diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-06-21 13:59:01 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-06-21 13:59:01 +0000 |
commit | 3a0822f094b578157263e04114075ad7df81db41 (patch) | |
tree | bc48361fe2cd1ca5f93ac01b38b183774468fc79 /test/CodeGen | |
parent | 85d8b2bbe386bcfe669575d05b61482d7be07e5d (diff) | |
download | src-3a0822f094b578157263e04114075ad7df81db41.tar.gz src-3a0822f094b578157263e04114075ad7df81db41.zip |
Notes
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/AArch64/arm64-atomic.ll | 16 | ||||
-rw-r--r-- | test/CodeGen/AArch64/arm64-big-endian-eh.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/AArch64/arm64-ccmp.ll | 40 | ||||
-rw-r--r-- | test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/AArch64/arm64-neon-2velem-high.ll | 457 | ||||
-rw-r--r-- | test/CodeGen/AArch64/arm64-stp.ll | 32 | ||||
-rw-r--r-- | test/CodeGen/AArch64/arm64-strict-align.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/AArch64/br-to-eh-lpad.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/AArch64/ifcvt-select.ll | 41 | ||||
-rw-r--r-- | test/CodeGen/AArch64/pic-eh-stubs.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/AArch64/simple-macho.ll | 12 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/32-bit-local-address-space.ll (renamed from test/CodeGen/R600/32-bit-local-address-space.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/README (renamed from test/CodeGen/R600/README) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/add-debug.ll (renamed from test/CodeGen/R600/add-debug.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/add.ll (renamed from test/CodeGen/R600/add.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/add_i64.ll (renamed from test/CodeGen/R600/add_i64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/address-space.ll (renamed from test/CodeGen/R600/address-space.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/and.ll (renamed from test/CodeGen/R600/and.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/anyext.ll (renamed from test/CodeGen/R600/anyext.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/array-ptr-calc-i32.ll (renamed from test/CodeGen/R600/array-ptr-calc-i32.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/array-ptr-calc-i64.ll (renamed from test/CodeGen/R600/array-ptr-calc-i64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll (renamed from test/CodeGen/R600/atomic_cmp_swap_local.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/atomic_load_add.ll (renamed from test/CodeGen/R600/atomic_load_add.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/atomic_load_sub.ll (renamed from test/CodeGen/R600/atomic_load_sub.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/basic-branch.ll (renamed from test/CodeGen/R600/basic-branch.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/basic-loop.ll (renamed from test/CodeGen/R600/basic-loop.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/bfe_uint.ll (renamed from test/CodeGen/R600/bfe_uint.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/bfi_int.ll (renamed from test/CodeGen/R600/bfi_int.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/big_alu.ll (renamed from test/CodeGen/R600/big_alu.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/bitcast.ll (renamed from test/CodeGen/R600/bitcast.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/bswap.ll (renamed from test/CodeGen/R600/bswap.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/build_vector.ll (renamed from test/CodeGen/R600/build_vector.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/call.ll (renamed from test/CodeGen/R600/call.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/call_fs.ll (renamed from test/CodeGen/R600/call_fs.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/cayman-loop-bug.ll (renamed from test/CodeGen/R600/cayman-loop-bug.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/cf-stack-bug.ll (renamed from test/CodeGen/R600/cf-stack-bug.ll) | 35 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/cf_end.ll (renamed from test/CodeGen/R600/cf_end.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/cgp-addressing-modes.ll (renamed from test/CodeGen/R600/cgp-addressing-modes.ll) | 2 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/coalescer_remat.ll (renamed from test/CodeGen/R600/coalescer_remat.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll (renamed from test/CodeGen/R600/codegen-prepare-addrmode-sext.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/combine_vloads.ll (renamed from test/CodeGen/R600/combine_vloads.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/commute-compares.ll (renamed from test/CodeGen/R600/commute-compares.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/commute_modifiers.ll (renamed from test/CodeGen/R600/commute_modifiers.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/complex-folding.ll (renamed from test/CodeGen/R600/complex-folding.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/concat_vectors.ll (renamed from test/CodeGen/R600/concat_vectors.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/copy-illegal-type.ll (renamed from test/CodeGen/R600/copy-illegal-type.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/copy-to-reg.ll (renamed from test/CodeGen/R600/copy-to-reg.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/ctlz_zero_undef.ll (renamed from test/CodeGen/R600/ctlz_zero_undef.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/ctpop.ll (renamed from test/CodeGen/R600/ctpop.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/ctpop64.ll (renamed from test/CodeGen/R600/ctpop64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/cttz_zero_undef.ll (renamed from test/CodeGen/R600/cttz_zero_undef.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (renamed from test/CodeGen/R600/cvt_f32_ubyte.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll (renamed from test/CodeGen/R600/cvt_flr_i32_f32.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll (renamed from test/CodeGen/R600/cvt_rpi_i32_f32.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll (renamed from test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/debug.ll (renamed from test/CodeGen/R600/debug.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/default-fp-mode.ll (renamed from test/CodeGen/R600/default-fp-mode.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/disconnected-predset-break-bug.ll (renamed from test/CodeGen/R600/disconnected-predset-break-bug.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/dot4-folding.ll (renamed from test/CodeGen/R600/dot4-folding.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll (renamed from test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/ds_read2.ll (renamed from test/CodeGen/R600/ds_read2.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/ds_read2_offset_order.ll (renamed from test/CodeGen/R600/ds_read2_offset_order.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/ds_read2st64.ll (renamed from test/CodeGen/R600/ds_read2st64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/ds_write2.ll (renamed from test/CodeGen/R600/ds_write2.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/ds_write2st64.ll (renamed from test/CodeGen/R600/ds_write2st64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/elf.ll (renamed from test/CodeGen/R600/elf.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/elf.r600.ll (renamed from test/CodeGen/R600/elf.r600.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/empty-function.ll (renamed from test/CodeGen/R600/empty-function.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/endcf-loop-header.ll (renamed from test/CodeGen/R600/endcf-loop-header.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/extload-private.ll (renamed from test/CodeGen/R600/extload-private.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/extload.ll (renamed from test/CodeGen/R600/extload.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/extract_vector_elt_i16.ll (renamed from test/CodeGen/R600/extract_vector_elt_i16.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fabs.f64.ll (renamed from test/CodeGen/R600/fabs.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fabs.ll (renamed from test/CodeGen/R600/fabs.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fadd.ll (renamed from test/CodeGen/R600/fadd.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fadd64.ll (renamed from test/CodeGen/R600/fadd64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fceil.ll (renamed from test/CodeGen/R600/fceil.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fceil64.ll (renamed from test/CodeGen/R600/fceil64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fcmp-cnd.ll (renamed from test/CodeGen/R600/fcmp-cnd.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fcmp-cnde-int-args.ll (renamed from test/CodeGen/R600/fcmp-cnde-int-args.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fcmp.ll (renamed from test/CodeGen/R600/fcmp.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fcmp64.ll (renamed from test/CodeGen/R600/fcmp64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fconst64.ll (renamed from test/CodeGen/R600/fconst64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fcopysign.f32.ll (renamed from test/CodeGen/R600/fcopysign.f32.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fcopysign.f64.ll (renamed from test/CodeGen/R600/fcopysign.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fdiv.f64.ll (renamed from test/CodeGen/R600/fdiv.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fdiv.ll (renamed from test/CodeGen/R600/fdiv.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fetch-limits.r600.ll (renamed from test/CodeGen/R600/fetch-limits.r600.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fetch-limits.r700+.ll (renamed from test/CodeGen/R600/fetch-limits.r700+.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/ffloor.f64.ll (renamed from test/CodeGen/R600/ffloor.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/ffloor.ll (renamed from test/CodeGen/R600/ffloor.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/flat-address-space.ll (renamed from test/CodeGen/R600/flat-address-space.ll) | 2 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/floor.ll (renamed from test/CodeGen/R600/floor.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fma-combine.ll (renamed from test/CodeGen/R600/fma-combine.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fma.f64.ll (renamed from test/CodeGen/R600/fma.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fma.ll (renamed from test/CodeGen/R600/fma.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmad.ll (renamed from test/CodeGen/R600/fmad.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmax.ll (renamed from test/CodeGen/R600/fmax.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmax3.f64.ll (renamed from test/CodeGen/R600/fmax3.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmax3.ll (renamed from test/CodeGen/R600/fmax3.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmax_legacy.f64.ll (renamed from test/CodeGen/R600/fmax_legacy.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmax_legacy.ll (renamed from test/CodeGen/R600/fmax_legacy.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmaxnum.f64.ll (renamed from test/CodeGen/R600/fmaxnum.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmaxnum.ll (renamed from test/CodeGen/R600/fmaxnum.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmin.ll (renamed from test/CodeGen/R600/fmin.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmin3.ll (renamed from test/CodeGen/R600/fmin3.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmin_legacy.f64.ll (renamed from test/CodeGen/R600/fmin_legacy.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmin_legacy.ll (renamed from test/CodeGen/R600/fmin_legacy.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fminnum.f64.ll (renamed from test/CodeGen/R600/fminnum.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fminnum.ll (renamed from test/CodeGen/R600/fminnum.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmul.ll (renamed from test/CodeGen/R600/fmul.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmul64.ll (renamed from test/CodeGen/R600/fmul64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmuladd.ll (renamed from test/CodeGen/R600/fmuladd.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fnearbyint.ll (renamed from test/CodeGen/R600/fnearbyint.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fneg-fabs.f64.ll (renamed from test/CodeGen/R600/fneg-fabs.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fneg-fabs.ll (renamed from test/CodeGen/R600/fneg-fabs.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fneg.f64.ll (renamed from test/CodeGen/R600/fneg.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fneg.ll (renamed from test/CodeGen/R600/fneg.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fp-classify.ll (renamed from test/CodeGen/R600/fp-classify.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fp16_to_fp.ll (renamed from test/CodeGen/R600/fp16_to_fp.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fp32_to_fp16.ll (renamed from test/CodeGen/R600/fp32_to_fp16.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fp_to_sint.f64.ll (renamed from test/CodeGen/R600/fp_to_sint.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fp_to_sint.ll (renamed from test/CodeGen/R600/fp_to_sint.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fp_to_uint.f64.ll (renamed from test/CodeGen/R600/fp_to_uint.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fp_to_uint.ll (renamed from test/CodeGen/R600/fp_to_uint.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fpext.ll (renamed from test/CodeGen/R600/fpext.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fptrunc.ll (renamed from test/CodeGen/R600/fptrunc.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/frem.ll (renamed from test/CodeGen/R600/frem.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fsqrt.ll (renamed from test/CodeGen/R600/fsqrt.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fsub.ll (renamed from test/CodeGen/R600/fsub.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fsub64.ll (renamed from test/CodeGen/R600/fsub64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/ftrunc.f64.ll (renamed from test/CodeGen/R600/ftrunc.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/ftrunc.ll (renamed from test/CodeGen/R600/ftrunc.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/gep-address-space.ll (renamed from test/CodeGen/R600/gep-address-space.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/global-directive.ll (renamed from test/CodeGen/R600/global-directive.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/global-extload-i1.ll (renamed from test/CodeGen/R600/global-extload-i1.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/global-extload-i16.ll (renamed from test/CodeGen/R600/global-extload-i16.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/global-extload-i32.ll (renamed from test/CodeGen/R600/global-extload-i32.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/global-extload-i8.ll (renamed from test/CodeGen/R600/global-extload-i8.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/global-zero-initializer.ll (renamed from test/CodeGen/R600/global-zero-initializer.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/global_atomics.ll (renamed from test/CodeGen/R600/global_atomics.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/gv-const-addrspace-fail.ll (renamed from test/CodeGen/R600/gv-const-addrspace-fail.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/gv-const-addrspace.ll (renamed from test/CodeGen/R600/gv-const-addrspace.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/half.ll (renamed from test/CodeGen/R600/half.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/hsa.ll (renamed from test/CodeGen/R600/hsa.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/i1-copy-implicit-def.ll (renamed from test/CodeGen/R600/i1-copy-implicit-def.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/i1-copy-phi.ll (renamed from test/CodeGen/R600/i1-copy-phi.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/i8-to-double-to-float.ll (renamed from test/CodeGen/R600/i8-to-double-to-float.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/icmp-select-sete-reverse-args.ll (renamed from test/CodeGen/R600/icmp-select-sete-reverse-args.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/icmp64.ll (renamed from test/CodeGen/R600/icmp64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/imm.ll (renamed from test/CodeGen/R600/imm.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/indirect-addressing-si.ll (renamed from test/CodeGen/R600/indirect-addressing-si.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/indirect-private-64.ll (renamed from test/CodeGen/R600/indirect-private-64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/infinite-loop-evergreen.ll (renamed from test/CodeGen/R600/infinite-loop-evergreen.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/infinite-loop.ll (renamed from test/CodeGen/R600/infinite-loop.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/inline-asm.ll (renamed from test/CodeGen/R600/inline-asm.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/inline-calls.ll (renamed from test/CodeGen/R600/inline-calls.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/input-mods.ll (renamed from test/CodeGen/R600/input-mods.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/insert_subreg.ll (renamed from test/CodeGen/R600/insert_subreg.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/insert_vector_elt.ll (renamed from test/CodeGen/R600/insert_vector_elt.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/jump-address.ll (renamed from test/CodeGen/R600/jump-address.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/kcache-fold.ll (renamed from test/CodeGen/R600/kcache-fold.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/kernel-args.ll (renamed from test/CodeGen/R600/kernel-args.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/large-alloca.ll (renamed from test/CodeGen/R600/large-alloca.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/large-constant-initializer.ll (renamed from test/CodeGen/R600/large-constant-initializer.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/lds-initializer.ll (renamed from test/CodeGen/R600/lds-initializer.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/lds-oqap-crash.ll (renamed from test/CodeGen/R600/lds-oqap-crash.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/lds-output-queue.ll (renamed from test/CodeGen/R600/lds-output-queue.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/lds-size.ll (renamed from test/CodeGen/R600/lds-size.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/lds-zero-initializer.ll (renamed from test/CodeGen/R600/lds-zero-initializer.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/legalizedag-bug-expand-setcc.ll (renamed from test/CodeGen/R600/legalizedag-bug-expand-setcc.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/lit.local.cfg | 2 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/literals.ll (renamed from test/CodeGen/R600/literals.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.abs.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.barrier.global.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.barrier.local.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.u32.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.bfi.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.bfi.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.bfm.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.bfm.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.brev.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.clamp.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.class.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.cube.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.cvt_f32_ubyte.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.div_scale.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.flbit.i32.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.fract.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.fract.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.imad24.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.imad24.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.imax.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.imax.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.imin.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.imin.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.imul24.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.imul24.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.kill.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.ldexp.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.legacy.rsq.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.mul.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.mul.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.rcp.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.rsq.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.tex.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.tex.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.trunc.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.umad24.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.umad24.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.umax.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.umax.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.umin.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.umin.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.umul24.ll (renamed from test/CodeGen/R600/llvm.AMDGPU.umul24.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll (renamed from test/CodeGen/R600/llvm.SI.fs.interp.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.gather4.ll (renamed from test/CodeGen/R600/llvm.SI.gather4.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.getlod.ll (renamed from test/CodeGen/R600/llvm.SI.getlod.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.image.ll (renamed from test/CodeGen/R600/llvm.SI.image.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.image.sample.ll (renamed from test/CodeGen/R600/llvm.SI.image.sample.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.image.sample.o.ll (renamed from test/CodeGen/R600/llvm.SI.image.sample.o.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.imageload.ll (renamed from test/CodeGen/R600/llvm.SI.imageload.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.load.dword.ll (renamed from test/CodeGen/R600/llvm.SI.load.dword.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.resinfo.ll (renamed from test/CodeGen/R600/llvm.SI.resinfo.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll (renamed from test/CodeGen/R600/llvm.SI.sample-masked.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.sample.ll (renamed from test/CodeGen/R600/llvm.SI.sample.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.sampled.ll (renamed from test/CodeGen/R600/llvm.SI.sampled.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll (renamed from test/CodeGen/R600/llvm.SI.sendmsg-m0.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll (renamed from test/CodeGen/R600/llvm.SI.sendmsg.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll (renamed from test/CodeGen/R600/llvm.SI.tbuffer.store.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.SI.tid.ll (renamed from test/CodeGen/R600/llvm.SI.tid.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.amdgpu.dp4.ll (renamed from test/CodeGen/R600/llvm.amdgpu.dp4.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.amdgpu.kilp.ll (renamed from test/CodeGen/R600/llvm.amdgpu.kilp.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll (renamed from test/CodeGen/R600/llvm.amdgpu.lrp.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.cos.ll (renamed from test/CodeGen/R600/llvm.cos.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.exp2.ll (renamed from test/CodeGen/R600/llvm.exp2.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.log2.ll (renamed from test/CodeGen/R600/llvm.log2.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.memcpy.ll (renamed from test/CodeGen/R600/llvm.memcpy.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.pow.ll (renamed from test/CodeGen/R600/llvm.pow.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.rint.f64.ll (renamed from test/CodeGen/R600/llvm.rint.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.rint.ll (renamed from test/CodeGen/R600/llvm.rint.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.round.f64.ll (renamed from test/CodeGen/R600/llvm.round.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.round.ll (renamed from test/CodeGen/R600/llvm.round.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.sin.ll (renamed from test/CodeGen/R600/llvm.sin.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.sqrt.ll (renamed from test/CodeGen/R600/llvm.sqrt.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/load-i1.ll (renamed from test/CodeGen/R600/load-i1.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/load-input-fold.ll (renamed from test/CodeGen/R600/load-input-fold.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/load.ll (renamed from test/CodeGen/R600/load.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/load.vec.ll (renamed from test/CodeGen/R600/load.vec.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/load64.ll (renamed from test/CodeGen/R600/load64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/local-64.ll (renamed from test/CodeGen/R600/local-64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/local-atomics.ll (renamed from test/CodeGen/R600/local-atomics.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/local-atomics64.ll (renamed from test/CodeGen/R600/local-atomics64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/local-memory-two-objects.ll (renamed from test/CodeGen/R600/local-memory-two-objects.ll) | 2 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/local-memory.ll (renamed from test/CodeGen/R600/local-memory.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/loop-address.ll (renamed from test/CodeGen/R600/loop-address.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/loop-idiom.ll (renamed from test/CodeGen/R600/loop-idiom.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/lshl.ll (renamed from test/CodeGen/R600/lshl.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/lshr.ll (renamed from test/CodeGen/R600/lshr.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/m0-spill.ll (renamed from test/CodeGen/R600/m0-spill.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/mad-combine.ll (renamed from test/CodeGen/R600/mad-combine.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/mad-sub.ll (renamed from test/CodeGen/R600/mad-sub.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/mad_int24.ll (renamed from test/CodeGen/R600/mad_int24.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/mad_uint24.ll (renamed from test/CodeGen/R600/mad_uint24.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/madak.ll (renamed from test/CodeGen/R600/madak.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/madmk.ll (renamed from test/CodeGen/R600/madmk.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/max-literals.ll (renamed from test/CodeGen/R600/max-literals.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/max.ll (renamed from test/CodeGen/R600/max.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/max3.ll (renamed from test/CodeGen/R600/max3.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/merge-stores.ll (renamed from test/CodeGen/R600/merge-stores.ll) | 101 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/min.ll (renamed from test/CodeGen/R600/min.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/min3.ll (renamed from test/CodeGen/R600/min3.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/missing-store.ll (renamed from test/CodeGen/R600/missing-store.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/mubuf.ll (renamed from test/CodeGen/R600/mubuf.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/mul.ll (renamed from test/CodeGen/R600/mul.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/mul_int24.ll (renamed from test/CodeGen/R600/mul_int24.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/mul_uint24.ll (renamed from test/CodeGen/R600/mul_uint24.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/mulhu.ll (renamed from test/CodeGen/R600/mulhu.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll (renamed from test/CodeGen/R600/no-initializer-constant-addrspace.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/no-shrink-extloads.ll (renamed from test/CodeGen/R600/no-shrink-extloads.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/operand-folding.ll (renamed from test/CodeGen/R600/operand-folding.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/operand-spacing.ll (renamed from test/CodeGen/R600/operand-spacing.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/or.ll (renamed from test/CodeGen/R600/or.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/packetizer.ll (renamed from test/CodeGen/R600/packetizer.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/parallelandifcollapse.ll (renamed from test/CodeGen/R600/parallelandifcollapse.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/parallelorifcollapse.ll (renamed from test/CodeGen/R600/parallelorifcollapse.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/predicate-dp4.ll (renamed from test/CodeGen/R600/predicate-dp4.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/predicates.ll (renamed from test/CodeGen/R600/predicates.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/private-memory-atomics.ll (renamed from test/CodeGen/R600/private-memory-atomics.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/private-memory-broken.ll (renamed from test/CodeGen/R600/private-memory-broken.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/private-memory.ll (renamed from test/CodeGen/R600/private-memory.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/pv-packing.ll (renamed from test/CodeGen/R600/pv-packing.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/pv.ll (renamed from test/CodeGen/R600/pv.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/r600-encoding.ll (renamed from test/CodeGen/R600/r600-encoding.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/r600-export-fix.ll (renamed from test/CodeGen/R600/r600-export-fix.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll (renamed from test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/r600cfg.ll (renamed from test/CodeGen/R600/r600cfg.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/reciprocal.ll (renamed from test/CodeGen/R600/reciprocal.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/register-count-comments.ll (renamed from test/CodeGen/R600/register-count-comments.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/reorder-stores.ll (renamed from test/CodeGen/R600/reorder-stores.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/rotl.i64.ll (renamed from test/CodeGen/R600/rotl.i64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/rotl.ll (renamed from test/CodeGen/R600/rotl.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/rotr.i64.ll (renamed from test/CodeGen/R600/rotr.i64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/rotr.ll (renamed from test/CodeGen/R600/rotr.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/rsq.ll (renamed from test/CodeGen/R600/rsq.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/rv7x0_count3.ll (renamed from test/CodeGen/R600/rv7x0_count3.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/s_movk_i32.ll (renamed from test/CodeGen/R600/s_movk_i32.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/saddo.ll (renamed from test/CodeGen/R600/saddo.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/salu-to-valu.ll (renamed from test/CodeGen/R600/salu-to-valu.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/scalar_to_vector.ll (renamed from test/CodeGen/R600/scalar_to_vector.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll (renamed from test/CodeGen/R600/schedule-fs-loop-nested-if.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll (renamed from test/CodeGen/R600/schedule-fs-loop-nested.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/schedule-fs-loop.ll (renamed from test/CodeGen/R600/schedule-fs-loop.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/schedule-global-loads.ll (renamed from test/CodeGen/R600/schedule-global-loads.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/schedule-if-2.ll (renamed from test/CodeGen/R600/schedule-if-2.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/schedule-if.ll (renamed from test/CodeGen/R600/schedule-if.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll (renamed from test/CodeGen/R600/schedule-kernel-arg-loads.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll (renamed from test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll (renamed from test/CodeGen/R600/schedule-vs-if-nested-loop.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/scratch-buffer.ll (renamed from test/CodeGen/R600/scratch-buffer.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sdiv.ll (renamed from test/CodeGen/R600/sdiv.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sdivrem24.ll (renamed from test/CodeGen/R600/sdivrem24.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sdivrem64.ll (renamed from test/CodeGen/R600/sdivrem64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/select-i1.ll (renamed from test/CodeGen/R600/select-i1.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/select-vectors.ll (renamed from test/CodeGen/R600/select-vectors.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/select.ll (renamed from test/CodeGen/R600/select.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/select64.ll (renamed from test/CodeGen/R600/select64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/selectcc-cnd.ll (renamed from test/CodeGen/R600/selectcc-cnd.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/selectcc-cnde-int.ll (renamed from test/CodeGen/R600/selectcc-cnde-int.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/selectcc-icmp-select-float.ll (renamed from test/CodeGen/R600/selectcc-icmp-select-float.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/selectcc-opt.ll (renamed from test/CodeGen/R600/selectcc-opt.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/selectcc.ll (renamed from test/CodeGen/R600/selectcc.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/set-dx10.ll (renamed from test/CodeGen/R600/set-dx10.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/setcc-equivalent.ll (renamed from test/CodeGen/R600/setcc-equivalent.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/setcc-opt.ll (renamed from test/CodeGen/R600/setcc-opt.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/setcc.ll (renamed from test/CodeGen/R600/setcc.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/setcc64.ll (renamed from test/CodeGen/R600/setcc64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/seto.ll (renamed from test/CodeGen/R600/seto.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/setuo.ll (renamed from test/CodeGen/R600/setuo.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sext-eliminate.ll (renamed from test/CodeGen/R600/sext-eliminate.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sext-in-reg.ll (renamed from test/CodeGen/R600/sext-in-reg.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sgpr-control-flow.ll (renamed from test/CodeGen/R600/sgpr-control-flow.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll (renamed from test/CodeGen/R600/sgpr-copy-duplicate-operand.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sgpr-copy.ll (renamed from test/CodeGen/R600/sgpr-copy.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/shared-op-cycle.ll (renamed from test/CodeGen/R600/shared-op-cycle.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/shl.ll (renamed from test/CodeGen/R600/shl.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/shl_add_constant.ll (renamed from test/CodeGen/R600/shl_add_constant.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/shl_add_ptr.ll (renamed from test/CodeGen/R600/shl_add_ptr.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/si-annotate-cf-assertion.ll (renamed from test/CodeGen/R600/si-annotate-cf-assertion.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/si-annotate-cf.ll (renamed from test/CodeGen/R600/si-annotate-cf.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/si-lod-bias.ll (renamed from test/CodeGen/R600/si-lod-bias.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/si-sgpr-spill.ll (renamed from test/CodeGen/R600/si-sgpr-spill.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/si-spill-cf.ll (renamed from test/CodeGen/R600/si-spill-cf.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll (renamed from test/CodeGen/R600/si-triv-disjoint-mem-access.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/si-vector-hang.ll (renamed from test/CodeGen/R600/si-vector-hang.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sign_extend.ll (renamed from test/CodeGen/R600/sign_extend.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/simplify-demanded-bits-build-pair.ll (renamed from test/CodeGen/R600/simplify-demanded-bits-build-pair.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sint_to_fp.f64.ll (renamed from test/CodeGen/R600/sint_to_fp.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sint_to_fp.ll (renamed from test/CodeGen/R600/sint_to_fp.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/smrd.ll (renamed from test/CodeGen/R600/smrd.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/split-scalar-i64-add.ll (renamed from test/CodeGen/R600/split-scalar-i64-add.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sra.ll (renamed from test/CodeGen/R600/sra.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/srem.ll (renamed from test/CodeGen/R600/srem.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/srl.ll (renamed from test/CodeGen/R600/srl.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/ssubo.ll (renamed from test/CodeGen/R600/ssubo.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/store-barrier.ll (renamed from test/CodeGen/R600/store-barrier.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/store-v3i32.ll (renamed from test/CodeGen/R600/store-v3i32.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/store-v3i64.ll (renamed from test/CodeGen/R600/store-v3i64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/store-vector-ptrs.ll (renamed from test/CodeGen/R600/store-vector-ptrs.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/store.ll (renamed from test/CodeGen/R600/store.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/store.r600.ll (renamed from test/CodeGen/R600/store.r600.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/structurize.ll (renamed from test/CodeGen/R600/structurize.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/structurize1.ll (renamed from test/CodeGen/R600/structurize1.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sub.ll (renamed from test/CodeGen/R600/sub.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/subreg-coalescer-crash.ll (renamed from test/CodeGen/R600/subreg-coalescer-crash.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/subreg-eliminate-dead.ll (renamed from test/CodeGen/R600/subreg-eliminate-dead.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/swizzle-export.ll (renamed from test/CodeGen/R600/swizzle-export.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/tex-clause-antidep.ll (renamed from test/CodeGen/R600/tex-clause-antidep.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/texture-input-merge.ll (renamed from test/CodeGen/R600/texture-input-merge.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/trunc-cmp-constant.ll (renamed from test/CodeGen/R600/trunc-cmp-constant.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/trunc-store-f64-to-f16.ll (renamed from test/CodeGen/R600/trunc-store-f64-to-f16.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/trunc-store-i1.ll (renamed from test/CodeGen/R600/trunc-store-i1.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/trunc-vector-store-assertion-failure.ll (renamed from test/CodeGen/R600/trunc-vector-store-assertion-failure.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/trunc.ll (renamed from test/CodeGen/R600/trunc.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/tti-unroll-prefs.ll (renamed from test/CodeGen/R600/tti-unroll-prefs.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/uaddo.ll (renamed from test/CodeGen/R600/uaddo.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/udiv.ll (renamed from test/CodeGen/R600/udiv.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/udivrem.ll (renamed from test/CodeGen/R600/udivrem.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/udivrem24.ll (renamed from test/CodeGen/R600/udivrem24.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/udivrem64.ll (renamed from test/CodeGen/R600/udivrem64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/uint_to_fp.f64.ll (renamed from test/CodeGen/R600/uint_to_fp.f64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/uint_to_fp.ll (renamed from test/CodeGen/R600/uint_to_fp.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/unaligned-load-store.ll (renamed from test/CodeGen/R600/unaligned-load-store.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll (renamed from test/CodeGen/R600/unhandled-loop-condition-assertion.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/unroll.ll (renamed from test/CodeGen/R600/unroll.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/unsupported-cc.ll (renamed from test/CodeGen/R600/unsupported-cc.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/urecip.ll (renamed from test/CodeGen/R600/urecip.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/urem.ll (renamed from test/CodeGen/R600/urem.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll (renamed from test/CodeGen/R600/use-sgpr-multiple-times.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/usubo.ll (renamed from test/CodeGen/R600/usubo.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/v1i64-kernel-arg.ll (renamed from test/CodeGen/R600/v1i64-kernel-arg.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/v_cndmask.ll (renamed from test/CodeGen/R600/v_cndmask.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/valu-i1.ll (renamed from test/CodeGen/R600/valu-i1.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/vector-alloca.ll (renamed from test/CodeGen/R600/vector-alloca.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/vertex-fetch-encoding.ll (renamed from test/CodeGen/R600/vertex-fetch-encoding.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/vop-shrink.ll (renamed from test/CodeGen/R600/vop-shrink.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/vselect.ll (renamed from test/CodeGen/R600/vselect.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/vselect64.ll (renamed from test/CodeGen/R600/vselect64.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/vtx-fetch-branch.ll (renamed from test/CodeGen/R600/vtx-fetch-branch.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/vtx-schedule.ll (renamed from test/CodeGen/R600/vtx-schedule.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/wait.ll (renamed from test/CodeGen/R600/wait.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/work-item-intrinsics.ll (renamed from test/CodeGen/R600/work-item-intrinsics.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll (renamed from test/CodeGen/R600/wrong-transalu-pos-fix.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/xor.ll (renamed from test/CodeGen/R600/xor.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/zero_extend.ll (renamed from test/CodeGen/R600/zero_extend.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/ARM/2009-08-31-LSDA-Name.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/2010-07-26-GlobalMerge.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/2010-08-04-EHCrash.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/ARM/2014-05-14-DwarfEHCrash.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/arm-ttype-target2.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/big-endian-eh-unwind.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/ARM/build-attributes.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/ARM/crash.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/debug-frame-no-debug.ll | 5 | ||||
-rw-r--r-- | test/CodeGen/ARM/debug-frame-vararg.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/ARM/debug-frame.ll | 5 | ||||
-rw-r--r-- | test/CodeGen/ARM/disable-tail-calls.ll | 40 | ||||
-rw-r--r-- | test/CodeGen/ARM/dwarf-eh.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/eh-dispcont.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/eh-resume-darwin.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/ehabi-filters.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/ARM/ehabi-handlerdata-nounwind.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/ehabi-handlerdata.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/ehabi.ll | 5 | ||||
-rw-r--r-- | test/CodeGen/ARM/global-merge.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/gv-stubs-crash.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/invoke-donothing-assert.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/ARM/sjlj-prepare-critical-edge.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/vtrn.ll | 265 | ||||
-rw-r--r-- | test/CodeGen/ARM/vuzp.ll | 212 | ||||
-rw-r--r-- | test/CodeGen/ARM/vzip.ll | 212 | ||||
-rw-r--r-- | test/CodeGen/Generic/2007-02-25-invoke.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Generic/2007-12-17-InvokeAsm.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Generic/2007-12-31-UnusedSelector.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/Generic/donothing.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Generic/exception-handling.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/absaddr-store.ll | 29 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/absimm.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/addh-sext-trunc.ll | 43 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/addh-shifted.ll | 21 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/addh.ll | 21 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/addrmode-indoff.ll | 74 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/always-ext.ll | 5 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/args.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/ashift-left-right.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/barrier-flag.ll | 125 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/base-offset-addr.ll | 15 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/base-offset-post.ll | 30 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/bugAsmHWloop.ll | 71 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/cext-valid-packet1.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/cext-valid-packet2.ll | 38 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/cext.ll | 16 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/cexti16.ll | 16 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/checktabs.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/cmp-extend.ll | 40 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/cmp-promote.ll | 72 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/cmp-to-genreg.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/cmp-to-predreg.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/cmp.ll | 161 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/cmp_pred.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/cmp_pred_reg.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/cmpb-eq.ll | 53 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/cmpb_pred.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/eh_return.ll | 48 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/hwloop-lt.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/hwloop-lt1.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/intrinsics/alu32_alu.ll | 63 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/intrinsics/alu32_perm.ll | 31 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/intrinsics/cr.ll | 39 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/intrinsics/xtype_alu.ll | 282 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/intrinsics/xtype_bit.ll | 93 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/intrinsics/xtype_complex.ll | 99 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/intrinsics/xtype_fp.ll | 110 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/intrinsics/xtype_mpy.ll | 434 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/intrinsics/xtype_perm.ll | 71 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/intrinsics/xtype_pred.ll | 99 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/intrinsics/xtype_shift.ll | 205 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/loadi1-G0.ll | 43 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/loadi1-v4-G0.ll | 43 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/loadi1-v4.ll | 45 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/loadi1.ll | 45 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/maxd.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/maxh.ll | 23 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/maxud.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/maxuw.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/maxw.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/mind.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/minu-zext-16.ll | 11 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/minu-zext-8.ll | 11 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/minud.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/minuw.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/minw.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/postinc-offset.ll | 40 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/signed_immediates.ll | 99 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/simple_addend.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/Hexagon/usr-ovf-dep.ll | 28 | ||||
-rw-r--r-- | test/CodeGen/MIR/basic-blocks.mir | 43 | ||||
-rw-r--r-- | test/CodeGen/MIR/function-missing-machine-function.mir | 13 | ||||
-rw-r--r-- | test/CodeGen/MIR/llvm-ir-error-reported.mir | 2 | ||||
-rw-r--r-- | test/CodeGen/MIR/llvmIR.mir | 3 | ||||
-rw-r--r-- | test/CodeGen/MIR/llvmIRMissing.mir | 4 | ||||
-rw-r--r-- | test/CodeGen/MIR/machine-basic-block-unknown-name.mir | 18 | ||||
-rw-r--r-- | test/CodeGen/MIR/machine-function-missing-function.mir | 19 | ||||
-rw-r--r-- | test/CodeGen/MIR/machine-function-missing-name.mir | 2 | ||||
-rw-r--r-- | test/CodeGen/MIR/machine-function-redefinition-error.mir | 10 | ||||
-rw-r--r-- | test/CodeGen/MIR/machine-function.mir | 34 | ||||
-rw-r--r-- | test/CodeGen/Mips/cconv/callee-saved.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/Mips/eh.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Mips/ehframe-indirect.ll | 5 | ||||
-rw-r--r-- | test/CodeGen/Mips/insn-zero-size-bb.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Mips/mips16ex.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/access-non-generic.ll | 22 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/call-with-alloca-buffer.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/intrin-nocapture.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/lower-alloca.ll | 22 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/builtins-ppc-p8vector.ll | 91 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/extra-toc-reg-deps.ll | 32 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/fast-isel-icmp-split.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/glob-comp-aa-crash.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/hello-reloc.s | 82 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/mftb.ll | 72 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/pr18663-2.ll | 14 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/preincprep-invoke.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/R600/lit.local.cfg | 2 | ||||
-rw-r--r-- | test/CodeGen/SPARC/exception.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/SPARC/obj-relocs.ll | 36 | ||||
-rw-r--r-- | test/CodeGen/Thumb/sjljehprepare-lower-vector.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/constant-islands.ll | 46 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-alloca-sink.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-catch-all.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-catch-and-throw.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-catch-scalar.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-catch-unwind.ll | 20 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-cleanup-invoke.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-demote-liveout.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-frame-vars.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-inalloca.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-min-unwind.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-multi-catch.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-nested-1.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-nested-2.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-nested-3.ll | 14 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-nested-rethrow.ll | 12 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-prepared-catch-all.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-prepared-catch.ll | 14 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-prepared-cleanups.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-shared-empty-catch.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll | 24 | ||||
-rw-r--r-- | test/CodeGen/WinEH/cppeh-state-calc-1.ll | 24 | ||||
-rw-r--r-- | test/CodeGen/WinEH/seh-catch-all.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/WinEH/seh-inlined-finally.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/WinEH/seh-outlined-finally.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/WinEH/seh-prepared-basic.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/WinEH/seh-resume-phi.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/WinEH/seh-simple.ll | 20 | ||||
-rw-r--r-- | test/CodeGen/X86/2007-05-05-Personality.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/2008-04-17-CoalescerBug.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/2009-03-13-PHIElimBug.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/2009-11-25-ImpDefBug.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/2010-08-04-MingWCrash.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/2011-12-15-vec_shift.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/2012-05-19-CoalescerCrash.ll | 16 | ||||
-rw-r--r-- | test/CodeGen/X86/2012-11-28-merge-store-alias.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/X86/2012-11-30-misched-dbg.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/MergeConsecutiveStores.ll | 61 | ||||
-rw-r--r-- | test/CodeGen/X86/asm-label2.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/avx2-vector-shifts.ll | 416 | ||||
-rw-r--r-- | test/CodeGen/X86/avx512-intrinsics.ll | 312 | ||||
-rw-r--r-- | test/CodeGen/X86/avx512-shuffle.ll | 60 | ||||
-rw-r--r-- | test/CodeGen/X86/avx512-vec-cmp.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/avx512bw-intrinsics.ll | 130 | ||||
-rw-r--r-- | test/CodeGen/X86/avx512bwvl-intrinsics.ll | 262 | ||||
-rw-r--r-- | test/CodeGen/X86/avx512vl-intrinsics.ll | 210 | ||||
-rw-r--r-- | test/CodeGen/X86/block-placement.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/branchfolding-landingpads.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/X86/bswap-vector.ll | 177 | ||||
-rw-r--r-- | test/CodeGen/X86/catch.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/cfi.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/code_placement_eh.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/X86/codegen-prepare-extload.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/disable-tail-calls.ll | 40 | ||||
-rw-r--r-- | test/CodeGen/X86/dllimport.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/dwarf-eh-prepare.ll | 24 | ||||
-rw-r--r-- | test/CodeGen/X86/eh-label.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/exception-label.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/fast-isel-cmp-branch.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/fast-isel-gep.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/fp-fast.ll | 78 | ||||
-rw-r--r-- | test/CodeGen/X86/gcc_except_table.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/gcc_except_table_functions.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/global-fill.ll | 27 | ||||
-rw-r--r-- | test/CodeGen/X86/global-sections.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/implicit-null-check-negative.ll | 53 | ||||
-rw-r--r-- | test/CodeGen/X86/implicit-null-check.ll | 118 | ||||
-rw-r--r-- | test/CodeGen/X86/inalloca-invoke.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/indirect-hidden.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/large-gep-chain.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/X86/patchpoint-invoke.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/personality.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/personality_size.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/pmul.ll | 37 | ||||
-rw-r--r-- | test/CodeGen/X86/pr3522.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/scev-interchange.ll | 44 | ||||
-rw-r--r-- | test/CodeGen/X86/seh-catch-all-win32.ll | 85 | ||||
-rw-r--r-- | test/CodeGen/X86/seh-catch-all.ll | 13 | ||||
-rw-r--r-- | test/CodeGen/X86/seh-except-finally.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/X86/seh-filter.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/seh-finally.ll | 47 | ||||
-rw-r--r-- | test/CodeGen/X86/seh-safe-div-win32.ll | 172 | ||||
-rw-r--r-- | test/CodeGen/X86/seh-safe-div.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/setjmp-spills.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/split-eh-lpad-edges.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/stack-protector.ll | 32 | ||||
-rw-r--r-- | test/CodeGen/X86/statepoint-invoke.ll | 22 | ||||
-rw-r--r-- | test/CodeGen/X86/statepoint-stack-usage.ll | 48 | ||||
-rw-r--r-- | test/CodeGen/X86/switch.ll | 115 | ||||
-rw-r--r-- | test/CodeGen/X86/unaligned-32-byte-memops.ll | 342 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_int_to_fp.ll | 1098 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_shift8.ll | 1383 | ||||
-rw-r--r-- | test/CodeGen/X86/vector-shuffle-512-v8.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/win32-eh-states.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/win32-eh.ll | 57 | ||||
-rw-r--r-- | test/CodeGen/X86/win64_call_epi.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/win64_eh.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/win_eh_prepare.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/XCore/exception.ll | 4 |
650 files changed, 7835 insertions, 3386 deletions
diff --git a/test/CodeGen/AArch64/arm64-atomic.ll b/test/CodeGen/AArch64/arm64-atomic.ll index 9136fb6271b5..0824bd881a95 100644 --- a/test/CodeGen/AArch64/arm64-atomic.ll +++ b/test/CodeGen/AArch64/arm64-atomic.ll @@ -14,6 +14,22 @@ define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 { ret i32 %val } +define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 { +; CHECK-LABEL: val_compare_and_swap_from_load: +; CHECK-NEXT: ldr [[NEW:w[0-9]+]], [x2] +; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0] +; CHECK-NEXT: cmp [[RESULT]], w1 +; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0] +; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK-NEXT: [[LABEL2]]: + %new = load i32, i32* %pnew + %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire + %val = extractvalue { i32, i1 } %pair, 0 + ret i32 %val +} + define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 { ; CHECK-LABEL: val_compare_and_swap_rel: ; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]: diff --git a/test/CodeGen/AArch64/arm64-big-endian-eh.ll b/test/CodeGen/AArch64/arm64-big-endian-eh.ll index a51703a8fc4b..77d52e32d3a0 100644 --- a/test/CodeGen/AArch64/arm64-big-endian-eh.ll +++ b/test/CodeGen/AArch64/arm64-big-endian-eh.ll @@ -14,13 +14,13 @@ ; } ;} -define void @_Z4testii(i32 %a, i32 %b) #0 { +define void @_Z4testii(i32 %a, i32 %b) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @_Z3fooi(i32 %a) to label %try.cont unwind label %lpad lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 %2 = tail call i8* @__cxa_begin_catch(i8* %1) #2 @@ -35,7 +35,7 @@ try.cont: ; preds = %entry, %invoke.cont ret void lpad1: ; preds = %lpad - %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %3 = landingpad { i8*, i32 } cleanup invoke void @__cxa_end_catch() to label %eh.resume unwind label %terminate.lpad @@ -44,7 +44,7 @@ eh.resume: ; preds = %lpad1 resume { i8*, i32 } %3 terminate.lpad: ; preds = %lpad1 - %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %4 = landingpad { i8*, i32 } catch i8* null %5 = extractvalue { i8*, i32 } %4, 0 tail call void @__clang_call_terminate(i8* %5) #3 diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll index 11228c7e8808..ff18f7364337 100644 --- a/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/test/CodeGen/AArch64/arm64-ccmp.ll @@ -287,43 +287,3 @@ sw.bb.i.i: %code1.i.i.phi.trans.insert = getelementptr inbounds %str1, %str1* %0, i64 0, i32 0, i32 0, i64 16 br label %sw.bb.i.i } - -; CHECK-LABEL: select_and -define i64 @select_and(i32 %v1, i32 %v2, i64 %a, i64 %b) { -; CHECK: cmp -; CHECK: ccmp{{.*}}, #0, ne -; CHECK: csel{{.*}}, lt - %1 = icmp slt i32 %v1, %v2 - %2 = icmp ne i32 5, %v2 - %3 = and i1 %1, %2 - %sel = select i1 %3, i64 %a, i64 %b - ret i64 %sel -} - -; CHECK-LABEL: select_or -define i64 @select_or(i32 %v1, i32 %v2, i64 %a, i64 %b) { -; CHECK: cmp -; CHECK: ccmp{{.*}}, #8, eq -; CHECK: csel{{.*}}, lt - %1 = icmp slt i32 %v1, %v2 - %2 = icmp ne i32 5, %v2 - %3 = or i1 %1, %2 - %sel = select i1 %3, i64 %a, i64 %b - ret i64 %sel -} - -; CHECK-LABEL: select_complicated -define i16 @select_complicated(double %v1, double %v2, i16 %a, i16 %b) { -; CHECK: fcmp -; CHECK: fccmp{{.*}}, #4, ne -; CHECK: fccmp{{.*}}, #1, ne -; CHECK: fccmp{{.*}}, #4, vc -; CEHCK: csel{{.*}}, eq - %1 = fcmp one double %v1, %v2 - %2 = fcmp oeq double %v2, 13.0 - %3 = fcmp oeq double %v1, 42.0 - %or0 = or i1 %2, %3 - %or1 = or i1 %1, %or0 - %sel = select i1 %or1, i16 %a, i16 %b - ret i16 %sel -} diff --git a/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll b/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll index f0b8299a66e3..c9f668f2c424 100644 --- a/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll +++ b/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll @@ -24,7 +24,7 @@ false: } ; Check that we manage to form a zextload is an operation with only one -; argument to explicitly extend is in the the way. +; argument to explicitly extend is in the way. ; OPTALL-LABEL: @promoteOneArg ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p ; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 @@ -49,7 +49,7 @@ false: } ; Check that we manage to form a sextload is an operation with only one -; argument to explicitly extend is in the the way. +; argument to explicitly extend is in the way. ; Version with sext. ; OPTALL-LABEL: @promoteOneArgSExt ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p @@ -74,7 +74,7 @@ false: } ; Check that we manage to form a zextload is an operation with two -; arguments to explicitly extend is in the the way. +; arguments to explicitly extend is in the way. ; Extending %add will create two extensions: ; 1. One for %b. ; 2. One for %t. @@ -113,7 +113,7 @@ false: } ; Check that we manage to form a sextload is an operation with two -; arguments to explicitly extend is in the the way. +; arguments to explicitly extend is in the way. ; Version with sext. ; OPTALL-LABEL: @promoteTwoArgSExt ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p diff --git a/test/CodeGen/AArch64/arm64-neon-2velem-high.ll b/test/CodeGen/AArch64/arm64-neon-2velem-high.ll index 58df094d1922..3ff1e61d0298 100644 --- a/test/CodeGen/AArch64/arm64-neon-2velem-high.ll +++ b/test/CodeGen/AArch64/arm64-neon-2velem-high.ll @@ -1,270 +1,484 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s +; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \ +; RUN: < %s -verify-machineinstrs -asm-verbose=false | FileCheck %s -declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) - -declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) - -declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) - -declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) - -declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) - -declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) - -define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) { +define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) #0 { ; CHECK-LABEL: test_vmull_high_n_s16: -; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 -; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 - %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) ret <4 x i32> %vmull15.i.i } -define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) { +define <4 x i32> @test_vmull_high_n_s16_imm(<8 x i16> %a) #0 { +; CHECK-LABEL: test_vmull_high_n_s16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d +; CHECK-NEXT: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) + ret <4 x i32> %vmull15.i.i +} + +define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) #0 { ; CHECK-LABEL: test_vmull_high_n_s32: -; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 -; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 - %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + ret <2 x i64> %vmull9.i.i +} + +define <2 x i64> @test_vmull_high_n_s32_imm(<4 x i32> %a) #0 { +; CHECK-LABEL: test_vmull_high_n_s32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1, msl #8 +; CHECK-NEXT: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 511, i32 511>) ret <2 x i64> %vmull9.i.i } -define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) { +define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) #0 { ; CHECK-LABEL: test_vmull_high_n_u16: -; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 -; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 - %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) ret <4 x i32> %vmull15.i.i } -define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) { +define <4 x i32> @test_vmull_high_n_u16_imm(<8 x i16> %a) #0 { +; CHECK-LABEL: test_vmull_high_n_u16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x11, lsl #8 +; CHECK-NEXT: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 4352, i16 4352, i16 4352, i16 4352>) + ret <4 x i32> %vmull15.i.i +} + +define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) #0 { ; CHECK-LABEL: test_vmull_high_n_u32: -; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 -; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 - %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) ret <2 x i64> %vmull9.i.i } -define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) { +define <2 x i64> @test_vmull_high_n_u32_imm(<4 x i32> %a) #0 { +; CHECK-LABEL: test_vmull_high_n_u32_imm: +; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].4s, #0x1, msl #8 +; CHECK-NEXT: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 4294966784, i32 4294966784>) + ret <2 x i64> %vmull9.i.i +} + +define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) #0 { ; CHECK-LABEL: test_vqdmull_high_n_s16: -; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 -; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 - %vqdmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vqdmull15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + ret <4 x i32> %vqdmull15.i.i +} + +define <4 x i32> @test_vqdmull_high_n_s16_imm(<8 x i16> %a) #0 { +; CHECK-LABEL: test_vqdmull_high_n_s16_imm: +; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].8h, #0x11, lsl #8 +; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vqdmull15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 61183, i16 61183, i16 61183, i16 61183>) ret <4 x i32> %vqdmull15.i.i } -define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) { +define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) #0 { ; CHECK-LABEL: test_vqdmull_high_n_s32: -; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 -; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 - %vqdmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vqdmull9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + ret <2 x i64> %vqdmull9.i.i +} + +define <2 x i64> @test_vqdmull_high_n_s32_imm(<4 x i32> %a) #0 { +; CHECK-LABEL: test_vqdmull_high_n_s32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d +; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vqdmull9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) ret <2 x i64> %vqdmull9.i.i } -define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { ; CHECK-LABEL: test_vmlal_high_n_s16: -; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 -; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %add.i.i = add <4 x i32> %vmull2.i.i.i, %a + ret <4 x i32> %add.i.i +} + +define <4 x i32> @test_vmlal_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 { +; CHECK-LABEL: test_vmlal_high_n_s16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d +; CHECK-NEXT: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) %add.i.i = add <4 x i32> %vmull2.i.i.i, %a ret <4 x i32> %add.i.i } -define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { ; CHECK-LABEL: test_vmlal_high_n_s32: -; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 -; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) %add.i.i = add <2 x i64> %vmull2.i.i.i, %a ret <2 x i64> %add.i.i } -define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +define <2 x i64> @test_vmlal_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 { +; CHECK-LABEL: test_vmlal_high_n_s32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d +; CHECK-NEXT: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) + %add.i.i = add <2 x i64> %vmull2.i.i.i, %a + ret <2 x i64> %add.i.i +} + +define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { ; CHECK-LABEL: test_vmlal_high_n_u16: -; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 -; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %add.i.i = add <4 x i32> %vmull2.i.i.i, %a + ret <4 x i32> %add.i.i +} + +define <4 x i32> @test_vmlal_high_n_u16_imm(<4 x i32> %a, <8 x i16> %b) #0 { +; CHECK-LABEL: test_vmlal_high_n_u16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d +; CHECK-NEXT: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) %add.i.i = add <4 x i32> %vmull2.i.i.i, %a ret <4 x i32> %add.i.i } -define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { ; CHECK-LABEL: test_vmlal_high_n_u32: -; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 -; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) %add.i.i = add <2 x i64> %vmull2.i.i.i, %a ret <2 x i64> %add.i.i } -define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +define <2 x i64> @test_vmlal_high_n_u32_imm(<2 x i64> %a, <4 x i32> %b) #0 { +; CHECK-LABEL: test_vmlal_high_n_u32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d +; CHECK-NEXT: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) + %add.i.i = add <2 x i64> %vmull2.i.i.i, %a + ret <2 x i64> %add.i.i +} + +define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { ; CHECK-LABEL: test_vqdmlal_high_n_s16: -; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vqdmlal15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %vqdmlal17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) + %vqdmlal15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vqdmlal17.i.i = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) + ret <4 x i32> %vqdmlal17.i.i +} + +define <4 x i32> @test_vqdmlal_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 { +; CHECK-LABEL: test_vqdmlal_high_n_s16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d +; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vqdmlal15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) + %vqdmlal17.i.i = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) ret <4 x i32> %vqdmlal17.i.i } -define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { ; CHECK-LABEL: test_vqdmlal_high_n_s32: -; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vqdmlal9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %vqdmlal11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) + %vqdmlal9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vqdmlal11.i.i = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) + ret <2 x i64> %vqdmlal11.i.i +} + +define <2 x i64> @test_vqdmlal_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 { +; CHECK-LABEL: test_vqdmlal_high_n_s32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d +; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vqdmlal9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) + %vqdmlal11.i.i = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) ret <2 x i64> %vqdmlal11.i.i } -define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { ; CHECK-LABEL: test_vmlsl_high_n_s16: -; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i + ret <4 x i32> %sub.i.i +} + +define <4 x i32> @test_vmlsl_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 { +; CHECK-LABEL: test_vmlsl_high_n_s16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d +; CHECK-NEXT: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i ret <4 x i32> %sub.i.i } -define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { ; CHECK-LABEL: test_vmlsl_high_n_s32: -; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i ret <2 x i64> %sub.i.i } -define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +define <2 x i64> @test_vmlsl_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 { +; CHECK-LABEL: test_vmlsl_high_n_s32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d +; CHECK-NEXT: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) + %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i + ret <2 x i64> %sub.i.i +} + +define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { ; CHECK-LABEL: test_vmlsl_high_n_u16: -; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i + ret <4 x i32> %sub.i.i +} + +define <4 x i32> @test_vmlsl_high_n_u16_imm(<4 x i32> %a, <8 x i16> %b) #0 { +; CHECK-LABEL: test_vmlsl_high_n_u16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d +; CHECK-NEXT: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i ret <4 x i32> %sub.i.i } -define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { ; CHECK-LABEL: test_vmlsl_high_n_u32: -; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i ret <2 x i64> %sub.i.i } -define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +define <2 x i64> @test_vmlsl_high_n_u32_imm(<2 x i64> %a, <4 x i32> %b) #0 { +; CHECK-LABEL: test_vmlsl_high_n_u32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d +; CHECK-NEXT: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) + %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i + ret <2 x i64> %sub.i.i +} + +define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { ; CHECK-LABEL: test_vqdmlsl_high_n_s16: -; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vqdmlsl15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %vqdmlsl17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) + %vqdmlsl15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vqdmlsl17.i.i = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) + ret <4 x i32> %vqdmlsl17.i.i +} + +define <4 x i32> @test_vqdmlsl_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 { +; CHECK-LABEL: test_vqdmlsl_high_n_s16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d +; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vqdmlsl15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) + %vqdmlsl17.i.i = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) ret <4 x i32> %vqdmlsl17.i.i } -define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { ; CHECK-LABEL: test_vqdmlsl_high_n_s32: -; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vqdmlsl9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %vqdmlsl11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) + %vqdmlsl9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vqdmlsl11.i.i = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) + ret <2 x i64> %vqdmlsl11.i.i +} + +define <2 x i64> @test_vqdmlsl_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 { +; CHECK-LABEL: test_vqdmlsl_high_n_s32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d +; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vqdmlsl9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) + %vqdmlsl11.i.i = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) ret <2 x i64> %vqdmlsl11.i.i } -define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) { +define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) #0 { ; CHECK-LABEL: test_vmul_n_f32: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %vecinit.i = insertelement <2 x float> undef, float %b, i32 0 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1 @@ -272,9 +486,10 @@ entry: ret <2 x float> %mul.i } -define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) { +define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) #0 { ; CHECK-LABEL: test_vmulq_n_f32: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %vecinit.i = insertelement <4 x float> undef, float %b, i32 0 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1 @@ -284,9 +499,10 @@ entry: ret <4 x float> %mul.i } -define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) { +define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) #0 { ; CHECK-LABEL: test_vmulq_n_f64: -; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %vecinit.i = insertelement <2 x double> undef, double %b, i32 0 %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1 @@ -294,48 +510,67 @@ entry: ret <2 x double> %mul.i } -define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) { +define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 { ; CHECK-LABEL: test_vfma_n_f32: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: ret entry: %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a) + %0 = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a) ret <2 x float> %0 } -define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { +define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 { ; CHECK-LABEL: test_vfmaq_n_f32: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: ret entry: %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a) + %0 = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a) ret <4 x float> %0 } -define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) { +define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 { ; CHECK-LABEL: test_vfms_n_f32: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: ret entry: %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b - %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a) + %1 = call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a) ret <2 x float> %1 } -define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { +define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 { ; CHECK-LABEL: test_vfmsq_n_f32: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: ret entry: %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b - %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a) + %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a) ret <4 x float> %1 } + +attributes #0 = { nounwind } + +declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) +declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) +declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) +declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) +declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) diff --git a/test/CodeGen/AArch64/arm64-stp.ll b/test/CodeGen/AArch64/arm64-stp.ll index 4d76396471ad..72561aac6e87 100644 --- a/test/CodeGen/AArch64/arm64-stp.ll +++ b/test/CodeGen/AArch64/arm64-stp.ll @@ -99,3 +99,35 @@ entry: store <4 x i32> %p20, <4 x i32>* %p21, align 4 ret void } + +; Read of %b to compute %tmp2 shouldn't prevent formation of stp +; CHECK-LABEL: stp_int_rar_hazard +; CHECK: stp w0, w1, [x2] +; CHECK: ldr [[REG:w[0-9]+]], [x2, #8] +; CHECK: add w0, [[REG]], w1 +; CHECK: ret +define i32 @stp_int_rar_hazard(i32 %a, i32 %b, i32* nocapture %p) nounwind { + store i32 %a, i32* %p, align 4 + %ld.ptr = getelementptr inbounds i32, i32* %p, i64 2 + %tmp = load i32, i32* %ld.ptr, align 4 + %tmp2 = add i32 %tmp, %b + %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 + store i32 %b, i32* %add.ptr, align 4 + ret i32 %tmp2 +} + +; Read of %b to compute %tmp2 shouldn't prevent formation of stp +; CHECK-LABEL: stp_int_rar_hazard_after +; CHECK: ldr [[REG:w[0-9]+]], [x3, #4] +; CHECK: add w0, [[REG]], w2 +; CHECK: stp w1, w2, [x3] +; CHECK: ret +define i32 @stp_int_rar_hazard_after(i32 %w0, i32 %a, i32 %b, i32* nocapture %p) nounwind { + store i32 %a, i32* %p, align 4 + %ld.ptr = getelementptr inbounds i32, i32* %p, i64 1 + %tmp = load i32, i32* %ld.ptr, align 4 + %tmp2 = add i32 %tmp, %b + %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 + store i32 %b, i32* %add.ptr, align 4 + ret i32 %tmp2 +} diff --git a/test/CodeGen/AArch64/arm64-strict-align.ll b/test/CodeGen/AArch64/arm64-strict-align.ll index b707527f3c0c..109f4115d801 100644 --- a/test/CodeGen/AArch64/arm64-strict-align.ll +++ b/test/CodeGen/AArch64/arm64-strict-align.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s ; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-no-strict-align | FileCheck %s ; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align | FileCheck %s --check-prefix=CHECK-STRICT +; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align -fast-isel | FileCheck %s --check-prefix=CHECK-STRICT define i32 @f0(i32* nocapture %p) nounwind { ; CHECK-STRICT: ldrh [[HIGH:w[0-9]+]], [x0, #2] diff --git a/test/CodeGen/AArch64/br-to-eh-lpad.ll b/test/CodeGen/AArch64/br-to-eh-lpad.ll index f304ba4ca286..2ac9e9043339 100644 --- a/test/CodeGen/AArch64/br-to-eh-lpad.ll +++ b/test/CodeGen/AArch64/br-to-eh-lpad.ll @@ -7,12 +7,12 @@ ; that case, the machine verifier, which relies on analyzing branches for this ; kind of verification, is unable to check anything, so accepts the CFG. -define void @test_branch_to_landingpad() { +define void @test_branch_to_landingpad() personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) { entry: br i1 undef, label %if.end50.thread, label %if.then6 lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch %struct._objc_typeinfo.12.129.194.285.350.493.519.532.571.597.623.765* @"OBJC_EHTYPE_$_NSString" catch %struct._objc_typeinfo.12.129.194.285.350.493.519.532.571.597.623.765* @OBJC_EHTYPE_id catch i8* null @@ -46,7 +46,7 @@ invoke.cont43: unreachable lpad40: - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) + %1 = landingpad { i8*, i32 } catch i8* null br label %finally.catchall diff --git a/test/CodeGen/AArch64/ifcvt-select.ll b/test/CodeGen/AArch64/ifcvt-select.ll new file mode 100644 index 000000000000..4e024d963f20 --- /dev/null +++ b/test/CodeGen/AArch64/ifcvt-select.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s +; Do not generate redundant select in early if-converstion pass. + +define i32 @foo(i32 %a, i32 %b) { +entry: +;CHECK-LABEL: foo: +;CHECK: csinc +;CHECK-NOT: csel + %sub = sub nsw i32 %b, %a + %cmp10 = icmp sgt i32 %a, 0 + br i1 %cmp10, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: + br label %while.body + +while.body: + %j.012 = phi i32 [ %sub, %while.body.lr.ph ], [ %inc, %if.then ], [ %inc, %if.else ] + %i.011 = phi i32 [ %a, %while.body.lr.ph ], [ %inc2, %if.then ], [ %dec, %if.else ] + %cmp1 = icmp slt i32 %i.011, %j.012 + br i1 %cmp1, label %while.end, label %while.cond + +while.cond: + %inc = add nsw i32 %j.012, 5 + %cmp2 = icmp slt i32 %inc, %b + br i1 %cmp2, label %if.then, label %if.else + +if.then: + %inc2 = add nsw i32 %i.011, 1 + br label %while.body + +if.else: + %dec = add nsw i32 %i.011, -1 + br label %while.body + +while.end: + %j.0.lcssa = phi i32 [ %j.012, %while.body ], [ %sub, %entry ] + %i.0.lcssa = phi i32 [ %i.011, %while.body ], [ %a, %entry ] + %add = add nsw i32 %j.0.lcssa, %i.0.lcssa + ret i32 %add +} + diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll index f761a87783ce..143558f7b2c7 100644 --- a/test/CodeGen/AArch64/pic-eh-stubs.ll +++ b/test/CodeGen/AArch64/pic-eh-stubs.ll @@ -21,13 +21,13 @@ @_ZTIi = external constant i8* -define i32 @_Z3barv() { +define i32 @_Z3barv() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @_Z3foov() to label %return unwind label %lpad lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* bitcast (i8** @_ZTIi to i8*) %1 = extractvalue { i8*, i32 } %0, 1 %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind diff --git a/test/CodeGen/AArch64/simple-macho.ll b/test/CodeGen/AArch64/simple-macho.ll new file mode 100644 index 000000000000..e9dd98e230db --- /dev/null +++ b/test/CodeGen/AArch64/simple-macho.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=arm64-macho -o - %s | FileCheck %s +; RUN: llc -mtriple=arm64-macho -filetype=obj -o %t %s +; RUN: llvm-objdump -triple=arm64-macho -d %t | FileCheck --check-prefix=CHECK-OBJ %s + +define void @foo() { +; CHECK-LABEL: _foo: +; CHECK: ret + +; CHECK-OBJ: 0: c0 03 5f d6 ret + + ret void +}
\ No newline at end of file diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/AMDGPU/32-bit-local-address-space.ll index c7bcfd2ddab2..c7bcfd2ddab2 100644 --- a/test/CodeGen/R600/32-bit-local-address-space.ll +++ b/test/CodeGen/AMDGPU/32-bit-local-address-space.ll diff --git a/test/CodeGen/R600/README b/test/CodeGen/AMDGPU/README index 96998bba28f2..96998bba28f2 100644 --- a/test/CodeGen/R600/README +++ b/test/CodeGen/AMDGPU/README diff --git a/test/CodeGen/R600/add-debug.ll b/test/CodeGen/AMDGPU/add-debug.ll index 529905dd36a2..529905dd36a2 100644 --- a/test/CodeGen/R600/add-debug.ll +++ b/test/CodeGen/AMDGPU/add-debug.ll diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/AMDGPU/add.ll index 655e75dbc1a4..655e75dbc1a4 100644 --- a/test/CodeGen/R600/add.ll +++ b/test/CodeGen/AMDGPU/add.ll diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/AMDGPU/add_i64.ll index 8346add7df97..8346add7df97 100644 --- a/test/CodeGen/R600/add_i64.ll +++ b/test/CodeGen/AMDGPU/add_i64.ll diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/AMDGPU/address-space.ll index 4be8c5847529..4be8c5847529 100644 --- a/test/CodeGen/R600/address-space.ll +++ b/test/CodeGen/AMDGPU/address-space.ll diff --git a/test/CodeGen/R600/and.ll b/test/CodeGen/AMDGPU/and.ll index 5672d470bd7e..5672d470bd7e 100644 --- a/test/CodeGen/R600/and.ll +++ b/test/CodeGen/AMDGPU/and.ll diff --git a/test/CodeGen/R600/anyext.ll b/test/CodeGen/AMDGPU/anyext.ll index 48d8f3122495..48d8f3122495 100644 --- a/test/CodeGen/R600/anyext.ll +++ b/test/CodeGen/AMDGPU/anyext.ll diff --git a/test/CodeGen/R600/array-ptr-calc-i32.ll b/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll index 8c2a0795860d..8c2a0795860d 100644 --- a/test/CodeGen/R600/array-ptr-calc-i32.ll +++ b/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll diff --git a/test/CodeGen/R600/array-ptr-calc-i64.ll b/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll index eae095eb8449..eae095eb8449 100644 --- a/test/CodeGen/R600/array-ptr-calc-i64.ll +++ b/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll diff --git a/test/CodeGen/R600/atomic_cmp_swap_local.ll b/test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll index ef2560ef1849..ef2560ef1849 100644 --- a/test/CodeGen/R600/atomic_cmp_swap_local.ll +++ b/test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll diff --git a/test/CodeGen/R600/atomic_load_add.ll b/test/CodeGen/AMDGPU/atomic_load_add.ll index 20c685447eef..20c685447eef 100644 --- a/test/CodeGen/R600/atomic_load_add.ll +++ b/test/CodeGen/AMDGPU/atomic_load_add.ll diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/AMDGPU/atomic_load_sub.ll index 4c6f45525b9e..4c6f45525b9e 100644 --- a/test/CodeGen/R600/atomic_load_sub.ll +++ b/test/CodeGen/AMDGPU/atomic_load_sub.ll diff --git a/test/CodeGen/R600/basic-branch.ll b/test/CodeGen/AMDGPU/basic-branch.ll index abdc4afef472..abdc4afef472 100644 --- a/test/CodeGen/R600/basic-branch.ll +++ b/test/CodeGen/AMDGPU/basic-branch.ll diff --git a/test/CodeGen/R600/basic-loop.ll b/test/CodeGen/AMDGPU/basic-loop.ll index f0263caf5d6b..f0263caf5d6b 100644 --- a/test/CodeGen/R600/basic-loop.ll +++ b/test/CodeGen/AMDGPU/basic-loop.ll diff --git a/test/CodeGen/R600/bfe_uint.ll b/test/CodeGen/AMDGPU/bfe_uint.ll index 32e3fc26106f..32e3fc26106f 100644 --- a/test/CodeGen/R600/bfe_uint.ll +++ b/test/CodeGen/AMDGPU/bfe_uint.ll diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/AMDGPU/bfi_int.ll index 03349349735d..03349349735d 100644 --- a/test/CodeGen/R600/bfi_int.ll +++ b/test/CodeGen/AMDGPU/bfi_int.ll diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/AMDGPU/big_alu.ll index 2671c5d102b3..2671c5d102b3 100644 --- a/test/CodeGen/R600/big_alu.ll +++ b/test/CodeGen/AMDGPU/big_alu.ll diff --git a/test/CodeGen/R600/bitcast.ll b/test/CodeGen/AMDGPU/bitcast.ll index fd56d956bf31..fd56d956bf31 100644 --- a/test/CodeGen/R600/bitcast.ll +++ b/test/CodeGen/AMDGPU/bitcast.ll diff --git a/test/CodeGen/R600/bswap.ll b/test/CodeGen/AMDGPU/bswap.ll index 4cf8e4bfed50..4cf8e4bfed50 100644 --- a/test/CodeGen/R600/bswap.ll +++ b/test/CodeGen/AMDGPU/bswap.ll diff --git a/test/CodeGen/R600/build_vector.ll b/test/CodeGen/AMDGPU/build_vector.ll index 65eacf5adc41..65eacf5adc41 100644 --- a/test/CodeGen/R600/build_vector.ll +++ b/test/CodeGen/AMDGPU/build_vector.ll diff --git a/test/CodeGen/R600/call.ll b/test/CodeGen/AMDGPU/call.ll index e769fd11c282..e769fd11c282 100644 --- a/test/CodeGen/R600/call.ll +++ b/test/CodeGen/AMDGPU/call.ll diff --git a/test/CodeGen/R600/call_fs.ll b/test/CodeGen/AMDGPU/call_fs.ll index 87bebbc49d52..87bebbc49d52 100644 --- a/test/CodeGen/R600/call_fs.ll +++ b/test/CodeGen/AMDGPU/call_fs.ll diff --git a/test/CodeGen/R600/cayman-loop-bug.ll b/test/CodeGen/AMDGPU/cayman-loop-bug.ll index c7b8c4037316..c7b8c4037316 100644 --- a/test/CodeGen/R600/cayman-loop-bug.ll +++ b/test/CodeGen/AMDGPU/cayman-loop-bug.ll diff --git a/test/CodeGen/R600/cf-stack-bug.ll b/test/CodeGen/AMDGPU/cf-stack-bug.ll index 02c87d76bb20..75b87e486226 100644 --- a/test/CodeGen/R600/cf-stack-bug.ll +++ b/test/CodeGen/AMDGPU/cf-stack-bug.ll @@ -1,12 +1,29 @@ -; RUN: llc -march=r600 -mcpu=redwood -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=sumo -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=barts -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=turks -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=caicos -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=cedar -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG32 --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=juniper -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=cypress -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=cayman -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=redwood -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: FileCheck --check-prefix=BUG64 %s < %t + +; RUN: llc -march=r600 -mcpu=sumo -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: FileCheck --check-prefix=BUG64 %s < %t + +; RUN: llc -march=r600 -mcpu=barts -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: FileCheck --check-prefix=BUG64 %s < %t + +; RUN: llc -march=r600 -mcpu=turks -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: FileCheck --check-prefix=BUG64 %s < %t + +; RUN: llc -march=r600 -mcpu=caicos -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: FileCheck --check-prefix=BUG64 %s < %t + +; RUN: llc -march=r600 -mcpu=cedar -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: FileCheck --check-prefix=BUG32 %s < %t + +; RUN: llc -march=r600 -mcpu=juniper -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: FileCheck --check-prefix=NOBUG %s < %t + +; RUN: llc -march=r600 -mcpu=cypress -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: FileCheck --check-prefix=NOBUG %s < %t + +; RUN: llc -march=r600 -mcpu=cayman -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: FileCheck --check-prefix=NOBUG %s < %t ; REQUIRES: asserts diff --git a/test/CodeGen/R600/cf_end.ll b/test/CodeGen/AMDGPU/cf_end.ll index c74ee22868d5..c74ee22868d5 100644 --- a/test/CodeGen/R600/cf_end.ll +++ b/test/CodeGen/AMDGPU/cf_end.ll diff --git a/test/CodeGen/R600/cgp-addressing-modes.ll b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll index 3d36bd19937e..77f7bd01b7f0 100644 --- a/test/CodeGen/R600/cgp-addressing-modes.ll +++ b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=OPT %s -; RUN: llc -march=amdgcn -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.r600.read.tidig.x() #0 diff --git a/test/CodeGen/R600/coalescer_remat.ll b/test/CodeGen/AMDGPU/coalescer_remat.ll index 96730bcf2e8f..96730bcf2e8f 100644 --- a/test/CodeGen/R600/coalescer_remat.ll +++ b/test/CodeGen/AMDGPU/coalescer_remat.ll diff --git a/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll b/test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll index 585172092676..585172092676 100644 --- a/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll +++ b/test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll diff --git a/test/CodeGen/R600/combine_vloads.ll b/test/CodeGen/AMDGPU/combine_vloads.ll index 01572afa6205..01572afa6205 100644 --- a/test/CodeGen/R600/combine_vloads.ll +++ b/test/CodeGen/AMDGPU/combine_vloads.ll diff --git a/test/CodeGen/R600/commute-compares.ll b/test/CodeGen/AMDGPU/commute-compares.ll index 31766047a358..31766047a358 100644 --- a/test/CodeGen/R600/commute-compares.ll +++ b/test/CodeGen/AMDGPU/commute-compares.ll diff --git a/test/CodeGen/R600/commute_modifiers.ll b/test/CodeGen/AMDGPU/commute_modifiers.ll index 7fc36eabb780..7fc36eabb780 100644 --- a/test/CodeGen/R600/commute_modifiers.ll +++ b/test/CodeGen/AMDGPU/commute_modifiers.ll diff --git a/test/CodeGen/R600/complex-folding.ll b/test/CodeGen/AMDGPU/complex-folding.ll index a5399a71324c..a5399a71324c 100644 --- a/test/CodeGen/R600/complex-folding.ll +++ b/test/CodeGen/AMDGPU/complex-folding.ll diff --git a/test/CodeGen/R600/concat_vectors.ll b/test/CodeGen/AMDGPU/concat_vectors.ll index a09ed1f73857..a09ed1f73857 100644 --- a/test/CodeGen/R600/concat_vectors.ll +++ b/test/CodeGen/AMDGPU/concat_vectors.ll diff --git a/test/CodeGen/R600/copy-illegal-type.ll b/test/CodeGen/AMDGPU/copy-illegal-type.ll index 8b397566066a..8b397566066a 100644 --- a/test/CodeGen/R600/copy-illegal-type.ll +++ b/test/CodeGen/AMDGPU/copy-illegal-type.ll diff --git a/test/CodeGen/R600/copy-to-reg.ll b/test/CodeGen/AMDGPU/copy-to-reg.ll index fc875f6ef7a3..fc875f6ef7a3 100644 --- a/test/CodeGen/R600/copy-to-reg.ll +++ b/test/CodeGen/AMDGPU/copy-to-reg.ll diff --git a/test/CodeGen/R600/ctlz_zero_undef.ll b/test/CodeGen/AMDGPU/ctlz_zero_undef.ll index bd26c302fe5a..bd26c302fe5a 100644 --- a/test/CodeGen/R600/ctlz_zero_undef.ll +++ b/test/CodeGen/AMDGPU/ctlz_zero_undef.ll diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/AMDGPU/ctpop.ll index 0a031c5e24d1..0a031c5e24d1 100644 --- a/test/CodeGen/R600/ctpop.ll +++ b/test/CodeGen/AMDGPU/ctpop.ll diff --git a/test/CodeGen/R600/ctpop64.ll b/test/CodeGen/AMDGPU/ctpop64.ll index e1a0ee3ea217..e1a0ee3ea217 100644 --- a/test/CodeGen/R600/ctpop64.ll +++ b/test/CodeGen/AMDGPU/ctpop64.ll diff --git a/test/CodeGen/R600/cttz_zero_undef.ll b/test/CodeGen/AMDGPU/cttz_zero_undef.ll index 56fcb51fe14e..56fcb51fe14e 100644 --- a/test/CodeGen/R600/cttz_zero_undef.ll +++ b/test/CodeGen/AMDGPU/cttz_zero_undef.ll diff --git a/test/CodeGen/R600/cvt_f32_ubyte.ll b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index 3399d9da29e3..3399d9da29e3 100644 --- a/test/CodeGen/R600/cvt_f32_ubyte.ll +++ b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll diff --git a/test/CodeGen/R600/cvt_flr_i32_f32.ll b/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll index 2dd3a9f2a776..2dd3a9f2a776 100644 --- a/test/CodeGen/R600/cvt_flr_i32_f32.ll +++ b/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll diff --git a/test/CodeGen/R600/cvt_rpi_i32_f32.ll b/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll index 864ac40260b3..864ac40260b3 100644 --- a/test/CodeGen/R600/cvt_rpi_i32_f32.ll +++ b/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll diff --git a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll index fb43ff4fbddd..fb43ff4fbddd 100644 --- a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll +++ b/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll diff --git a/test/CodeGen/R600/debug.ll b/test/CodeGen/AMDGPU/debug.ll index a2e0e878b740..a2e0e878b740 100644 --- a/test/CodeGen/R600/debug.ll +++ b/test/CodeGen/AMDGPU/debug.ll diff --git a/test/CodeGen/R600/default-fp-mode.ll b/test/CodeGen/AMDGPU/default-fp-mode.ll index da8e91454b98..da8e91454b98 100644 --- a/test/CodeGen/R600/default-fp-mode.ll +++ b/test/CodeGen/AMDGPU/default-fp-mode.ll diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/AMDGPU/disconnected-predset-break-bug.ll index cdd2c0cd4f43..cdd2c0cd4f43 100644 --- a/test/CodeGen/R600/disconnected-predset-break-bug.ll +++ b/test/CodeGen/AMDGPU/disconnected-predset-break-bug.ll diff --git a/test/CodeGen/R600/dot4-folding.ll b/test/CodeGen/AMDGPU/dot4-folding.ll index 4df7b63bf98e..4df7b63bf98e 100644 --- a/test/CodeGen/R600/dot4-folding.ll +++ b/test/CodeGen/AMDGPU/dot4-folding.ll diff --git a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll index e7e13d6178c4..e7e13d6178c4 100644 --- a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll +++ b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll diff --git a/test/CodeGen/R600/ds_read2.ll b/test/CodeGen/AMDGPU/ds_read2.ll index 5929898f8bd8..5929898f8bd8 100644 --- a/test/CodeGen/R600/ds_read2.ll +++ b/test/CodeGen/AMDGPU/ds_read2.ll diff --git a/test/CodeGen/R600/ds_read2_offset_order.ll b/test/CodeGen/AMDGPU/ds_read2_offset_order.ll index 9ea9a5a2617b..9ea9a5a2617b 100644 --- a/test/CodeGen/R600/ds_read2_offset_order.ll +++ b/test/CodeGen/AMDGPU/ds_read2_offset_order.ll diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/AMDGPU/ds_read2st64.ll index 54b3b45636d6..54b3b45636d6 100644 --- a/test/CodeGen/R600/ds_read2st64.ll +++ b/test/CodeGen/AMDGPU/ds_read2st64.ll diff --git a/test/CodeGen/R600/ds_write2.ll b/test/CodeGen/AMDGPU/ds_write2.ll index b553d3459e40..b553d3459e40 100644 --- a/test/CodeGen/R600/ds_write2.ll +++ b/test/CodeGen/AMDGPU/ds_write2.ll diff --git a/test/CodeGen/R600/ds_write2st64.ll b/test/CodeGen/AMDGPU/ds_write2st64.ll index 1d9d881c5c7e..1d9d881c5c7e 100644 --- a/test/CodeGen/R600/ds_write2st64.ll +++ b/test/CodeGen/AMDGPU/ds_write2st64.ll diff --git a/test/CodeGen/R600/elf.ll b/test/CodeGen/AMDGPU/elf.ll index d0fd06a34379..d0fd06a34379 100644 --- a/test/CodeGen/R600/elf.ll +++ b/test/CodeGen/AMDGPU/elf.ll diff --git a/test/CodeGen/R600/elf.r600.ll b/test/CodeGen/AMDGPU/elf.r600.ll index 51cd08500932..51cd08500932 100644 --- a/test/CodeGen/R600/elf.r600.ll +++ b/test/CodeGen/AMDGPU/elf.r600.ll diff --git a/test/CodeGen/R600/empty-function.ll b/test/CodeGen/AMDGPU/empty-function.ll index a060900811ea..a060900811ea 100644 --- a/test/CodeGen/R600/empty-function.ll +++ b/test/CodeGen/AMDGPU/empty-function.ll diff --git a/test/CodeGen/R600/endcf-loop-header.ll b/test/CodeGen/AMDGPU/endcf-loop-header.ll index 267a323c5063..267a323c5063 100644 --- a/test/CodeGen/R600/endcf-loop-header.ll +++ b/test/CodeGen/AMDGPU/endcf-loop-header.ll diff --git a/test/CodeGen/R600/extload-private.ll b/test/CodeGen/AMDGPU/extload-private.ll index 294c3a9c6782..294c3a9c6782 100644 --- a/test/CodeGen/R600/extload-private.ll +++ b/test/CodeGen/AMDGPU/extload-private.ll diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/AMDGPU/extload.ll index 662eb7a9716b..662eb7a9716b 100644 --- a/test/CodeGen/R600/extload.ll +++ b/test/CodeGen/AMDGPU/extload.ll diff --git a/test/CodeGen/R600/extract_vector_elt_i16.ll b/test/CodeGen/AMDGPU/extract_vector_elt_i16.ll index c7572efc6f5b..c7572efc6f5b 100644 --- a/test/CodeGen/R600/extract_vector_elt_i16.ll +++ b/test/CodeGen/AMDGPU/extract_vector_elt_i16.ll diff --git a/test/CodeGen/R600/fabs.f64.ll b/test/CodeGen/AMDGPU/fabs.f64.ll index 3c6136c1a7bd..3c6136c1a7bd 100644 --- a/test/CodeGen/R600/fabs.f64.ll +++ b/test/CodeGen/AMDGPU/fabs.f64.ll diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/AMDGPU/fabs.ll index 419a73d02669..419a73d02669 100644 --- a/test/CodeGen/R600/fabs.ll +++ b/test/CodeGen/AMDGPU/fabs.ll diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/AMDGPU/fadd.ll index 5fac328c5981..5fac328c5981 100644 --- a/test/CodeGen/R600/fadd.ll +++ b/test/CodeGen/AMDGPU/fadd.ll diff --git a/test/CodeGen/R600/fadd64.ll b/test/CodeGen/AMDGPU/fadd64.ll index 485c55870c47..485c55870c47 100644 --- a/test/CodeGen/R600/fadd64.ll +++ b/test/CodeGen/AMDGPU/fadd64.ll diff --git a/test/CodeGen/R600/fceil.ll b/test/CodeGen/AMDGPU/fceil.ll index f23e8919d733..f23e8919d733 100644 --- a/test/CodeGen/R600/fceil.ll +++ b/test/CodeGen/AMDGPU/fceil.ll diff --git a/test/CodeGen/R600/fceil64.ll b/test/CodeGen/AMDGPU/fceil64.ll index e8c34f0141e4..e8c34f0141e4 100644 --- a/test/CodeGen/R600/fceil64.ll +++ b/test/CodeGen/AMDGPU/fceil64.ll diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/AMDGPU/fcmp-cnd.ll index 530274f920f0..530274f920f0 100644 --- a/test/CodeGen/R600/fcmp-cnd.ll +++ b/test/CodeGen/AMDGPU/fcmp-cnd.ll diff --git a/test/CodeGen/R600/fcmp-cnde-int-args.ll b/test/CodeGen/AMDGPU/fcmp-cnde-int-args.ll index c402805feb39..c402805feb39 100644 --- a/test/CodeGen/R600/fcmp-cnde-int-args.ll +++ b/test/CodeGen/AMDGPU/fcmp-cnde-int-args.ll diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/AMDGPU/fcmp.ll index 5207ab57bade..5207ab57bade 100644 --- a/test/CodeGen/R600/fcmp.ll +++ b/test/CodeGen/AMDGPU/fcmp.ll diff --git a/test/CodeGen/R600/fcmp64.ll b/test/CodeGen/AMDGPU/fcmp64.ll index 053ab0ed7aaf..053ab0ed7aaf 100644 --- a/test/CodeGen/R600/fcmp64.ll +++ b/test/CodeGen/AMDGPU/fcmp64.ll diff --git a/test/CodeGen/R600/fconst64.ll b/test/CodeGen/AMDGPU/fconst64.ll index 89af37545c99..89af37545c99 100644 --- a/test/CodeGen/R600/fconst64.ll +++ b/test/CodeGen/AMDGPU/fconst64.ll diff --git a/test/CodeGen/R600/fcopysign.f32.ll b/test/CodeGen/AMDGPU/fcopysign.f32.ll index b719d5a39785..b719d5a39785 100644 --- a/test/CodeGen/R600/fcopysign.f32.ll +++ b/test/CodeGen/AMDGPU/fcopysign.f32.ll diff --git a/test/CodeGen/R600/fcopysign.f64.ll b/test/CodeGen/AMDGPU/fcopysign.f64.ll index 3d8c55993089..3d8c55993089 100644 --- a/test/CodeGen/R600/fcopysign.f64.ll +++ b/test/CodeGen/AMDGPU/fcopysign.f64.ll diff --git a/test/CodeGen/R600/fdiv.f64.ll b/test/CodeGen/AMDGPU/fdiv.f64.ll index 7c022e38c808..7c022e38c808 100644 --- a/test/CodeGen/R600/fdiv.f64.ll +++ b/test/CodeGen/AMDGPU/fdiv.f64.ll diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/AMDGPU/fdiv.ll index 7cbf87336399..7cbf87336399 100644 --- a/test/CodeGen/R600/fdiv.ll +++ b/test/CodeGen/AMDGPU/fdiv.ll diff --git a/test/CodeGen/R600/fetch-limits.r600.ll b/test/CodeGen/AMDGPU/fetch-limits.r600.ll index e7160ef5d726..e7160ef5d726 100644 --- a/test/CodeGen/R600/fetch-limits.r600.ll +++ b/test/CodeGen/AMDGPU/fetch-limits.r600.ll diff --git a/test/CodeGen/R600/fetch-limits.r700+.ll b/test/CodeGen/AMDGPU/fetch-limits.r700+.ll index acaea2aa7943..acaea2aa7943 100644 --- a/test/CodeGen/R600/fetch-limits.r700+.ll +++ b/test/CodeGen/AMDGPU/fetch-limits.r700+.ll diff --git a/test/CodeGen/R600/ffloor.f64.ll b/test/CodeGen/AMDGPU/ffloor.f64.ll index 45f8382c3929..45f8382c3929 100644 --- a/test/CodeGen/R600/ffloor.f64.ll +++ b/test/CodeGen/AMDGPU/ffloor.f64.ll diff --git a/test/CodeGen/R600/ffloor.ll b/test/CodeGen/AMDGPU/ffloor.ll index 61c46ac2bc03..61c46ac2bc03 100644 --- a/test/CodeGen/R600/ffloor.ll +++ b/test/CodeGen/AMDGPU/ffloor.ll diff --git a/test/CodeGen/R600/flat-address-space.ll b/test/CodeGen/AMDGPU/flat-address-space.ll index 425d67d5b07c..8ceca078f2d6 100644 --- a/test/CodeGen/R600/flat-address-space.ll +++ b/test/CodeGen/AMDGPU/flat-address-space.ll @@ -8,7 +8,7 @@ ; CHECK-LABEL: {{^}}branch_use_flat_i32: -; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, [M0, FLAT_SCRATCH] +; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} ; CHECK: s_endpgm define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 { entry: diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/AMDGPU/floor.ll index c6bfb8567a0f..c6bfb8567a0f 100644 --- a/test/CodeGen/R600/floor.ll +++ b/test/CodeGen/AMDGPU/floor.ll diff --git a/test/CodeGen/R600/fma-combine.ll b/test/CodeGen/AMDGPU/fma-combine.ll index bd574b877117..bd574b877117 100644 --- a/test/CodeGen/R600/fma-combine.ll +++ b/test/CodeGen/AMDGPU/fma-combine.ll diff --git a/test/CodeGen/R600/fma.f64.ll b/test/CodeGen/AMDGPU/fma.f64.ll index 0a55ef778557..0a55ef778557 100644 --- a/test/CodeGen/R600/fma.f64.ll +++ b/test/CodeGen/AMDGPU/fma.f64.ll diff --git a/test/CodeGen/R600/fma.ll b/test/CodeGen/AMDGPU/fma.ll index d6024aa0b4c5..d6024aa0b4c5 100644 --- a/test/CodeGen/R600/fma.ll +++ b/test/CodeGen/AMDGPU/fma.ll diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/AMDGPU/fmad.ll index 935e35123f45..935e35123f45 100644 --- a/test/CodeGen/R600/fmad.ll +++ b/test/CodeGen/AMDGPU/fmad.ll diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/AMDGPU/fmax.ll index d7127f485c74..d7127f485c74 100644 --- a/test/CodeGen/R600/fmax.ll +++ b/test/CodeGen/AMDGPU/fmax.ll diff --git a/test/CodeGen/R600/fmax3.f64.ll b/test/CodeGen/AMDGPU/fmax3.f64.ll index f78c71b28264..f78c71b28264 100644 --- a/test/CodeGen/R600/fmax3.f64.ll +++ b/test/CodeGen/AMDGPU/fmax3.f64.ll diff --git a/test/CodeGen/R600/fmax3.ll b/test/CodeGen/AMDGPU/fmax3.ll index c3028a6217d5..c3028a6217d5 100644 --- a/test/CodeGen/R600/fmax3.ll +++ b/test/CodeGen/AMDGPU/fmax3.ll diff --git a/test/CodeGen/R600/fmax_legacy.f64.ll b/test/CodeGen/AMDGPU/fmax_legacy.f64.ll index 828243888ac7..828243888ac7 100644 --- a/test/CodeGen/R600/fmax_legacy.f64.ll +++ b/test/CodeGen/AMDGPU/fmax_legacy.f64.ll diff --git a/test/CodeGen/R600/fmax_legacy.ll b/test/CodeGen/AMDGPU/fmax_legacy.ll index 413957d2982a..413957d2982a 100644 --- a/test/CodeGen/R600/fmax_legacy.ll +++ b/test/CodeGen/AMDGPU/fmax_legacy.ll diff --git a/test/CodeGen/R600/fmaxnum.f64.ll b/test/CodeGen/AMDGPU/fmaxnum.f64.ll index de563cec3412..de563cec3412 100644 --- a/test/CodeGen/R600/fmaxnum.f64.ll +++ b/test/CodeGen/AMDGPU/fmaxnum.f64.ll diff --git a/test/CodeGen/R600/fmaxnum.ll b/test/CodeGen/AMDGPU/fmaxnum.ll index 3029bd02e4db..3029bd02e4db 100644 --- a/test/CodeGen/R600/fmaxnum.ll +++ b/test/CodeGen/AMDGPU/fmaxnum.ll diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/AMDGPU/fmin.ll index defa8c09638a..defa8c09638a 100644 --- a/test/CodeGen/R600/fmin.ll +++ b/test/CodeGen/AMDGPU/fmin.ll diff --git a/test/CodeGen/R600/fmin3.ll b/test/CodeGen/AMDGPU/fmin3.ll index 0a76699b43e1..0a76699b43e1 100644 --- a/test/CodeGen/R600/fmin3.ll +++ b/test/CodeGen/AMDGPU/fmin3.ll diff --git a/test/CodeGen/R600/fmin_legacy.f64.ll b/test/CodeGen/AMDGPU/fmin_legacy.f64.ll index e19a48f3f7e2..e19a48f3f7e2 100644 --- a/test/CodeGen/R600/fmin_legacy.f64.ll +++ b/test/CodeGen/AMDGPU/fmin_legacy.f64.ll diff --git a/test/CodeGen/R600/fmin_legacy.ll b/test/CodeGen/AMDGPU/fmin_legacy.ll index 6a625c239d76..6a625c239d76 100644 --- a/test/CodeGen/R600/fmin_legacy.ll +++ b/test/CodeGen/AMDGPU/fmin_legacy.ll diff --git a/test/CodeGen/R600/fminnum.f64.ll b/test/CodeGen/AMDGPU/fminnum.f64.ll index 0f929d6a81f0..0f929d6a81f0 100644 --- a/test/CodeGen/R600/fminnum.f64.ll +++ b/test/CodeGen/AMDGPU/fminnum.f64.ll diff --git a/test/CodeGen/R600/fminnum.ll b/test/CodeGen/AMDGPU/fminnum.ll index 4d7b52540d85..4d7b52540d85 100644 --- a/test/CodeGen/R600/fminnum.ll +++ b/test/CodeGen/AMDGPU/fminnum.ll diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/AMDGPU/fmul.ll index addc409c9eb1..addc409c9eb1 100644 --- a/test/CodeGen/R600/fmul.ll +++ b/test/CodeGen/AMDGPU/fmul.ll diff --git a/test/CodeGen/R600/fmul64.ll b/test/CodeGen/AMDGPU/fmul64.ll index 3c222eaba89d..3c222eaba89d 100644 --- a/test/CodeGen/R600/fmul64.ll +++ b/test/CodeGen/AMDGPU/fmul64.ll diff --git a/test/CodeGen/R600/fmuladd.ll b/test/CodeGen/AMDGPU/fmuladd.ll index ae84d841021d..ae84d841021d 100644 --- a/test/CodeGen/R600/fmuladd.ll +++ b/test/CodeGen/AMDGPU/fmuladd.ll diff --git a/test/CodeGen/R600/fnearbyint.ll b/test/CodeGen/AMDGPU/fnearbyint.ll index 4fa9adaabdae..4fa9adaabdae 100644 --- a/test/CodeGen/R600/fnearbyint.ll +++ b/test/CodeGen/AMDGPU/fnearbyint.ll diff --git a/test/CodeGen/R600/fneg-fabs.f64.ll b/test/CodeGen/AMDGPU/fneg-fabs.f64.ll index 8830e8273661..8830e8273661 100644 --- a/test/CodeGen/R600/fneg-fabs.f64.ll +++ b/test/CodeGen/AMDGPU/fneg-fabs.f64.ll diff --git a/test/CodeGen/R600/fneg-fabs.ll b/test/CodeGen/AMDGPU/fneg-fabs.ll index 3b4930d9897d..3b4930d9897d 100644 --- a/test/CodeGen/R600/fneg-fabs.ll +++ b/test/CodeGen/AMDGPU/fneg-fabs.ll diff --git a/test/CodeGen/R600/fneg.f64.ll b/test/CodeGen/AMDGPU/fneg.f64.ll index aa6df209035b..aa6df209035b 100644 --- a/test/CodeGen/R600/fneg.f64.ll +++ b/test/CodeGen/AMDGPU/fneg.f64.ll diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/AMDGPU/fneg.ll index a0fd539863c6..a0fd539863c6 100644 --- a/test/CodeGen/R600/fneg.ll +++ b/test/CodeGen/AMDGPU/fneg.ll diff --git a/test/CodeGen/R600/fp-classify.ll b/test/CodeGen/AMDGPU/fp-classify.ll index 4fac5176fac9..4fac5176fac9 100644 --- a/test/CodeGen/R600/fp-classify.ll +++ b/test/CodeGen/AMDGPU/fp-classify.ll diff --git a/test/CodeGen/R600/fp16_to_fp.ll b/test/CodeGen/AMDGPU/fp16_to_fp.ll index 5a79ca82bc29..5a79ca82bc29 100644 --- a/test/CodeGen/R600/fp16_to_fp.ll +++ b/test/CodeGen/AMDGPU/fp16_to_fp.ll diff --git a/test/CodeGen/R600/fp32_to_fp16.ll b/test/CodeGen/AMDGPU/fp32_to_fp16.ll index 67925ebd82b6..67925ebd82b6 100644 --- a/test/CodeGen/R600/fp32_to_fp16.ll +++ b/test/CodeGen/AMDGPU/fp32_to_fp16.ll diff --git a/test/CodeGen/R600/fp_to_sint.f64.ll b/test/CodeGen/AMDGPU/fp_to_sint.f64.ll index 12df6606e8ff..12df6606e8ff 100644 --- a/test/CodeGen/R600/fp_to_sint.f64.ll +++ b/test/CodeGen/AMDGPU/fp_to_sint.f64.ll diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/AMDGPU/fp_to_sint.ll index 301a94b4904c..301a94b4904c 100644 --- a/test/CodeGen/R600/fp_to_sint.ll +++ b/test/CodeGen/AMDGPU/fp_to_sint.ll diff --git a/test/CodeGen/R600/fp_to_uint.f64.ll b/test/CodeGen/AMDGPU/fp_to_uint.f64.ll index 41bc2a780014..41bc2a780014 100644 --- a/test/CodeGen/R600/fp_to_uint.f64.ll +++ b/test/CodeGen/AMDGPU/fp_to_uint.f64.ll diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/AMDGPU/fp_to_uint.ll index b7b6ccc238b3..b7b6ccc238b3 100644 --- a/test/CodeGen/R600/fp_to_uint.ll +++ b/test/CodeGen/AMDGPU/fp_to_uint.ll diff --git a/test/CodeGen/R600/fpext.ll b/test/CodeGen/AMDGPU/fpext.ll index 734a43be2296..734a43be2296 100644 --- a/test/CodeGen/R600/fpext.ll +++ b/test/CodeGen/AMDGPU/fpext.ll diff --git a/test/CodeGen/R600/fptrunc.ll b/test/CodeGen/AMDGPU/fptrunc.ll index 385e10e7baae..385e10e7baae 100644 --- a/test/CodeGen/R600/fptrunc.ll +++ b/test/CodeGen/AMDGPU/fptrunc.ll diff --git a/test/CodeGen/R600/frem.ll b/test/CodeGen/AMDGPU/frem.ll index f245ef08cb9d..f245ef08cb9d 100644 --- a/test/CodeGen/R600/frem.ll +++ b/test/CodeGen/AMDGPU/frem.ll diff --git a/test/CodeGen/R600/fsqrt.ll b/test/CodeGen/AMDGPU/fsqrt.ll index 04101346cdf9..04101346cdf9 100644 --- a/test/CodeGen/R600/fsqrt.ll +++ b/test/CodeGen/AMDGPU/fsqrt.ll diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/AMDGPU/fsub.ll index dfe41cb5b111..dfe41cb5b111 100644 --- a/test/CodeGen/R600/fsub.ll +++ b/test/CodeGen/AMDGPU/fsub.ll diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/AMDGPU/fsub64.ll index f34a48e30a86..f34a48e30a86 100644 --- a/test/CodeGen/R600/fsub64.ll +++ b/test/CodeGen/AMDGPU/fsub64.ll diff --git a/test/CodeGen/R600/ftrunc.f64.ll b/test/CodeGen/AMDGPU/ftrunc.f64.ll index 6618d8b5e57e..6618d8b5e57e 100644 --- a/test/CodeGen/R600/ftrunc.f64.ll +++ b/test/CodeGen/AMDGPU/ftrunc.f64.ll diff --git a/test/CodeGen/R600/ftrunc.ll b/test/CodeGen/AMDGPU/ftrunc.ll index edc08609a8aa..edc08609a8aa 100644 --- a/test/CodeGen/R600/ftrunc.ll +++ b/test/CodeGen/AMDGPU/ftrunc.ll diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/AMDGPU/gep-address-space.ll index 471b0f6b13e7..471b0f6b13e7 100644 --- a/test/CodeGen/R600/gep-address-space.ll +++ b/test/CodeGen/AMDGPU/gep-address-space.ll diff --git a/test/CodeGen/R600/global-directive.ll b/test/CodeGen/AMDGPU/global-directive.ll index be775cf9292f..be775cf9292f 100644 --- a/test/CodeGen/R600/global-directive.ll +++ b/test/CodeGen/AMDGPU/global-directive.ll diff --git a/test/CodeGen/R600/global-extload-i1.ll b/test/CodeGen/AMDGPU/global-extload-i1.ll index bd9557d730fb..bd9557d730fb 100644 --- a/test/CodeGen/R600/global-extload-i1.ll +++ b/test/CodeGen/AMDGPU/global-extload-i1.ll diff --git a/test/CodeGen/R600/global-extload-i16.ll b/test/CodeGen/AMDGPU/global-extload-i16.ll index 103a40dee270..103a40dee270 100644 --- a/test/CodeGen/R600/global-extload-i16.ll +++ b/test/CodeGen/AMDGPU/global-extload-i16.ll diff --git a/test/CodeGen/R600/global-extload-i32.ll b/test/CodeGen/AMDGPU/global-extload-i32.ll index 79b83452939e..79b83452939e 100644 --- a/test/CodeGen/R600/global-extload-i32.ll +++ b/test/CodeGen/AMDGPU/global-extload-i32.ll diff --git a/test/CodeGen/R600/global-extload-i8.ll b/test/CodeGen/AMDGPU/global-extload-i8.ll index b31d5361d5a2..b31d5361d5a2 100644 --- a/test/CodeGen/R600/global-extload-i8.ll +++ b/test/CodeGen/AMDGPU/global-extload-i8.ll diff --git a/test/CodeGen/R600/global-zero-initializer.ll b/test/CodeGen/AMDGPU/global-zero-initializer.ll index 45aa8bf4e1d7..45aa8bf4e1d7 100644 --- a/test/CodeGen/R600/global-zero-initializer.ll +++ b/test/CodeGen/AMDGPU/global-zero-initializer.ll diff --git a/test/CodeGen/R600/global_atomics.ll b/test/CodeGen/AMDGPU/global_atomics.ll index 847950f6376e..847950f6376e 100644 --- a/test/CodeGen/R600/global_atomics.ll +++ b/test/CodeGen/AMDGPU/global_atomics.ll diff --git a/test/CodeGen/R600/gv-const-addrspace-fail.ll b/test/CodeGen/AMDGPU/gv-const-addrspace-fail.ll index 014b0a5482ab..014b0a5482ab 100644 --- a/test/CodeGen/R600/gv-const-addrspace-fail.ll +++ b/test/CodeGen/AMDGPU/gv-const-addrspace-fail.ll diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/AMDGPU/gv-const-addrspace.ll index 3c1fc6c98f74..3c1fc6c98f74 100644 --- a/test/CodeGen/R600/gv-const-addrspace.ll +++ b/test/CodeGen/AMDGPU/gv-const-addrspace.ll diff --git a/test/CodeGen/R600/half.ll b/test/CodeGen/AMDGPU/half.ll index bf8f11860b50..bf8f11860b50 100644 --- a/test/CodeGen/R600/half.ll +++ b/test/CodeGen/AMDGPU/half.ll diff --git a/test/CodeGen/R600/hsa.ll b/test/CodeGen/AMDGPU/hsa.ll index f9113399afe8..f9113399afe8 100644 --- a/test/CodeGen/R600/hsa.ll +++ b/test/CodeGen/AMDGPU/hsa.ll diff --git a/test/CodeGen/R600/i1-copy-implicit-def.ll b/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll index b11a21137642..b11a21137642 100644 --- a/test/CodeGen/R600/i1-copy-implicit-def.ll +++ b/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll diff --git a/test/CodeGen/R600/i1-copy-phi.ll b/test/CodeGen/AMDGPU/i1-copy-phi.ll index 105cd06b330a..105cd06b330a 100644 --- a/test/CodeGen/R600/i1-copy-phi.ll +++ b/test/CodeGen/AMDGPU/i1-copy-phi.ll diff --git a/test/CodeGen/R600/i8-to-double-to-float.ll b/test/CodeGen/AMDGPU/i8-to-double-to-float.ll index c218e1918bb0..c218e1918bb0 100644 --- a/test/CodeGen/R600/i8-to-double-to-float.ll +++ b/test/CodeGen/AMDGPU/i8-to-double-to-float.ll diff --git a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/test/CodeGen/AMDGPU/icmp-select-sete-reverse-args.ll index 60e59a5a5286..60e59a5a5286 100644 --- a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll +++ b/test/CodeGen/AMDGPU/icmp-select-sete-reverse-args.ll diff --git a/test/CodeGen/R600/icmp64.ll b/test/CodeGen/AMDGPU/icmp64.ll index 0eaa33ebafed..0eaa33ebafed 100644 --- a/test/CodeGen/R600/icmp64.ll +++ b/test/CodeGen/AMDGPU/icmp64.ll diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/AMDGPU/imm.ll index 12eed550eb1f..12eed550eb1f 100644 --- a/test/CodeGen/R600/imm.ll +++ b/test/CodeGen/AMDGPU/imm.ll diff --git a/test/CodeGen/R600/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll index f551606d63a7..f551606d63a7 100644 --- a/test/CodeGen/R600/indirect-addressing-si.ll +++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll diff --git a/test/CodeGen/R600/indirect-private-64.ll b/test/CodeGen/AMDGPU/indirect-private-64.ll index d63e1b6c5212..d63e1b6c5212 100644 --- a/test/CodeGen/R600/indirect-private-64.ll +++ b/test/CodeGen/AMDGPU/indirect-private-64.ll diff --git a/test/CodeGen/R600/infinite-loop-evergreen.ll b/test/CodeGen/AMDGPU/infinite-loop-evergreen.ll index f6e39b3d8306..f6e39b3d8306 100644 --- a/test/CodeGen/R600/infinite-loop-evergreen.ll +++ b/test/CodeGen/AMDGPU/infinite-loop-evergreen.ll diff --git a/test/CodeGen/R600/infinite-loop.ll b/test/CodeGen/AMDGPU/infinite-loop.ll index 7233aa57fd78..7233aa57fd78 100644 --- a/test/CodeGen/R600/infinite-loop.ll +++ b/test/CodeGen/AMDGPU/infinite-loop.ll diff --git a/test/CodeGen/R600/inline-asm.ll b/test/CodeGen/AMDGPU/inline-asm.ll index efc2292de3a5..efc2292de3a5 100644 --- a/test/CodeGen/R600/inline-asm.ll +++ b/test/CodeGen/AMDGPU/inline-asm.ll diff --git a/test/CodeGen/R600/inline-calls.ll b/test/CodeGen/AMDGPU/inline-calls.ll index 33a4c832e75e..33a4c832e75e 100644 --- a/test/CodeGen/R600/inline-calls.ll +++ b/test/CodeGen/AMDGPU/inline-calls.ll diff --git a/test/CodeGen/R600/input-mods.ll b/test/CodeGen/AMDGPU/input-mods.ll index 1c4d285cbcb1..1c4d285cbcb1 100644 --- a/test/CodeGen/R600/input-mods.ll +++ b/test/CodeGen/AMDGPU/input-mods.ll diff --git a/test/CodeGen/R600/insert_subreg.ll b/test/CodeGen/AMDGPU/insert_subreg.ll index 4a5e8869c2df..4a5e8869c2df 100644 --- a/test/CodeGen/R600/insert_subreg.ll +++ b/test/CodeGen/AMDGPU/insert_subreg.ll diff --git a/test/CodeGen/R600/insert_vector_elt.ll b/test/CodeGen/AMDGPU/insert_vector_elt.ll index 6de3d408c486..6de3d408c486 100644 --- a/test/CodeGen/R600/insert_vector_elt.ll +++ b/test/CodeGen/AMDGPU/insert_vector_elt.ll diff --git a/test/CodeGen/R600/jump-address.ll b/test/CodeGen/AMDGPU/jump-address.ll index f55912e37401..f55912e37401 100644 --- a/test/CodeGen/R600/jump-address.ll +++ b/test/CodeGen/AMDGPU/jump-address.ll diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/AMDGPU/kcache-fold.ll index 7e2291cfdc35..7e2291cfdc35 100644 --- a/test/CodeGen/R600/kcache-fold.ll +++ b/test/CodeGen/AMDGPU/kcache-fold.ll diff --git a/test/CodeGen/R600/kernel-args.ll b/test/CodeGen/AMDGPU/kernel-args.ll index 1dd7c2cb7995..1dd7c2cb7995 100644 --- a/test/CodeGen/R600/kernel-args.ll +++ b/test/CodeGen/AMDGPU/kernel-args.ll diff --git a/test/CodeGen/R600/large-alloca.ll b/test/CodeGen/AMDGPU/large-alloca.ll index 671833d1a33a..671833d1a33a 100644 --- a/test/CodeGen/R600/large-alloca.ll +++ b/test/CodeGen/AMDGPU/large-alloca.ll diff --git a/test/CodeGen/R600/large-constant-initializer.ll b/test/CodeGen/AMDGPU/large-constant-initializer.ll index 9975b1b7f5cc..9975b1b7f5cc 100644 --- a/test/CodeGen/R600/large-constant-initializer.ll +++ b/test/CodeGen/AMDGPU/large-constant-initializer.ll diff --git a/test/CodeGen/R600/lds-initializer.ll b/test/CodeGen/AMDGPU/lds-initializer.ll index bf8df63be9fd..bf8df63be9fd 100644 --- a/test/CodeGen/R600/lds-initializer.ll +++ b/test/CodeGen/AMDGPU/lds-initializer.ll diff --git a/test/CodeGen/R600/lds-oqap-crash.ll b/test/CodeGen/AMDGPU/lds-oqap-crash.ll index 6ff6fc3d7afc..6ff6fc3d7afc 100644 --- a/test/CodeGen/R600/lds-oqap-crash.ll +++ b/test/CodeGen/AMDGPU/lds-oqap-crash.ll diff --git a/test/CodeGen/R600/lds-output-queue.ll b/test/CodeGen/AMDGPU/lds-output-queue.ll index 44ffc36af149..44ffc36af149 100644 --- a/test/CodeGen/R600/lds-output-queue.ll +++ b/test/CodeGen/AMDGPU/lds-output-queue.ll diff --git a/test/CodeGen/R600/lds-size.ll b/test/CodeGen/AMDGPU/lds-size.ll index 3e8328659fdb..3e8328659fdb 100644 --- a/test/CodeGen/R600/lds-size.ll +++ b/test/CodeGen/AMDGPU/lds-size.ll diff --git a/test/CodeGen/R600/lds-zero-initializer.ll b/test/CodeGen/AMDGPU/lds-zero-initializer.ll index fb51bc0e50c2..fb51bc0e50c2 100644 --- a/test/CodeGen/R600/lds-zero-initializer.ll +++ b/test/CodeGen/AMDGPU/lds-zero-initializer.ll diff --git a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll b/test/CodeGen/AMDGPU/legalizedag-bug-expand-setcc.ll index 4244c48d240e..4244c48d240e 100644 --- a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll +++ b/test/CodeGen/AMDGPU/legalizedag-bug-expand-setcc.ll diff --git a/test/CodeGen/AMDGPU/lit.local.cfg b/test/CodeGen/AMDGPU/lit.local.cfg new file mode 100644 index 000000000000..2a665f06be72 --- /dev/null +++ b/test/CodeGen/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/AMDGPU/literals.ll index cff1c24f89d6..cff1c24f89d6 100644 --- a/test/CodeGen/R600/literals.ll +++ b/test/CodeGen/AMDGPU/literals.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.abs.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll index 8bf094b8bc7b..8bf094b8bc7b 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.abs.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.barrier.global.ll index db883972d646..db883972d646 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.barrier.global.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.barrier.local.ll index 48fb2e0b1a8d..48fb2e0b1a8d 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.barrier.local.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll index 1168713ca66e..1168713ca66e 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.u32.ll index 541119242a94..541119242a94 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.u32.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfi.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.bfi.ll index 517a55abc098..517a55abc098 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfi.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.bfi.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfm.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.bfm.ll index 50492289d744..50492289d744 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfm.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.bfm.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.brev.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll index 301de4b1c82d..301de4b1c82d 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.brev.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.clamp.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll index 11ec963ab314..11ec963ab314 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.clamp.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.class.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll index 805a88b59c72..805a88b59c72 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.class.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.cube.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll index e95a51093cb7..e95a51093cb7 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.cube.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.cvt_f32_ubyte.ll index 8b32f696449e..8b32f696449e 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.cvt_f32_ubyte.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll index 55ca9c7536e5..55ca9c7536e5 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll index bcb7f870f1f4..bcb7f870f1f4 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll index de830de039c7..de830de039c7 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.flbit.i32.ll index 20c7af8ade5e..20c7af8ade5e 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.flbit.i32.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll index e098dd35d6da..e098dd35d6da 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.ll index 7501b4b75465..7501b4b75465 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.fract.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.imad24.ll index 42102e30f071..42102e30f071 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.imad24.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.imax.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.imax.ll index 46662f96c290..46662f96c290 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.imax.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.imax.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.imin.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.imin.ll index 34b454e23755..34b454e23755 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.imin.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.imin.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.imul24.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.imul24.ll index fdc1172260b9..fdc1172260b9 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.imul24.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.imul24.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.kill.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll index 057708e7b5cc..057708e7b5cc 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.kill.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.ldexp.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll index a59c0ce6d675..a59c0ce6d675 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.ldexp.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.legacy.rsq.ll index 4cafd563685e..4cafd563685e 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.legacy.rsq.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.mul.ll index 83b56a5029d3..83b56a5029d3 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.mul.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.mul.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll index d2a655bf909c..d2a655bf909c 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll index edd6e9a72f1b..edd6e9a72f1b 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll index 67f1d22c7178..67f1d22c7178 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll index eeff2536b232..eeff2536b232 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll index 36b72f14db19..36b72f14db19 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.rsq.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.tex.ll index 10206609bb57..10206609bb57 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.tex.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.tex.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll index 6b546a7e17c1..6b546a7e17c1 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll index 74792e50017f..74792e50017f 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.umad24.ll index 77a073b0cb03..77a073b0cb03 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.umad24.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.umax.ll index a97d103016d3..a97d103016d3 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.umax.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.umax.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.umin.ll index 2acd10e0c631..2acd10e0c631 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.umin.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.umin.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.umul24.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.umul24.ll index 76624a078b3a..76624a078b3a 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.umul24.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.umul24.ll diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.ll b/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll index 3d05da616e4e..3d05da616e4e 100644 --- a/test/CodeGen/R600/llvm.SI.fs.interp.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll diff --git a/test/CodeGen/R600/llvm.SI.gather4.ll b/test/CodeGen/AMDGPU/llvm.SI.gather4.ll index 275cb580bc9b..275cb580bc9b 100644 --- a/test/CodeGen/R600/llvm.SI.gather4.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.gather4.ll diff --git a/test/CodeGen/R600/llvm.SI.getlod.ll b/test/CodeGen/AMDGPU/llvm.SI.getlod.ll index 06ee98e91b31..06ee98e91b31 100644 --- a/test/CodeGen/R600/llvm.SI.getlod.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.getlod.ll diff --git a/test/CodeGen/R600/llvm.SI.image.ll b/test/CodeGen/AMDGPU/llvm.SI.image.ll index 0fac8d799562..0fac8d799562 100644 --- a/test/CodeGen/R600/llvm.SI.image.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.image.ll diff --git a/test/CodeGen/R600/llvm.SI.image.sample.ll b/test/CodeGen/AMDGPU/llvm.SI.image.sample.ll index 4bc638a28063..4bc638a28063 100644 --- a/test/CodeGen/R600/llvm.SI.image.sample.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.image.sample.ll diff --git a/test/CodeGen/R600/llvm.SI.image.sample.o.ll b/test/CodeGen/AMDGPU/llvm.SI.image.sample.o.ll index 9d8935414ed9..9d8935414ed9 100644 --- a/test/CodeGen/R600/llvm.SI.image.sample.o.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.image.sample.o.ll diff --git a/test/CodeGen/R600/llvm.SI.imageload.ll b/test/CodeGen/AMDGPU/llvm.SI.imageload.ll index b67716c3b665..b67716c3b665 100644 --- a/test/CodeGen/R600/llvm.SI.imageload.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.imageload.ll diff --git a/test/CodeGen/R600/llvm.SI.load.dword.ll b/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll index f6c258539d5b..f6c258539d5b 100644 --- a/test/CodeGen/R600/llvm.SI.load.dword.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll diff --git a/test/CodeGen/R600/llvm.SI.resinfo.ll b/test/CodeGen/AMDGPU/llvm.SI.resinfo.ll index ac95fd0b83a2..ac95fd0b83a2 100644 --- a/test/CodeGen/R600/llvm.SI.resinfo.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.resinfo.ll diff --git a/test/CodeGen/R600/llvm.SI.sample-masked.ll b/test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll index ce9558cbf81d..ce9558cbf81d 100644 --- a/test/CodeGen/R600/llvm.SI.sample-masked.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/AMDGPU/llvm.SI.sample.ll index 509c45f588b8..509c45f588b8 100644 --- a/test/CodeGen/R600/llvm.SI.sample.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.sample.ll diff --git a/test/CodeGen/R600/llvm.SI.sampled.ll b/test/CodeGen/AMDGPU/llvm.SI.sampled.ll index f2badff2a99c..f2badff2a99c 100644 --- a/test/CodeGen/R600/llvm.SI.sampled.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.sampled.ll diff --git a/test/CodeGen/R600/llvm.SI.sendmsg-m0.ll b/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll index 2198590f2dfe..2198590f2dfe 100644 --- a/test/CodeGen/R600/llvm.SI.sendmsg-m0.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll diff --git a/test/CodeGen/R600/llvm.SI.sendmsg.ll b/test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll index 09675d503355..09675d503355 100644 --- a/test/CodeGen/R600/llvm.SI.sendmsg.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll diff --git a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll b/test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll index 71f51548a5f8..71f51548a5f8 100644 --- a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll diff --git a/test/CodeGen/R600/llvm.SI.tid.ll b/test/CodeGen/AMDGPU/llvm.SI.tid.ll index f6e6d7050ba7..f6e6d7050ba7 100644 --- a/test/CodeGen/R600/llvm.SI.tid.ll +++ b/test/CodeGen/AMDGPU/llvm.SI.tid.ll diff --git a/test/CodeGen/R600/llvm.amdgpu.dp4.ll b/test/CodeGen/AMDGPU/llvm.amdgpu.dp4.ll index 036cd2ca82a6..036cd2ca82a6 100644 --- a/test/CodeGen/R600/llvm.amdgpu.dp4.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgpu.dp4.ll diff --git a/test/CodeGen/R600/llvm.amdgpu.kilp.ll b/test/CodeGen/AMDGPU/llvm.amdgpu.kilp.ll index 42df6db1ccfd..42df6db1ccfd 100644 --- a/test/CodeGen/R600/llvm.amdgpu.kilp.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgpu.kilp.ll diff --git a/test/CodeGen/R600/llvm.amdgpu.lrp.ll b/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll index 4e4c2ec7791a..4e4c2ec7791a 100644 --- a/test/CodeGen/R600/llvm.amdgpu.lrp.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/AMDGPU/llvm.cos.ll index c65df8b3e8da..c65df8b3e8da 100644 --- a/test/CodeGen/R600/llvm.cos.ll +++ b/test/CodeGen/AMDGPU/llvm.cos.ll diff --git a/test/CodeGen/R600/llvm.exp2.ll b/test/CodeGen/AMDGPU/llvm.exp2.ll index 42698925aae4..42698925aae4 100644 --- a/test/CodeGen/R600/llvm.exp2.ll +++ b/test/CodeGen/AMDGPU/llvm.exp2.ll diff --git a/test/CodeGen/R600/llvm.log2.ll b/test/CodeGen/AMDGPU/llvm.log2.ll index c75e7850b353..c75e7850b353 100644 --- a/test/CodeGen/R600/llvm.log2.ll +++ b/test/CodeGen/AMDGPU/llvm.log2.ll diff --git a/test/CodeGen/R600/llvm.memcpy.ll b/test/CodeGen/AMDGPU/llvm.memcpy.ll index e491732cf9c5..e491732cf9c5 100644 --- a/test/CodeGen/R600/llvm.memcpy.ll +++ b/test/CodeGen/AMDGPU/llvm.memcpy.ll diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/AMDGPU/llvm.pow.ll index c4ae652619c2..c4ae652619c2 100644 --- a/test/CodeGen/R600/llvm.pow.ll +++ b/test/CodeGen/AMDGPU/llvm.pow.ll diff --git a/test/CodeGen/R600/llvm.rint.f64.ll b/test/CodeGen/AMDGPU/llvm.rint.f64.ll index c63fb1727940..c63fb1727940 100644 --- a/test/CodeGen/R600/llvm.rint.f64.ll +++ b/test/CodeGen/AMDGPU/llvm.rint.f64.ll diff --git a/test/CodeGen/R600/llvm.rint.ll b/test/CodeGen/AMDGPU/llvm.rint.ll index 661db51ad032..661db51ad032 100644 --- a/test/CodeGen/R600/llvm.rint.ll +++ b/test/CodeGen/AMDGPU/llvm.rint.ll diff --git a/test/CodeGen/R600/llvm.round.f64.ll b/test/CodeGen/AMDGPU/llvm.round.f64.ll index 3d0f57e33280..3d0f57e33280 100644 --- a/test/CodeGen/R600/llvm.round.f64.ll +++ b/test/CodeGen/AMDGPU/llvm.round.f64.ll diff --git a/test/CodeGen/R600/llvm.round.ll b/test/CodeGen/AMDGPU/llvm.round.ll index f5f124d915a5..f5f124d915a5 100644 --- a/test/CodeGen/R600/llvm.round.ll +++ b/test/CodeGen/AMDGPU/llvm.round.ll diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/AMDGPU/llvm.sin.ll index 3bb245c2e249..3bb245c2e249 100644 --- a/test/CodeGen/R600/llvm.sin.ll +++ b/test/CodeGen/AMDGPU/llvm.sin.ll diff --git a/test/CodeGen/R600/llvm.sqrt.ll b/test/CodeGen/AMDGPU/llvm.sqrt.ll index c6da047f5392..c6da047f5392 100644 --- a/test/CodeGen/R600/llvm.sqrt.ll +++ b/test/CodeGen/AMDGPU/llvm.sqrt.ll diff --git a/test/CodeGen/R600/load-i1.ll b/test/CodeGen/AMDGPU/load-i1.ll index 0ca49fde3e7b..0ca49fde3e7b 100644 --- a/test/CodeGen/R600/load-i1.ll +++ b/test/CodeGen/AMDGPU/load-i1.ll diff --git a/test/CodeGen/R600/load-input-fold.ll b/test/CodeGen/AMDGPU/load-input-fold.ll index 1daf0e6527b9..1daf0e6527b9 100644 --- a/test/CodeGen/R600/load-input-fold.ll +++ b/test/CodeGen/AMDGPU/load-input-fold.ll diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/AMDGPU/load.ll index 93b1b51a0d07..93b1b51a0d07 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/AMDGPU/load.ll diff --git a/test/CodeGen/R600/load.vec.ll b/test/CodeGen/AMDGPU/load.vec.ll index 02f883cd8e9c..02f883cd8e9c 100644 --- a/test/CodeGen/R600/load.vec.ll +++ b/test/CodeGen/AMDGPU/load.vec.ll diff --git a/test/CodeGen/R600/load64.ll b/test/CodeGen/AMDGPU/load64.ll index 74beabdc0076..74beabdc0076 100644 --- a/test/CodeGen/R600/load64.ll +++ b/test/CodeGen/AMDGPU/load64.ll diff --git a/test/CodeGen/R600/local-64.ll b/test/CodeGen/AMDGPU/local-64.ll index 33f3159d13eb..33f3159d13eb 100644 --- a/test/CodeGen/R600/local-64.ll +++ b/test/CodeGen/AMDGPU/local-64.ll diff --git a/test/CodeGen/R600/local-atomics.ll b/test/CodeGen/AMDGPU/local-atomics.ll index 2aaf977ab903..2aaf977ab903 100644 --- a/test/CodeGen/R600/local-atomics.ll +++ b/test/CodeGen/AMDGPU/local-atomics.ll diff --git a/test/CodeGen/R600/local-atomics64.ll b/test/CodeGen/AMDGPU/local-atomics64.ll index 0ffa5e751b7d..0ffa5e751b7d 100644 --- a/test/CodeGen/R600/local-atomics64.ll +++ b/test/CodeGen/AMDGPU/local-atomics64.ll diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/AMDGPU/local-memory-two-objects.ll index 06a8b1246e63..f501a7ac6274 100644 --- a/test/CodeGen/R600/local-memory-two-objects.ll +++ b/test/CodeGen/AMDGPU/local-memory-two-objects.ll @@ -14,7 +14,7 @@ ; EG: {{^}}local_memory_two_objects: -; We would like to check the the lds writes are using different +; We would like to check the lds writes are using different ; addresses, but due to variations in the scheduler, we can't do ; this consistently on evergreen GPUs. ; EG: LDS_WRITE diff --git a/test/CodeGen/R600/local-memory.ll b/test/CodeGen/AMDGPU/local-memory.ll index 9494ed75bd0c..9494ed75bd0c 100644 --- a/test/CodeGen/R600/local-memory.ll +++ b/test/CodeGen/AMDGPU/local-memory.ll diff --git a/test/CodeGen/R600/loop-address.ll b/test/CodeGen/AMDGPU/loop-address.ll index f60d574497de..f60d574497de 100644 --- a/test/CodeGen/R600/loop-address.ll +++ b/test/CodeGen/AMDGPU/loop-address.ll diff --git a/test/CodeGen/R600/loop-idiom.ll b/test/CodeGen/AMDGPU/loop-idiom.ll index 5fd9806813cd..5fd9806813cd 100644 --- a/test/CodeGen/R600/loop-idiom.ll +++ b/test/CodeGen/AMDGPU/loop-idiom.ll diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/AMDGPU/lshl.ll index 9ac988d38d1b..9ac988d38d1b 100644 --- a/test/CodeGen/R600/lshl.ll +++ b/test/CodeGen/AMDGPU/lshl.ll diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/AMDGPU/lshr.ll index 50e444ac26b3..50e444ac26b3 100644 --- a/test/CodeGen/R600/lshr.ll +++ b/test/CodeGen/AMDGPU/lshr.ll diff --git a/test/CodeGen/R600/m0-spill.ll b/test/CodeGen/AMDGPU/m0-spill.ll index 1dddc85f775d..1dddc85f775d 100644 --- a/test/CodeGen/R600/m0-spill.ll +++ b/test/CodeGen/AMDGPU/m0-spill.ll diff --git a/test/CodeGen/R600/mad-combine.ll b/test/CodeGen/AMDGPU/mad-combine.ll index bc071628ead0..bc071628ead0 100644 --- a/test/CodeGen/R600/mad-combine.ll +++ b/test/CodeGen/AMDGPU/mad-combine.ll diff --git a/test/CodeGen/R600/mad-sub.ll b/test/CodeGen/AMDGPU/mad-sub.ll index aa4194ff6106..aa4194ff6106 100644 --- a/test/CodeGen/R600/mad-sub.ll +++ b/test/CodeGen/AMDGPU/mad-sub.ll diff --git a/test/CodeGen/R600/mad_int24.ll b/test/CodeGen/AMDGPU/mad_int24.ll index 86d75a63ca40..86d75a63ca40 100644 --- a/test/CodeGen/R600/mad_int24.ll +++ b/test/CodeGen/AMDGPU/mad_int24.ll diff --git a/test/CodeGen/R600/mad_uint24.ll b/test/CodeGen/AMDGPU/mad_uint24.ll index 95fe34119596..95fe34119596 100644 --- a/test/CodeGen/R600/mad_uint24.ll +++ b/test/CodeGen/AMDGPU/mad_uint24.ll diff --git a/test/CodeGen/R600/madak.ll b/test/CodeGen/AMDGPU/madak.ll index 933bb016d2c9..933bb016d2c9 100644 --- a/test/CodeGen/R600/madak.ll +++ b/test/CodeGen/AMDGPU/madak.ll diff --git a/test/CodeGen/R600/madmk.ll b/test/CodeGen/AMDGPU/madmk.ll index ba7bb221a99a..ba7bb221a99a 100644 --- a/test/CodeGen/R600/madmk.ll +++ b/test/CodeGen/AMDGPU/madmk.ll diff --git a/test/CodeGen/R600/max-literals.ll b/test/CodeGen/AMDGPU/max-literals.ll index c357524b140f..c357524b140f 100644 --- a/test/CodeGen/R600/max-literals.ll +++ b/test/CodeGen/AMDGPU/max-literals.ll diff --git a/test/CodeGen/R600/max.ll b/test/CodeGen/AMDGPU/max.ll index fef3e2f0a21c..fef3e2f0a21c 100644 --- a/test/CodeGen/R600/max.ll +++ b/test/CodeGen/AMDGPU/max.ll diff --git a/test/CodeGen/R600/max3.ll b/test/CodeGen/AMDGPU/max3.ll index cfb94b272e51..cfb94b272e51 100644 --- a/test/CodeGen/R600/max3.ll +++ b/test/CodeGen/AMDGPU/max3.ll diff --git a/test/CodeGen/R600/merge-stores.ll b/test/CodeGen/AMDGPU/merge-stores.ll index dbf9d4481ffb..34a2fc7ffa74 100644 --- a/test/CodeGen/R600/merge-stores.ll +++ b/test/CodeGen/AMDGPU/merge-stores.ll @@ -89,7 +89,11 @@ define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 { } ; GCN-LABEL: {{^}}merge_global_store_2_constants_f32_i32: -; GCN: buffer_store_dwordx2 +; SI-DAG: s_mov_b32 [[SLO:s[0-9]+]], 4.0 +; SI-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b{{$}} +; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[SLO]] +; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[SHI]] +; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)* @@ -99,7 +103,11 @@ define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 } ; GCN-LABEL: {{^}}merge_global_store_4_constants_i32: -; GCN: buffer_store_dwordx4 +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x14d{{$}} +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x1c8{{$}} +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x7b{{$}} +; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x4d2{{$}} +; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI]]{{\]}} define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 @@ -530,6 +538,95 @@ define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 { ret void } +; GCN-LABEL: {{^}}merge_global_store_5_constants_i32: +; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 9{{$}} +; GCN-DAG: v_mov_b32_e32 v[[HI4:[0-9]+]], -12{{$}} +; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI4]]{{\]}} +; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 11{{$}} +; GCN: buffer_store_dword v[[HI]] +define void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) { + store i32 9, i32 addrspace(1)* %out, align 4 + %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 + store i32 12, i32 addrspace(1)* %idx1, align 4 + %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2 + store i32 16, i32 addrspace(1)* %idx2, align 4 + %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3 + store i32 -12, i32 addrspace(1)* %idx3, align 4 + %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4 + store i32 11, i32 addrspace(1)* %idx4, align 4 + ret void +} + +; GCN-LABEL: {{^}}merge_global_store_6_constants_i32: +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx2 +define void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) { + store i32 13, i32 addrspace(1)* %out, align 4 + %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 + store i32 15, i32 addrspace(1)* %idx1, align 4 + %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2 + store i32 62, i32 addrspace(1)* %idx2, align 4 + %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3 + store i32 63, i32 addrspace(1)* %idx3, align 4 + %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4 + store i32 11, i32 addrspace(1)* %idx4, align 4 + %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5 + store i32 123, i32 addrspace(1)* %idx5, align 4 + ret void +} + +; GCN-LABEL: {{^}}merge_global_store_7_constants_i32: +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx2 +; GCN: buffer_store_dword v +define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) { + store i32 34, i32 addrspace(1)* %out, align 4 + %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 + store i32 999, i32 addrspace(1)* %idx1, align 4 + %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2 + store i32 65, i32 addrspace(1)* %idx2, align 4 + %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3 + store i32 33, i32 addrspace(1)* %idx3, align 4 + %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4 + store i32 98, i32 addrspace(1)* %idx4, align 4 + %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5 + store i32 91, i32 addrspace(1)* %idx5, align 4 + %idx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 6 + store i32 212, i32 addrspace(1)* %idx6, align 4 + ret void +} + +; GCN-LABEL: {{^}}merge_global_store_8_constants_i32: +; XGCN: buffer_store_dwordx4 +; XGCN: buffer_store_dwordx4 + +; GCN: buffer_store_dword v +; GCN: buffer_store_dword v +; GCN: buffer_store_dword v +; GCN: buffer_store_dword v +; GCN: buffer_store_dword v +; GCN: buffer_store_dword v +; GCN: buffer_store_dword v +; GCN: buffer_store_dword v +define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) { + store i32 34, i32 addrspace(1)* %out, align 4 + %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 + store i32 999, i32 addrspace(1)* %idx1, align 4 + %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2 + store i32 65, i32 addrspace(1)* %idx2, align 4 + %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3 + store i32 33, i32 addrspace(1)* %idx3, align 4 + %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4 + store i32 98, i32 addrspace(1)* %idx4, align 4 + %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5 + store i32 91, i32 addrspace(1)* %idx5, align 4 + %idx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 6 + store i32 212, i32 addrspace(1)* %idx6, align 4 + %idx7 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 7 + store i32 999, i32 addrspace(1)* %idx7, align 4 + ret void +} + declare void @llvm.AMDGPU.barrier.local() #1 attributes #0 = { nounwind } diff --git a/test/CodeGen/R600/min.ll b/test/CodeGen/AMDGPU/min.ll index 0332d1a8e407..0332d1a8e407 100644 --- a/test/CodeGen/R600/min.ll +++ b/test/CodeGen/AMDGPU/min.ll diff --git a/test/CodeGen/R600/min3.ll b/test/CodeGen/AMDGPU/min3.ll index 38ef46d1bdd6..38ef46d1bdd6 100644 --- a/test/CodeGen/R600/min3.ll +++ b/test/CodeGen/AMDGPU/min3.ll diff --git a/test/CodeGen/R600/missing-store.ll b/test/CodeGen/AMDGPU/missing-store.ll index 4af9cdf1b960..4af9cdf1b960 100644 --- a/test/CodeGen/R600/missing-store.ll +++ b/test/CodeGen/AMDGPU/missing-store.ll diff --git a/test/CodeGen/R600/mubuf.ll b/test/CodeGen/AMDGPU/mubuf.ll index b19163f294e0..b19163f294e0 100644 --- a/test/CodeGen/R600/mubuf.ll +++ b/test/CodeGen/AMDGPU/mubuf.ll diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/AMDGPU/mul.ll index 94e0f96b323e..94e0f96b323e 100644 --- a/test/CodeGen/R600/mul.ll +++ b/test/CodeGen/AMDGPU/mul.ll diff --git a/test/CodeGen/R600/mul_int24.ll b/test/CodeGen/AMDGPU/mul_int24.ll index 7609dcc87afa..7609dcc87afa 100644 --- a/test/CodeGen/R600/mul_int24.ll +++ b/test/CodeGen/AMDGPU/mul_int24.ll diff --git a/test/CodeGen/R600/mul_uint24.ll b/test/CodeGen/AMDGPU/mul_uint24.ll index e640a7cd69f6..e640a7cd69f6 100644 --- a/test/CodeGen/R600/mul_uint24.ll +++ b/test/CodeGen/AMDGPU/mul_uint24.ll diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/AMDGPU/mulhu.ll index 29b0944a5533..29b0944a5533 100644 --- a/test/CodeGen/R600/mulhu.ll +++ b/test/CodeGen/AMDGPU/mulhu.ll diff --git a/test/CodeGen/R600/no-initializer-constant-addrspace.ll b/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll index 9a814b579deb..9a814b579deb 100644 --- a/test/CodeGen/R600/no-initializer-constant-addrspace.ll +++ b/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll diff --git a/test/CodeGen/R600/no-shrink-extloads.ll b/test/CodeGen/AMDGPU/no-shrink-extloads.ll index e4328ecbaca8..e4328ecbaca8 100644 --- a/test/CodeGen/R600/no-shrink-extloads.ll +++ b/test/CodeGen/AMDGPU/no-shrink-extloads.ll diff --git a/test/CodeGen/R600/operand-folding.ll b/test/CodeGen/AMDGPU/operand-folding.ll index 816755efb07c..816755efb07c 100644 --- a/test/CodeGen/R600/operand-folding.ll +++ b/test/CodeGen/AMDGPU/operand-folding.ll diff --git a/test/CodeGen/R600/operand-spacing.ll b/test/CodeGen/AMDGPU/operand-spacing.ll index 20420a84de6f..20420a84de6f 100644 --- a/test/CodeGen/R600/operand-spacing.ll +++ b/test/CodeGen/AMDGPU/operand-spacing.ll diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/AMDGPU/or.ll index 1c04090b407f..1c04090b407f 100644 --- a/test/CodeGen/R600/or.ll +++ b/test/CodeGen/AMDGPU/or.ll diff --git a/test/CodeGen/R600/packetizer.ll b/test/CodeGen/AMDGPU/packetizer.ll index 49a7c0df748f..49a7c0df748f 100644 --- a/test/CodeGen/R600/packetizer.ll +++ b/test/CodeGen/AMDGPU/packetizer.ll diff --git a/test/CodeGen/R600/parallelandifcollapse.ll b/test/CodeGen/AMDGPU/parallelandifcollapse.ll index f32b044198ab..f32b044198ab 100644 --- a/test/CodeGen/R600/parallelandifcollapse.ll +++ b/test/CodeGen/AMDGPU/parallelandifcollapse.ll diff --git a/test/CodeGen/R600/parallelorifcollapse.ll b/test/CodeGen/AMDGPU/parallelorifcollapse.ll index 1da1e91b8ab8..1da1e91b8ab8 100644 --- a/test/CodeGen/R600/parallelorifcollapse.ll +++ b/test/CodeGen/AMDGPU/parallelorifcollapse.ll diff --git a/test/CodeGen/R600/predicate-dp4.ll b/test/CodeGen/AMDGPU/predicate-dp4.ll index 6bc187594359..6bc187594359 100644 --- a/test/CodeGen/R600/predicate-dp4.ll +++ b/test/CodeGen/AMDGPU/predicate-dp4.ll diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/AMDGPU/predicates.ll index 0ce74d97ba8e..0ce74d97ba8e 100644 --- a/test/CodeGen/R600/predicates.ll +++ b/test/CodeGen/AMDGPU/predicates.ll diff --git a/test/CodeGen/R600/private-memory-atomics.ll b/test/CodeGen/AMDGPU/private-memory-atomics.ll index a008ac98a43b..a008ac98a43b 100644 --- a/test/CodeGen/R600/private-memory-atomics.ll +++ b/test/CodeGen/AMDGPU/private-memory-atomics.ll diff --git a/test/CodeGen/R600/private-memory-broken.ll b/test/CodeGen/AMDGPU/private-memory-broken.ll index 6b18a19f1956..6b18a19f1956 100644 --- a/test/CodeGen/R600/private-memory-broken.ll +++ b/test/CodeGen/AMDGPU/private-memory-broken.ll diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/AMDGPU/private-memory.ll index 1c5629780508..1c5629780508 100644 --- a/test/CodeGen/R600/private-memory.ll +++ b/test/CodeGen/AMDGPU/private-memory.ll diff --git a/test/CodeGen/R600/pv-packing.ll b/test/CodeGen/AMDGPU/pv-packing.ll index abeae563ff3f..abeae563ff3f 100644 --- a/test/CodeGen/R600/pv-packing.ll +++ b/test/CodeGen/AMDGPU/pv-packing.ll diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/AMDGPU/pv.ll index 9a57dd19765a..9a57dd19765a 100644 --- a/test/CodeGen/R600/pv.ll +++ b/test/CodeGen/AMDGPU/pv.ll diff --git a/test/CodeGen/R600/r600-encoding.ll b/test/CodeGen/AMDGPU/r600-encoding.ll index 3a82ee30a328..3a82ee30a328 100644 --- a/test/CodeGen/R600/r600-encoding.ll +++ b/test/CodeGen/AMDGPU/r600-encoding.ll diff --git a/test/CodeGen/R600/r600-export-fix.ll b/test/CodeGen/AMDGPU/r600-export-fix.ll index 7cb80195b368..7cb80195b368 100644 --- a/test/CodeGen/R600/r600-export-fix.ll +++ b/test/CodeGen/AMDGPU/r600-export-fix.ll diff --git a/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll b/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll index f388f8ffe293..f388f8ffe293 100644 --- a/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll +++ b/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll diff --git a/test/CodeGen/R600/r600cfg.ll b/test/CodeGen/AMDGPU/r600cfg.ll index c7b9d65220f3..c7b9d65220f3 100644 --- a/test/CodeGen/R600/r600cfg.ll +++ b/test/CodeGen/AMDGPU/r600cfg.ll diff --git a/test/CodeGen/R600/reciprocal.ll b/test/CodeGen/AMDGPU/reciprocal.ll index b4ac47afced7..b4ac47afced7 100644 --- a/test/CodeGen/R600/reciprocal.ll +++ b/test/CodeGen/AMDGPU/reciprocal.ll diff --git a/test/CodeGen/R600/register-count-comments.ll b/test/CodeGen/AMDGPU/register-count-comments.ll index de6bfb310883..de6bfb310883 100644 --- a/test/CodeGen/R600/register-count-comments.ll +++ b/test/CodeGen/AMDGPU/register-count-comments.ll diff --git a/test/CodeGen/R600/reorder-stores.ll b/test/CodeGen/AMDGPU/reorder-stores.ll index 187650ff9a53..187650ff9a53 100644 --- a/test/CodeGen/R600/reorder-stores.ll +++ b/test/CodeGen/AMDGPU/reorder-stores.ll diff --git a/test/CodeGen/R600/rotl.i64.ll b/test/CodeGen/AMDGPU/rotl.i64.ll index 3f4ceb7e0310..3f4ceb7e0310 100644 --- a/test/CodeGen/R600/rotl.i64.ll +++ b/test/CodeGen/AMDGPU/rotl.i64.ll diff --git a/test/CodeGen/R600/rotl.ll b/test/CodeGen/AMDGPU/rotl.ll index 6c144cd56ea7..6c144cd56ea7 100644 --- a/test/CodeGen/R600/rotl.ll +++ b/test/CodeGen/AMDGPU/rotl.ll diff --git a/test/CodeGen/R600/rotr.i64.ll b/test/CodeGen/AMDGPU/rotr.i64.ll index 586de44a566c..586de44a566c 100644 --- a/test/CodeGen/R600/rotr.i64.ll +++ b/test/CodeGen/AMDGPU/rotr.i64.ll diff --git a/test/CodeGen/R600/rotr.ll b/test/CodeGen/AMDGPU/rotr.ll index 044f9ffe6d63..044f9ffe6d63 100644 --- a/test/CodeGen/R600/rotr.ll +++ b/test/CodeGen/AMDGPU/rotr.ll diff --git a/test/CodeGen/R600/rsq.ll b/test/CodeGen/AMDGPU/rsq.ll index b67b800c7374..b67b800c7374 100644 --- a/test/CodeGen/R600/rsq.ll +++ b/test/CodeGen/AMDGPU/rsq.ll diff --git a/test/CodeGen/R600/rv7x0_count3.ll b/test/CodeGen/AMDGPU/rv7x0_count3.ll index c3fd923e4593..c3fd923e4593 100644 --- a/test/CodeGen/R600/rv7x0_count3.ll +++ b/test/CodeGen/AMDGPU/rv7x0_count3.ll diff --git a/test/CodeGen/R600/s_movk_i32.ll b/test/CodeGen/AMDGPU/s_movk_i32.ll index 6b1a36c979c2..6b1a36c979c2 100644 --- a/test/CodeGen/R600/s_movk_i32.ll +++ b/test/CodeGen/AMDGPU/s_movk_i32.ll diff --git a/test/CodeGen/R600/saddo.ll b/test/CodeGen/AMDGPU/saddo.ll index f8ced7942a60..f8ced7942a60 100644 --- a/test/CodeGen/R600/saddo.ll +++ b/test/CodeGen/AMDGPU/saddo.ll diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/AMDGPU/salu-to-valu.ll index 0b9649576545..0b9649576545 100644 --- a/test/CodeGen/R600/salu-to-valu.ll +++ b/test/CodeGen/AMDGPU/salu-to-valu.ll diff --git a/test/CodeGen/R600/scalar_to_vector.ll b/test/CodeGen/AMDGPU/scalar_to_vector.ll index 0970e5d30630..0970e5d30630 100644 --- a/test/CodeGen/R600/scalar_to_vector.ll +++ b/test/CodeGen/AMDGPU/scalar_to_vector.ll diff --git a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll b/test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll index 11e8f5176f44..11e8f5176f44 100644 --- a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll +++ b/test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll diff --git a/test/CodeGen/R600/schedule-fs-loop-nested.ll b/test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll index 759197ca61f7..759197ca61f7 100644 --- a/test/CodeGen/R600/schedule-fs-loop-nested.ll +++ b/test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll diff --git a/test/CodeGen/R600/schedule-fs-loop.ll b/test/CodeGen/AMDGPU/schedule-fs-loop.ll index 28cc08abc022..28cc08abc022 100644 --- a/test/CodeGen/R600/schedule-fs-loop.ll +++ b/test/CodeGen/AMDGPU/schedule-fs-loop.ll diff --git a/test/CodeGen/R600/schedule-global-loads.ll b/test/CodeGen/AMDGPU/schedule-global-loads.ll index 3f728fd873b3..3f728fd873b3 100644 --- a/test/CodeGen/R600/schedule-global-loads.ll +++ b/test/CodeGen/AMDGPU/schedule-global-loads.ll diff --git a/test/CodeGen/R600/schedule-if-2.ll b/test/CodeGen/AMDGPU/schedule-if-2.ll index 549465096833..549465096833 100644 --- a/test/CodeGen/R600/schedule-if-2.ll +++ b/test/CodeGen/AMDGPU/schedule-if-2.ll diff --git a/test/CodeGen/R600/schedule-if.ll b/test/CodeGen/AMDGPU/schedule-if.ll index 94c653c8f25b..94c653c8f25b 100644 --- a/test/CodeGen/R600/schedule-if.ll +++ b/test/CodeGen/AMDGPU/schedule-if.ll diff --git a/test/CodeGen/R600/schedule-kernel-arg-loads.ll b/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll index 6b3e0814c380..6b3e0814c380 100644 --- a/test/CodeGen/R600/schedule-kernel-arg-loads.ll +++ b/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll b/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll index 3863afda5dd3..3863afda5dd3 100644 --- a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll +++ b/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll index 8d980dbf8995..8d980dbf8995 100644 --- a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll +++ b/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll diff --git a/test/CodeGen/R600/scratch-buffer.ll b/test/CodeGen/AMDGPU/scratch-buffer.ll index 56088718ada8..56088718ada8 100644 --- a/test/CodeGen/R600/scratch-buffer.ll +++ b/test/CodeGen/AMDGPU/scratch-buffer.ll diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/AMDGPU/sdiv.ll index de645353a401..de645353a401 100644 --- a/test/CodeGen/R600/sdiv.ll +++ b/test/CodeGen/AMDGPU/sdiv.ll diff --git a/test/CodeGen/R600/sdivrem24.ll b/test/CodeGen/AMDGPU/sdivrem24.ll index ad5df39f5505..ad5df39f5505 100644 --- a/test/CodeGen/R600/sdivrem24.ll +++ b/test/CodeGen/AMDGPU/sdivrem24.ll diff --git a/test/CodeGen/R600/sdivrem64.ll b/test/CodeGen/AMDGPU/sdivrem64.ll index a9b2b7f9df55..a9b2b7f9df55 100644 --- a/test/CodeGen/R600/sdivrem64.ll +++ b/test/CodeGen/AMDGPU/sdivrem64.ll diff --git a/test/CodeGen/R600/select-i1.ll b/test/CodeGen/AMDGPU/select-i1.ll index 6735394e93a9..6735394e93a9 100644 --- a/test/CodeGen/R600/select-i1.ll +++ b/test/CodeGen/AMDGPU/select-i1.ll diff --git a/test/CodeGen/R600/select-vectors.ll b/test/CodeGen/AMDGPU/select-vectors.ll index 59082c65cc8a..59082c65cc8a 100644 --- a/test/CodeGen/R600/select-vectors.ll +++ b/test/CodeGen/AMDGPU/select-vectors.ll diff --git a/test/CodeGen/R600/select.ll b/test/CodeGen/AMDGPU/select.ll index 45f3cd5a7ac5..45f3cd5a7ac5 100644 --- a/test/CodeGen/R600/select.ll +++ b/test/CodeGen/AMDGPU/select.ll diff --git a/test/CodeGen/R600/select64.ll b/test/CodeGen/AMDGPU/select64.ll index 5cebb30dc72e..5cebb30dc72e 100644 --- a/test/CodeGen/R600/select64.ll +++ b/test/CodeGen/AMDGPU/select64.ll diff --git a/test/CodeGen/R600/selectcc-cnd.ll b/test/CodeGen/AMDGPU/selectcc-cnd.ll index 94d0ace75697..94d0ace75697 100644 --- a/test/CodeGen/R600/selectcc-cnd.ll +++ b/test/CodeGen/AMDGPU/selectcc-cnd.ll diff --git a/test/CodeGen/R600/selectcc-cnde-int.ll b/test/CodeGen/AMDGPU/selectcc-cnde-int.ll index 58a4ee7d62b2..58a4ee7d62b2 100644 --- a/test/CodeGen/R600/selectcc-cnde-int.ll +++ b/test/CodeGen/AMDGPU/selectcc-cnde-int.ll diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/AMDGPU/selectcc-icmp-select-float.ll index e870ee891e66..e870ee891e66 100644 --- a/test/CodeGen/R600/selectcc-icmp-select-float.ll +++ b/test/CodeGen/AMDGPU/selectcc-icmp-select-float.ll diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/AMDGPU/selectcc-opt.ll index 65be4a626a18..65be4a626a18 100644 --- a/test/CodeGen/R600/selectcc-opt.ll +++ b/test/CodeGen/AMDGPU/selectcc-opt.ll diff --git a/test/CodeGen/R600/selectcc.ll b/test/CodeGen/AMDGPU/selectcc.ll index f378e15dd763..f378e15dd763 100644 --- a/test/CodeGen/R600/selectcc.ll +++ b/test/CodeGen/AMDGPU/selectcc.ll diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/AMDGPU/set-dx10.ll index 53694dcffa66..53694dcffa66 100644 --- a/test/CodeGen/R600/set-dx10.ll +++ b/test/CodeGen/AMDGPU/set-dx10.ll diff --git a/test/CodeGen/R600/setcc-equivalent.ll b/test/CodeGen/AMDGPU/setcc-equivalent.ll index 11ea793650c4..11ea793650c4 100644 --- a/test/CodeGen/R600/setcc-equivalent.ll +++ b/test/CodeGen/AMDGPU/setcc-equivalent.ll diff --git a/test/CodeGen/R600/setcc-opt.ll b/test/CodeGen/AMDGPU/setcc-opt.ll index 4e6a10d6b78d..4e6a10d6b78d 100644 --- a/test/CodeGen/R600/setcc-opt.ll +++ b/test/CodeGen/AMDGPU/setcc-opt.ll diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/AMDGPU/setcc.ll index f33a82df5ffb..f33a82df5ffb 100644 --- a/test/CodeGen/R600/setcc.ll +++ b/test/CodeGen/AMDGPU/setcc.ll diff --git a/test/CodeGen/R600/setcc64.ll b/test/CodeGen/AMDGPU/setcc64.ll index 231be7aa3da7..231be7aa3da7 100644 --- a/test/CodeGen/R600/setcc64.ll +++ b/test/CodeGen/AMDGPU/setcc64.ll diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/AMDGPU/seto.ll index 9b5d6b5dbd62..9b5d6b5dbd62 100644 --- a/test/CodeGen/R600/seto.ll +++ b/test/CodeGen/AMDGPU/seto.ll diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/AMDGPU/setuo.ll index 76346c4f624a..76346c4f624a 100644 --- a/test/CodeGen/R600/setuo.ll +++ b/test/CodeGen/AMDGPU/setuo.ll diff --git a/test/CodeGen/R600/sext-eliminate.ll b/test/CodeGen/AMDGPU/sext-eliminate.ll index 7dc6eb87f6b5..7dc6eb87f6b5 100644 --- a/test/CodeGen/R600/sext-eliminate.ll +++ b/test/CodeGen/AMDGPU/sext-eliminate.ll diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/AMDGPU/sext-in-reg.ll index 5aedda2ce1a9..5aedda2ce1a9 100644 --- a/test/CodeGen/R600/sext-in-reg.ll +++ b/test/CodeGen/AMDGPU/sext-in-reg.ll diff --git a/test/CodeGen/R600/sgpr-control-flow.ll b/test/CodeGen/AMDGPU/sgpr-control-flow.ll index 38289ced632a..38289ced632a 100644 --- a/test/CodeGen/R600/sgpr-control-flow.ll +++ b/test/CodeGen/AMDGPU/sgpr-control-flow.ll diff --git a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll b/test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll index df67fcca22fe..df67fcca22fe 100644 --- a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll +++ b/test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/AMDGPU/sgpr-copy.ll index b849c4038bc7..b849c4038bc7 100644 --- a/test/CodeGen/R600/sgpr-copy.ll +++ b/test/CodeGen/AMDGPU/sgpr-copy.ll diff --git a/test/CodeGen/R600/shared-op-cycle.ll b/test/CodeGen/AMDGPU/shared-op-cycle.ll index f52a9baf4d18..f52a9baf4d18 100644 --- a/test/CodeGen/R600/shared-op-cycle.ll +++ b/test/CodeGen/AMDGPU/shared-op-cycle.ll diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/AMDGPU/shl.ll index 53b63dc4b8ad..53b63dc4b8ad 100644 --- a/test/CodeGen/R600/shl.ll +++ b/test/CodeGen/AMDGPU/shl.ll diff --git a/test/CodeGen/R600/shl_add_constant.ll b/test/CodeGen/AMDGPU/shl_add_constant.ll index b1485bfaaebb..b1485bfaaebb 100644 --- a/test/CodeGen/R600/shl_add_constant.ll +++ b/test/CodeGen/AMDGPU/shl_add_constant.ll diff --git a/test/CodeGen/R600/shl_add_ptr.ll b/test/CodeGen/AMDGPU/shl_add_ptr.ll index 6671e909cd1d..6671e909cd1d 100644 --- a/test/CodeGen/R600/shl_add_ptr.ll +++ b/test/CodeGen/AMDGPU/shl_add_ptr.ll diff --git a/test/CodeGen/R600/si-annotate-cf-assertion.ll b/test/CodeGen/AMDGPU/si-annotate-cf-assertion.ll index 69d719385acd..69d719385acd 100644 --- a/test/CodeGen/R600/si-annotate-cf-assertion.ll +++ b/test/CodeGen/AMDGPU/si-annotate-cf-assertion.ll diff --git a/test/CodeGen/R600/si-annotate-cf.ll b/test/CodeGen/AMDGPU/si-annotate-cf.ll index bbcb861f37dc..bbcb861f37dc 100644 --- a/test/CodeGen/R600/si-annotate-cf.ll +++ b/test/CodeGen/AMDGPU/si-annotate-cf.ll diff --git a/test/CodeGen/R600/si-lod-bias.ll b/test/CodeGen/AMDGPU/si-lod-bias.ll index 944499a11461..944499a11461 100644 --- a/test/CodeGen/R600/si-lod-bias.ll +++ b/test/CodeGen/AMDGPU/si-lod-bias.ll diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/AMDGPU/si-sgpr-spill.ll index 84652701f773..84652701f773 100644 --- a/test/CodeGen/R600/si-sgpr-spill.ll +++ b/test/CodeGen/AMDGPU/si-sgpr-spill.ll diff --git a/test/CodeGen/R600/si-spill-cf.ll b/test/CodeGen/AMDGPU/si-spill-cf.ll index 4b2d8ec6bf0a..4b2d8ec6bf0a 100644 --- a/test/CodeGen/R600/si-spill-cf.ll +++ b/test/CodeGen/AMDGPU/si-spill-cf.ll diff --git a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll b/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll index 5a6129aaa3fa..5a6129aaa3fa 100644 --- a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll +++ b/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll diff --git a/test/CodeGen/R600/si-vector-hang.ll b/test/CodeGen/AMDGPU/si-vector-hang.ll index bd427dd3ed46..bd427dd3ed46 100644 --- a/test/CodeGen/R600/si-vector-hang.ll +++ b/test/CodeGen/AMDGPU/si-vector-hang.ll diff --git a/test/CodeGen/R600/sign_extend.ll b/test/CodeGen/AMDGPU/sign_extend.ll index 06bee114c23a..06bee114c23a 100644 --- a/test/CodeGen/R600/sign_extend.ll +++ b/test/CodeGen/AMDGPU/sign_extend.ll diff --git a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/test/CodeGen/AMDGPU/simplify-demanded-bits-build-pair.ll index dffee70b6b02..dffee70b6b02 100644 --- a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll +++ b/test/CodeGen/AMDGPU/simplify-demanded-bits-build-pair.ll diff --git a/test/CodeGen/R600/sint_to_fp.f64.ll b/test/CodeGen/AMDGPU/sint_to_fp.f64.ll index da4e91db3a38..da4e91db3a38 100644 --- a/test/CodeGen/R600/sint_to_fp.f64.ll +++ b/test/CodeGen/AMDGPU/sint_to_fp.f64.ll diff --git a/test/CodeGen/R600/sint_to_fp.ll b/test/CodeGen/AMDGPU/sint_to_fp.ll index 8506441d1361..8506441d1361 100644 --- a/test/CodeGen/R600/sint_to_fp.ll +++ b/test/CodeGen/AMDGPU/sint_to_fp.ll diff --git a/test/CodeGen/R600/smrd.ll b/test/CodeGen/AMDGPU/smrd.ll index b0c18ca5959c..b0c18ca5959c 100644 --- a/test/CodeGen/R600/smrd.ll +++ b/test/CodeGen/AMDGPU/smrd.ll diff --git a/test/CodeGen/R600/split-scalar-i64-add.ll b/test/CodeGen/AMDGPU/split-scalar-i64-add.ll index 46409cdfae1c..46409cdfae1c 100644 --- a/test/CodeGen/R600/split-scalar-i64-add.ll +++ b/test/CodeGen/AMDGPU/split-scalar-i64-add.ll diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/AMDGPU/sra.ll index bcbc32f4c053..bcbc32f4c053 100644 --- a/test/CodeGen/R600/sra.ll +++ b/test/CodeGen/AMDGPU/sra.ll diff --git a/test/CodeGen/R600/srem.ll b/test/CodeGen/AMDGPU/srem.ll index c78fd549b316..c78fd549b316 100644 --- a/test/CodeGen/R600/srem.ll +++ b/test/CodeGen/AMDGPU/srem.ll diff --git a/test/CodeGen/R600/srl.ll b/test/CodeGen/AMDGPU/srl.ll index 4904d7fa1bd0..4904d7fa1bd0 100644 --- a/test/CodeGen/R600/srl.ll +++ b/test/CodeGen/AMDGPU/srl.ll diff --git a/test/CodeGen/R600/ssubo.ll b/test/CodeGen/AMDGPU/ssubo.ll index 26884a1b7761..26884a1b7761 100644 --- a/test/CodeGen/R600/ssubo.ll +++ b/test/CodeGen/AMDGPU/ssubo.ll diff --git a/test/CodeGen/R600/store-barrier.ll b/test/CodeGen/AMDGPU/store-barrier.ll index 4a72b4d090ad..4a72b4d090ad 100644 --- a/test/CodeGen/R600/store-barrier.ll +++ b/test/CodeGen/AMDGPU/store-barrier.ll diff --git a/test/CodeGen/R600/store-v3i32.ll b/test/CodeGen/AMDGPU/store-v3i32.ll index 33617b55ed64..33617b55ed64 100644 --- a/test/CodeGen/R600/store-v3i32.ll +++ b/test/CodeGen/AMDGPU/store-v3i32.ll diff --git a/test/CodeGen/R600/store-v3i64.ll b/test/CodeGen/AMDGPU/store-v3i64.ll index e0c554ad2c17..e0c554ad2c17 100644 --- a/test/CodeGen/R600/store-v3i64.ll +++ b/test/CodeGen/AMDGPU/store-v3i64.ll diff --git a/test/CodeGen/R600/store-vector-ptrs.ll b/test/CodeGen/AMDGPU/store-vector-ptrs.ll index d5af3b29118a..d5af3b29118a 100644 --- a/test/CodeGen/R600/store-vector-ptrs.ll +++ b/test/CodeGen/AMDGPU/store-vector-ptrs.ll diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/AMDGPU/store.ll index 0f89405e073b..0f89405e073b 100644 --- a/test/CodeGen/R600/store.ll +++ b/test/CodeGen/AMDGPU/store.ll diff --git a/test/CodeGen/R600/store.r600.ll b/test/CodeGen/AMDGPU/store.r600.ll index 696fb033b5ec..696fb033b5ec 100644 --- a/test/CodeGen/R600/store.r600.ll +++ b/test/CodeGen/AMDGPU/store.r600.ll diff --git a/test/CodeGen/R600/structurize.ll b/test/CodeGen/AMDGPU/structurize.ll index 02e592e9a559..02e592e9a559 100644 --- a/test/CodeGen/R600/structurize.ll +++ b/test/CodeGen/AMDGPU/structurize.ll diff --git a/test/CodeGen/R600/structurize1.ll b/test/CodeGen/AMDGPU/structurize1.ll index 77432c1f9d2b..77432c1f9d2b 100644 --- a/test/CodeGen/R600/structurize1.ll +++ b/test/CodeGen/AMDGPU/structurize1.ll diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/AMDGPU/sub.ll index b7fba0efa5b2..b7fba0efa5b2 100644 --- a/test/CodeGen/R600/sub.ll +++ b/test/CodeGen/AMDGPU/sub.ll diff --git a/test/CodeGen/R600/subreg-coalescer-crash.ll b/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll index c4dae4736cfa..c4dae4736cfa 100644 --- a/test/CodeGen/R600/subreg-coalescer-crash.ll +++ b/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll diff --git a/test/CodeGen/R600/subreg-eliminate-dead.ll b/test/CodeGen/AMDGPU/subreg-eliminate-dead.ll index 8bd995a8ecbb..8bd995a8ecbb 100644 --- a/test/CodeGen/R600/subreg-eliminate-dead.ll +++ b/test/CodeGen/AMDGPU/subreg-eliminate-dead.ll diff --git a/test/CodeGen/R600/swizzle-export.ll b/test/CodeGen/AMDGPU/swizzle-export.ll index 000ee2faa478..000ee2faa478 100644 --- a/test/CodeGen/R600/swizzle-export.ll +++ b/test/CodeGen/AMDGPU/swizzle-export.ll diff --git a/test/CodeGen/R600/tex-clause-antidep.ll b/test/CodeGen/AMDGPU/tex-clause-antidep.ll index cbb9c50974a4..cbb9c50974a4 100644 --- a/test/CodeGen/R600/tex-clause-antidep.ll +++ b/test/CodeGen/AMDGPU/tex-clause-antidep.ll diff --git a/test/CodeGen/R600/texture-input-merge.ll b/test/CodeGen/AMDGPU/texture-input-merge.ll index 789538af5821..789538af5821 100644 --- a/test/CodeGen/R600/texture-input-merge.ll +++ b/test/CodeGen/AMDGPU/texture-input-merge.ll diff --git a/test/CodeGen/R600/trunc-cmp-constant.ll b/test/CodeGen/AMDGPU/trunc-cmp-constant.ll index dac74728b3ce..dac74728b3ce 100644 --- a/test/CodeGen/R600/trunc-cmp-constant.ll +++ b/test/CodeGen/AMDGPU/trunc-cmp-constant.ll diff --git a/test/CodeGen/R600/trunc-store-f64-to-f16.ll b/test/CodeGen/AMDGPU/trunc-store-f64-to-f16.ll index c29872beef86..c29872beef86 100644 --- a/test/CodeGen/R600/trunc-store-f64-to-f16.ll +++ b/test/CodeGen/AMDGPU/trunc-store-f64-to-f16.ll diff --git a/test/CodeGen/R600/trunc-store-i1.ll b/test/CodeGen/AMDGPU/trunc-store-i1.ll index b71a838b62cd..b71a838b62cd 100644 --- a/test/CodeGen/R600/trunc-store-i1.ll +++ b/test/CodeGen/AMDGPU/trunc-store-i1.ll diff --git a/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll b/test/CodeGen/AMDGPU/trunc-vector-store-assertion-failure.ll index 878ea3f48995..878ea3f48995 100644 --- a/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll +++ b/test/CodeGen/AMDGPU/trunc-vector-store-assertion-failure.ll diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/AMDGPU/trunc.ll index bf690ca4cb28..bf690ca4cb28 100644 --- a/test/CodeGen/R600/trunc.ll +++ b/test/CodeGen/AMDGPU/trunc.ll diff --git a/test/CodeGen/R600/tti-unroll-prefs.ll b/test/CodeGen/AMDGPU/tti-unroll-prefs.ll index 76c32afc1f21..76c32afc1f21 100644 --- a/test/CodeGen/R600/tti-unroll-prefs.ll +++ b/test/CodeGen/AMDGPU/tti-unroll-prefs.ll diff --git a/test/CodeGen/R600/uaddo.ll b/test/CodeGen/AMDGPU/uaddo.ll index 11438f267ad0..11438f267ad0 100644 --- a/test/CodeGen/R600/uaddo.ll +++ b/test/CodeGen/AMDGPU/uaddo.ll diff --git a/test/CodeGen/R600/udiv.ll b/test/CodeGen/AMDGPU/udiv.ll index de22a22e5029..de22a22e5029 100644 --- a/test/CodeGen/R600/udiv.ll +++ b/test/CodeGen/AMDGPU/udiv.ll diff --git a/test/CodeGen/R600/udivrem.ll b/test/CodeGen/AMDGPU/udivrem.ll index b3837f28209a..b3837f28209a 100644 --- a/test/CodeGen/R600/udivrem.ll +++ b/test/CodeGen/AMDGPU/udivrem.ll diff --git a/test/CodeGen/R600/udivrem24.ll b/test/CodeGen/AMDGPU/udivrem24.ll index 4de881b66f10..4de881b66f10 100644 --- a/test/CodeGen/R600/udivrem24.ll +++ b/test/CodeGen/AMDGPU/udivrem24.ll diff --git a/test/CodeGen/R600/udivrem64.ll b/test/CodeGen/AMDGPU/udivrem64.ll index 9f3069bdf80c..9f3069bdf80c 100644 --- a/test/CodeGen/R600/udivrem64.ll +++ b/test/CodeGen/AMDGPU/udivrem64.ll diff --git a/test/CodeGen/R600/uint_to_fp.f64.ll b/test/CodeGen/AMDGPU/uint_to_fp.f64.ll index dfec8eb15cb7..dfec8eb15cb7 100644 --- a/test/CodeGen/R600/uint_to_fp.f64.ll +++ b/test/CodeGen/AMDGPU/uint_to_fp.f64.ll diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/AMDGPU/uint_to_fp.ll index 00fea80b1bc8..00fea80b1bc8 100644 --- a/test/CodeGen/R600/uint_to_fp.ll +++ b/test/CodeGen/AMDGPU/uint_to_fp.ll diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/AMDGPU/unaligned-load-store.ll index 82d88ebd3ae7..82d88ebd3ae7 100644 --- a/test/CodeGen/R600/unaligned-load-store.ll +++ b/test/CodeGen/AMDGPU/unaligned-load-store.ll diff --git a/test/CodeGen/R600/unhandled-loop-condition-assertion.ll b/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll index 036a7e91b47f..036a7e91b47f 100644 --- a/test/CodeGen/R600/unhandled-loop-condition-assertion.ll +++ b/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll diff --git a/test/CodeGen/R600/unroll.ll b/test/CodeGen/AMDGPU/unroll.ll index 411a15a4b839..411a15a4b839 100644 --- a/test/CodeGen/R600/unroll.ll +++ b/test/CodeGen/AMDGPU/unroll.ll diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/AMDGPU/unsupported-cc.ll index 8ab4faf2f145..8ab4faf2f145 100644 --- a/test/CodeGen/R600/unsupported-cc.ll +++ b/test/CodeGen/AMDGPU/unsupported-cc.ll diff --git a/test/CodeGen/R600/urecip.ll b/test/CodeGen/AMDGPU/urecip.ll index daacc771708a..daacc771708a 100644 --- a/test/CodeGen/R600/urecip.ll +++ b/test/CodeGen/AMDGPU/urecip.ll diff --git a/test/CodeGen/R600/urem.ll b/test/CodeGen/AMDGPU/urem.ll index 62841ec2d6c5..62841ec2d6c5 100644 --- a/test/CodeGen/R600/urem.ll +++ b/test/CodeGen/AMDGPU/urem.ll diff --git a/test/CodeGen/R600/use-sgpr-multiple-times.ll b/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll index f26f30022b4f..f26f30022b4f 100644 --- a/test/CodeGen/R600/use-sgpr-multiple-times.ll +++ b/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll diff --git a/test/CodeGen/R600/usubo.ll b/test/CodeGen/AMDGPU/usubo.ll index 3c9b1622a076..3c9b1622a076 100644 --- a/test/CodeGen/R600/usubo.ll +++ b/test/CodeGen/AMDGPU/usubo.ll diff --git a/test/CodeGen/R600/v1i64-kernel-arg.ll b/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll index 31755125c03b..31755125c03b 100644 --- a/test/CodeGen/R600/v1i64-kernel-arg.ll +++ b/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll diff --git a/test/CodeGen/R600/v_cndmask.ll b/test/CodeGen/AMDGPU/v_cndmask.ll index c368c5aaf7dc..c368c5aaf7dc 100644 --- a/test/CodeGen/R600/v_cndmask.ll +++ b/test/CodeGen/AMDGPU/v_cndmask.ll diff --git a/test/CodeGen/R600/valu-i1.ll b/test/CodeGen/AMDGPU/valu-i1.ll index 7d0ebd139f51..7d0ebd139f51 100644 --- a/test/CodeGen/R600/valu-i1.ll +++ b/test/CodeGen/AMDGPU/valu-i1.ll diff --git a/test/CodeGen/R600/vector-alloca.ll b/test/CodeGen/AMDGPU/vector-alloca.ll index 6f3b4847fbdf..6f3b4847fbdf 100644 --- a/test/CodeGen/R600/vector-alloca.ll +++ b/test/CodeGen/AMDGPU/vector-alloca.ll diff --git a/test/CodeGen/R600/vertex-fetch-encoding.ll b/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll index fb6a17e67146..fb6a17e67146 100644 --- a/test/CodeGen/R600/vertex-fetch-encoding.ll +++ b/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll diff --git a/test/CodeGen/R600/vop-shrink.ll b/test/CodeGen/AMDGPU/vop-shrink.ll index 9b2f229c05af..9b2f229c05af 100644 --- a/test/CodeGen/R600/vop-shrink.ll +++ b/test/CodeGen/AMDGPU/vop-shrink.ll diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/AMDGPU/vselect.ll index a3014b03d2b3..a3014b03d2b3 100644 --- a/test/CodeGen/R600/vselect.ll +++ b/test/CodeGen/AMDGPU/vselect.ll diff --git a/test/CodeGen/R600/vselect64.ll b/test/CodeGen/AMDGPU/vselect64.ll index ef85ebe7899f..ef85ebe7899f 100644 --- a/test/CodeGen/R600/vselect64.ll +++ b/test/CodeGen/AMDGPU/vselect64.ll diff --git a/test/CodeGen/R600/vtx-fetch-branch.ll b/test/CodeGen/AMDGPU/vtx-fetch-branch.ll index 4584d6e25254..4584d6e25254 100644 --- a/test/CodeGen/R600/vtx-fetch-branch.ll +++ b/test/CodeGen/AMDGPU/vtx-fetch-branch.ll diff --git a/test/CodeGen/R600/vtx-schedule.ll b/test/CodeGen/AMDGPU/vtx-schedule.ll index 912e258ebb83..912e258ebb83 100644 --- a/test/CodeGen/R600/vtx-schedule.ll +++ b/test/CodeGen/AMDGPU/vtx-schedule.ll diff --git a/test/CodeGen/R600/wait.ll b/test/CodeGen/AMDGPU/wait.ll index 5cc7577cad33..5cc7577cad33 100644 --- a/test/CodeGen/R600/wait.ll +++ b/test/CodeGen/AMDGPU/wait.ll diff --git a/test/CodeGen/R600/work-item-intrinsics.ll b/test/CodeGen/AMDGPU/work-item-intrinsics.ll index 4328e964c1bf..4328e964c1bf 100644 --- a/test/CodeGen/R600/work-item-intrinsics.ll +++ b/test/CodeGen/AMDGPU/work-item-intrinsics.ll diff --git a/test/CodeGen/R600/wrong-transalu-pos-fix.ll b/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll index 8b383e4c393d..8b383e4c393d 100644 --- a/test/CodeGen/R600/wrong-transalu-pos-fix.ll +++ b/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/AMDGPU/xor.ll index 089db59eabc7..089db59eabc7 100644 --- a/test/CodeGen/R600/xor.ll +++ b/test/CodeGen/AMDGPU/xor.ll diff --git a/test/CodeGen/R600/zero_extend.ll b/test/CodeGen/AMDGPU/zero_extend.ll index 033055db185a..033055db185a 100644 --- a/test/CodeGen/R600/zero_extend.ll +++ b/test/CodeGen/AMDGPU/zero_extend.ll diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll index 2f6e428351d7..d402c16ccacb 100644 --- a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll +++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll @@ -7,7 +7,7 @@ %struct.A = type { i32* } -define void @"\01-[MyFunction Name:]"() { +define void @"\01-[MyFunction Name:]"() personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { entry: %save_filt.1 = alloca i32 %save_eptr.0 = alloca i8* @@ -39,7 +39,7 @@ return: ; preds = %invcont ret void lpad: ; preds = %entry - %exn = landingpad {i8*, i32} personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %exn = landingpad {i8*, i32} cleanup %eh_ptr = extractvalue {i8*, i32} %exn, 0 store i8* %eh_ptr, i8** %eh_exception diff --git a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll index b02efea929fa..a876d998e750 100644 --- a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll +++ b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll @@ -40,7 +40,7 @@ entry: declare void @__cxa_throw(i8*, i8*, i8*) -define i32 @main() ssp { +define i32 @main() ssp personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { entry: %puts.i = tail call i32 @puts(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @str, i32 0, i32 0)) ; <i32> [#uses=0] %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind ; <i8*> [#uses=2] @@ -71,7 +71,7 @@ try.cont: ; preds = %lpad ret i32 %conv lpad: ; preds = %entry - %exn.ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %exn.ptr = landingpad { i8*, i32 } catch i8* bitcast (%0* @_ZTI1A to i8*) catch i8* null %exn = extractvalue { i8*, i32 } %exn.ptr, 0 diff --git a/test/CodeGen/ARM/2010-08-04-EHCrash.ll b/test/CodeGen/ARM/2010-08-04-EHCrash.ll index 13214c521530..69482cc8b35b 100644 --- a/test/CodeGen/ARM/2010-08-04-EHCrash.ll +++ b/test/CodeGen/ARM/2010-08-04-EHCrash.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 ; <rdar://problem/8264008> -define linkonce_odr arm_apcscc void @func1() { +define linkonce_odr arm_apcscc void @func1() personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { entry: %save_filt.936 = alloca i32 ; <i32*> [#uses=2] %save_eptr.935 = alloca i8* ; <i8**> [#uses=2] @@ -34,7 +34,7 @@ return: ; preds = %entry ret void lpad: ; preds = %bb - %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %eh_ptr = landingpad { i8*, i32 } cleanup %exn = extractvalue { i8*, i32 } %eh_ptr, 0 store i8* %exn, i8** %eh_exception diff --git a/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll index 2af3e3e6bd4c..559b027fb115 100644 --- a/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll +++ b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll @@ -3,7 +3,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" target triple = "thumbv7-apple-darwin" -define void @func() unnamed_addr align 2 { +define void @func() unnamed_addr align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { entry: br label %for.cond @@ -35,13 +35,13 @@ for.cond.backedge: br label %for.cond lpad: - %exn = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %exn = landingpad { i8*, i32 } catch i8* null invoke void @foo() to label %eh.resume unwind label %terminate.lpad lpad26: - %exn27 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %exn27 = landingpad { i8*, i32 } catch i8* null invoke void @foo() to label %eh.resume unwind label %terminate.lpad @@ -57,7 +57,7 @@ call8.i.i.i.noexc: ret void lpad44: - %exn45 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %exn45 = landingpad { i8*, i32 } catch i8* null invoke void @foo() to label %eh.resume unwind label %terminate.lpad @@ -67,7 +67,7 @@ eh.resume: resume { i8*, i32 } %exn.slot.0 terminate.lpad: - %exn51 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %exn51 = landingpad { i8*, i32 } catch i8* null tail call void @_ZSt9terminatev() noreturn nounwind unreachable diff --git a/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll index 40d1f628aaae..b00cc51d9842 100644 --- a/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll +++ b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll @@ -8,7 +8,7 @@ %0 = type opaque %struct.NSConstantString = type { i32*, i32, i8*, i32 } -define i32 @asdf(i32 %a, i32 %b, i8** %c, i8* %d) { +define i32 @asdf(i32 %a, i32 %b, i8** %c, i8* %d) personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) { bb: %tmp = alloca i32, align 4 %tmp1 = alloca i32, align 4 @@ -37,7 +37,7 @@ bb14: ; preds = %bb11 unreachable bb15: ; preds = %bb11, %bb - %tmp16 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) + %tmp16 = landingpad { i8*, i32 } catch i8* null %tmp17 = extractvalue { i8*, i32 } %tmp16, 0 store i8* %tmp17, i8** %tmp4 diff --git a/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll index 69d72bd83391..ce0dcc709522 100644 --- a/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll +++ b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll @@ -25,13 +25,13 @@ declare void @__cxa_end_catch() declare void @_ZSt9terminatev() -define hidden double @t(%0* %self, i8* nocapture %_cmd) optsize ssp { +define hidden double @t(%0* %self, i8* nocapture %_cmd) optsize ssp personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { entry: %call = invoke double undef(%class.FunctionInterpreter.3.15.31* undef) optsize to label %try.cont unwind label %lpad lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* bitcast ({ i8*, i8* }* @_ZTI13ParseErrorMsg to i8*) br i1 undef, label %catch, label %eh.resume @@ -47,7 +47,7 @@ try.cont: ; preds = %invoke.cont2, %entr ret double %value.0 lpad1: ; preds = %catch - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %1 = landingpad { i8*, i32 } cleanup invoke void @__cxa_end_catch() to label %eh.resume unwind label %terminate.lpad @@ -56,7 +56,7 @@ eh.resume: ; preds = %lpad1, %lpad resume { i8*, i32 } undef terminate.lpad: ; preds = %lpad1 - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %2 = landingpad { i8*, i32 } catch i8* null unreachable } diff --git a/test/CodeGen/ARM/2014-05-14-DwarfEHCrash.ll b/test/CodeGen/ARM/2014-05-14-DwarfEHCrash.ll index 1e40e4afe5c0..feae48646cd5 100644 --- a/test/CodeGen/ARM/2014-05-14-DwarfEHCrash.ll +++ b/test/CodeGen/ARM/2014-05-14-DwarfEHCrash.ll @@ -8,13 +8,13 @@ target triple = "armv4t--linux-androideabi" @_ZTIi = external constant i8* -define void @_Z3fn2v() #0 { +define void @_Z3fn2v() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @_Z3fn1v() to label %try.cont unwind label %lpad lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* bitcast (i8** @_ZTIi to i8*) %1 = extractvalue { i8*, i32 } %0, 1 %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #2 diff --git a/test/CodeGen/ARM/arm-ttype-target2.ll b/test/CodeGen/ARM/arm-ttype-target2.ll index 754a16d90877..ef19d24d7d49 100644 --- a/test/CodeGen/ARM/arm-ttype-target2.ll +++ b/test/CodeGen/ARM/arm-ttype-target2.ll @@ -4,13 +4,13 @@ @_ZTS3Foo = linkonce_odr constant [5 x i8] c"3Foo\00" @_ZTI3Foo = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @_ZTS3Foo, i32 0, i32 0) } -define i32 @main() { +define i32 @main() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @_Z3foov() to label %return unwind label %lpad lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* bitcast ({ i8*, i8* }* @_ZTI3Foo to i8*) %1 = extractvalue { i8*, i32 } %0, 1 %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast ({ i8*, i8* }* @_ZTI3Foo to i8*)) nounwind diff --git a/test/CodeGen/ARM/big-endian-eh-unwind.ll b/test/CodeGen/ARM/big-endian-eh-unwind.ll index 630dfed4467c..7df5f30570ef 100644 --- a/test/CodeGen/ARM/big-endian-eh-unwind.ll +++ b/test/CodeGen/ARM/big-endian-eh-unwind.ll @@ -14,13 +14,13 @@ ; } ;} -define void @_Z4testii(i32 %a, i32 %b) #0 { +define void @_Z4testii(i32 %a, i32 %b) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @_Z3fooi(i32 %a) to label %try.cont unwind label %lpad lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 %2 = tail call i8* @__cxa_begin_catch(i8* %1) #2 @@ -35,7 +35,7 @@ try.cont: ; preds = %entry, %invoke.cont ret void lpad1: ; preds = %lpad - %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %3 = landingpad { i8*, i32 } cleanup invoke void @__cxa_end_catch() to label %eh.resume unwind label %terminate.lpad @@ -44,7 +44,7 @@ eh.resume: ; preds = %lpad1 resume { i8*, i32 } %3 terminate.lpad: ; preds = %lpad1 - %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %4 = landingpad { i8*, i32 } catch i8* null %5 = extractvalue { i8*, i32 } %4, 0 tail call void @__clang_call_terminate(i8* %5) #3 diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll index e9de52a3e1a0..0cc4f230f284 100644 --- a/test/CodeGen/ARM/build-attributes.ll +++ b/test/CodeGen/ARM/build-attributes.ll @@ -103,8 +103,8 @@ ; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s --check-prefix=CORTEX-A7-CHECK ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-CHECK-FAST -; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-NOFPU -; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-NOFPU-FAST +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon,-fp16 | FileCheck %s --check-prefix=CORTEX-A7-NOFPU +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon,-fp16 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-NOFPU-FAST ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-FPUV4-FAST @@ -436,7 +436,7 @@ ; Tag_FP_HP_extension ; CORTEX-A7-CHECK: .eabi_attribute 36, 1 -; CORTEX-A7-NOFPU: .eabi_attribute 36, 1 +; CORTEX-A7-NOFPU-NOT: .eabi_attribute 36 ; CORTEX-A7-FPUV4: .eabi_attribute 36, 1 ; Tag_FP_16bit_format diff --git a/test/CodeGen/ARM/crash.ll b/test/CodeGen/ARM/crash.ll index 3b01d8113b9c..3b7a897e10c0 100644 --- a/test/CodeGen/ARM/crash.ll +++ b/test/CodeGen/ARM/crash.ll @@ -74,7 +74,7 @@ bb: %A = type { %B } %B = type { i32 } -define void @_Z3Foov() ssp { +define void @_Z3Foov() ssp personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { entry: br i1 true, label %exit, label %false @@ -83,7 +83,7 @@ false: to label %exit unwind label %lpad lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null unreachable diff --git a/test/CodeGen/ARM/debug-frame-no-debug.ll b/test/CodeGen/ARM/debug-frame-no-debug.ll index 81702c6e7491..8a07f261f41b 100644 --- a/test/CodeGen/ARM/debug-frame-no-debug.ll +++ b/test/CodeGen/ARM/debug-frame-no-debug.ll @@ -34,14 +34,13 @@ declare void @_Z5printddddd(double, double, double, double, double) define void @_Z4testiiiiiddddd(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, double %m, double %n, double %p, - double %q, double %r) { + double %q, double %r) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @_Z5printiiiii(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) to label %try.cont unwind label %lpad lpad: %0 = landingpad { i8*, i32 } - personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 %2 = tail call i8* @__cxa_begin_catch(i8* %1) @@ -58,7 +57,6 @@ try.cont: lpad1: %3 = landingpad { i8*, i32 } - personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) cleanup invoke void @__cxa_end_catch() to label %eh.resume unwind label %terminate.lpad @@ -68,7 +66,6 @@ eh.resume: terminate.lpad: %4 = landingpad { i8*, i32 } - personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) catch i8* null %5 = extractvalue { i8*, i32 } %4, 0 tail call void @__clang_call_terminate(i8* %5) diff --git a/test/CodeGen/ARM/debug-frame-vararg.ll b/test/CodeGen/ARM/debug-frame-vararg.ll index 19e55fe02354..c1eff0a5bd67 100644 --- a/test/CodeGen/ARM/debug-frame-vararg.ll +++ b/test/CodeGen/ARM/debug-frame-vararg.ll @@ -4,7 +4,7 @@ ; RUN: llc -mtriple thumb-unknown-linux-gnueabi -filetype asm -o - %s -disable-fp-elim | FileCheck %s --check-prefix=CHECK-THUMB-FP-ELIM ; Tests that the initial space allocated to the varargs on the stack is -; taken into account in the the .cfi_ directives. +; taken into account in the .cfi_ directives. ; Generated from the C program: ; #include <stdarg.h> diff --git a/test/CodeGen/ARM/debug-frame.ll b/test/CodeGen/ARM/debug-frame.ll index 134829254e3f..cc07400c2e1c 100644 --- a/test/CodeGen/ARM/debug-frame.ll +++ b/test/CodeGen/ARM/debug-frame.ll @@ -73,14 +73,13 @@ declare void @_Z5printddddd(double, double, double, double, double) define void @_Z4testiiiiiddddd(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, double %m, double %n, double %p, - double %q, double %r) { + double %q, double %r) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @_Z5printiiiii(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) to label %try.cont unwind label %lpad lpad: %0 = landingpad { i8*, i32 } - personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 %2 = tail call i8* @__cxa_begin_catch(i8* %1) @@ -97,7 +96,6 @@ try.cont: lpad1: %3 = landingpad { i8*, i32 } - personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) cleanup invoke void @__cxa_end_catch() to label %eh.resume unwind label %terminate.lpad @@ -107,7 +105,6 @@ eh.resume: terminate.lpad: %4 = landingpad { i8*, i32 } - personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) catch i8* null %5 = extractvalue { i8*, i32 } %4, 0 tail call void @__clang_call_terminate(i8* %5) diff --git a/test/CodeGen/ARM/disable-tail-calls.ll b/test/CodeGen/ARM/disable-tail-calls.ll new file mode 100644 index 000000000000..ab3731a839ab --- /dev/null +++ b/test/CodeGen/ARM/disable-tail-calls.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -mtriple=arm-unknown-unknown | FileCheck %s --check-prefix=NO-OPTION +; RUN: llc < %s -mtriple=arm-unknown-unknown -disable-tail-calls | FileCheck %s --check-prefix=DISABLE-TRUE +; RUN: llc < %s -mtriple=arm-unknown-unknown -disable-tail-calls=false | FileCheck %s --check-prefix=DISABLE-FALSE + +; Check that command line option "-disable-tail-calls" overrides function +; attribute "disable-tail-calls". + +; NO-OPTION-LABEL: {{\_?}}func_attr +; NO-OPTION: bl {{\_?}}callee + +; DISABLE-FALSE-LABEL: {{\_?}}func_attr +; DISABLE-FALSE: b {{\_?}}callee + +; DISABLE-TRUE-LABEL: {{\_?}}func_attr +; DISABLE-TRUE: bl {{\_?}}callee + +define i32 @func_attr(i32 %a) #0 { +entry: + %call = tail call i32 @callee(i32 %a) + ret i32 %call +} + +; NO-OPTION-LABEL: {{\_?}}func_noattr +; NO-OPTION: b {{\_?}}callee + +; DISABLE-FALSE-LABEL: {{\_?}}func_noattr +; DISABLE-FALSE: b {{\_?}}callee + +; DISABLE-TRUE-LABEL: {{\_?}}func_noattr +; DISABLE-TRUE: bl {{\_?}}callee + +define i32 @func_noattr(i32 %a) { +entry: + %call = tail call i32 @callee(i32 %a) + ret i32 %call +} + +declare i32 @callee(i32) + +attributes #0 = { "disable-tail-calls"="true" } diff --git a/test/CodeGen/ARM/dwarf-eh.ll b/test/CodeGen/ARM/dwarf-eh.ll index c890206b3532..68f8e95b5e73 100644 --- a/test/CodeGen/ARM/dwarf-eh.ll +++ b/test/CodeGen/ARM/dwarf-eh.ll @@ -17,7 +17,7 @@ target triple = "armv5e--netbsd-eabi" @_ZTS9exception = linkonce_odr constant [11 x i8] c"9exception\00" @_ZTI9exception = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @_ZTS9exception, i32 0, i32 0) } -define void @f() uwtable { +define void @f() uwtable personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { %1 = alloca i8* %2 = alloca i32 %e = alloca %struct.exception*, align 4 @@ -26,7 +26,7 @@ define void @f() uwtable { br label %16 - %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %5 = landingpad { i8*, i32 } catch i8* bitcast ({ i8*, i8* }* @_ZTI9exception to i8*) %6 = extractvalue { i8*, i32 } %5, 0 store i8* %6, i8** %1 diff --git a/test/CodeGen/ARM/eh-dispcont.ll b/test/CodeGen/ARM/eh-dispcont.ll index 57ab15feca5e..e9871aa7dc77 100644 --- a/test/CodeGen/ARM/eh-dispcont.ll +++ b/test/CodeGen/ARM/eh-dispcont.ll @@ -7,7 +7,7 @@ @_ZTIi = external constant i8* -define i32 @main() #0 { +define i32 @main() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { entry: %exception = tail call i8* @__cxa_allocate_exception(i32 4) #1 %0 = bitcast i8* %exception to i32* @@ -16,7 +16,7 @@ entry: to label %unreachable unwind label %lpad lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %1 = landingpad { i8*, i32 } catch i8* null %2 = extractvalue { i8*, i32 } %1, 0 %3 = tail call i8* @__cxa_begin_catch(i8* %2) #1 diff --git a/test/CodeGen/ARM/eh-resume-darwin.ll b/test/CodeGen/ARM/eh-resume-darwin.ll index d1252f4c9867..0cd49775cfb4 100644 --- a/test/CodeGen/ARM/eh-resume-darwin.ll +++ b/test/CodeGen/ARM/eh-resume-darwin.ll @@ -5,7 +5,7 @@ declare void @func() declare i32 @__gxx_personality_sj0(...) -define void @test0() { +define void @test0() personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { entry: invoke void @func() to label %cont unwind label %lpad @@ -14,7 +14,7 @@ cont: ret void lpad: - %exn = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %exn = landingpad { i8*, i32 } cleanup resume { i8*, i32 } %exn } diff --git a/test/CodeGen/ARM/ehabi-filters.ll b/test/CodeGen/ARM/ehabi-filters.ll index f86b66c30c5d..4faa29e20389 100644 --- a/test/CodeGen/ARM/ehabi-filters.ll +++ b/test/CodeGen/ARM/ehabi-filters.ll @@ -14,7 +14,7 @@ declare void @__cxa_throw(i8*, i8*, i8*) declare void @__cxa_call_unexpected(i8*) -define i32 @main() { +define i32 @main() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: main: entry: %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind @@ -24,7 +24,7 @@ entry: to label %unreachable.i unwind label %lpad.i lpad.i: ; preds = %entry - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %1 = landingpad { i8*, i32 } filter [1 x i8*] [i8* bitcast (i8** @_ZTIi to i8*)] catch i8* bitcast (i8** @_ZTIi to i8*) ; CHECK: .long _ZTIi(target2) @ TypeInfo 1 @@ -45,7 +45,7 @@ unreachable.i: ; preds = %entry unreachable lpad: ; preds = %ehspec.unexpected.i - %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %4 = landingpad { i8*, i32 } catch i8* bitcast (i8** @_ZTIi to i8*) br label %lpad.body diff --git a/test/CodeGen/ARM/ehabi-handlerdata-nounwind.ll b/test/CodeGen/ARM/ehabi-handlerdata-nounwind.ll index 42ca9888abbc..3d380bf8f22a 100644 --- a/test/CodeGen/ARM/ehabi-handlerdata-nounwind.ll +++ b/test/CodeGen/ARM/ehabi-handlerdata-nounwind.ll @@ -25,12 +25,12 @@ declare i8* @__cxa_begin_catch(i8*) declare void @__cxa_end_catch() -define void @test1() nounwind { +define void @test1() nounwind personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @throw_exception() to label %try.cont unwind label %lpad lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 %2 = tail call i8* @__cxa_begin_catch(i8* %1) diff --git a/test/CodeGen/ARM/ehabi-handlerdata.ll b/test/CodeGen/ARM/ehabi-handlerdata.ll index 7045902f99cd..c53b36ffe18f 100644 --- a/test/CodeGen/ARM/ehabi-handlerdata.ll +++ b/test/CodeGen/ARM/ehabi-handlerdata.ll @@ -23,12 +23,12 @@ declare i8* @__cxa_begin_catch(i8*) declare void @__cxa_end_catch() -define void @test1() { +define void @test1() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @throw_exception() to label %try.cont unwind label %lpad lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 %2 = tail call i8* @__cxa_begin_catch(i8* %1) diff --git a/test/CodeGen/ARM/ehabi.ll b/test/CodeGen/ARM/ehabi.ll index 088e48d2d793..923cffcf6532 100644 --- a/test/CodeGen/ARM/ehabi.ll +++ b/test/CodeGen/ARM/ehabi.ll @@ -89,14 +89,13 @@ declare void @_Z5printddddd(double, double, double, double, double) define void @_Z4testiiiiiddddd(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, double %m, double %n, double %p, - double %q, double %r) { + double %q, double %r) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @_Z5printiiiii(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) to label %try.cont unwind label %lpad lpad: %0 = landingpad { i8*, i32 } - personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 %2 = tail call i8* @__cxa_begin_catch(i8* %1) @@ -113,7 +112,6 @@ try.cont: lpad1: %3 = landingpad { i8*, i32 } - personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) cleanup invoke void @__cxa_end_catch() to label %eh.resume unwind label %terminate.lpad @@ -123,7 +121,6 @@ eh.resume: terminate.lpad: %4 = landingpad { i8*, i32 } - personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) catch i8* null %5 = extractvalue { i8*, i32 } %4, 0 tail call void @__clang_call_terminate(i8* %5) diff --git a/test/CodeGen/ARM/global-merge.ll b/test/CodeGen/ARM/global-merge.ll index e8c8289098a7..fd06f1eeca74 100644 --- a/test/CodeGen/ARM/global-merge.ll +++ b/test/CodeGen/ARM/global-merge.ll @@ -15,13 +15,13 @@ ; CHECK: ZTIi @_ZTIi = internal global i8* null -define i32 @_Z9exceptioni(i32 %arg) { +define i32 @_Z9exceptioni(i32 %arg) personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { bb: %tmp = invoke i32 @_Z14throwSomethingi(i32 %arg) to label %bb9 unwind label %bb1 bb1: ; preds = %bb - %tmp2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %tmp2 = landingpad { i8*, i32 } catch i8* bitcast (i8** @_ZTIi to i8*) %tmp3 = extractvalue { i8*, i32 } %tmp2, 1 %tmp4 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) diff --git a/test/CodeGen/ARM/gv-stubs-crash.ll b/test/CodeGen/ARM/gv-stubs-crash.ll index 6e82afeacf88..b1e6e4f7b178 100644 --- a/test/CodeGen/ARM/gv-stubs-crash.ll +++ b/test/CodeGen/ARM/gv-stubs-crash.ll @@ -3,7 +3,7 @@ @Exn = external hidden unnamed_addr constant { i8*, i8* } -define hidden void @func(i32* %this, i32* %e) optsize align 2 { +define hidden void @func(i32* %this, i32* %e) optsize align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { %e.ld = load i32, i32* %e, align 4 %inv = invoke zeroext i1 @func2(i32* %this, i32 %e.ld) optsize to label %ret unwind label %lpad @@ -12,7 +12,7 @@ ret: ret void lpad: - %lp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %lp = landingpad { i8*, i32 } catch i8* bitcast ({ i8*, i8* }* @Exn to i8*) br label %.loopexit4 diff --git a/test/CodeGen/ARM/invoke-donothing-assert.ll b/test/CodeGen/ARM/invoke-donothing-assert.ll index aab3556c5477..c6489e3a4ce5 100644 --- a/test/CodeGen/ARM/invoke-donothing-assert.ll +++ b/test/CodeGen/ARM/invoke-donothing-assert.ll @@ -4,7 +4,7 @@ ; <rdar://problem/13228754> & <rdar://problem/13316637> ; CHECK: .globl _foo -define void @foo() { +define void @foo() personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { invoke.cont: invoke void @callA() to label %invoke.cont25 unwind label %lpad2 @@ -20,12 +20,12 @@ invoke.cont75: ret void lpad2: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %0 = landingpad { i8*, i32 } cleanup br label %eh.resume lpad15: - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %1 = landingpad { i8*, i32 } cleanup br label %eh.resume @@ -34,7 +34,7 @@ eh.resume: } ; CHECK: .globl _bar -define linkonce_odr void @bar(i32* %a) { +define linkonce_odr void @bar(i32* %a) personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { if.end.i.i.i: invoke void @llvm.donothing() to label %call.i.i.i.noexc unwind label %eh.resume @@ -58,7 +58,7 @@ _ZN3lol5ArrayIivvvvvvvED1Ev.exit: ret void eh.resume: - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %1 = landingpad { i8*, i32 } cleanup %2 = extractvalue { i8*, i32 } %1, 0 %3 = extractvalue { i8*, i32 } %1, 1 diff --git a/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll index f85203e381b9..c7f47b0962dc 100644 --- a/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll +++ b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll @@ -6,7 +6,7 @@ declare void @bar(%struct.__CFString*, %struct.__CFString*) -define noalias i8* @foo(i8* nocapture %inRefURL) noreturn ssp { +define noalias i8* @foo(i8* nocapture %inRefURL) noreturn ssp personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { entry: %call = tail call %struct.__CFString* @bar3() %call2 = invoke i8* @bar2() @@ -17,14 +17,14 @@ for.cond: ; preds = %entry, %for.cond to label %for.cond unwind label %lpad5 lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %0 = landingpad { i8*, i32 } cleanup %1 = extractvalue { i8*, i32 } %0, 0 %2 = extractvalue { i8*, i32 } %0, 1 br label %ehcleanup lpad5: ; preds = %for.cond - %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %3 = landingpad { i8*, i32 } cleanup %4 = extractvalue { i8*, i32 } %3, 0 %5 = extractvalue { i8*, i32 } %3, 1 @@ -32,7 +32,7 @@ lpad5: ; preds = %for.cond to label %ehcleanup unwind label %terminate.lpad.i.i16 terminate.lpad.i.i16: ; preds = %lpad5 - %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %6 = landingpad { i8*, i32 } catch i8* null tail call void @terminatev() noreturn nounwind unreachable @@ -45,7 +45,7 @@ ehcleanup: ; preds = %lpad5, %lpad to label %_ZN5SmartIPK10__CFStringED1Ev.exit unwind label %terminate.lpad.i.i terminate.lpad.i.i: ; preds = %ehcleanup - %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %8 = landingpad { i8*, i32 } catch i8* null tail call void @terminatev() noreturn nounwind unreachable @@ -90,7 +90,7 @@ declare void @terminatev() @.str = private unnamed_addr constant [12 x i8] c"some_string\00", align 1 -define void @_Z4foo1c(i8 signext %a) { +define void @_Z4foo1c(i8 signext %a) personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { entry: %s1 = alloca %"class.std::__1::basic_string", align 4 call void @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6__initEPKcm(%"class.std::__1::basic_string"* %s1, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 11) @@ -131,14 +131,14 @@ invoke.cont6: ; preds = %_ZNSt3__113__vector ret void lpad.body: ; preds = %entry - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %2 = landingpad { i8*, i32 } cleanup %3 = extractvalue { i8*, i32 } %2, 0 %4 = extractvalue { i8*, i32 } %2, 1 br label %ehcleanup lpad2: ; preds = %invoke.cont - %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %5 = landingpad { i8*, i32 } cleanup %6 = extractvalue { i8*, i32 } %5, 0 %7 = extractvalue { i8*, i32 } %5, 1 @@ -161,7 +161,7 @@ eh.resume: ; preds = %ehcleanup resume { i8*, i32 } %lpad.val13 terminate.lpad: ; preds = %ehcleanup - %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %8 = landingpad { i8*, i32 } catch i8* null %9 = extractvalue { i8*, i32 } %8, 0 call void @__clang_call_terminate(i8* %9) diff --git a/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll b/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll index 3cf2a08fe35d..5d015738623a 100644 --- a/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll +++ b/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll @@ -10,7 +10,7 @@ ; __Unwind_SjLj_Register and actual @bar invocation -define i8* @foo(i8 %a, {} %c) { +define i8* @foo(i8 %a, {} %c) personality i8* bitcast (i32 (...)* @baz to i8*) { entry: ; CHECK: bl __Unwind_SjLj_Register ; CHECK-NEXT: {{[A-Z][a-zA-Z0-9]*}}: @@ -22,7 +22,7 @@ unreachable: unreachable handler: - %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @baz to i8*) + %tmp = landingpad { i8*, i32 } cleanup resume { i8*, i32 } undef } diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll index caa5becac1d9..7b83dfdaf229 100644 --- a/test/CodeGen/ARM/vtrn.ll +++ b/test/CodeGen/ARM/vtrn.ll @@ -1,9 +1,14 @@ ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: vtrni8: -;CHECK: vtrn.8 -;CHECK-NEXT: vadd.i8 +; CHECK-LABEL: vtrni8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vtrn.8 d17, d16 +; CHECK-NEXT: vadd.i8 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> @@ -12,10 +17,30 @@ define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ret <8 x i8> %tmp5 } +define <16 x i8> @vtrni8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { +; CHECK-LABEL: vtrni8_Qres: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d17, [r1] +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vtrn.8 d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> + ret <16 x i8> %tmp3 +} + define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: vtrni16: -;CHECK: vtrn.16 -;CHECK-NEXT: vadd.i16 +; CHECK-LABEL: vtrni16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vtrn.16 d17, d16 +; CHECK-NEXT: vadd.i16 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> @@ -24,10 +49,30 @@ define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ret <4 x i16> %tmp5 } +define <8 x i16> @vtrni16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind { +; CHECK-LABEL: vtrni16_Qres: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d17, [r1] +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vtrn.16 d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7> + ret <8 x i16> %tmp3 +} + define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: vtrni32: -;CHECK: vtrn.32 -;CHECK-NEXT: vadd.i32 +; CHECK-LABEL: vtrni32: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vtrn.32 d17, d16 +; CHECK-NEXT: vadd.i32 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2> @@ -36,10 +81,30 @@ define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ret <2 x i32> %tmp5 } +define <4 x i32> @vtrni32_Qres(<2 x i32>* %A, <2 x i32>* %B) nounwind { +; CHECK-LABEL: vtrni32_Qres: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d17, [r1] +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vtrn.32 d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <2 x i32>, <2 x i32>* %A + %tmp2 = load <2 x i32>, <2 x i32>* %B + %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 1, i32 3> + ret <4 x i32> %tmp3 +} + define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind { -;CHECK-LABEL: vtrnf: -;CHECK: vtrn.32 -;CHECK-NEXT: vadd.f32 +; CHECK-LABEL: vtrnf: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vtrn.32 d17, d16 +; CHECK-NEXT: vadd.f32 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <2 x float>, <2 x float>* %A %tmp2 = load <2 x float>, <2 x float>* %B %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2> @@ -48,10 +113,31 @@ define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind { ret <2 x float> %tmp5 } +define <4 x float> @vtrnf_Qres(<2 x float>* %A, <2 x float>* %B) nounwind { +; CHECK-LABEL: vtrnf_Qres: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d17, [r1] +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vtrn.32 d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <2 x float>, <2 x float>* %A + %tmp2 = load <2 x float>, <2 x float>* %B + %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 1, i32 3> + ret <4 x float> %tmp3 +} + define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: vtrnQi8: -;CHECK: vtrn.8 -;CHECK-NEXT: vadd.i8 +; CHECK-LABEL: vtrnQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vtrn.8 q9, q8 +; CHECK-NEXT: vadd.i8 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> @@ -60,10 +146,31 @@ define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ret <16 x i8> %tmp5 } +define <32 x i8> @vtrnQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { +; CHECK-LABEL: vtrnQi8_QQres: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vtrn.8 q9, q8 +; CHECK-NEXT: vst1.8 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: mov pc, lr + %tmp1 = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30, i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> + ret <32 x i8> %tmp3 +} + define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: vtrnQi16: -;CHECK: vtrn.16 -;CHECK-NEXT: vadd.i16 +; CHECK-LABEL: vtrnQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vtrn.16 q9, q8 +; CHECK-NEXT: vadd.i16 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> @@ -72,10 +179,31 @@ define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ret <8 x i16> %tmp5 } +define <16 x i16> @vtrnQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { +; CHECK-LABEL: vtrnQi16_QQres: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vtrn.16 q9, q8 +; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: mov pc, lr + %tmp1 = load <8 x i16>, <8 x i16>* %A + %tmp2 = load <8 x i16>, <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> + ret <16 x i16> %tmp3 +} + define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: vtrnQi32: -;CHECK: vtrn.32 -;CHECK-NEXT: vadd.i32 +; CHECK-LABEL: vtrnQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vtrn.32 q9, q8 +; CHECK-NEXT: vadd.i32 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> @@ -84,10 +212,31 @@ define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ret <4 x i32> %tmp5 } +define <8 x i32> @vtrnQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind { +; CHECK-LABEL: vtrnQi32_QQres: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vtrn.32 q9, q8 +; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: mov pc, lr + %tmp1 = load <4 x i32>, <4 x i32>* %A + %tmp2 = load <4 x i32>, <4 x i32>* %B + %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7> + ret <8 x i32> %tmp3 +} + define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind { -;CHECK-LABEL: vtrnQf: -;CHECK: vtrn.32 -;CHECK-NEXT: vadd.f32 +; CHECK-LABEL: vtrnQf: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vtrn.32 q9, q8 +; CHECK-NEXT: vadd.f32 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <4 x float>, <4 x float>* %A %tmp2 = load <4 x float>, <4 x float>* %B %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> @@ -96,12 +245,31 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind { ret <4 x float> %tmp5 } -; Undef shuffle indices should not prevent matching to VTRN: +define <8 x float> @vtrnQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind { +; CHECK-LABEL: vtrnQf_QQres: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vtrn.32 q9, q8 +; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: mov pc, lr + %tmp1 = load <4 x float>, <4 x float>* %A + %tmp2 = load <4 x float>, <4 x float>* %B + %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7> + ret <8 x float> %tmp3 +} + define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: vtrni8_undef: -;CHECK: vtrn.8 -;CHECK-NEXT: vadd.i8 +; CHECK-LABEL: vtrni8_undef: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vtrn.8 d17, d16 +; CHECK-NEXT: vadd.i8 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14> @@ -110,10 +278,31 @@ define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { ret <8 x i8> %tmp5 } +define <16 x i8> @vtrni8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { +; CHECK-LABEL: vtrni8_undef_Qres: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d17, [r1] +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vtrn.8 d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15> + ret <16 x i8> %tmp3 +} + define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: vtrnQi16_undef: -;CHECK: vtrn.16 -;CHECK-NEXT: vadd.i16 +; CHECK-LABEL: vtrnQi16_undef: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vtrn.16 q9, q8 +; CHECK-NEXT: vadd.i16 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14> @@ -122,3 +311,17 @@ define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { ret <8 x i16> %tmp5 } +define <16 x i16> @vtrnQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { +; CHECK-LABEL: vtrnQi16_undef_QQres: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vtrn.16 q9, q8 +; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: mov pc, lr + %tmp1 = load <8 x i16>, <8 x i16>* %A + %tmp2 = load <8 x i16>, <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14, i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef> + ret <16 x i16> %tmp3 +} diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll index 7a7306a26593..5510634b0668 100644 --- a/test/CodeGen/ARM/vuzp.ll +++ b/test/CodeGen/ARM/vuzp.ll @@ -1,9 +1,14 @@ ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: vuzpi8: -;CHECK: vuzp.8 -;CHECK-NEXT: vadd.i8 +; CHECK-LABEL: vuzpi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vuzp.8 d17, d16 +; CHECK-NEXT: vadd.i8 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> @@ -12,10 +17,30 @@ define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ret <8 x i8> %tmp5 } +define <16 x i8> @vuzpi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { +; CHECK-LABEL: vuzpi8_Qres: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d17, [r1] +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vuzp.8 d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> + ret <16 x i8> %tmp3 +} + define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: vuzpi16: -;CHECK: vuzp.16 -;CHECK-NEXT: vadd.i16 +; CHECK-LABEL: vuzpi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vuzp.16 d17, d16 +; CHECK-NEXT: vadd.i16 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> @@ -24,12 +49,33 @@ define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ret <4 x i16> %tmp5 } +define <8 x i16> @vuzpi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind { +; CHECK-LABEL: vuzpi16_Qres: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d17, [r1] +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vuzp.16 d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> + ret <8 x i16> %tmp3 +} + ; VUZP.32 is equivalent to VTRN.32 for 64-bit vectors. define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: vuzpQi8: -;CHECK: vuzp.8 -;CHECK-NEXT: vadd.i8 +; CHECK-LABEL: vuzpQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vuzp.8 q9, q8 +; CHECK-NEXT: vadd.i8 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> @@ -38,10 +84,31 @@ define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ret <16 x i8> %tmp5 } +define <32 x i8> @vuzpQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { +; CHECK-LABEL: vuzpQi8_QQres: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vuzp.8 q9, q8 +; CHECK-NEXT: vst1.8 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: mov pc, lr + %tmp1 = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> + ret <32 x i8> %tmp3 +} + define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: vuzpQi16: -;CHECK: vuzp.16 -;CHECK-NEXT: vadd.i16 +; CHECK-LABEL: vuzpQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vuzp.16 q9, q8 +; CHECK-NEXT: vadd.i16 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> @@ -50,10 +117,31 @@ define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ret <8 x i16> %tmp5 } +define <16 x i16> @vuzpQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { +; CHECK-LABEL: vuzpQi16_QQres: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vuzp.16 q9, q8 +; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: mov pc, lr + %tmp1 = load <8 x i16>, <8 x i16>* %A + %tmp2 = load <8 x i16>, <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> + ret <16 x i16> %tmp3 +} + define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: vuzpQi32: -;CHECK: vuzp.32 -;CHECK-NEXT: vadd.i32 +; CHECK-LABEL: vuzpQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vuzp.32 q9, q8 +; CHECK-NEXT: vadd.i32 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> @@ -62,10 +150,31 @@ define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ret <4 x i32> %tmp5 } +define <8 x i32> @vuzpQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind { +; CHECK-LABEL: vuzpQi32_QQres: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vuzp.32 q9, q8 +; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: mov pc, lr + %tmp1 = load <4 x i32>, <4 x i32>* %A + %tmp2 = load <4 x i32>, <4 x i32>* %B + %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> + ret <8 x i32> %tmp3 +} + define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind { -;CHECK-LABEL: vuzpQf: -;CHECK: vuzp.32 -;CHECK-NEXT: vadd.f32 +; CHECK-LABEL: vuzpQf: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vuzp.32 q9, q8 +; CHECK-NEXT: vadd.f32 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <4 x float>, <4 x float>* %A %tmp2 = load <4 x float>, <4 x float>* %B %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> @@ -74,12 +183,32 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind { ret <4 x float> %tmp5 } +define <8 x float> @vuzpQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind { +; CHECK-LABEL: vuzpQf_QQres: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vuzp.32 q9, q8 +; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: mov pc, lr + %tmp1 = load <4 x float>, <4 x float>* %A + %tmp2 = load <4 x float>, <4 x float>* %B + %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> + ret <8 x float> %tmp3 +} + ; Undef shuffle indices should not prevent matching to VUZP: define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: vuzpi8_undef: -;CHECK: vuzp.8 -;CHECK-NEXT: vadd.i8 +; CHECK-LABEL: vuzpi8_undef: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vuzp.8 d17, d16 +; CHECK-NEXT: vadd.i8 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14> @@ -88,10 +217,31 @@ define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { ret <8 x i8> %tmp5 } +define <16 x i8> @vuzpi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { +; CHECK-LABEL: vuzpi8_undef_Qres: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d17, [r1] +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vuzp.8 d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15> + ret <16 x i8> %tmp3 +} + define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: vuzpQi16_undef: -;CHECK: vuzp.16 -;CHECK-NEXT: vadd.i16 +; CHECK-LABEL: vuzpQi16_undef: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vuzp.16 q9, q8 +; CHECK-NEXT: vadd.i16 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14> @@ -100,3 +250,17 @@ define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { ret <8 x i16> %tmp5 } +define <16 x i16> @vuzpQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { +; CHECK-LABEL: vuzpQi16_undef_QQres: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vuzp.16 q9, q8 +; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: mov pc, lr + %tmp1 = load <8 x i16>, <8 x i16>* %A + %tmp2 = load <8 x i16>, <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15> + ret <16 x i16> %tmp3 +} diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll index a1b5b4549ac2..1d9f59aeda0b 100644 --- a/test/CodeGen/ARM/vzip.ll +++ b/test/CodeGen/ARM/vzip.ll @@ -1,9 +1,14 @@ ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: vzipi8: -;CHECK: vzip.8 -;CHECK-NEXT: vadd.i8 +; CHECK-LABEL: vzipi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vzip.8 d17, d16 +; CHECK-NEXT: vadd.i8 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> @@ -12,10 +17,30 @@ define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ret <8 x i8> %tmp5 } +define <16 x i8> @vzipi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { +; CHECK-LABEL: vzipi8_Qres: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d17, [r1] +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vzip.8 d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> + ret <16 x i8> %tmp3 +} + define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: vzipi16: -;CHECK: vzip.16 -;CHECK-NEXT: vadd.i16 +; CHECK-LABEL: vzipi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vzip.16 d17, d16 +; CHECK-NEXT: vadd.i16 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> @@ -24,12 +49,33 @@ define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ret <4 x i16> %tmp5 } +define <8 x i16> @vzipi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind { +; CHECK-LABEL: vzipi16_Qres: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d17, [r1] +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vzip.16 d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> + ret <8 x i16> %tmp3 +} + ; VZIP.32 is equivalent to VTRN.32 for 64-bit vectors. define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: vzipQi8: -;CHECK: vzip.8 -;CHECK-NEXT: vadd.i8 +; CHECK-LABEL: vzipQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vzip.8 q9, q8 +; CHECK-NEXT: vadd.i8 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> @@ -38,10 +84,31 @@ define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ret <16 x i8> %tmp5 } +define <32 x i8> @vzipQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { +; CHECK-LABEL: vzipQi8_QQres: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vzip.8 q9, q8 +; CHECK-NEXT: vst1.8 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: mov pc, lr + %tmp1 = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> + ret <32 x i8> %tmp3 +} + define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: vzipQi16: -;CHECK: vzip.16 -;CHECK-NEXT: vadd.i16 +; CHECK-LABEL: vzipQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vzip.16 q9, q8 +; CHECK-NEXT: vadd.i16 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> @@ -50,10 +117,31 @@ define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ret <8 x i16> %tmp5 } +define <16 x i16> @vzipQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { +; CHECK-LABEL: vzipQi16_QQres: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vzip.16 q9, q8 +; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: mov pc, lr + %tmp1 = load <8 x i16>, <8 x i16>* %A + %tmp2 = load <8 x i16>, <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> + ret <16 x i16> %tmp3 +} + define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: vzipQi32: -;CHECK: vzip.32 -;CHECK-NEXT: vadd.i32 +; CHECK-LABEL: vzipQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vzip.32 q9, q8 +; CHECK-NEXT: vadd.i32 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> @@ -62,10 +150,31 @@ define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ret <4 x i32> %tmp5 } +define <8 x i32> @vzipQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind { +; CHECK-LABEL: vzipQi32_QQres: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vzip.32 q9, q8 +; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: mov pc, lr + %tmp1 = load <4 x i32>, <4 x i32>* %A + %tmp2 = load <4 x i32>, <4 x i32>* %B + %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> + ret <8 x i32> %tmp3 +} + define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind { -;CHECK-LABEL: vzipQf: -;CHECK: vzip.32 -;CHECK-NEXT: vadd.f32 +; CHECK-LABEL: vzipQf: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vzip.32 q9, q8 +; CHECK-NEXT: vadd.f32 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <4 x float>, <4 x float>* %A %tmp2 = load <4 x float>, <4 x float>* %B %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> @@ -74,12 +183,32 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind { ret <4 x float> %tmp5 } +define <8 x float> @vzipQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind { +; CHECK-LABEL: vzipQf_QQres: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vzip.32 q9, q8 +; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: mov pc, lr + %tmp1 = load <4 x float>, <4 x float>* %A + %tmp2 = load <4 x float>, <4 x float>* %B + %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> + ret <8 x float> %tmp3 +} + ; Undef shuffle indices should not prevent matching to VZIP: define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: vzipi8_undef: -;CHECK: vzip.8 -;CHECK-NEXT: vadd.i8 +; CHECK-LABEL: vzipi8_undef: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vzip.8 d17, d16 +; CHECK-NEXT: vadd.i8 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11> @@ -88,10 +217,31 @@ define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { ret <8 x i8> %tmp5 } +define <16 x i8> @vzipi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { +; CHECK-LABEL: vzipi8_undef_Qres: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d17, [r1] +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vzip.8 d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15> + ret <16 x i8> %tmp3 +} + define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: vzipQi8_undef: -;CHECK: vzip.8 -;CHECK-NEXT: vadd.i8 +; CHECK-LABEL: vzipQi8_undef: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vzip.8 q9, q8 +; CHECK-NEXT: vadd.i8 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> @@ -100,3 +250,17 @@ define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { ret <16 x i8> %tmp5 } +define <32 x i8> @vzipQi8_undef_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { +; CHECK-LABEL: vzipQi8_undef_QQres: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vzip.8 q9, q8 +; CHECK-NEXT: vst1.8 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: mov pc, lr + %tmp1 = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31> + ret <32 x i8> %tmp3 +} diff --git a/test/CodeGen/Generic/2007-02-25-invoke.ll b/test/CodeGen/Generic/2007-02-25-invoke.ll index 7850cec35f9e..4ca280d1587e 100644 --- a/test/CodeGen/Generic/2007-02-25-invoke.ll +++ b/test/CodeGen/Generic/2007-02-25-invoke.ll @@ -3,12 +3,12 @@ ; PR1224 declare i32 @test() -define i32 @test2() { +define i32 @test2() personality i32 (...)* @__gxx_personality_v0 { %A = invoke i32 @test() to label %invcont unwind label %blat invcont: ret i32 %A blat: - %lpad = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + %lpad = landingpad { i8*, i32 } cleanup ret i32 0 } diff --git a/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll b/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll index 407696f4c2ff..f0259ec6b128 100644 --- a/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll +++ b/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll @@ -7,7 +7,7 @@ %"struct.std::locale::facet" = type { i32 (...)**, i32 } %"struct.std::string" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" } -define void @_ZNKSt6locale4nameEv(%"struct.std::string"* %agg.result) { +define void @_ZNKSt6locale4nameEv(%"struct.std::string"* %agg.result) personality i32 (...)* @__gxx_personality_v0 { entry: %tmp105 = icmp eq i8* null, null ; <i1> [#uses=1] br i1 %tmp105, label %cond_true, label %cond_true222 @@ -45,7 +45,7 @@ cond_next1328: ; preds = %cond_true235, %cond_true ret void cond_true1402: ; preds = %invcont282, %cond_false280, %cond_true235, %cond_true - %lpad = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + %lpad = landingpad { i8*, i32 } cleanup ret void } diff --git a/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll b/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll index 03ccbdfaf0cc..fe7f463159a5 100644 --- a/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll +++ b/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll @@ -1,6 +1,6 @@ ; RUN: llc -no-integrated-as < %s -define fastcc void @bc__support__high_resolution_time__initialize_clock_rate() { +define fastcc void @bc__support__high_resolution_time__initialize_clock_rate() personality i32 (...)* @__gxx_personality_v0 { entry: invoke void asm "rdtsc\0A\09movl %eax, $0\0A\09movl %edx, $1", "=*imr,=*imr,~{dirflag},~{fpsr},~{flags},~{dx},~{ax}"( i32* null, i32* null ) to label %.noexc unwind label %cleanup144 @@ -9,7 +9,7 @@ entry: ret void cleanup144: ; preds = %entry - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} cleanup resume { i8*, i32 } %exn } diff --git a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll index d67559e4d859..59a7b64e0dfa 100644 --- a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll +++ b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll @@ -5,7 +5,7 @@ %struct.__type_info_pseudo = type { i8*, i8* } @_ZTI2e1 = external constant %struct.__class_type_info_pseudo ; <%struct.__class_type_info_pseudo*> [#uses=1] -define void @_Z7ex_testv() { +define void @_Z7ex_testv() personality i32 (...)* @__gxx_personality_v0 { entry: invoke void @__cxa_throw( i8* null, i8* bitcast (%struct.__class_type_info_pseudo* @_ZTI2e1 to i8*), void (i8*)* null ) noreturn to label %UnifiedUnreachableBlock unwind label %lpad @@ -14,13 +14,13 @@ bb14: ; preds = %lpad unreachable lpad: ; preds = %entry - %lpad1 = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + %lpad1 = landingpad { i8*, i32 } catch i8* null invoke void @__cxa_end_catch( ) to label %bb14 unwind label %lpad17 lpad17: ; preds = %lpad - %lpad2 = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + %lpad2 = landingpad { i8*, i32 } catch i8* null unreachable diff --git a/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll index b483009976c2..a0455cfe1aae 100644 --- a/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll +++ b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll @@ -19,7 +19,7 @@ declare i8* @__cxa_begin_catch(i8*) nounwind declare %"struct.std::ctype<char>"* @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale(%"struct.std::locale"*) -define %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_(%"struct.std::basic_istream<char,std::char_traits<char> >"* %__in, i8* nocapture %__s) { +define %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_(%"struct.std::basic_istream<char,std::char_traits<char> >"* %__in, i8* nocapture %__s) personality i32 (...)* @__gxx_personality_v0 { entry: %0 = invoke %"struct.std::ctype<char>"* @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale(%"struct.std::locale"* undef) to label %invcont8 unwind label %lpad74 ; <%"struct.std::ctype<char>"*> [#uses=0] @@ -62,14 +62,14 @@ invcont38: ; preds = %invcont25, %bb1.i, lpad: ; preds = %bb.i93, %invcont24, %bb1.i, %invcont8 %__extracted.1 = phi i32 [ 0, %invcont8 ], [ %2, %bb1.i ], [ undef, %bb.i93 ], [ undef, %invcont24 ] ; <i32> [#uses=0] - %lpad1 = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + %lpad1 = landingpad { i8*, i32 } catch i8* null %eh_ptr = extractvalue { i8*, i32 } %lpad1, 0 %6 = call i8* @__cxa_begin_catch(i8* %eh_ptr) nounwind ; <i8*> [#uses=0] unreachable lpad74: ; preds = %entry - %lpad2 = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + %lpad2 = landingpad { i8*, i32 } cleanup unreachable } diff --git a/test/CodeGen/Generic/donothing.ll b/test/CodeGen/Generic/donothing.ll index 3727b60a1a45..59ccf6be092b 100644 --- a/test/CodeGen/Generic/donothing.ll +++ b/test/CodeGen/Generic/donothing.ll @@ -5,7 +5,7 @@ declare void @__cxa_call_unexpected(i8*) declare void @llvm.donothing() readnone ; CHECK: f1 -define void @f1() nounwind uwtable ssp { +define void @f1() nounwind uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: ; CHECK-NOT: donothing invoke void @llvm.donothing() @@ -15,7 +15,7 @@ invoke.cont: ret void lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } filter [0 x i8*] zeroinitializer %1 = extractvalue { i8*, i32 } %0, 0 tail call void @__cxa_call_unexpected(i8* %1) noreturn nounwind diff --git a/test/CodeGen/Generic/exception-handling.ll b/test/CodeGen/Generic/exception-handling.ll index 376e1f196d65..6e2a8678e9a6 100644 --- a/test/CodeGen/Generic/exception-handling.ll +++ b/test/CodeGen/Generic/exception-handling.ll @@ -2,7 +2,7 @@ ; PR10733 declare void @_Znam() -define void @_ZNK14gIndexOdometer15AfterExcisionOfERi() uwtable align 2 { +define void @_ZNK14gIndexOdometer15AfterExcisionOfERi() uwtable align 2 personality i32 (i32, i64, i8*, i8*)* @__gxx_personality_v0 { _ZN6Gambit5ArrayIiEC2Ej.exit36: br label %"9" @@ -19,7 +19,7 @@ _ZN6Gambit5ArrayIiEC2Ej.exit36: lpad27: ; preds = %"10", %"9" %0 = phi i32 [ undef, %"9" ], [ %tmp, %"10" ] - %1 = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__gxx_personality_v0 + %1 = landingpad { i8*, i32 } cleanup resume { i8*, i32 } zeroinitializer } diff --git a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll index a135c625fccc..a21906cf6dc5 100644 --- a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll +++ b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll @@ -2,7 +2,7 @@ ; XFAIL: hexagon declare { i64, double } @wild() -define void @foo(i64* %p, double* %q) nounwind { +define void @foo(i64* %p, double* %q) nounwind personality i32 (...)* @__gxx_personality_v0 { %t = invoke { i64, double } @wild() to label %normal unwind label %handler normal: @@ -13,7 +13,7 @@ normal: ret void handler: - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} catch i8* null ret void } diff --git a/test/CodeGen/Hexagon/absaddr-store.ll b/test/CodeGen/Hexagon/absaddr-store.ll index 3be4b1cc2614..dac8607d88db 100644 --- a/test/CodeGen/Hexagon/absaddr-store.ll +++ b/test/CodeGen/Hexagon/absaddr-store.ll @@ -1,39 +1,42 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-small-data-threshold=0 < %s | FileCheck %s ; Check that we generate load instructions with absolute addressing mode. -@a = external global i32 -@b = external global i8 -@c = external global i16 +@a0 = external global i32 +@a1 = external global i32 +@b0 = external global i8 +@b1 = external global i8 +@c0 = external global i16 +@c1 = external global i16 @d = external global i64 define zeroext i8 @absStoreByte() nounwind { -; CHECK: memb(##b){{ *}}={{ *}}r{{[0-9]+}} +; CHECK: memb(##b1){{ *}}={{ *}}r{{[0-9]+}} entry: - %0 = load i8, i8* @b, align 1 + %0 = load i8, i8* @b0, align 1 %conv = zext i8 %0 to i32 %mul = mul nsw i32 100, %conv %conv1 = trunc i32 %mul to i8 - store i8 %conv1, i8* @b, align 1 + store i8 %conv1, i8* @b1, align 1 ret i8 %conv1 } define signext i16 @absStoreHalf() nounwind { -; CHECK: memh(##c){{ *}}={{ *}}r{{[0-9]+}} +; CHECK: memh(##c1){{ *}}={{ *}}r{{[0-9]+}} entry: - %0 = load i16, i16* @c, align 2 + %0 = load i16, i16* @c0, align 2 %conv = sext i16 %0 to i32 %mul = mul nsw i32 100, %conv %conv1 = trunc i32 %mul to i16 - store i16 %conv1, i16* @c, align 2 + store i16 %conv1, i16* @c1, align 2 ret i16 %conv1 } define i32 @absStoreWord() nounwind { -; CHECK: memw(##a){{ *}}={{ *}}r{{[0-9]+}} +; CHECK: memw(##a1){{ *}}={{ *}}r{{[0-9]+}} entry: - %0 = load i32, i32* @a, align 4 + %0 = load i32, i32* @a0, align 4 %mul = mul nsw i32 100, %0 - store i32 %mul, i32* @a, align 4 + store i32 %mul, i32* @a1, align 4 ret i32 %mul } diff --git a/test/CodeGen/Hexagon/absimm.ll b/test/CodeGen/Hexagon/absimm.ll index 07adb3fe49d5..e67af5e8fef9 100644 --- a/test/CodeGen/Hexagon/absimm.ll +++ b/test/CodeGen/Hexagon/absimm.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s +; RUN: llc -march=hexagon < %s | FileCheck %s ; Check that we generate absolute addressing mode instructions ; with immediate value. diff --git a/test/CodeGen/Hexagon/addh-sext-trunc.ll b/test/CodeGen/Hexagon/addh-sext-trunc.ll new file mode 100644 index 000000000000..094932933fbc --- /dev/null +++ b/test/CodeGen/Hexagon/addh-sext-trunc.ll @@ -0,0 +1,43 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}}.{{L|l}}, r{{[0-9]+}}.{{H|h}}) + +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32" +target triple = "hexagon-unknown-none" + +%struct.aDataType = type { i16, i16, i16, i16, i16, i16*, i16*, i16*, i8*, i16*, i16*, i16*, i8* } + +define i8* @a_get_score(%struct.aDataType* nocapture %pData, i16 signext %gmmModelIndex, i16* nocapture %pGmmScoreL16Q4) #0 { +entry: + %numSubVector = getelementptr inbounds %struct.aDataType, %struct.aDataType* %pData, i32 0, i32 3 + %0 = load i16, i16* %numSubVector, align 2, !tbaa !0 + %and = and i16 %0, -4 + %b = getelementptr inbounds %struct.aDataType, %struct.aDataType* %pData, i32 0, i32 8 + %1 = load i8*, i8** %b, align 4, !tbaa !3 + %conv3 = sext i16 %and to i32 + %cmp21 = icmp sgt i16 %and, 0 + br i1 %cmp21, label %for.inc.preheader, label %for.end + +for.inc.preheader: ; preds = %entry + br label %for.inc + +for.inc: ; preds = %for.inc.preheader, %for.inc + %j.022 = phi i32 [ %phitmp, %for.inc ], [ 0, %for.inc.preheader ] + %add13 = mul i32 %j.022, 65536 + %sext = add i32 %add13, 262144 + %phitmp = ashr exact i32 %sext, 16 + %cmp = icmp slt i32 %phitmp, %conv3 + br i1 %cmp, label %for.inc, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret i8* %1 +} + +attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!0 = !{!"short", !1} +!1 = !{!"omnipotent char", !2} +!2 = !{!"Simple C/C++ TBAA"} +!3 = !{!"any pointer", !1} diff --git a/test/CodeGen/Hexagon/addh-shifted.ll b/test/CodeGen/Hexagon/addh-shifted.ll new file mode 100644 index 000000000000..eb263521b42f --- /dev/null +++ b/test/CodeGen/Hexagon/addh-shifted.ll @@ -0,0 +1,21 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}}.{{L|l}}, r{{[0-9]+}}.{{L|l}}):<<16 + +define i64 @test_cast(i64 %arg0, i16 zeroext %arg1, i16 zeroext %arg2) nounwind readnone { +entry: + %conv.i = zext i16 %arg1 to i32 + %conv1.i = zext i16 %arg2 to i32 + %sub.i = add nsw i32 %conv.i, %conv1.i + %sext.i = shl i32 %sub.i, 16 + %cmp.i = icmp slt i32 %sext.i, 65536 + %0 = ashr exact i32 %sext.i, 16 + %conv7.i = select i1 %cmp.i, i32 1, i32 %0 + %cmp8.i = icmp sgt i32 %conv7.i, 4 + %conv7.op.i = add i32 %conv7.i, 65535 + %shl = shl i64 %arg0, 2 + %.mask = and i32 %conv7.op.i, 65535 + %1 = zext i32 %.mask to i64 + %conv = select i1 %cmp8.i, i64 3, i64 %1 + %or = or i64 %conv, %shl + ret i64 %or +} diff --git a/test/CodeGen/Hexagon/addh.ll b/test/CodeGen/Hexagon/addh.ll new file mode 100644 index 000000000000..c2b536c4669a --- /dev/null +++ b/test/CodeGen/Hexagon/addh.ll @@ -0,0 +1,21 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}}.{{L|l}}, r{{[0-9]+}}.{{L|l}}) + +define i64 @test_cast(i64 %arg0, i16 zeroext %arg1, i16 zeroext %arg2) nounwind readnone { +entry: + %conv.i = zext i16 %arg1 to i32 + %conv1.i = zext i16 %arg2 to i32 + %sub.i = add nsw i32 %conv.i, %conv1.i + %sext.i = shl i32 %sub.i, 16 + %cmp.i = icmp slt i32 %sext.i, 65536 + %0 = ashr exact i32 %sext.i, 16 + %conv7.i = select i1 %cmp.i, i32 1, i32 %0 + %cmp8.i = icmp sgt i32 %conv7.i, 4 + %conv7.op.i = add i32 %conv7.i, 65535 + %shl = shl i64 %arg0, 2 + %.mask = and i32 %conv7.op.i, 65535 + %1 = zext i32 %.mask to i64 + %conv = select i1 %cmp8.i, i64 3, i64 %1 + %or = or i64 %conv, %shl + ret i64 %or +} diff --git a/test/CodeGen/Hexagon/addrmode-indoff.ll b/test/CodeGen/Hexagon/addrmode-indoff.ll new file mode 100644 index 000000000000..6ea2b3d95daf --- /dev/null +++ b/test/CodeGen/Hexagon/addrmode-indoff.ll @@ -0,0 +1,74 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; +; Bug 6840. Use absolute+index addressing. + +@ga = common global [1024 x i8] zeroinitializer, align 8 +@gb = common global [1024 x i8] zeroinitializer, align 8 + +; CHECK: memub(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##ga) +define zeroext i8 @lf2(i32 %i) nounwind readonly { +entry: + %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i + %0 = load i8, i8* %arrayidx, align 1 + ret i8 %0 +} + +; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##gb) +define signext i8 @lf2s(i32 %i) nounwind readonly { +entry: + %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %i + %0 = load i8, i8* %arrayidx, align 1 + ret i8 %0 +} + +; CHECK: memub(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##ga) +define zeroext i8 @lf3(i32 %i) nounwind readonly { +entry: + %mul = shl nsw i32 %i, 2 + %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %mul + %0 = load i8, i8* %arrayidx, align 1 + ret i8 %0 +} + +; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##gb) +define signext i8 @lf3s(i32 %i) nounwind readonly { +entry: + %mul = shl nsw i32 %i, 2 + %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %mul + %0 = load i8, i8* %arrayidx, align 1 + ret i8 %0 +} + +; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##ga) +define void @sf4(i32 %i, i8 zeroext %j) nounwind { +entry: + %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i + store i8 %j, i8* %arrayidx, align 1 + ret void +} + +; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##gb) +define void @sf4s(i32 %i, i8 signext %j) nounwind { +entry: + %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %i + store i8 %j, i8* %arrayidx, align 1 + ret void +} + +; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##ga) +define void @sf5(i32 %i, i8 zeroext %j) nounwind { +entry: + %mul = shl nsw i32 %i, 2 + %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %mul + store i8 %j, i8* %arrayidx, align 1 + ret void +} + +; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##gb) +define void @sf5s(i32 %i, i8 signext %j) nounwind { +entry: + %mul = shl nsw i32 %i, 2 + %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %mul + store i8 %j, i8* %arrayidx, align 1 + ret void +} diff --git a/test/CodeGen/Hexagon/always-ext.ll b/test/CodeGen/Hexagon/always-ext.ll index 8b4b2f5bf4f2..3bf465b6a513 100644 --- a/test/CodeGen/Hexagon/always-ext.ll +++ b/test/CodeGen/Hexagon/always-ext.ll @@ -1,5 +1,4 @@ -; XFAIL: -; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s +; RUN: llc -march=hexagon < %s | FileCheck %s ; Check that we don't generate an invalid packet with too many instructions ; due to a store that has a must-extend operand. @@ -8,7 +7,7 @@ ; CHECK: { ; CHECK-NOT: call abort ; CHECK: memw(##0) -; CHECK: memw(r{{[0-9+]}}<<#2 + ##4) +; CHECK: memw(r{{[0-9+]}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##4) ; CHECK: } %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111 = type { i8*, void (%struct.CuTest.1.28.31.37.40.43.52.55.67.85.111*)*, i32, i32, i8*, [23 x i32]* } diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll index 1c470f68aa27..3bfb8b159556 100644 --- a/test/CodeGen/Hexagon/args.ll +++ b/test/CodeGen/Hexagon/args.ll @@ -2,7 +2,7 @@ ; CHECK: r5:4 = combine(#6, #5) ; CHECK: r3:2 = combine(#4, #3) ; CHECK: r1:0 = combine(#2, #1) -; CHECK: memw(r29{{ *}}+{{ *}}#0){{ *}}={{ *}}#7 +; CHECK: memw(r29+#0)=#7 define void @foo() nounwind { diff --git a/test/CodeGen/Hexagon/ashift-left-right.ll b/test/CodeGen/Hexagon/ashift-left-right.ll index 7c41bc7bbf3b..bc3e813220db 100644 --- a/test/CodeGen/Hexagon/ashift-left-right.ll +++ b/test/CodeGen/Hexagon/ashift-left-right.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s +; RUN: llc -march=hexagon < %s | FileCheck %s define i32 @foo(i32 %a, i32 %b) nounwind readnone { ; CHECK: lsl diff --git a/test/CodeGen/Hexagon/barrier-flag.ll b/test/CodeGen/Hexagon/barrier-flag.ll new file mode 100644 index 000000000000..e70a56bae02d --- /dev/null +++ b/test/CodeGen/Hexagon/barrier-flag.ll @@ -0,0 +1,125 @@ +; RUN: llc -O2 < %s +; Check for successful compilation. It originally caused an abort due to +; the "isBarrier" flag set on instructions that were not meant to have it. + +target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32" +target triple = "hexagon" + +; Function Attrs: nounwind optsize readnone +define void @dummy() #0 { +entry: + ret void +} + +; Function Attrs: nounwind optsize +define void @conv3x3(i8* nocapture readonly %inp, i8* nocapture readonly %mask, i32 %shift, i8* nocapture %outp, i32 %width) #1 { +entry: + %cmp381 = icmp sgt i32 %width, 0 + %arrayidx16.gep = getelementptr i8, i8* %mask, i32 4 + %arrayidx19.gep = getelementptr i8, i8* %mask, i32 8 + br label %for.body + +for.body: ; preds = %for.inc48, %entry + %i.086 = phi i32 [ 0, %entry ], [ %inc49, %for.inc48 ] + %mul = mul nsw i32 %i.086, %width + %arrayidx.sum = add i32 %mul, %width + br i1 %cmp381, label %for.cond5.preheader.lr.ph, label %for.inc48 + +for.cond5.preheader.lr.ph: ; preds = %for.body + %add.ptr.sum = add i32 %arrayidx.sum, %width + %add.ptr1 = getelementptr inbounds i8, i8* %inp, i32 %add.ptr.sum + %add.ptr = getelementptr inbounds i8, i8* %inp, i32 %arrayidx.sum + %arrayidx = getelementptr inbounds i8, i8* %inp, i32 %mul + %arrayidx44.gep = getelementptr i8, i8* %outp, i32 %mul + br label %for.cond5.preheader + +for.cond5.preheader: ; preds = %if.end40, %for.cond5.preheader.lr.ph + %arrayidx44.phi = phi i8* [ %arrayidx44.gep, %for.cond5.preheader.lr.ph ], [ %arrayidx44.inc, %if.end40 ] + %j.085 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc46, %if.end40 ] + %IN1.084 = phi i8* [ %arrayidx, %for.cond5.preheader.lr.ph ], [ %incdec.ptr, %if.end40 ] + %IN2.083 = phi i8* [ %add.ptr, %for.cond5.preheader.lr.ph ], [ %incdec.ptr33, %if.end40 ] + %IN3.082 = phi i8* [ %add.ptr1, %for.cond5.preheader.lr.ph ], [ %incdec.ptr34, %if.end40 ] + br label %for.body7 + +for.body7: ; preds = %for.body7, %for.cond5.preheader + %arrayidx8.phi = phi i8* [ %IN1.084, %for.cond5.preheader ], [ %arrayidx8.inc, %for.body7 ] + %arrayidx9.phi = phi i8* [ %IN2.083, %for.cond5.preheader ], [ %arrayidx9.inc, %for.body7 ] + %arrayidx11.phi = phi i8* [ %IN3.082, %for.cond5.preheader ], [ %arrayidx11.inc, %for.body7 ] + %arrayidx13.phi = phi i8* [ %mask, %for.cond5.preheader ], [ %arrayidx13.inc, %for.body7 ] + %arrayidx16.phi = phi i8* [ %arrayidx16.gep, %for.cond5.preheader ], [ %arrayidx16.inc, %for.body7 ] + %arrayidx19.phi = phi i8* [ %arrayidx19.gep, %for.cond5.preheader ], [ %arrayidx19.inc, %for.body7 ] + %k.080 = phi i32 [ 0, %for.cond5.preheader ], [ %inc, %for.body7 ] + %sum.079 = phi i32 [ 0, %for.cond5.preheader ], [ %add32, %for.body7 ] + %0 = load i8, i8* %arrayidx8.phi, align 1, !tbaa !1 + %1 = load i8, i8* %arrayidx9.phi, align 1, !tbaa !1 + %2 = load i8, i8* %arrayidx11.phi, align 1, !tbaa !1 + %3 = load i8, i8* %arrayidx13.phi, align 1, !tbaa !1 + %4 = load i8, i8* %arrayidx16.phi, align 1, !tbaa !1 + %5 = load i8, i8* %arrayidx19.phi, align 1, !tbaa !1 + %conv21 = zext i8 %0 to i32 + %conv22 = sext i8 %3 to i32 + %mul23 = mul nsw i32 %conv22, %conv21 + %conv24 = zext i8 %1 to i32 + %conv25 = sext i8 %4 to i32 + %mul26 = mul nsw i32 %conv25, %conv24 + %conv27 = zext i8 %2 to i32 + %conv28 = sext i8 %5 to i32 + %mul29 = mul nsw i32 %conv28, %conv27 + %add30 = add i32 %mul23, %sum.079 + %add31 = add i32 %add30, %mul26 + %add32 = add i32 %add31, %mul29 + %inc = add nsw i32 %k.080, 1 + %exitcond = icmp eq i32 %inc, 3 + %arrayidx8.inc = getelementptr i8, i8* %arrayidx8.phi, i32 1 + %arrayidx9.inc = getelementptr i8, i8* %arrayidx9.phi, i32 1 + %arrayidx11.inc = getelementptr i8, i8* %arrayidx11.phi, i32 1 + %arrayidx13.inc = getelementptr i8, i8* %arrayidx13.phi, i32 1 + %arrayidx16.inc = getelementptr i8, i8* %arrayidx16.phi, i32 1 + %arrayidx19.inc = getelementptr i8, i8* %arrayidx19.phi, i32 1 + br i1 %exitcond, label %for.end, label %for.body7 + +for.end: ; preds = %for.body7 + %incdec.ptr = getelementptr inbounds i8, i8* %IN1.084, i32 1 + %incdec.ptr33 = getelementptr inbounds i8, i8* %IN2.083, i32 1 + %incdec.ptr34 = getelementptr inbounds i8, i8* %IN3.082, i32 1 + %shr = ashr i32 %add32, %shift + %cmp35 = icmp slt i32 %shr, 0 + br i1 %cmp35, label %if.end40, label %if.end + +if.end: ; preds = %for.end + %cmp37 = icmp sgt i32 %shr, 255 + br i1 %cmp37, label %if.then39, label %if.end40 + +if.then39: ; preds = %if.end + br label %if.end40 + +if.end40: ; preds = %for.end, %if.then39, %if.end + %sum.2 = phi i32 [ 255, %if.then39 ], [ %shr, %if.end ], [ 0, %for.end ] + %conv41 = trunc i32 %sum.2 to i8 + store i8 %conv41, i8* %arrayidx44.phi, align 1, !tbaa !1 + %inc46 = add nsw i32 %j.085, 1 + %exitcond87 = icmp eq i32 %inc46, %width + %arrayidx44.inc = getelementptr i8, i8* %arrayidx44.phi, i32 1 + br i1 %exitcond87, label %for.inc48.loopexit, label %for.cond5.preheader + +for.inc48.loopexit: ; preds = %if.end40 + br label %for.inc48 + +for.inc48: ; preds = %for.inc48.loopexit, %for.body + %inc49 = add nsw i32 %i.086, 1 + %exitcond88 = icmp eq i32 %inc49, 2 + br i1 %exitcond88, label %for.end50, label %for.body + +for.end50: ; preds = %for.inc48 + ret void +} + +attributes #0 = { nounwind optsize readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"Clang 3.1"} +!1 = !{!2, !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} diff --git a/test/CodeGen/Hexagon/base-offset-addr.ll b/test/CodeGen/Hexagon/base-offset-addr.ll new file mode 100644 index 000000000000..30410fe92543 --- /dev/null +++ b/test/CodeGen/Hexagon/base-offset-addr.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=hexagon -enable-aa-sched-mi < %s +; REQUIRES: asserts + +; Make sure the base is a register and not an address. + +define fastcc void @Get_lsp_pol(i32* nocapture %f) #0 { +entry: + %f5 = alloca i32, align 4 + %arrayidx103 = getelementptr inbounds i32, i32* %f, i32 4 + store i32 0, i32* %arrayidx103, align 4 + %f5.0.load185 = load volatile i32, i32* %f5, align 4 + ret void +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/Hexagon/base-offset-post.ll b/test/CodeGen/Hexagon/base-offset-post.ll new file mode 100644 index 000000000000..a6e4cdd34a0d --- /dev/null +++ b/test/CodeGen/Hexagon/base-offset-post.ll @@ -0,0 +1,30 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s +; REQUIRES: asserts + +; Test that the accessSize is set on a post-increment store. If not, an assert +; is triggered in getBaseAndOffset() + +%struct.A = type { i8, i32, i32, i32, [10 x i32], [10 x i32], [80 x i32], [80 x i32], [8 x i32], i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 } + +; Function Attrs: nounwind +define fastcc void @Decoder_amr(i8 zeroext %mode) #0 { +entry: + br label %for.cond64.preheader.i + +for.cond64.preheader.i: + %i.1984.i = phi i32 [ 0, %entry ], [ %inc166.i.1, %for.cond64.preheader.i ] + %inc166.i = add nsw i32 %i.1984.i, 1 + %arrayidx71.i1422.1 = getelementptr inbounds %struct.A, %struct.A* undef, i32 0, i32 7, i32 %inc166.i + %storemerge800.i.1 = select i1 undef, i32 1310, i32 undef + %sub156.i.1 = sub nsw i32 0, %storemerge800.i.1 + %sub156.storemerge800.i.1 = select i1 undef, i32 %storemerge800.i.1, i32 %sub156.i.1 + store i32 %sub156.storemerge800.i.1, i32* %arrayidx71.i1422.1, align 4 + store i32 0, i32* undef, align 4 + %inc166.i.1 = add nsw i32 %i.1984.i, 2 + br label %for.cond64.preheader.i + +if.end: + ret void +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/Hexagon/bugAsmHWloop.ll b/test/CodeGen/Hexagon/bugAsmHWloop.ll new file mode 100644 index 000000000000..c7e95ed05664 --- /dev/null +++ b/test/CodeGen/Hexagon/bugAsmHWloop.ll @@ -0,0 +1,71 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; CHECK: { +; CHECK: loop0(.LBB +; CHECK-NOT: loop0(##.LBB + +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32" +target triple = "hexagon" + +define i32 @q6zip_uncompress(i8* %out_buf, i32* %out_buf_size, i8* %in_buf, i32 %in_buf_size, i8* nocapture %dict, i32 %dict_size) nounwind { +entry: + %0 = bitcast i8* %in_buf to i32* + %incdec.ptr = getelementptr inbounds i8, i8* %in_buf, i32 4 + %1 = load i32, i32* %0, align 4, !tbaa !0 + %2 = ptrtoint i8* %incdec.ptr to i32 + %and.i = and i32 %2, 31 + %sub.i = sub i32 %2, %and.i + %3 = inttoptr i32 %sub.i to i8* + %add.i = add i32 %in_buf_size, 31 + %sub2.i = add i32 %add.i, %and.i + %div.i = lshr i32 %sub2.i, 5 + %4 = tail call i32 @llvm.hexagon.A2.combine.ll(i32 32, i32 %div.i) nounwind + %5 = tail call i64 @llvm.hexagon.A4.combineir(i32 32, i32 %4) nounwind + tail call void asm sideeffect "l2fetch($0,$1)", "r,r,~{memory}"(i8* %3, i64 %5) nounwind, !srcloc !3 + %6 = ptrtoint i8* %out_buf to i32 + br label %for.body.i + +for.body.i: ; preds = %for.body.i, %entry + %i.02.i = phi i32 [ 0, %entry ], [ %inc.i, %for.body.i ] + %addr.addr.01.i = phi i32 [ %6, %entry ], [ %add.i14, %for.body.i ] + tail call void asm sideeffect "dczeroa($0)", "r"(i32 %addr.addr.01.i) nounwind, !srcloc !4 + %add.i14 = add i32 %addr.addr.01.i, 32 + %inc.i = add i32 %i.02.i, 1 + %exitcond.i = icmp eq i32 %inc.i, 128 + br i1 %exitcond.i, label %while.cond.preheader, label %for.body.i + +while.cond.preheader: ; preds = %for.body.i + %and = and i32 %1, 3 + switch i32 %and, label %infloop.preheader [ + i32 0, label %exit_inflate.split + i32 2, label %if.then.preheader + ] + +if.then.preheader: ; preds = %while.cond.preheader + br label %if.then + +infloop.preheader: ; preds = %while.cond.preheader + br label %infloop + +if.then: ; preds = %if.then.preheader, %if.then + tail call void @llvm.prefetch(i8* %incdec.ptr, i32 0, i32 3, i32 1) + br label %if.then + +exit_inflate.split: ; preds = %while.cond.preheader + ret i32 0 + +infloop: ; preds = %infloop.preheader, %infloop + br label %infloop +} + +declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind + +declare i64 @llvm.hexagon.A4.combineir(i32, i32) nounwind readnone + +declare i32 @llvm.hexagon.A2.combine.ll(i32, i32) nounwind readnone + +!0 = !{!"long", !1} +!1 = !{!"omnipotent char", !2} +!2 = !{!"Simple C/C++ TBAA"} +!3 = !{i32 18362} +!4 = !{i32 18893} diff --git a/test/CodeGen/Hexagon/cext-valid-packet1.ll b/test/CodeGen/Hexagon/cext-valid-packet1.ll index 35e7b364b508..36abc59f5e3e 100644 --- a/test/CodeGen/Hexagon/cext-valid-packet1.ll +++ b/test/CodeGen/Hexagon/cext-valid-packet1.ll @@ -1,5 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s -; XFAIL: +; RUN: llc -march=hexagon < %s | FileCheck %s ; Check that the packetizer generates valid packets with constant ; extended instructions. diff --git a/test/CodeGen/Hexagon/cext-valid-packet2.ll b/test/CodeGen/Hexagon/cext-valid-packet2.ll index c3a4915ec2e0..9f03ef1309ec 100644 --- a/test/CodeGen/Hexagon/cext-valid-packet2.ll +++ b/test/CodeGen/Hexagon/cext-valid-packet2.ll @@ -1,44 +1,16 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s -; XFAIL: +; RUN: llc -march=hexagon < %s | FileCheck %s ; Check that the packetizer generates valid packets with constant ; extended add and base+offset store instructions. -; CHECK: { -; CHECK-NEXT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}, ##{{[0-9]+}}) -; CHECK-NEXT: memw(r{{[0-9]+}}+{{ *}}##{{[0-9]+}}){{ *}}={{ *}}r{{[0-9]+}}.new +; CHECK: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}},{{ *}}##200000) +; CHECK-NEXT: memw(r{{[0-9]+}}{{ *}}+{{ *}}##12000){{ *}}={{ *}}r{{[0-9]+}}.new ; CHECK-NEXT: } -define i32 @test(i32* nocapture %a, i32* nocapture %b, i32 %c) nounwind { +define void @test(i32* nocapture %a, i32* nocapture %b, i32 %c) nounwind { entry: - %add = add nsw i32 %c, 200002 %0 = load i32, i32* %a, align 4 %add1 = add nsw i32 %0, 200000 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 3000 store i32 %add1, i32* %arrayidx2, align 4 - %1 = load i32, i32* %b, align 4 - %add4 = add nsw i32 %1, 200001 - %arrayidx5 = getelementptr inbounds i32, i32* %a, i32 1 - store i32 %add4, i32* %arrayidx5, align 4 - %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 1 - %2 = load i32, i32* %arrayidx7, align 4 - %cmp = icmp sgt i32 %add4, %2 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - %arrayidx8 = getelementptr inbounds i32, i32* %a, i32 2 - %3 = load i32, i32* %arrayidx8, align 4 - %arrayidx9 = getelementptr inbounds i32, i32* %b, i32 2000 - %4 = load i32, i32* %arrayidx9, align 4 - %sub = sub nsw i32 %3, %4 - %arrayidx10 = getelementptr inbounds i32, i32* %a, i32 4000 - store i32 %sub, i32* %arrayidx10, align 4 - br label %if.end - -if.else: ; preds = %entry - %arrayidx11 = getelementptr inbounds i32, i32* %b, i32 3200 - store i32 %add, i32* %arrayidx11, align 4 - br label %if.end - -if.end: ; preds = %if.else, %if.then - ret i32 %add + ret void } diff --git a/test/CodeGen/Hexagon/cext.ll b/test/CodeGen/Hexagon/cext.ll new file mode 100644 index 000000000000..6daba8cc9599 --- /dev/null +++ b/test/CodeGen/Hexagon/cext.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: memub(r{{[0-9]+}}{{ *}}<<{{ *}}#1{{ *}}+{{ *}}##a) + +@a = external global [5 x [2 x i8]] + +define zeroext i8 @foo(i8 zeroext %l) nounwind readonly { +for.end: + %idxprom = zext i8 %l to i32 + %arrayidx1 = getelementptr inbounds [5 x [2 x i8]], [5 x [2 x i8]]* @a, i32 0, i32 %idxprom, i32 0 + %0 = load i8, i8* %arrayidx1, align 1 + %conv = zext i8 %0 to i32 + %mul = mul nsw i32 %conv, 20 + %conv2 = trunc i32 %mul to i8 + ret i8 %conv2 +} + diff --git a/test/CodeGen/Hexagon/cexti16.ll b/test/CodeGen/Hexagon/cexti16.ll new file mode 100644 index 000000000000..465cfe400719 --- /dev/null +++ b/test/CodeGen/Hexagon/cexti16.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: memuh(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##a) + +@a = external global [5 x [2 x i16]] + +define signext i16 @foo(i16 zeroext %l) nounwind readonly { +for.end: + %idxprom = zext i16 %l to i32 + %arrayidx1 = getelementptr inbounds [5 x [2 x i16]], [5 x [2 x i16]]* @a, i32 0, i32 %idxprom, i32 0 + %0 = load i16, i16* %arrayidx1, align 2 + %conv = zext i16 %0 to i32 + %mul = mul nsw i32 %conv, 20 + %conv2 = trunc i32 %mul to i16 + ret i16 %conv2 +} + diff --git a/test/CodeGen/Hexagon/checktabs.ll b/test/CodeGen/Hexagon/checktabs.ll new file mode 100644 index 000000000000..740433bf824a --- /dev/null +++ b/test/CodeGen/Hexagon/checktabs.ll @@ -0,0 +1,8 @@ +; RUN: llc -march=hexagon < %s | FileCheck --strict-whitespace %s +; Make sure we are emitting tabs as formatting. +; CHECK: { +; CHECK-NEXT: {{jump|r}} +define i32 @foobar(i32 %a, i32 %b) { + %1 = add i32 %a, %b + ret i32 %1 +} diff --git a/test/CodeGen/Hexagon/cmp-extend.ll b/test/CodeGen/Hexagon/cmp-extend.ll new file mode 100644 index 000000000000..0bd1fca73946 --- /dev/null +++ b/test/CodeGen/Hexagon/cmp-extend.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s + +%struct.RESULTS_S.A = type { i16, i16, i16, [4 x i8*], i32, i32, i32, %struct.list_head_s.B*, %struct.MAT_PARAMS_S.D, i16, i16, i16, i16, i16, %struct.CORE_PORTABLE_S.E } +%struct.list_head_s.B = type { %struct.list_head_s.B*, %struct.list_data_s.C* } +%struct.list_data_s.C = type { i16, i16 } +%struct.MAT_PARAMS_S.D = type { i32, i16*, i16*, i32* } +%struct.CORE_PORTABLE_S.E = type { i8 } + +; Test that we don't generate a zero extend in this case. Instead we generate +; a single sign extend instead of two zero extends. + +; CHECK-NOT: zxth + +; Function Attrs: nounwind +define void @core_bench_list(%struct.RESULTS_S.A* %res) #0 { +entry: + %seed3 = getelementptr inbounds %struct.RESULTS_S.A, %struct.RESULTS_S.A* %res, i32 0, i32 2 + %0 = load i16, i16* %seed3, align 2 + %cmp364 = icmp sgt i16 %0, 0 + br i1 %cmp364, label %for.body, label %while.body19.i160 + +for.body: + %i.0370 = phi i16 [ %inc50, %if.then ], [ 0, %entry ] + br i1 undef, label %if.then, label %while.body.i273 + +while.body.i273: + %tobool.i272 = icmp eq %struct.list_head_s.B* undef, null + br i1 %tobool.i272, label %if.then, label %while.body.i273 + +if.then: + %inc50 = add i16 %i.0370, 1 + %exitcond = icmp eq i16 %inc50, %0 + br i1 %exitcond, label %while.body19.i160, label %for.body + +while.body19.i160: + br label %while.body19.i160 +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + diff --git a/test/CodeGen/Hexagon/cmp-promote.ll b/test/CodeGen/Hexagon/cmp-promote.ll new file mode 100644 index 000000000000..7811b7e729cb --- /dev/null +++ b/test/CodeGen/Hexagon/cmp-promote.ll @@ -0,0 +1,72 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; +; Bug 6714. Use sign-extend to promote the arguments for compare +; equal/not-equal for 8- and 16-bit types with negative constants. + +; CHECK: cmp.eq{{.*}}#-16 +define i32 @foo1(i16 signext %q) nounwind readnone { +entry: + %not.cmp = icmp ne i16 %q, -16 + %res.0 = zext i1 %not.cmp to i32 + ret i32 %res.0 +} + +; CHECK: cmp.eq{{.*}}#-14 +define i32 @foo2(i16 signext %q) nounwind readnone { +entry: + %cmp = icmp eq i16 %q, -14 + %res.0 = select i1 %cmp, i32 2, i32 0 + ret i32 %res.0 +} + +; CHECK: cmp.eq{{.*}}#-8 +define i32 @foo3(i8 signext %r) nounwind readnone { +entry: + %cmp = icmp eq i8 %r, -8 + %res.0 = select i1 %cmp, i32 0, i32 3 + ret i32 %res.0 +} + +; CHECK: cmp.eq{{.*}}#-6 +define i32 @foo4(i8 signext %r) nounwind readnone { +entry: + %cmp = icmp eq i8 %r, -6 + %res.0 = select i1 %cmp, i32 4, i32 0 + ret i32 %res.0 +} + +; CHECK: cmp.eq{{.*}}#-20 +define i32 @foo5(i32 %s) nounwind readnone { +entry: + %cmp = icmp eq i32 %s, -20 + %res.0 = select i1 %cmp, i32 0, i32 5 + ret i32 %res.0 +} + +; CHECK: cmp.eq{{.*}}#-18 +define i32 @foo6(i32 %s) nounwind readnone { +entry: + %cmp = icmp eq i32 %s, -18 + %res.0 = select i1 %cmp, i32 6, i32 0 + ret i32 %res.0 +} + +; CHECK: cmp.eq{{.*}}#10 +define i32 @foo7(i16 signext %q) nounwind readnone { +entry: + %cmp = icmp eq i16 %q, 10 + %res.0 = select i1 %cmp, i32 7, i32 0 + ret i32 %res.0 +} + +@g = external global i16 + +; CHECK: cmp.eq{{.*}}#-12 +define i32 @foo8() nounwind readonly { +entry: + %0 = load i16, i16* @g, align 2 + %cmp = icmp eq i16 %0, -12 + %res.0 = select i1 %cmp, i32 0, i32 8 + ret i32 %res.0 +} + diff --git a/test/CodeGen/Hexagon/cmp-to-genreg.ll b/test/CodeGen/Hexagon/cmp-to-genreg.ll index 97cf51ce1a2b..d0df16815131 100644 --- a/test/CodeGen/Hexagon/cmp-to-genreg.ll +++ b/test/CodeGen/Hexagon/cmp-to-genreg.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s +; RUN: llc -march=hexagon < %s | FileCheck %s ; Check that we generate compare to general register. define i32 @compare1(i32 %a) nounwind { diff --git a/test/CodeGen/Hexagon/cmp-to-predreg.ll b/test/CodeGen/Hexagon/cmp-to-predreg.ll index 2b65343ab2cf..c97a736f10af 100644 --- a/test/CodeGen/Hexagon/cmp-to-predreg.ll +++ b/test/CodeGen/Hexagon/cmp-to-predreg.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s +; RUN: llc -march=hexagon < %s | FileCheck %s ; Check that we generate compare to predicate register. define i32 @compare1(i32 %a, i32 %b) nounwind { diff --git a/test/CodeGen/Hexagon/cmp.ll b/test/CodeGen/Hexagon/cmp.ll new file mode 100644 index 000000000000..c274a787249a --- /dev/null +++ b/test/CodeGen/Hexagon/cmp.ll @@ -0,0 +1,161 @@ +; RUN: llc -march=hexagon --filetype=obj < %s -o - | llvm-objdump -d - | FileCheck %s + +; Function Attrs: nounwind +define i32 @cmpeq(i32 %i) #0 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = call i32 @llvm.hexagon.C2.cmpeq(i32 %0, i32 1) + ret i32 %1 +} +; CHECK: { p{{[0-3]}} = cmp.eq(r{{[0-9]}}, r{{[0-9]}}) + +; Function Attrs: nounwind readnone +declare i32 @llvm.hexagon.C2.cmpeq(i32, i32) #1 + +; Function Attrs: nounwind +define i32 @cmpgt(i32 %i) #0 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = call i32 @llvm.hexagon.C2.cmpgt(i32 %0, i32 2) + ret i32 %1 +} +; CHECK: { p{{[0-3]}} = cmp.gt(r{{[0-9]}}, r{{[0-9]}}) + +; Function Attrs: nounwind readnone +declare i32 @llvm.hexagon.C2.cmpgt(i32, i32) #1 + +; Function Attrs: nounwind +define i32 @cmpgtu(i32 %i) #0 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = call i32 @llvm.hexagon.C2.cmpgtu(i32 %0, i32 3) + ret i32 %1 +} +; CHECK: { p{{[0-3]}} = cmp.gtu(r{{[0-9]}}, r{{[0-9]}}) + +; Function Attrs: nounwind readnone +declare i32 @llvm.hexagon.C2.cmpgtu(i32, i32) #1 + +; Function Attrs: nounwind +define i32 @cmplt(i32 %i) #0 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = call i32 @llvm.hexagon.C2.cmplt(i32 %0, i32 4) + ret i32 %1 +} +; CHECK: { p{{[0-3]}} = cmp.gt(r{{[0-9]}}, r{{[0-9]}}) + +; Function Attrs: nounwind readnone +declare i32 @llvm.hexagon.C2.cmplt(i32, i32) #1 + +; Function Attrs: nounwind +define i32 @cmpltu(i32 %i) #0 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = call i32 @llvm.hexagon.C2.cmpltu(i32 %0, i32 5) + ret i32 %1 +} +; CHECK: { p{{[0-3]}} = cmp.gtu(r{{[0-9]}}, r{{[0-9]}}) + +; Function Attrs: nounwind readnone +declare i32 @llvm.hexagon.C2.cmpltu(i32, i32) #1 + +; Function Attrs: nounwind +define i32 @cmpeqi(i32 %i) #0 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = call i32 @llvm.hexagon.C2.cmpeqi(i32 %0, i32 10) + ret i32 %1 +} +; CHECK: { p{{[0-3]}} = cmp.eq(r{{[0-9]}}, {{.*}}#10) + +; Function Attrs: nounwind readnone +declare i32 @llvm.hexagon.C2.cmpeqi(i32, i32) #1 + +; Function Attrs: nounwind +define i32 @cmpgti(i32 %i) #0 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = call i32 @llvm.hexagon.C2.cmpgti(i32 %0, i32 20) + ret i32 %1 +} +; CHECK: { p{{[0-3]}} = cmp.gt(r{{[0-9]}}, {{.*}}#20) + +; Function Attrs: nounwind readnone +declare i32 @llvm.hexagon.C2.cmpgti(i32, i32) #1 + +; Function Attrs: nounwind +define i32 @cmpgtui(i32 %i) #0 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = call i32 @llvm.hexagon.C2.cmpgtui(i32 %0, i32 40) + ret i32 %1 +} +; CHECK: { p{{[0-3]}} = cmp.gtu(r{{[0-9]}}, {{.*}}#40) + +; Function Attrs: nounwind readnone +declare i32 @llvm.hexagon.C2.cmpgtui(i32, i32) #1 + +; Function Attrs: nounwind +define i32 @cmpgei(i32 %i) #0 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = call i32 @llvm.hexagon.C2.cmpgei(i32 %0, i32 3) + ret i32 %1 +} +; CHECK: { p{{[0-3]}} = cmp.gt(r{{[0-9]}}, {{.*}}#2) + +; Function Attrs: nounwind readnone +declare i32 @llvm.hexagon.C2.cmpgei(i32, i32) #1 + +; Function Attrs: nounwind +define i32 @cmpgeu(i32 %i) #0 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = call i32 @llvm.hexagon.C2.cmpgeui(i32 %0, i32 3) + ret i32 %1 +} +; CHECK: { p{{[0-3]}} = cmp.gtu(r{{[0-9]}}, {{.*}}#2) + +; Function Attrs: nounwind readnone +declare i32 @llvm.hexagon.C2.cmpgeui(i32, i32) #1 + +; Function Attrs: nounwind +define i32 @cmpgeu0(i32 %i) #0 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = call i32 @llvm.hexagon.C2.cmpgeui(i32 %0, i32 0) + ret i32 %1 +} +; CHECK: { p{{[0-3]}} = cmp.eq(r{{[0-9]}}, r{{[0-9]}}) + + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.ident = !{!0} + +!0 = !{!"Clang 3.1"} + diff --git a/test/CodeGen/Hexagon/cmp_pred.ll b/test/CodeGen/Hexagon/cmp_pred.ll index 39549a1f2d54..ee3f5ddf1f12 100644 --- a/test/CodeGen/Hexagon/cmp_pred.ll +++ b/test/CodeGen/Hexagon/cmp_pred.ll @@ -1,4 +1,3 @@ -; XFAIL: ; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s ; Generate various cmpb instruction followed by if (p0) .. if (!p0)... target triple = "hexagon" @@ -61,7 +60,7 @@ entry: define i32 @Func_3gt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone { entry: -; CHECK: mux +; CHECK-NOT: mux %cmp = icmp sgt i32 %Enum_Par_Val, %pv2 %selv = zext i1 %cmp to i32 ret i32 %selv diff --git a/test/CodeGen/Hexagon/cmp_pred_reg.ll b/test/CodeGen/Hexagon/cmp_pred_reg.ll index 39549a1f2d54..ee3f5ddf1f12 100644 --- a/test/CodeGen/Hexagon/cmp_pred_reg.ll +++ b/test/CodeGen/Hexagon/cmp_pred_reg.ll @@ -1,4 +1,3 @@ -; XFAIL: ; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s ; Generate various cmpb instruction followed by if (p0) .. if (!p0)... target triple = "hexagon" @@ -61,7 +60,7 @@ entry: define i32 @Func_3gt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone { entry: -; CHECK: mux +; CHECK-NOT: mux %cmp = icmp sgt i32 %Enum_Par_Val, %pv2 %selv = zext i1 %cmp to i32 ret i32 %selv diff --git a/test/CodeGen/Hexagon/cmpb-eq.ll b/test/CodeGen/Hexagon/cmpb-eq.ll new file mode 100644 index 000000000000..e59ed3e51c37 --- /dev/null +++ b/test/CodeGen/Hexagon/cmpb-eq.ll @@ -0,0 +1,53 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK-NOT: cmpb.eq(r{{[0-9]+}}, #-1) + +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32" +target triple = "hexagon" + +%struct.wms_address_s = type { i32, i32, i32, i32, i8, [48 x i8] } + +define zeroext i8 @qmi_wmsi_bin_to_addr(i8* %str, i8 zeroext %len, %struct.wms_address_s* %addr) nounwind optsize { +entry: + %cmp = icmp eq i8* %str, null + %cmp2 = icmp eq %struct.wms_address_s* %addr, null + %or.cond = or i1 %cmp, %cmp2 + br i1 %or.cond, label %if.then12, label %if.then + +if.then: ; preds = %entry + %dec = add i8 %len, -1 + %cmp3 = icmp ugt i8 %dec, 24 + %tobool27 = icmp eq i8 %dec, 0 + %or.cond31 = or i1 %cmp3, %tobool27 + br i1 %or.cond31, label %if.then12, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %if.then + %dec626 = add i8 %len, -2 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %if.end21 + %indvars.iv = phi i32 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %if.end21 ] + %dec630 = phi i8 [ %dec626, %for.body.lr.ph ], [ %dec6, %if.end21 ] + %str.pn = phi i8* [ %str, %for.body.lr.ph ], [ %str.addr.029, %if.end21 ] + %str.addr.029 = getelementptr inbounds i8, i8* %str.pn, i32 1 + %0 = load i8, i8* %str.addr.029, align 1, !tbaa !0 + %cmp10 = icmp ugt i8 %0, -49 + br i1 %cmp10, label %if.then12.loopexit, label %if.end21 + +if.then12.loopexit: ; preds = %if.end21, %for.body + br label %if.then12 + +if.then12: ; preds = %if.then12.loopexit, %if.then, %entry + ret i8 0 + +if.end21: ; preds = %for.body + %shr24 = lshr i8 %0, 4 + %arrayidx = getelementptr inbounds %struct.wms_address_s, %struct.wms_address_s* %addr, i32 0, i32 5, i32 %indvars.iv + store i8 %shr24, i8* %arrayidx, align 1, !tbaa !0 + %dec6 = add i8 %dec630, -1 + %tobool = icmp eq i8 %dec630, 0 + %indvars.iv.next = add i32 %indvars.iv, 1 + br i1 %tobool, label %if.then12.loopexit, label %for.body +} + +!0 = !{!"omnipotent char", !1} +!1 = !{!"Simple C/C++ TBAA"} diff --git a/test/CodeGen/Hexagon/cmpb_pred.ll b/test/CodeGen/Hexagon/cmpb_pred.ll index 1a43e6291696..d5a76ff129e3 100644 --- a/test/CodeGen/Hexagon/cmpb_pred.ll +++ b/test/CodeGen/Hexagon/cmpb_pred.ll @@ -1,4 +1,3 @@ -; XFAIL: ; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s ; Generate various cmpb instruction followed by if (p0) .. if (!p0)... target triple = "hexagon" @@ -64,7 +63,7 @@ entry: define i32 @Func_3g(i32) nounwind readnone { entry: -; CHECK: mux +; CHECK-NOT: mux %conv = and i32 %0, 255 %cmp = icmp ult i32 %conv, 3 %selv = zext i1 %cmp to i32 diff --git a/test/CodeGen/Hexagon/eh_return.ll b/test/CodeGen/Hexagon/eh_return.ll new file mode 100644 index 000000000000..67649a07afc7 --- /dev/null +++ b/test/CodeGen/Hexagon/eh_return.ll @@ -0,0 +1,48 @@ +; RUN: llc -O0 -march=hexagon < %s | FileCheck %s +; Make sure we generate an exception handling return. + +; CHECK: deallocframe +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r29 = add(r29, r28) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32" +target triple = "hexagon-unknown-linux-gnu" + +%struct.Data = type { i32, i8* } + +define i32 @test_eh_return(i32 %a, i32 %b) nounwind { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %d = alloca %struct.Data, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + %0 = load i32, i32* %a.addr, align 4 + %1 = load i32, i32* %b.addr, align 4 + %cmp = icmp sgt i32 %0, %1 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %2 = load i32, i32* %a.addr, align 4 + %3 = load i32, i32* %b.addr, align 4 + %add = add nsw i32 %2, %3 + ret i32 %add + +if.else: ; preds = %entry + %call = call i32 @setup(%struct.Data* %d) + %_d1 = getelementptr inbounds %struct.Data, %struct.Data* %d, i32 0, i32 0 + %4 = load i32, i32* %_d1, align 4 + %_d2 = getelementptr inbounds %struct.Data, %struct.Data* %d, i32 0, i32 1 + %5 = load i8*, i8** %_d2, align 4 + call void @llvm.eh.return.i32(i32 %4, i8* %5) + unreachable +} + +declare i32 @setup(%struct.Data*) + +declare void @llvm.eh.return.i32(i32, i8*) nounwind diff --git a/test/CodeGen/Hexagon/hwloop-lt.ll b/test/CodeGen/Hexagon/hwloop-lt.ll index 7e2ad2a4678e..8919f265abfe 100644 --- a/test/CodeGen/Hexagon/hwloop-lt.ll +++ b/test/CodeGen/Hexagon/hwloop-lt.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s +; RUN: llc -march=hexagon -O3 < %s | FileCheck %s ; CHECK-LABEL: @test_pos1_ir_slt ; CHECK: loop0 diff --git a/test/CodeGen/Hexagon/hwloop-lt1.ll b/test/CodeGen/Hexagon/hwloop-lt1.ll index 16fe728fa7bc..cf97fffce40a 100644 --- a/test/CodeGen/Hexagon/hwloop-lt1.ll +++ b/test/CodeGen/Hexagon/hwloop-lt1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s +; RUN: llc -march=hexagon < %s | FileCheck %s ; Check that we generate a hardware loop instruction. ; CHECK: endloop0 diff --git a/test/CodeGen/Hexagon/intrinsics/alu32_alu.ll b/test/CodeGen/Hexagon/intrinsics/alu32_alu.ll index 37f9f4007b67..fcf80b08181e 100644 --- a/test/CodeGen/Hexagon/intrinsics/alu32_alu.ll +++ b/test/CodeGen/Hexagon/intrinsics/alu32_alu.ll @@ -1,27 +1,30 @@ ; RUN: llc -march=hexagon -O0 < %s | FileCheck %s +; RUN: llc -march=hexagon -O0 < %s | FileCheck -check-prefix=CHECK-CALL %s ; Hexagon Programmer's Reference Manual 11.1.1 ALU32/ALU +; CHECK-CALL-NOT: call + ; Add declare i32 @llvm.hexagon.A2.addi(i32, i32) define i32 @A2_addi(i32 %a) { %z = call i32 @llvm.hexagon.A2.addi(i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = add(r0, #0) +; CHECK: = add({{.*}}, #0) declare i32 @llvm.hexagon.A2.add(i32, i32) define i32 @A2_add(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.add(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0, r1) +; CHECK: = add({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.addsat(i32, i32) define i32 @A2_addsat(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.addsat(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0, r1):sat +; CHECK: = add({{.*}}, {{.*}}):sat ; Logical operations declare i32 @llvm.hexagon.A2.and(i32, i32) @@ -29,43 +32,35 @@ define i32 @A2_and(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.and(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = and(r0, r1) +; CHECK: = and({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.or(i32, i32) define i32 @A2_or(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.or(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = or(r0, r1) +; CHECK: = or({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.xor(i32, i32) define i32 @A2_xor(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.xor(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = xor(r0, r1) +; CHECK: = xor({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A4.andn(i32, i32) define i32 @A4_andn(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A4.andn(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = and(r0, ~r1) +; CHECK: = and({{.*}}, ~{{.*}}) declare i32 @llvm.hexagon.A4.orn(i32, i32) define i32 @A4_orn(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A4.orn(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = or(r0, ~r1) - -; Nop -declare void @llvm.hexagon.A2.nop() -define void @A2_nop(i32 %a, i32 %b) { - call void @llvm.hexagon.A2.nop() - ret void -} -; CHECK: nop +; CHECK: = or({{.*}}, ~{{.*}}) ; Subtract declare i32 @llvm.hexagon.A2.sub(i32, i32) @@ -73,14 +68,14 @@ define i32 @A2_sub(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.sub(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = sub(r0, r1) +; CHECK: = sub({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.subsat(i32, i32) define i32 @A2_subsat(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.subsat(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = sub(r0, r1):sat +; CHECK: = sub({{.*}}, {{.*}}):sat ; Sign extend declare i32 @llvm.hexagon.A2.sxtb(i32) @@ -88,14 +83,14 @@ define i32 @A2_sxtb(i32 %a) { %z = call i32 @llvm.hexagon.A2.sxtb(i32 %a) ret i32 %z } -; CHECK: r0 = sxtb(r0) +; CHECK: = sxtb({{.*}}) declare i32 @llvm.hexagon.A2.sxth(i32) define i32 @A2_sxth(i32 %a) { %z = call i32 @llvm.hexagon.A2.sxth(i32 %a) ret i32 %z } -; CHECK: r0 = sxth(r0) +; CHECK: = sxth({{.*}}) ; Transfer immediate declare i32 @llvm.hexagon.A2.tfril(i32, i32) @@ -103,21 +98,21 @@ define i32 @A2_tfril(i32 %a) { %z = call i32 @llvm.hexagon.A2.tfril(i32 %a, i32 0) ret i32 %z } -; CHECK: r0.l = #0 +; CHECK: = #0 declare i32 @llvm.hexagon.A2.tfrih(i32, i32) define i32 @A2_tfrih(i32 %a) { %z = call i32 @llvm.hexagon.A2.tfrih(i32 %a, i32 0) ret i32 %z } -; CHECK: r0.h = #0 +; CHECK: = #0 declare i32 @llvm.hexagon.A2.tfrsi(i32) define i32 @A2_tfrsi() { %z = call i32 @llvm.hexagon.A2.tfrsi(i32 0) ret i32 %z } -; CHECK: r0 = #0 +; CHECK: = #0 ; Transfer register declare i32 @llvm.hexagon.A2.tfr(i32) @@ -125,7 +120,7 @@ define i32 @A2_tfr(i32 %a) { %z = call i32 @llvm.hexagon.A2.tfr(i32 %a) ret i32 %z } -; CHECK: r0 = r0 +; CHECK: = ; Vector add halfwords declare i32 @llvm.hexagon.A2.svaddh(i32, i32) @@ -133,21 +128,21 @@ define i32 @A2_svaddh(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.svaddh(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = vaddh(r0, r1) +; CHECK: = vaddh({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.svaddhs(i32, i32) define i32 @A2_svaddhs(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.svaddhs(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = vaddh(r0, r1):sat +; CHECK: = vaddh({{.*}}, {{.*}}):sat declare i32 @llvm.hexagon.A2.svadduhs(i32, i32) define i32 @A2_svadduhs(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.svadduhs(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = vadduh(r0, r1):sat +; CHECK: = vadduh({{.*}}, {{.*}}):sat ; Vector average halfwords declare i32 @llvm.hexagon.A2.svavgh(i32, i32) @@ -155,21 +150,21 @@ define i32 @A2_svavgh(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.svavgh(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = vavgh(r0, r1) +; CHECK: = vavgh({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.svavghs(i32, i32) define i32 @A2_svavghs(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.svavghs(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = vavgh(r0, r1):rnd +; CHECK: = vavgh({{.*}}, {{.*}}):rnd declare i32 @llvm.hexagon.A2.svnavgh(i32, i32) define i32 @A2_svnavgh(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.svnavgh(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = vnavgh(r0, r1) +; CHECK: = vnavgh({{.*}}, {{.*}}) ; Vector subtract halfwords declare i32 @llvm.hexagon.A2.svsubh(i32, i32) @@ -177,21 +172,21 @@ define i32 @A2_svsubh(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.svsubh(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = vsubh(r0, r1) +; CHECK: = vsubh({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.svsubhs(i32, i32) define i32 @A2_svsubhs(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.svsubhs(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = vsubh(r0, r1):sat +; CHECK: = vsubh({{.*}}, {{.*}}):sat declare i32 @llvm.hexagon.A2.svsubuhs(i32, i32) define i32 @A2_svsubuhs(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.svsubuhs(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = vsubuh(r0, r1):sat +; CHECK: = vsubuh({{.*}}, {{.*}}):sat ; Zero extend declare i32 @llvm.hexagon.A2.zxth(i32) @@ -199,4 +194,4 @@ define i32 @A2_zxth(i32 %a) { %z = call i32 @llvm.hexagon.A2.zxth(i32 %a) ret i32 %z } -; CHECK: r0 = zxth(r0) +; CHECK: = zxth({{.*}}) diff --git a/test/CodeGen/Hexagon/intrinsics/alu32_perm.ll b/test/CodeGen/Hexagon/intrinsics/alu32_perm.ll index a9cc01c5dcb0..c9fb0afe0781 100644 --- a/test/CodeGen/Hexagon/intrinsics/alu32_perm.ll +++ b/test/CodeGen/Hexagon/intrinsics/alu32_perm.ll @@ -1,62 +1,65 @@ ; RUN: llc -march=hexagon -O0 < %s | FileCheck %s +; RUN: llc -march=hexagon -O0 < %s | FileCheck -check-prefix=CHECK-CALL %s ; Hexagon Programmer's Reference Manual 11.1.2 ALU32/PERM +; CHECK-CALL-NOT: call + ; Combine words into doubleword declare i64 @llvm.hexagon.A4.combineri(i32, i32) define i64 @A4_combineri(i32 %a) { %z = call i64 @llvm.hexagon.A4.combineri(i32 %a, i32 0) ret i64 %z } -; CHECK: = combine(r0, #0) +; CHECK: = combine({{.*}}, #0) declare i64 @llvm.hexagon.A4.combineir(i32, i32) define i64 @A4_combineir(i32 %a) { %z = call i64 @llvm.hexagon.A4.combineir(i32 0, i32 %a) ret i64 %z } -; CHECK: = combine(#0, r0) +; CHECK: = combine(#0, {{.*}}) declare i64 @llvm.hexagon.A2.combineii(i32, i32) define i64 @A2_combineii() { %z = call i64 @llvm.hexagon.A2.combineii(i32 0, i32 0) ret i64 %z } -; CHECK: r1:0 = combine(#0, #0) +; CHECK: = combine(#0, #0) declare i32 @llvm.hexagon.A2.combine.hh(i32, i32) define i32 @A2_combine_hh(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.combine.hh(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = combine(r0.h, r1.h) +; CHECK: = combine({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.combine.hl(i32, i32) define i32 @A2_combine_hl(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.combine.hl(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = combine(r0.h, r1.l) +; CHECK: = combine({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.combine.lh(i32, i32) define i32 @A2_combine_lh(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.combine.lh(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = combine(r0.l, r1.h) +; CHECK: = combine({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.combine.ll(i32, i32) define i32 @A2_combine_ll(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.combine.ll(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = combine(r0.l, r1.l) +; CHECK: = combine({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.combinew(i32, i32) define i64 @A2_combinew(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.A2.combinew(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = combine(r0, r1) +; CHECK: = combine({{.*}}, {{.*}}) ; Mux declare i32 @llvm.hexagon.C2.muxri(i32, i32, i32) @@ -64,21 +67,21 @@ define i32 @C2_muxri(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.C2.muxri(i32 %a, i32 0, i32 %b) ret i32 %z } -; CHECK: r0 = mux(p0, #0, r1) +; CHECK: = mux({{.*}}, #0, {{.*}}) declare i32 @llvm.hexagon.C2.muxir(i32, i32, i32) define i32 @C2_muxir(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.C2.muxir(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 = mux(p0, r1, #0) +; CHECK: = mux({{.*}}, {{.*}}, #0) declare i32 @llvm.hexagon.C2.mux(i32, i32, i32) define i32 @C2_mux(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.C2.mux(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 = mux(p0, r1, r2) +; CHECK: = mux({{.*}}, {{.*}}, {{.*}}) ; Shift word by 16 declare i32 @llvm.hexagon.A2.aslh(i32) @@ -86,14 +89,14 @@ define i32 @A2_aslh(i32 %a) { %z = call i32 @llvm.hexagon.A2.aslh(i32 %a) ret i32 %z } -; CHECK: r0 = aslh(r0) +; CHECK: = aslh({{.*}}) declare i32 @llvm.hexagon.A2.asrh(i32) define i32 @A2_asrh(i32 %a) { %z = call i32 @llvm.hexagon.A2.asrh(i32 %a) ret i32 %z } -; CHECK: r0 = asrh(r0) +; CHECK: = asrh({{.*}}) ; Pack high and low halfwords declare i64 @llvm.hexagon.S2.packhl(i32, i32) @@ -101,4 +104,4 @@ define i64 @S2_packhl(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.S2.packhl(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = packhl(r0, r1) +; CHECK: = packhl({{.*}}, {{.*}}) diff --git a/test/CodeGen/Hexagon/intrinsics/cr.ll b/test/CodeGen/Hexagon/intrinsics/cr.ll index 9bdcb253fe2f..f308ef8e5664 100644 --- a/test/CodeGen/Hexagon/intrinsics/cr.ll +++ b/test/CodeGen/Hexagon/intrinsics/cr.ll @@ -1,20 +1,23 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -O0 < %s | FileCheck -check-prefix=CHECK-CALL %s ; Hexagon Programmer's Reference Manual 11.2 CR +; CHECK-CALL-NOT: call + ; Corner detection acceleration declare i32 @llvm.hexagon.C4.fastcorner9(i32, i32) define i32 @C4_fastcorner9(i32 %a, i32 %b) { %z = call i32@llvm.hexagon.C4.fastcorner9(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = fastcorner9(p0, p1) +; CHECK: = fastcorner9({{.*}}, {{.*}}) declare i32 @llvm.hexagon.C4.fastcorner9.not(i32, i32) define i32 @C4_fastcorner9_not(i32 %a, i32 %b) { %z = call i32@llvm.hexagon.C4.fastcorner9.not(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = !fastcorner9(p0, p1) +; CHECK: = !fastcorner9({{.*}}, {{.*}}) ; Logical reductions on predicates declare i32 @llvm.hexagon.C2.any8(i32) @@ -22,7 +25,7 @@ define i32 @C2_any8(i32 %a) { %z = call i32@llvm.hexagon.C2.any8(i32 %a) ret i32 %z } -; CHECK: p0 = any8(p0) +; CHECK: = any8({{.*}}) declare i32 @llvm.hexagon.C2.all8(i32) define i32 @C2_all8(i32 %a) { @@ -30,7 +33,7 @@ define i32 @C2_all8(i32 %a) { ret i32 %z } -; CHECK: p0 = all8(p0) +; CHECK: = all8({{.*}}) ; Logical operations on predicates declare i32 @llvm.hexagon.C2.and(i32, i32) @@ -38,95 +41,95 @@ define i32 @C2_and(i32 %a, i32 %b) { %z = call i32@llvm.hexagon.C2.and(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = and(p0, p1) +; CHECK: = and({{.*}}, {{.*}}) declare i32 @llvm.hexagon.C4.and.and(i32, i32, i32) define i32 @C4_and_and(i32 %a, i32 %b, i32 %c) { %z = call i32@llvm.hexagon.C4.and.and(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: p0 = and(p0, and(p1, p2)) +; CHECK: = and({{.*}}, and({{.*}}, {{.*}})) declare i32 @llvm.hexagon.C2.or(i32, i32) define i32 @C2_or(i32 %a, i32 %b) { %z = call i32@llvm.hexagon.C2.or(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = or(p0, p1) +; CHECK: = or({{.*}}, {{.*}}) declare i32 @llvm.hexagon.C4.and.or(i32, i32, i32) define i32 @C4_and_or(i32 %a, i32 %b, i32 %c) { %z = call i32@llvm.hexagon.C4.and.or(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: p0 = and(p0, or(p1, p2)) +; CHECK: = and({{.*}}, or({{.*}}, {{.*}})) declare i32 @llvm.hexagon.C2.xor(i32, i32) define i32 @C2_xor(i32 %a, i32 %b) { %z = call i32@llvm.hexagon.C2.xor(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = xor(p0, p1) +; CHECK: = xor({{.*}}, {{.*}}) declare i32 @llvm.hexagon.C4.or.and(i32, i32, i32) define i32 @C4_or_and(i32 %a, i32 %b, i32 %c) { %z = call i32@llvm.hexagon.C4.or.and(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: p0 = or(p0, and(p1, p2)) +; CHECK: = or({{.*}}, and({{.*}}, {{.*}})) declare i32 @llvm.hexagon.C2.andn(i32, i32) define i32 @C2_andn(i32 %a, i32 %b) { %z = call i32@llvm.hexagon.C2.andn(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = and(p0, !p1) +; CHECK: = and({{.*}}, !{{.*}}) declare i32 @llvm.hexagon.C4.or.or(i32, i32, i32) define i32 @C4_or_or(i32 %a, i32 %b, i32 %c) { %z = call i32@llvm.hexagon.C4.or.or(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: p0 = or(p0, or(p1, p2)) +; CHECK: = or({{.*}}, or({{.*}}, {{.*}})) declare i32 @llvm.hexagon.C4.and.andn(i32, i32, i32) define i32 @C4_and_andn(i32 %a, i32 %b, i32 %c) { %z = call i32@llvm.hexagon.C4.and.andn(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: p0 = and(p0, and(p1, !p2)) +; CHECK: = and({{.*}}, and({{.*}}, !{{.*}})) declare i32 @llvm.hexagon.C4.and.orn(i32, i32, i32) define i32 @C4_and_orn(i32 %a, i32 %b, i32 %c) { %z = call i32@llvm.hexagon.C4.and.orn(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: p0 = and(p0, or(p1, !p2)) +; CHECK: = and({{.*}}, or({{.*}}, !{{.*}})) declare i32 @llvm.hexagon.C2.not(i32) define i32 @C2_not(i32 %a) { %z = call i32@llvm.hexagon.C2.not(i32 %a) ret i32 %z } -; CHECK: p0 = not(p0) +; CHECK: = not({{.*}}) declare i32 @llvm.hexagon.C4.or.andn(i32, i32, i32) define i32 @C4_or_andn(i32 %a, i32 %b, i32 %c) { %z = call i32@llvm.hexagon.C4.or.andn(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: p0 = or(p0, and(p1, !p2)) +; CHECK: = or({{.*}}, and({{.*}}, !{{.*}})) declare i32 @llvm.hexagon.C2.orn(i32, i32) define i32 @C2_orn(i32 %a, i32 %b) { %z = call i32@llvm.hexagon.C2.orn(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = or(p0, !p1) +; CHECK: = or({{.*}}, !{{.*}}) declare i32 @llvm.hexagon.C4.or.orn(i32, i32, i32) define i32 @C4_or_orn(i32 %a, i32 %b, i32 %c) { %z = call i32@llvm.hexagon.C4.or.orn(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: p0 = or(p0, or(p1, !p2)) +; CHECK: = or({{.*}}, or({{.*}}, !{{.*}})) diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_alu.ll b/test/CodeGen/Hexagon/intrinsics/xtype_alu.ll index 4a11112d73a9..c5c23c22bde9 100644 --- a/test/CodeGen/Hexagon/intrinsics/xtype_alu.ll +++ b/test/CodeGen/Hexagon/intrinsics/xtype_alu.ll @@ -1,13 +1,17 @@ ; RUN: llc -march=hexagon -mcpu=hexagonv5 -O0 < %s | FileCheck %s +; RUN: llc -march=hexagon -mcpu=hexagonv5 -O0 < %s | \ +; RUN: FileCheck -check-prefix=CHECK-CALL %s ; Hexagon Programmer's Reference Manual 11.10.1 XTYPE/ALU +; CHECK-CALL-NOT: call + ; Absolute value doubleword declare i64 @llvm.hexagon.A2.absp(i64) define i64 @A2_absp(i64 %a) { %z = call i64 @llvm.hexagon.A2.absp(i64 %a) ret i64 %z } -; CHECK: r1:0 = abs(r1:0) +; CHECK: = abs({{.*}}) ; Absolute value word declare i32 @llvm.hexagon.A2.abs(i32) @@ -15,14 +19,14 @@ define i32 @A2_abs(i32 %a) { %z = call i32 @llvm.hexagon.A2.abs(i32 %a) ret i32 %z } -; CHECK: r0 = abs(r0) +; CHECK: = abs({{.*}}) declare i32 @llvm.hexagon.A2.abssat(i32) define i32 @A2_abssat(i32 %a) { %z = call i32 @llvm.hexagon.A2.abssat(i32 %a) ret i32 %z } -; CHECK: r0 = abs(r0):sat +; CHECK: = abs({{.*}}):sat ; Add and accumulate declare i32 @llvm.hexagon.S4.addaddi(i32, i32, i32) @@ -30,42 +34,42 @@ define i32 @S4_addaddi(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S4.addaddi(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 = add(r0, add(r1, #0)) +; CHECK: = add({{.*}}, add({{.*}}, #0)) declare i32 @llvm.hexagon.S4.subaddi(i32, i32, i32) define i32 @S4_subaddi(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S4.subaddi(i32 %a, i32 0, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0, sub(#0, r1)) +; CHECK: = add({{.*}}, sub(#0, {{.*}})) declare i32 @llvm.hexagon.M2.accii(i32, i32, i32) define i32 @M2_accii(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.accii(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 += add(r1, #0) +; CHECK: += add({{.*}}, #0) declare i32 @llvm.hexagon.M2.naccii(i32, i32, i32) define i32 @M2_naccii(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.naccii(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 -= add(r1, #0) +; CHECK: -= add({{.*}}, #0) declare i32 @llvm.hexagon.M2.acci(i32, i32, i32) define i32 @M2_acci(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.acci(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += add(r1, r2) +; CHECK: += add({{.*}}, {{.*}}) declare i32 @llvm.hexagon.M2.nacci(i32, i32, i32) define i32 @M2_nacci(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.nacci(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= add(r1, r2) +; CHECK: -= add({{.*}}, {{.*}}) ; Add doublewords declare i64 @llvm.hexagon.A2.addp(i64, i64) @@ -73,14 +77,14 @@ define i64 @A2_addp(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.addp(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = add(r1:0, r3:2) +; CHECK: = add({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.addpsat(i64, i64) define i64 @A2_addpsat(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.addpsat(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = add(r1:0, r3:2):sat +; CHECK: = add({{.*}}, {{.*}}):sat ; Add halfword declare i32 @llvm.hexagon.A2.addh.l16.ll(i32, i32) @@ -88,84 +92,84 @@ define i32 @A2_addh_l16_ll(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.addh.l16.ll(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0.l, r1.l) +; CHECK: = add({{.*}}.l, {{.*}}.l) declare i32 @llvm.hexagon.A2.addh.l16.hl(i32, i32) define i32 @A2_addh_l16_hl(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.addh.l16.hl(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0.l, r1.h) +; CHECK: = add({{.*}}.l, {{.*}}.h) declare i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32, i32) define i32 @A2_addh_l16_sat.ll(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0.l, r1.l):sat +; CHECK: = add({{.*}}.l, {{.*}}.l):sat declare i32 @llvm.hexagon.A2.addh.l16.sat.hl(i32, i32) define i32 @A2_addh_l16_sat.hl(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.addh.l16.sat.hl(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0.l, r1.h):sat +; CHECK: = add({{.*}}.l, {{.*}}.h):sat declare i32 @llvm.hexagon.A2.addh.h16.ll(i32, i32) define i32 @A2_addh_h16_ll(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.addh.h16.ll(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0.l, r1.l):<<16 +; CHECK: = add({{.*}}.l, {{.*}}.l):<<16 declare i32 @llvm.hexagon.A2.addh.h16.lh(i32, i32) define i32 @A2_addh_h16_lh(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.addh.h16.lh(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0.l, r1.h):<<16 +; CHECK: = add({{.*}}.l, {{.*}}.h):<<16 declare i32 @llvm.hexagon.A2.addh.h16.hl(i32, i32) define i32 @A2_addh_h16_hl(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.addh.h16.hl(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0.h, r1.l):<<16 +; CHECK: = add({{.*}}.h, {{.*}}.l):<<16 declare i32 @llvm.hexagon.A2.addh.h16.hh(i32, i32) define i32 @A2_addh_h16_hh(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.addh.h16.hh(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0.h, r1.h):<<16 +; CHECK: = add({{.*}}.h, {{.*}}.h):<<16 declare i32 @llvm.hexagon.A2.addh.h16.sat.ll(i32, i32) define i32 @A2_addh_h16_sat_ll(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.addh.h16.sat.ll(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0.l, r1.l):sat:<<16 +; CHECK: = add({{.*}}.l, {{.*}}.l):sat:<<16 declare i32 @llvm.hexagon.A2.addh.h16.sat.lh(i32, i32) define i32 @A2_addh_h16_sat_lh(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.addh.h16.sat.lh(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0.l, r1.h):sat:<<16 +; CHECK: = add({{.*}}.l, {{.*}}.h):sat:<<16 declare i32 @llvm.hexagon.A2.addh.h16.sat.hl(i32, i32) define i32 @A2_addh_h16_sat_hl(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.addh.h16.sat.hl(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0.h, r1.l):sat:<<16 +; CHECK: = add({{.*}}.h, {{.*}}.l):sat:<<16 declare i32 @llvm.hexagon.A2.addh.h16.sat.hh(i32, i32) define i32 @A2_addh_h16_sat_hh(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.addh.h16.sat.hh(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0.h, r1.h):sat:<<16 +; CHECK: = add({{.*}}.h, {{.*}}.h):sat:<<16 ; Logical doublewords declare i64 @llvm.hexagon.A2.notp(i64) @@ -173,42 +177,42 @@ define i64 @A2_notp(i64 %a) { %z = call i64 @llvm.hexagon.A2.notp(i64 %a) ret i64 %z } -; CHECK: r1:0 = not(r1:0) +; CHECK: = not({{.*}}) declare i64 @llvm.hexagon.A2.andp(i64, i64) define i64 @A2_andp(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.andp(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = and(r1:0, r3:2) +; CHECK: = and({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A4.andnp(i64, i64) define i64 @A2_andnp(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A4.andnp(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = and(r1:0, ~r3:2) +; CHECK: = and({{.*}}, ~{{.*}}) declare i64 @llvm.hexagon.A2.orp(i64, i64) define i64 @A2_orp(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.orp(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = or(r1:0, r3:2) +; CHECK: = or({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A4.ornp(i64, i64) define i64 @A2_ornp(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A4.ornp(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = or(r1:0, ~r3:2) +; CHECK: = or({{.*}}, ~{{.*}}) declare i64 @llvm.hexagon.A2.xorp(i64, i64) define i64 @A2_xorp(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.xorp(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = xor(r1:0, r3:2) +; CHECK: = xor({{.*}}, {{.*}}) ; Logical-logical doublewords declare i64 @llvm.hexagon.M4.xor.xacc(i64, i64, i64) @@ -216,7 +220,7 @@ define i64 @M4_xor_xacc(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M4.xor.xacc(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 ^= xor(r3:2, r5:4) +; CHECK: ^= xor({{.*}}, {{.*}}) ; Logical-logical words declare i32 @llvm.hexagon.S4.or.andi(i32, i32, i32) @@ -224,91 +228,91 @@ define i32 @S4_or_andi(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S4.or.andi(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 |= and(r1, #0) +; CHECK: |= and({{.*}}, #0) declare i32 @llvm.hexagon.S4.or.andix(i32, i32, i32) define i32 @S4_or_andix(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S4.or.andix(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r1 = or(r0, and(r1, #0)) +; CHECK: = or({{.*}}, and({{.*}}, #0)) declare i32 @llvm.hexagon.M4.or.andn(i32, i32, i32) define i32 @M4_or_andn(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M4.or.andn(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 |= and(r1, ~r2) +; CHECK: |= and({{.*}}, ~{{.*}}) declare i32 @llvm.hexagon.M4.and.andn(i32, i32, i32) define i32 @M4_and_andn(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M4.and.andn(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 &= and(r1, ~r2) +; CHECK: &= and({{.*}}, ~{{.*}}) declare i32 @llvm.hexagon.M4.xor.andn(i32, i32, i32) define i32 @M4_xor_andn(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M4.xor.andn(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 ^= and(r1, ~r2) +; CHECK: ^= and({{.*}}, ~{{.*}}) declare i32 @llvm.hexagon.M4.and.and(i32, i32, i32) define i32 @M4_and_and(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M4.and.and(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 &= and(r1, r2) +; CHECK: &= and({{.*}}, {{.*}}) declare i32 @llvm.hexagon.M4.and.or(i32, i32, i32) define i32 @M4_and_or(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M4.and.or(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 &= or(r1, r2) +; CHECK: &= or({{.*}}, {{.*}}) declare i32 @llvm.hexagon.M4.and.xor(i32, i32, i32) define i32 @M4_and_xor(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M4.and.xor(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 &= xor(r1, r2) +; CHECK: &= xor({{.*}}, {{.*}}) declare i32 @llvm.hexagon.M4.or.and(i32, i32, i32) define i32 @M4_or_and(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M4.or.and(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 |= and(r1, r2) +; CHECK: |= and({{.*}}, {{.*}}) declare i32 @llvm.hexagon.M4.or.or(i32, i32, i32) define i32 @M4_or_or(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M4.or.or(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 |= or(r1, r2) +; CHECK: |= or({{.*}}, {{.*}}) declare i32 @llvm.hexagon.M4.or.xor(i32, i32, i32) define i32 @M4_or_xor(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M4.or.xor(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 |= xor(r1, r2) +; CHECK: |= xor({{.*}}, {{.*}}) declare i32 @llvm.hexagon.M4.xor.and(i32, i32, i32) define i32 @M4_xor_and(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M4.xor.and(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 ^= and(r1, r2) +; CHECK: ^= and({{.*}}, {{.*}}) declare i32 @llvm.hexagon.M4.xor.or(i32, i32, i32) define i32 @M4_xor_or(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M4.xor.or(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 ^= or(r1, r2) +; CHECK: ^= or({{.*}}, {{.*}}) ; Maximum words declare i32 @llvm.hexagon.A2.max(i32, i32) @@ -316,14 +320,14 @@ define i32 @A2_max(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.max(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = max(r0, r1) +; CHECK: = max({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.maxu(i32, i32) define i32 @A2_maxu(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.maxu(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = maxu(r0, r1) +; CHECK: = maxu({{.*}}, {{.*}}) ; Maximum doublewords declare i64 @llvm.hexagon.A2.maxp(i64, i64) @@ -331,14 +335,14 @@ define i64 @A2_maxp(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.maxp(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = max(r1:0, r3:2) +; CHECK: = max({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.maxup(i64, i64) define i64 @A2_maxup(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.maxup(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = maxu(r1:0, r3:2) +; CHECK: = maxu({{.*}}, {{.*}}) ; Minimum words declare i32 @llvm.hexagon.A2.min(i32, i32) @@ -346,14 +350,14 @@ define i32 @A2_min(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.min(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = min(r0, r1) +; CHECK: = min({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.minu(i32, i32) define i32 @A2_minu(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.minu(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = minu(r0, r1) +; CHECK: = minu({{.*}}, {{.*}}) ; Minimum doublewords declare i64 @llvm.hexagon.A2.minp(i64, i64) @@ -361,14 +365,14 @@ define i64 @A2_minp(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.minp(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = min(r1:0, r3:2) +; CHECK: = min({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.minup(i64, i64) define i64 @A2_minup(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.minup(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = minu(r1:0, r3:2) +; CHECK: = minu({{.*}}, {{.*}}) ; Module wrap declare i32 @llvm.hexagon.A4.modwrapu(i32, i32) @@ -376,7 +380,7 @@ define i32 @A4_modwrapu(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A4.modwrapu(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = modwrap(r0, r1) +; CHECK: = modwrap({{.*}}, {{.*}}) ; Negate declare i64 @llvm.hexagon.A2.negp(i64) @@ -384,14 +388,14 @@ define i64 @A2_negp(i64 %a) { %z = call i64 @llvm.hexagon.A2.negp(i64 %a) ret i64 %z } -; CHECK: r1:0 = neg(r1:0) +; CHECK: = neg({{.*}}) declare i32 @llvm.hexagon.A2.negsat(i32) define i32 @A2_negsat(i32 %a) { %z = call i32 @llvm.hexagon.A2.negsat(i32 %a) ret i32 %z } -; CHECK: r0 = neg(r0):sat +; CHECK: = neg({{.*}}):sat ; Round declare i32 @llvm.hexagon.A2.roundsat(i64) @@ -399,49 +403,49 @@ define i32 @A2_roundsat(i64 %a) { %z = call i32 @llvm.hexagon.A2.roundsat(i64 %a) ret i32 %z } -; CHECK: r0 = round(r1:0):sat +; CHECK: = round({{.*}}):sat declare i32 @llvm.hexagon.A4.cround.ri(i32, i32) define i32 @A4_cround_ri(i32 %a) { %z = call i32 @llvm.hexagon.A4.cround.ri(i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = cround(r0, #0) +; CHECK: = cround({{.*}}, #0) declare i32 @llvm.hexagon.A4.round.ri(i32, i32) define i32 @A4_round_ri(i32 %a) { %z = call i32 @llvm.hexagon.A4.round.ri(i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = round(r0, #0) +; CHECK: = round({{.*}}, #0) declare i32 @llvm.hexagon.A4.round.ri.sat(i32, i32) define i32 @A4_round_ri_sat(i32 %a) { %z = call i32 @llvm.hexagon.A4.round.ri.sat(i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = round(r0, #0):sat +; CHECK: = round({{.*}}, #0):sat declare i32 @llvm.hexagon.A4.cround.rr(i32, i32) define i32 @A4_cround_rr(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A4.cround.rr(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = cround(r0, r1) +; CHECK: = cround({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A4.round.rr(i32, i32) define i32 @A4_round_rr(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A4.round.rr(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = round(r0, r1) +; CHECK: = round({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A4.round.rr.sat(i32, i32) define i32 @A4_round_rr_sat(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A4.round.rr.sat(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = round(r0, r1):sat +; CHECK: = round({{.*}}, {{.*}}):sat ; Subtract doublewords declare i64 @llvm.hexagon.A2.subp(i64, i64) @@ -449,7 +453,7 @@ define i64 @A2_subp(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.subp(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = sub(r1:0, r3:2) +; CHECK: = sub({{.*}}, {{.*}}) ; Subtract and accumulate declare i32 @llvm.hexagon.M2.subacc(i32, i32, i32) @@ -457,7 +461,7 @@ define i32 @M2_subacc(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.subacc(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += sub(r1, r2) +; CHECK: += sub({{.*}}, {{.*}}) ; Subtract halfwords declare i32 @llvm.hexagon.A2.subh.l16.ll(i32, i32) @@ -465,84 +469,84 @@ define i32 @A2_subh_l16_ll(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.subh.l16.ll(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = sub(r0.l, r1.l) +; CHECK: = sub({{.*}}.l, {{.*}}.l) declare i32 @llvm.hexagon.A2.subh.l16.hl(i32, i32) define i32 @A2_subh_l16_hl(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.subh.l16.hl(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = sub(r0.l, r1.h) +; CHECK: = sub({{.*}}.l, {{.*}}.h) declare i32 @llvm.hexagon.A2.subh.l16.sat.ll(i32, i32) define i32 @A2_subh_l16_sat.ll(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.subh.l16.sat.ll(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = sub(r0.l, r1.l):sat +; CHECK: = sub({{.*}}.l, {{.*}}.l):sat declare i32 @llvm.hexagon.A2.subh.l16.sat.hl(i32, i32) define i32 @A2_subh_l16_sat.hl(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.subh.l16.sat.hl(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = sub(r0.l, r1.h):sat +; CHECK: = sub({{.*}}.l, {{.*}}.h):sat declare i32 @llvm.hexagon.A2.subh.h16.ll(i32, i32) define i32 @A2_subh_h16_ll(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.subh.h16.ll(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = sub(r0.l, r1.l):<<16 +; CHECK: = sub({{.*}}.l, {{.*}}.l):<<16 declare i32 @llvm.hexagon.A2.subh.h16.lh(i32, i32) define i32 @A2_subh_h16_lh(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.subh.h16.lh(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = sub(r0.l, r1.h):<<16 +; CHECK: = sub({{.*}}.l, {{.*}}.h):<<16 declare i32 @llvm.hexagon.A2.subh.h16.hl(i32, i32) define i32 @A2_subh_h16_hl(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.subh.h16.hl(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = sub(r0.h, r1.l):<<16 +; CHECK: = sub({{.*}}.h, {{.*}}.l):<<16 declare i32 @llvm.hexagon.A2.subh.h16.hh(i32, i32) define i32 @A2_subh_h16_hh(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.subh.h16.hh(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = sub(r0.h, r1.h):<<16 +; CHECK: = sub({{.*}}.h, {{.*}}.h):<<16 declare i32 @llvm.hexagon.A2.subh.h16.sat.ll(i32, i32) define i32 @A2_subh_h16_sat_ll(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.subh.h16.sat.ll(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = sub(r0.l, r1.l):sat:<<16 +; CHECK: = sub({{.*}}.l, {{.*}}.l):sat:<<16 declare i32 @llvm.hexagon.A2.subh.h16.sat.lh(i32, i32) define i32 @A2_subh_h16_sat_lh(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.subh.h16.sat.lh(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = sub(r0.l, r1.h):sat:<<16 +; CHECK: = sub({{.*}}.l, {{.*}}.h):sat:<<16 declare i32 @llvm.hexagon.A2.subh.h16.sat.hl(i32, i32) define i32 @A2_subh_h16_sat_hl(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.subh.h16.sat.hl(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = sub(r0.h, r1.l):sat:<<16 +; CHECK: = sub({{.*}}.h, {{.*}}.l):sat:<<16 declare i32 @llvm.hexagon.A2.subh.h16.sat.hh(i32, i32) define i32 @A2_subh_h16_sat_hh(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A2.subh.h16.sat.hh(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = sub(r0.h, r1.h):sat:<<16 +; CHECK: = sub({{.*}}.h, {{.*}}.h):sat:<<16 ; Sign extend word to doubleword declare i64 @llvm.hexagon.A2.sxtw(i32) @@ -550,7 +554,7 @@ define i64 @A2_sxtw(i32 %a) { %z = call i64 @llvm.hexagon.A2.sxtw(i32 %a) ret i64 %z } -; CHECK: = sxtw(r0) +; CHECK: = sxtw({{.*}}) ; Vector absolute value halfwords declare i64 @llvm.hexagon.A2.vabsh(i64) @@ -558,14 +562,14 @@ define i64 @A2_vabsh(i64 %a) { %z = call i64 @llvm.hexagon.A2.vabsh(i64 %a) ret i64 %z } -; CHECK: r1:0 = vabsh(r1:0) +; CHECK: = vabsh({{.*}}) declare i64 @llvm.hexagon.A2.vabshsat(i64) define i64 @A2_vabshsat(i64 %a) { %z = call i64 @llvm.hexagon.A2.vabshsat(i64 %a) ret i64 %z } -; CHECK: r1:0 = vabsh(r1:0):sat +; CHECK: = vabsh({{.*}}):sat ; Vector absolute value words declare i64 @llvm.hexagon.A2.vabsw(i64) @@ -573,14 +577,14 @@ define i64 @A2_vabsw(i64 %a) { %z = call i64 @llvm.hexagon.A2.vabsw(i64 %a) ret i64 %z } -; CHECK: r1:0 = vabsw(r1:0) +; CHECK: = vabsw({{.*}}) declare i64 @llvm.hexagon.A2.vabswsat(i64) define i64 @A2_vabswsat(i64 %a) { %z = call i64 @llvm.hexagon.A2.vabswsat(i64 %a) ret i64 %z } -; CHECK: r1:0 = vabsw(r1:0):sat +; CHECK: = vabsw({{.*}}):sat ; Vector absolute difference halfwords declare i64 @llvm.hexagon.M2.vabsdiffh(i64, i64) @@ -588,7 +592,7 @@ define i64 @M2_vabsdiffh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.vabsdiffh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vabsdiffh(r1:0, r3:2) +; CHECK: = vabsdiffh({{.*}}, {{.*}}) ; Vector absolute difference words declare i64 @llvm.hexagon.M2.vabsdiffw(i64, i64) @@ -596,7 +600,7 @@ define i64 @M2_vabsdiffw(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.vabsdiffw(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vabsdiffw(r1:0, r3:2) +; CHECK: = vabsdiffw({{.*}}, {{.*}}) ; Vector add halfwords declare i64 @llvm.hexagon.A2.vaddh(i64, i64) @@ -604,21 +608,21 @@ define i64 @A2_vaddh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vaddh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vaddh(r1:0, r3:2) +; CHECK: = vaddh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vaddhs(i64, i64) define i64 @A2_vaddhs(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vaddhs(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vaddh(r1:0, r3:2):sat +; CHECK: = vaddh({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.A2.vadduhs(i64, i64) define i64 @A2_vadduhs(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vadduhs(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vadduh(r1:0, r3:2):sat +; CHECK: = vadduh({{.*}}, {{.*}}):sat ; Vector add halfwords with saturate and pack to unsigned bytes declare i32 @llvm.hexagon.A5.vaddhubs(i64, i64) @@ -626,7 +630,7 @@ define i32 @A5_vaddhubs(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.A5.vaddhubs(i64 %a, i64 %b) ret i32 %z } -; CHECK: r0 = vaddhub(r1:0, r3:2):sat +; CHECK: = vaddhub({{.*}}, {{.*}}):sat ; Vector reduce add unsigned bytes declare i64 @llvm.hexagon.A2.vraddub(i64, i64) @@ -634,14 +638,14 @@ define i64 @A2_vraddub(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vraddub(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vraddub(r1:0, r3:2) +; CHECK: = vraddub({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vraddub.acc(i64, i64, i64) define i64 @A2_vraddub_acc(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.A2.vraddub.acc(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vraddub(r3:2, r5:4) +; CHECK: += vraddub({{.*}}, {{.*}}) ; Vector reduce add halfwords declare i32 @llvm.hexagon.M2.vradduh(i64, i64) @@ -649,14 +653,14 @@ define i32 @M2_vradduh(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.M2.vradduh(i64 %a, i64 %b) ret i32 %z } -; CHECK: r0 = vradduh(r1:0, r3:2) +; CHECK: = vradduh({{.*}}, {{.*}}) declare i32 @llvm.hexagon.M2.vraddh(i64, i64) define i32 @M2_vraddh(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.M2.vraddh(i64 %a, i64 %b) ret i32 %z } -; CHECK: r0 = vraddh(r1:0, r3:2) +; CHECK: = vraddh({{.*}}, {{.*}}) ; Vector add bytes declare i64 @llvm.hexagon.A2.vaddub(i64, i64) @@ -664,14 +668,14 @@ define i64 @A2_vaddub(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vaddub(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vaddub(r1:0, r3:2) +; CHECK: = vaddub({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vaddubs(i64, i64) define i64 @A2_vaddubs(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vaddubs(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vaddub(r1:0, r3:2):sat +; CHECK: = vaddub({{.*}}, {{.*}}):sat ; Vector add words declare i64 @llvm.hexagon.A2.vaddw(i64, i64) @@ -679,14 +683,14 @@ define i64 @A2_vaddw(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vaddw(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vaddw(r1:0, r3:2) +; CHECK: = vaddw({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vaddws(i64, i64) define i64 @A2_vaddws(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vaddws(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vaddw(r1:0, r3:2):sat +; CHECK: = vaddw({{.*}}, {{.*}}):sat ; Vector average halfwords declare i64 @llvm.hexagon.A2.vavgh(i64, i64) @@ -694,56 +698,56 @@ define i64 @A2_vavgh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vavgh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vavgh(r1:0, r3:2) +; CHECK: = vavgh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vavghr(i64, i64) define i64 @A2_vavghr(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vavghr(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vavgh(r1:0, r3:2):rnd +; CHECK: = vavgh({{.*}}, {{.*}}):rnd declare i64 @llvm.hexagon.A2.vavghcr(i64, i64) define i64 @A2_vavghcr(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vavghcr(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vavgh(r1:0, r3:2):crnd +; CHECK: = vavgh({{.*}}, {{.*}}):crnd declare i64 @llvm.hexagon.A2.vavguh(i64, i64) define i64 @A2_vavguh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vavguh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vavguh(r1:0, r3:2) +; CHECK: = vavguh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vavguhr(i64, i64) define i64 @A2_vavguhr(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vavguhr(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vavguh(r1:0, r3:2):rnd +; CHECK: = vavguh({{.*}}, {{.*}}):rnd declare i64 @llvm.hexagon.A2.vnavgh(i64, i64) define i64 @A2_vnavgh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vnavgh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vnavgh(r1:0, r3:2) +; CHECK: = vnavgh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vnavghr(i64, i64) define i64 @A2_vnavghr(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vnavghr(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vnavgh(r1:0, r3:2):rnd +; CHECK: = vnavgh({{.*}}, {{.*}}):rnd declare i64 @llvm.hexagon.A2.vnavghcr(i64, i64) define i64 @A2_vnavghcr(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vnavghcr(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vnavgh(r1:0, r3:2):crnd +; CHECK: = vnavgh({{.*}}, {{.*}}):crnd ; Vector average unsigned bytes declare i64 @llvm.hexagon.A2.vavgub(i64, i64) @@ -751,14 +755,14 @@ define i64 @A2_vavgub(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vavgub(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vavgub(r1:0, r3:2) +; CHECK: vavgub({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vavgubr(i64, i64) define i64 @A2_vavgubr(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vavgubr(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vavgub(r1:0, r3:2):rnd +; CHECK: = vavgub({{.*}}, {{.*}}):rnd ; Vector average words declare i64 @llvm.hexagon.A2.vavgw(i64, i64) @@ -766,56 +770,56 @@ define i64 @A2_vavgw(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vavgw(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vavgw(r1:0, r3:2) +; CHECK: = vavgw({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vavgwr(i64, i64) define i64 @A2_vavgwr(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vavgwr(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vavgw(r1:0, r3:2):rnd +; CHECK: = vavgw({{.*}}, {{.*}}):rnd declare i64 @llvm.hexagon.A2.vavgwcr(i64, i64) define i64 @A2_vavgwcr(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vavgwcr(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vavgw(r1:0, r3:2):crnd +; CHECK: = vavgw({{.*}}, {{.*}}):crnd declare i64 @llvm.hexagon.A2.vavguw(i64, i64) define i64 @A2_vavguw(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vavguw(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vavguw(r1:0, r3:2) +; CHECK: = vavguw({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vavguwr(i64, i64) define i64 @A2_vavguwr(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vavguwr(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vavguw(r1:0, r3:2):rnd +; CHECK: = vavguw({{.*}}, {{.*}}):rnd declare i64 @llvm.hexagon.A2.vnavgw(i64, i64) define i64 @A2_vnavgw(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vnavgw(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vnavgw(r1:0, r3:2) +; CHECK: = vnavgw({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vnavgwr(i64, i64) define i64 @A2_vnavgwr(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vnavgwr(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vnavgw(r1:0, r3:2):rnd +; CHECK: = vnavgw({{.*}}, {{.*}}):rnd declare i64 @llvm.hexagon.A2.vnavgwcr(i64, i64) define i64 @A2_vnavgwcr(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vnavgwcr(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vnavgw(r1:0, r3:2):crnd +; CHECK: = vnavgw({{.*}}, {{.*}}):crnd ; Vector conditional negate declare i64 @llvm.hexagon.S2.vcnegh(i64, i32) @@ -823,14 +827,14 @@ define i64 @S2_vcnegh(i64 %a, i32 %b) { %z = call i64 @llvm.hexagon.S2.vcnegh(i64 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = vcnegh(r1:0, r2) +; CHECK: = vcnegh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.S2.vrcnegh(i64, i64, i32) define i64 @S2_vrcnegh(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.vrcnegh(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += vrcnegh(r3:2, r4) +; CHECK: += vrcnegh({{.*}}, {{.*}}) ; Vector maximum bytes declare i64 @llvm.hexagon.A2.vmaxub(i64, i64) @@ -838,14 +842,14 @@ define i64 @A2_vmaxub(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vmaxub(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmaxub(r1:0, r3:2) +; CHECK: = vmaxub({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vmaxb(i64, i64) define i64 @A2_vmaxb(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vmaxb(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmaxb(r1:0, r3:2) +; CHECK: = vmaxb({{.*}}, {{.*}}) ; Vector maximum halfwords declare i64 @llvm.hexagon.A2.vmaxh(i64, i64) @@ -853,14 +857,14 @@ define i64 @A2_vmaxh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vmaxh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmaxh(r1:0, r3:2) +; CHECK: = vmaxh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vmaxuh(i64, i64) define i64 @A2_vmaxuh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vmaxuh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmaxuh(r1:0, r3:2) +; CHECK: = vmaxuh({{.*}}, {{.*}}) ; Vector reduce maximum halfwords declare i64 @llvm.hexagon.A4.vrmaxh(i64, i64, i32) @@ -868,14 +872,14 @@ define i64 @A4_vrmaxh(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.A4.vrmaxh(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 = vrmaxh(r3:2, r4) +; CHECK: = vrmaxh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A4.vrmaxuh(i64, i64, i32) define i64 @A4_vrmaxuh(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.A4.vrmaxuh(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 = vrmaxuh(r3:2, r4) +; CHECK: = vrmaxuh({{.*}}, {{.*}}) ; Vector reduce maximum words declare i64 @llvm.hexagon.A4.vrmaxw(i64, i64, i32) @@ -883,14 +887,14 @@ define i64 @A4_vrmaxw(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.A4.vrmaxw(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 = vrmaxw(r3:2, r4) +; CHECK: = vrmaxw({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A4.vrmaxuw(i64, i64, i32) define i64 @A4_vrmaxuw(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.A4.vrmaxuw(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 = vrmaxuw(r3:2, r4) +; CHECK: vrmaxuw({{.*}}, {{.*}}) ; Vector minimum bytes declare i64 @llvm.hexagon.A2.vminub(i64, i64) @@ -898,14 +902,14 @@ define i64 @A2_vminub(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vminub(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vminub(r1:0, r3:2) +; CHECK: = vminub({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vminb(i64, i64) define i64 @A2_vminb(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vminb(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vminb(r1:0, r3:2) +; CHECK: = vminb({{.*}}, {{.*}}) ; Vector minimum halfwords declare i64 @llvm.hexagon.A2.vminh(i64, i64) @@ -913,14 +917,14 @@ define i64 @A2_vminh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vminh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vminh(r1:0, r3:2) +; CHECK: = vminh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vminuh(i64, i64) define i64 @A2_vminuh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vminuh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vminuh(r1:0, r3:2) +; CHECK: = vminuh({{.*}}, {{.*}}) ; Vector reduce minimum halfwords declare i64 @llvm.hexagon.A4.vrminh(i64, i64, i32) @@ -928,14 +932,14 @@ define i64 @A4_vrminh(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.A4.vrminh(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 = vrminh(r3:2, r4) +; CHECK: = vrminh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A4.vrminuh(i64, i64, i32) define i64 @A4_vrminuh(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.A4.vrminuh(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 = vrminuh(r3:2, r4) +; CHECK: = vrminuh({{.*}}, {{.*}}) ; Vector reduce minimum words declare i64 @llvm.hexagon.A4.vrminw(i64, i64, i32) @@ -943,14 +947,14 @@ define i64 @A4_vrminw(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.A4.vrminw(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 = vrminw(r3:2, r4) +; CHECK: = vrminw({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A4.vrminuw(i64, i64, i32) define i64 @A4_vrminuw(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.A4.vrminuw(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 = vrminuw(r3:2, r4) +; CHECK: = vrminuw({{.*}}, {{.*}}) ; Vector sum of absolute differences unsigned bytes declare i64 @llvm.hexagon.A2.vrsadub(i64, i64) @@ -958,14 +962,14 @@ define i64 @A2_vrsadub(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vrsadub(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vrsadub(r1:0, r3:2) +; CHECK: = vrsadub({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vrsadub.acc(i64, i64, i64) define i64 @A2_vrsadub_acc(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.A2.vrsadub.acc(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vrsadub(r3:2, r5:4) +; CHECK: += vrsadub({{.*}}, {{.*}}) ; Vector subtract halfwords declare i64 @llvm.hexagon.A2.vsubh(i64, i64) @@ -973,21 +977,21 @@ define i64 @A2_vsubh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vsubh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vsubh(r1:0, r3:2) +; CHECK: = vsubh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vsubhs(i64, i64) define i64 @A2_vsubhs(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vsubhs(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vsubh(r1:0, r3:2):sat +; CHECK: = vsubh({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.A2.vsubuhs(i64, i64) define i64 @A2_vsubuhs(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vsubuhs(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vsubuh(r1:0, r3:2):sat +; CHECK: = vsubuh({{.*}}, {{.*}}):sat ; Vector subtract bytes declare i64 @llvm.hexagon.A2.vsubub(i64, i64) @@ -995,14 +999,14 @@ define i64 @A2_vsubub(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vsubub(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vsubub(r1:0, r3:2) +; CHECK: = vsubub({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vsububs(i64, i64) define i64 @A2_vsububs(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vsububs(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vsubub(r1:0, r3:2):sat +; CHECK: = vsubub({{.*}}, {{.*}}):sat ; Vector subtract words declare i64 @llvm.hexagon.A2.vsubw(i64, i64) @@ -1010,11 +1014,11 @@ define i64 @A2_vsubw(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vsubw(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vsubw(r1:0, r3:2) +; CHECK: = vsubw({{.*}}, {{.*}}) declare i64 @llvm.hexagon.A2.vsubws(i64, i64) define i64 @A2_vsubws(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.A2.vsubws(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vsubw(r1:0, r3:2):sat +; CHECK: = vsubw({{.*}}, {{.*}}):sat diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_bit.ll b/test/CodeGen/Hexagon/intrinsics/xtype_bit.ll index 8531b2f9334b..e8f83d01820a 100644 --- a/test/CodeGen/Hexagon/intrinsics/xtype_bit.ll +++ b/test/CodeGen/Hexagon/intrinsics/xtype_bit.ll @@ -1,69 +1,72 @@ ; RUN: llc -march=hexagon -O0 < %s | FileCheck %s +; RUN: llc -march=hexagon -O0 < %s | FileCheck -check-prefix=CHECK-CALL %s ; Hexagon Programmer's Reference Manual 11.10.2 XTYPE/BIT +; CHECK-CALL-NOT: call + ; Count leading declare i32 @llvm.hexagon.S2.clbp(i64) define i32 @S2_clbp(i64 %a) { %z = call i32 @llvm.hexagon.S2.clbp(i64 %a) ret i32 %z } -; CHECK: r0 = clb(r1:0) +; CHECK: = clb({{.*}}) declare i32 @llvm.hexagon.S2.cl0p(i64) define i32 @S2_cl0p(i64 %a) { %z = call i32 @llvm.hexagon.S2.cl0p(i64 %a) ret i32 %z } -; CHECK: r0 = cl0(r1:0) +; CHECK: = cl0({{.*}}) declare i32 @llvm.hexagon.S2.cl1p(i64) define i32 @S2_cl1p(i64 %a) { %z = call i32 @llvm.hexagon.S2.cl1p(i64 %a) ret i32 %z } -; CHECK: r0 = cl1(r1:0) +; CHECK: = cl1({{.*}}) declare i32 @llvm.hexagon.S4.clbpnorm(i64) define i32 @S4_clbpnorm(i64 %a) { %z = call i32 @llvm.hexagon.S4.clbpnorm(i64 %a) ret i32 %z } -; CHECK: r0 = normamt(r1:0) +; CHECK: = normamt({{.*}}) declare i32 @llvm.hexagon.S4.clbpaddi(i64, i32) define i32 @S4_clbpaddi(i64 %a) { %z = call i32 @llvm.hexagon.S4.clbpaddi(i64 %a, i32 0) ret i32 %z } -; CHECK: r0 = add(clb(r1:0), #0) +; CHECK: = add(clb({{.*}}), #0) declare i32 @llvm.hexagon.S4.clbaddi(i32, i32) define i32 @S4_clbaddi(i32 %a) { %z = call i32 @llvm.hexagon.S4.clbaddi(i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = add(clb(r0), #0) +; CHECK: = add(clb({{.*}}), #0) declare i32 @llvm.hexagon.S2.cl0(i32) define i32 @S2_cl0(i32 %a) { %z = call i32 @llvm.hexagon.S2.cl0(i32 %a) ret i32 %z } -; CHECK: r0 = cl0(r0) +; CHECK: = cl0({{.*}}) declare i32 @llvm.hexagon.S2.cl1(i32) define i32 @S2_cl1(i32 %a) { %z = call i32 @llvm.hexagon.S2.cl1(i32 %a) ret i32 %z } -; CHECK: r0 = cl1(r0) +; CHECK: = cl1({{.*}}) declare i32 @llvm.hexagon.S2.clbnorm(i32) define i32 @S4_clbnorm(i32 %a) { %z = call i32 @llvm.hexagon.S2.clbnorm(i32 %a) ret i32 %z } -; CHECK: r0 = normamt(r0) +; CHECK: = normamt({{.*}}) ; Count population declare i32 @llvm.hexagon.S5.popcountp(i64) @@ -71,7 +74,7 @@ define i32 @S5_popcountp(i64 %a) { %z = call i32 @llvm.hexagon.S5.popcountp(i64 %a) ret i32 %z } -; CHECK: r0 = popcount(r1:0) +; CHECK: = popcount({{.*}}) ; Count trailing declare i32 @llvm.hexagon.S2.ct0p(i64) @@ -79,28 +82,28 @@ define i32 @S2_ct0p(i64 %a) { %z = call i32 @llvm.hexagon.S2.ct0p(i64 %a) ret i32 %z } -; CHECK: r0 = ct0(r1:0) +; CHECK: = ct0({{.*}}) declare i32 @llvm.hexagon.S2.ct1p(i64) define i32 @S2_ct1p(i64 %a) { %z = call i32 @llvm.hexagon.S2.ct1p(i64 %a) ret i32 %z } -; CHECK: r0 = ct1(r1:0) +; CHECK: = ct1({{.*}}) declare i32 @llvm.hexagon.S2.ct0(i32) define i32 @S2_ct0(i32 %a) { %z = call i32 @llvm.hexagon.S2.ct0(i32 %a) ret i32 %z } -; CHECK: r0 = ct0(r0) +; CHECK: = ct0({{.*}}) declare i32 @llvm.hexagon.S2.ct1(i32) define i32 @S2_ct1(i32 %a) { %z = call i32 @llvm.hexagon.S2.ct1(i32 %a) ret i32 %z } -; CHECK: r0 = ct1(r0) +; CHECK: = ct1({{.*}}) ; Extract bitfield declare i64 @llvm.hexagon.S2.extractup(i64, i32, i32) @@ -108,56 +111,56 @@ define i64 @S2_extractup(i64 %a) { %z = call i64 @llvm.hexagon.S2.extractup(i64 %a, i32 0, i32 0) ret i64 %z } -; CHECK: r1:0 = extractu(r1:0, #0, #0) +; CHECK: = extractu({{.*}}, #0, #0) declare i64 @llvm.hexagon.S4.extractp(i64, i32, i32) define i64 @S2_extractp(i64 %a) { %z = call i64 @llvm.hexagon.S4.extractp(i64 %a, i32 0, i32 0) ret i64 %z } -; CHECK: r1:0 = extract(r1:0, #0, #0) +; CHECK: = extract({{.*}}, #0, #0) declare i32 @llvm.hexagon.S2.extractu(i32, i32, i32) define i32 @S2_extractu(i32 %a) { %z = call i32 @llvm.hexagon.S2.extractu(i32 %a, i32 0, i32 0) ret i32 %z } -; CHECK: r0 = extractu(r0, #0, #0) +; CHECK: = extractu({{.*}}, #0, #0) declare i32 @llvm.hexagon.S4.extract(i32, i32, i32) define i32 @S2_extract(i32 %a) { %z = call i32 @llvm.hexagon.S4.extract(i32 %a, i32 0, i32 0) ret i32 %z } -; CHECK: r0 = extract(r0, #0, #0) +; CHECK: = extract({{.*}}, #0, #0) declare i64 @llvm.hexagon.S2.extractup.rp(i64, i64) define i64 @S2_extractup_rp(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.extractup.rp(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = extractu(r1:0, r3:2) +; CHECK: = extractu({{.*}}, {{.*}}) declare i64 @llvm.hexagon.S4.extractp.rp(i64, i64) define i64 @S4_extractp_rp(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S4.extractp.rp(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = extract(r1:0, r3:2) +; CHECK: = extract({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.extractu.rp(i32, i64) define i32 @S2_extractu_rp(i32 %a, i64 %b) { %z = call i32 @llvm.hexagon.S2.extractu.rp(i32 %a, i64 %b) ret i32 %z } -; CHECK: r0 = extractu(r0, r3:2) +; CHECK: = extractu({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S4.extract.rp(i32, i64) define i32 @S4_extract_rp(i32 %a, i64 %b) { %z = call i32 @llvm.hexagon.S4.extract.rp(i32 %a, i64 %b) ret i32 %z } -; CHECK: r0 = extract(r0, r3:2) +; CHECK: = extract({{.*}}, {{.*}}) ; Insert bitfield declare i64 @llvm.hexagon.S2.insertp(i64, i64, i32, i32) @@ -165,28 +168,28 @@ define i64 @S2_insertp(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.insertp(i64 %a, i64 %b, i32 0, i32 0) ret i64 %z } -; CHECK: r1:0 = insert(r3:2, #0, #0) +; CHECK: = insert({{.*}}, #0, #0) declare i32 @llvm.hexagon.S2.insert(i32, i32, i32, i32) define i32 @S2_insert(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.insert(i32 %a, i32 %b, i32 0, i32 0) ret i32 %z } -; CHECK: r0 = insert(r1, #0, #0) +; CHECK: = insert({{.*}}, #0, #0) declare i32 @llvm.hexagon.S2.insert.rp(i32, i32, i64) define i32 @S2_insert_rp(i32 %a, i32 %b, i64 %c) { %z = call i32 @llvm.hexagon.S2.insert.rp(i32 %a, i32 %b, i64 %c) ret i32 %z } -; CHECK: r0 = insert(r1, r3:2) +; CHECK: = insert({{.*}}, {{.*}}) declare i64 @llvm.hexagon.S2.insertp.rp(i64, i64, i64) define i64 @S2_insertp_rp(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.S2.insertp.rp(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 = insert(r3:2, r5:4) +; CHECK: = insert({{.*}}, r5:4) ; Interleave/deinterleave declare i64 @llvm.hexagon.S2.deinterleave(i64) @@ -194,14 +197,14 @@ define i64 @S2_deinterleave(i64 %a) { %z = call i64 @llvm.hexagon.S2.deinterleave(i64 %a) ret i64 %z } -; CHECK: r1:0 = deinterleave(r1:0) +; CHECK: = deinterleave({{.*}}) declare i64 @llvm.hexagon.S2.interleave(i64) define i64 @S2_interleave(i64 %a) { %z = call i64 @llvm.hexagon.S2.interleave(i64 %a) ret i64 %z } -; CHECK: r1:0 = interleave(r1:0) +; CHECK: = interleave({{.*}}) ; Linear feedback-shift operation declare i64 @llvm.hexagon.S2.lfsp(i64, i64) @@ -209,7 +212,7 @@ define i64 @S2_lfsp(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.lfsp(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = lfs(r1:0, r3:2) +; CHECK: = lfs({{.*}}, {{.*}}) ; Masked parity declare i32 @llvm.hexagon.S2.parityp(i64, i64) @@ -217,14 +220,14 @@ define i32 @S2_parityp(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.S2.parityp(i64 %a, i64 %b) ret i32 %z } -; CHECK: r0 = parity(r1:0, r3:2) +; CHECK: = parity({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S4.parity(i32, i32) define i32 @S4_parity(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S4.parity(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = parity(r0, r1) +; CHECK: = parity({{.*}}, {{.*}}) ; Bit reverse declare i64 @llvm.hexagon.S2.brevp(i64) @@ -232,14 +235,14 @@ define i64 @S2_brevp(i64 %a) { %z = call i64 @llvm.hexagon.S2.brevp(i64 %a) ret i64 %z } -; CHECK: r1:0 = brev(r1:0) +; CHECK: = brev({{.*}}) declare i32 @llvm.hexagon.S2.brev(i32) define i32 @S2_brev(i32 %a) { %z = call i32 @llvm.hexagon.S2.brev(i32 %a) ret i32 %z } -; CHECK: r0 = brev(r0) +; CHECK: = brev({{.*}}) ; Set/clear/toggle bit declare i32 @llvm.hexagon.S2.setbit.i(i32, i32) @@ -247,42 +250,42 @@ define i32 @S2_setbit_i(i32 %a) { %z = call i32 @llvm.hexagon.S2.setbit.i(i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = setbit(r0, #0) +; CHECK: = setbit({{.*}}, #0) declare i32 @llvm.hexagon.S2.clrbit.i(i32, i32) define i32 @S2_clrbit_i(i32 %a) { %z = call i32 @llvm.hexagon.S2.clrbit.i(i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = clrbit(r0, #0) +; CHECK: = clrbit({{.*}}, #0) declare i32 @llvm.hexagon.S2.togglebit.i(i32, i32) define i32 @S2_togglebit_i(i32 %a) { %z = call i32 @llvm.hexagon.S2.togglebit.i(i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = togglebit(r0, #0) +; CHECK: = togglebit({{.*}}, #0) declare i32 @llvm.hexagon.S2.setbit.r(i32, i32) define i32 @S2_setbit_r(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.setbit.r(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = setbit(r0, r1) +; CHECK: = setbit({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.clrbit.r(i32, i32) define i32 @S2_clrbit_r(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.clrbit.r(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = clrbit(r0, r1) +; CHECK: = clrbit({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.togglebit.r(i32, i32) define i32 @S2_togglebit_r(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.togglebit.r(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = togglebit(r0, r1) +; CHECK: = togglebit({{.*}}, {{.*}}) ; Split bitfield declare i64 @llvm.hexagon.A4.bitspliti(i32, i32) @@ -290,14 +293,14 @@ define i64 @A4_bitspliti(i32 %a) { %z = call i64 @llvm.hexagon.A4.bitspliti(i32 %a, i32 0) ret i64 %z } -; CHECK: = bitsplit(r0, #0) +; CHECK: = bitsplit({{.*}}, #0) declare i64 @llvm.hexagon.A4.bitsplit(i32, i32) define i64 @A4_bitsplit(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.A4.bitsplit(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = bitsplit(r0, r1) +; CHECK: = bitsplit({{.*}}, {{.*}}) ; Table index declare i32 @llvm.hexagon.S2.tableidxb.goodsyntax(i32, i32, i32, i32) @@ -305,25 +308,25 @@ define i32 @S2_tableidxb_goodsyntax(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.tableidxb.goodsyntax(i32 %a, i32 %b, i32 0, i32 0) ret i32 %z } -; CHECK: r0 = tableidxb(r1, #0, #0) +; CHECK: = tableidxb({{.*}}, #0, #0) declare i32 @llvm.hexagon.S2.tableidxh.goodsyntax(i32, i32, i32, i32) define i32 @S2_tableidxh_goodsyntax(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.tableidxh.goodsyntax(i32 %a, i32 %b, i32 0, i32 0) ret i32 %z } -; CHECK: r0 = tableidxh(r1, #0, #-1) +; CHECK: = tableidxh({{.*}}, #0, #-1) declare i32 @llvm.hexagon.S2.tableidxw.goodsyntax(i32, i32, i32, i32) define i32 @S2_tableidxw_goodsyntax(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.tableidxw.goodsyntax(i32 %a, i32 %b, i32 0, i32 0) ret i32 %z } -; CHECK: r0 = tableidxw(r1, #0, #-2) +; CHECK: = tableidxw({{.*}}, #0, #-2) declare i32 @llvm.hexagon.S2.tableidxd.goodsyntax(i32, i32, i32, i32) define i32 @S2_tableidxd_goodsyntax(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.tableidxd.goodsyntax(i32 %a, i32 %b, i32 0, i32 0) ret i32 %z } -; CHECK: r0 = tableidxd(r1, #0, #-3) +; CHECK: = tableidxd({{.*}}, #0, #-3) diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_complex.ll b/test/CodeGen/Hexagon/intrinsics/xtype_complex.ll index 57b0c5b6db56..0087883573ec 100644 --- a/test/CodeGen/Hexagon/intrinsics/xtype_complex.ll +++ b/test/CodeGen/Hexagon/intrinsics/xtype_complex.ll @@ -1,34 +1,37 @@ ; RUN: llc -march=hexagon -O0 < %s | FileCheck %s +; RUN: llc -march=hexagon -O0 < %s | FileCheck -check-prefix=CHECK-CALL %s ; Hexagon Programmer's Reference Manual 11.10.3 XTYPE/COMPLEX +; CHECK-CALL-NOT: call + ; Complex add/sub halfwords declare i64 @llvm.hexagon.S4.vxaddsubh(i64, i64) define i64 @S4_vxaddsubh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S4.vxaddsubh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vxaddsubh(r1:0, r3:2):sat +; CHECK: = vxaddsubh({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.S4.vxsubaddh(i64, i64) define i64 @S4_vxsubaddh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S4.vxsubaddh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vxsubaddh(r1:0, r3:2):sat +; CHECK: = vxsubaddh({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.S4.vxaddsubhr(i64, i64) define i64 @S4_vxaddsubhr(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S4.vxaddsubhr(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vxaddsubh(r1:0, r3:2):rnd:>>1:sat +; CHECK: = vxaddsubh({{.*}}, {{.*}}):rnd:>>1:sat declare i64 @llvm.hexagon.S4.vxsubaddhr(i64, i64) define i64 @S4_vxsubaddhr(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S4.vxsubaddhr(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vxsubaddh(r1:0, r3:2):rnd:>>1:sat +; CHECK: = vxsubaddh({{.*}}, {{.*}}):rnd:>>1:sat ; Complex add/sub words declare i64 @llvm.hexagon.S4.vxaddsubw(i64, i64) @@ -36,14 +39,14 @@ define i64 @S4_vxaddsubw(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S4.vxaddsubw(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vxaddsubw(r1:0, r3:2):sat +; CHECK: = vxaddsubw({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.S4.vxsubaddw(i64, i64) define i64 @S4_vxsubaddw(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S4.vxsubaddw(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vxsubaddw(r1:0, r3:2):sat +; CHECK: = vxsubaddw({{.*}}, {{.*}}):sat ; Complex multiply declare i64 @llvm.hexagon.M2.cmpys.s0(i32, i32) @@ -51,84 +54,84 @@ define i64 @M2_cmpys_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.cmpys.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = cmpy(r0, r1):sat +; CHECK: = cmpy({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M2.cmpys.s1(i32, i32) define i64 @M2_cmpys_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.cmpys.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = cmpy(r0, r1):<<1:sat +; CHECK: = cmpy({{.*}}, {{.*}}):<<1:sat declare i64 @llvm.hexagon.M2.cmpysc.s0(i32, i32) define i64 @M2_cmpysc_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.cmpysc.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = cmpy(r0, r1*):sat +; CHECK: = cmpy({{.*}}, {{.*}}*):sat declare i64 @llvm.hexagon.M2.cmpysc.s1(i32, i32) define i64 @M2_cmpysc_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.cmpysc.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = cmpy(r0, r1*):<<1:sat +; CHECK: = cmpy({{.*}}, {{.*}}*):<<1:sat declare i64 @llvm.hexagon.M2.cmacs.s0(i64, i32, i32) define i64 @M2_cmacs_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.cmacs.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += cmpy(r2, r3):sat +; CHECK: += cmpy({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M2.cmacs.s1(i64, i32, i32) define i64 @M2_cmacs_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.cmacs.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += cmpy(r2, r3):<<1:sat +; CHECK: += cmpy({{.*}}, {{.*}}):<<1:sat declare i64 @llvm.hexagon.M2.cnacs.s0(i64, i32, i32) define i64 @M2_cnacs_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.cnacs.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= cmpy(r2, r3):sat +; CHECK: -= cmpy({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M2.cnacs.s1(i64, i32, i32) define i64 @M2_cnacs_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.cnacs.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= cmpy(r2, r3):<<1:sat +; CHECK: -= cmpy({{.*}}, {{.*}}):<<1:sat declare i64 @llvm.hexagon.M2.cmacsc.s0(i64, i32, i32) define i64 @M2_cmacsc_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.cmacsc.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += cmpy(r2, r3*):sat +; CHECK: += cmpy({{.*}}, {{.*}}*):sat declare i64 @llvm.hexagon.M2.cmacsc.s1(i64, i32, i32) define i64 @M2_cmacsc_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.cmacsc.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += cmpy(r2, r3*):<<1:sat +; CHECK: += cmpy({{.*}}, {{.*}}*):<<1:sat declare i64 @llvm.hexagon.M2.cnacsc.s0(i64, i32, i32) define i64 @M2_cnacsc_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.cnacsc.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= cmpy(r2, r3*):sat +; CHECK: -= cmpy({{.*}}, {{.*}}*):sat declare i64 @llvm.hexagon.M2.cnacsc.s1(i64, i32, i32) define i64 @M2_cnacsc_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.cnacsc.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= cmpy(r2, r3*):<<1:sat +; CHECK: -= cmpy({{.*}}, {{.*}}*):<<1:sat ; Complex multiply real or imaginary declare i64 @llvm.hexagon.M2.cmpyi.s0(i32, i32) @@ -136,28 +139,28 @@ define i64 @M2_cmpyi_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.cmpyi.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = cmpyi(r0, r1) +; CHECK: = cmpyi({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M2.cmpyr.s0(i32, i32) define i64 @M2_cmpyr_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.cmpyr.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = cmpyr(r0, r1) +; CHECK: = cmpyr({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M2.cmaci.s0(i64, i32, i32) define i64 @M2_cmaci_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.cmaci.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += cmpyi(r2, r3) +; CHECK: += cmpyi({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M2.cmacr.s0(i64, i32, i32) define i64 @M2_cmacr_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.cmacr.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += cmpyr(r2, r3) +; CHECK: += cmpyr({{.*}}, {{.*}}) ; Complex multiply with round and pack declare i32 @llvm.hexagon.M2.cmpyrs.s0(i32, i32) @@ -165,28 +168,28 @@ define i32 @M2_cmpyrs_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.cmpyrs.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = cmpy(r0, r1):rnd:sat +; CHECK: = cmpy({{.*}}, {{.*}}):rnd:sat declare i32 @llvm.hexagon.M2.cmpyrs.s1(i32, i32) define i32 @M2_cmpyrs_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.cmpyrs.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = cmpy(r0, r1):<<1:rnd:sat +; CHECK: = cmpy({{.*}}, {{.*}}):<<1:rnd:sat declare i32 @llvm.hexagon.M2.cmpyrsc.s0(i32, i32) define i32 @M2_cmpyrsc_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.cmpyrsc.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = cmpy(r0, r1*):rnd:sat +; CHECK: = cmpy({{.*}}, {{.*}}*):rnd:sat declare i32 @llvm.hexagon.M2.cmpyrsc.s1(i32, i32) define i32 @M2_cmpyrsc_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.cmpyrsc.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = cmpy(r0, r1*):<<1:rnd:sat +; CHECK: = cmpy({{.*}}, {{.*}}*):<<1:rnd:sat ; Complex multiply 32x16 declare i32 @llvm.hexagon.M4.cmpyi.wh(i64, i32) @@ -194,28 +197,28 @@ define i32 @M4_cmpyi_wh(i64 %a, i32 %b) { %z = call i32 @llvm.hexagon.M4.cmpyi.wh(i64 %a, i32 %b) ret i32 %z } -; CHECK: r0 = cmpyiwh(r1:0, r2):<<1:rnd:sat +; CHECK: = cmpyiwh({{.*}}, {{.*}}):<<1:rnd:sat declare i32 @llvm.hexagon.M4.cmpyi.whc(i64, i32) define i32 @M4_cmpyi_whc(i64 %a, i32 %b) { %z = call i32 @llvm.hexagon.M4.cmpyi.whc(i64 %a, i32 %b) ret i32 %z } -; CHECK: r0 = cmpyiwh(r1:0, r2*):<<1:rnd:sat +; CHECK: = cmpyiwh({{.*}}, {{.*}}*):<<1:rnd:sat declare i32 @llvm.hexagon.M4.cmpyr.wh(i64, i32) define i32 @M4_cmpyr_wh(i64 %a, i32 %b) { %z = call i32 @llvm.hexagon.M4.cmpyr.wh(i64 %a, i32 %b) ret i32 %z } -; CHECK: r0 = cmpyrwh(r1:0, r2):<<1:rnd:sat +; CHECK: = cmpyrwh({{.*}}, {{.*}}):<<1:rnd:sat declare i32 @llvm.hexagon.M4.cmpyr.whc(i64, i32) define i32 @M4_cmpyr_whc(i64 %a, i32 %b) { %z = call i32 @llvm.hexagon.M4.cmpyr.whc(i64 %a, i32 %b) ret i32 %z } -; CHECK: r0 = cmpyrwh(r1:0, r2*):<<1:rnd:sat +; CHECK: = cmpyrwh({{.*}}, {{.*}}*):<<1:rnd:sat ; Vector complex multiply real or imaginary declare i64 @llvm.hexagon.M2.vcmpy.s0.sat.r(i64, i64) @@ -223,42 +226,42 @@ define i64 @M2_vcmpy_s0_sat_r(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.vcmpy.s0.sat.r(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vcmpyr(r1:0, r3:2):sat +; CHECK: = vcmpyr({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M2.vcmpy.s1.sat.r(i64, i64) define i64 @M2_vcmpy_s1_sat_r(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.vcmpy.s1.sat.r(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vcmpyr(r1:0, r3:2):<<1:sat +; CHECK: = vcmpyr({{.*}}, {{.*}}):<<1:sat declare i64 @llvm.hexagon.M2.vcmpy.s0.sat.i(i64, i64) define i64 @M2_vcmpy_s0_sat_i(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.vcmpy.s0.sat.i(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vcmpyi(r1:0, r3:2):sat +; CHECK: = vcmpyi({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M2.vcmpy.s1.sat.i(i64, i64) define i64 @M2_vcmpy_s1_sat_i(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.vcmpy.s1.sat.i(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vcmpyi(r1:0, r3:2):<<1:sat +; CHECK: = vcmpyi({{.*}}, {{.*}}):<<1:sat declare i64 @llvm.hexagon.M2.vcmac.s0.sat.r(i64, i64, i64) define i64 @M2_vcmac_s0_sat_r(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M2.vcmac.s0.sat.r(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vcmpyr(r3:2, r5:4):sat +; CHECK: += vcmpyr({{.*}}, r5:4):sat declare i64 @llvm.hexagon.M2.vcmac.s0.sat.i(i64, i64, i64) define i64 @M2_vcmac_s0_sat_i(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M2.vcmac.s0.sat.i(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vcmpyi(r3:2, r5:4):sat +; CHECK: += vcmpyi({{.*}}, r5:4):sat ; Vector complex conjugate declare i64 @llvm.hexagon.A2.vconj(i64) @@ -266,7 +269,7 @@ define i64 @A2_vconj(i64 %a) { %z = call i64 @llvm.hexagon.A2.vconj(i64 %a) ret i64 %z } -; CHECK: r1:0 = vconj(r1:0):sat +; CHECK: = vconj({{.*}}):sat ; Vector complex rotate declare i64 @llvm.hexagon.S2.vcrotate(i64, i32) @@ -274,7 +277,7 @@ define i64 @S2_vcrotate(i64 %a, i32 %b) { %z = call i64 @llvm.hexagon.S2.vcrotate(i64 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = vcrotate(r1:0, r2) +; CHECK: = vcrotate({{.*}}, {{.*}}) ; Vector reduce complex multiply real or imaginary declare i64 @llvm.hexagon.M2.vrcmpyi.s0(i64, i64) @@ -282,56 +285,56 @@ define i64 @M2_vrcmpyi_s0(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.vrcmpyi.s0(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vrcmpyi(r1:0, r3:2) +; CHECK: = vrcmpyi({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M2.vrcmpyr.s0(i64, i64) define i64 @M2_vrcmpyr_s0(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.vrcmpyr.s0(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vrcmpyr(r1:0, r3:2) +; CHECK: = vrcmpyr({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M2.vrcmpyi.s0c(i64, i64) define i64 @M2_vrcmpyi_s0c(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.vrcmpyi.s0c(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vrcmpyi(r1:0, r3:2*) +; CHECK: = vrcmpyi({{.*}}, {{.*}}*) declare i64 @llvm.hexagon.M2.vrcmpyr.s0c(i64, i64) define i64 @M2_vrcmpyr_s0c(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.vrcmpyr.s0c(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vrcmpyr(r1:0, r3:2*) +; CHECK: = vrcmpyr({{.*}}, {{.*}}*) declare i64 @llvm.hexagon.M2.vrcmaci.s0(i64, i64, i64) define i64 @M2_vrcmaci_s0(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M2.vrcmaci.s0(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vrcmpyi(r3:2, r5:4) +; CHECK: += vrcmpyi({{.*}}, r5:4) declare i64 @llvm.hexagon.M2.vrcmacr.s0(i64, i64, i64) define i64 @M2_vrcmacr_s0(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M2.vrcmacr.s0(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vrcmpyr(r3:2, r5:4) +; CHECK: += vrcmpyr({{.*}}, r5:4) declare i64 @llvm.hexagon.M2.vrcmaci.s0c(i64, i64, i64) define i64 @M2_vrcmaci_s0c(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M2.vrcmaci.s0c(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vrcmpyi(r3:2, r5:4*) +; CHECK: += vrcmpyi({{.*}}, r5:4*) declare i64 @llvm.hexagon.M2.vrcmacr.s0c(i64, i64, i64) define i64 @M2_vrcmacr_s0c(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M2.vrcmacr.s0c(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vrcmpyr(r3:2, r5:4*) +; CHECK: += vrcmpyr({{.*}}, r5:4*) ; Vector reduce complex rotate declare i64 @llvm.hexagon.S4.vrcrotate(i64, i32, i32) @@ -339,11 +342,11 @@ define i64 @S4_vrcrotate(i64 %a, i32 %b) { %z = call i64 @llvm.hexagon.S4.vrcrotate(i64 %a, i32 %b, i32 0) ret i64 %z } -; CHECK: r1:0 = vrcrotate(r1:0, r2, #0) +; CHECK: = vrcrotate({{.*}}, {{.*}}, #0) declare i64 @llvm.hexagon.S4.vrcrotate.acc(i64, i64, i32, i32) define i64 @S4_vrcrotate_acc(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S4.vrcrotate.acc(i64 %a, i64 %b, i32 %c, i32 0) ret i64 %z } -; CHECK: r1:0 += vrcrotate(r3:2, r4, #0) +; CHECK: += vrcrotate({{.*}}, {{.*}}, #0) diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_fp.ll b/test/CodeGen/Hexagon/intrinsics/xtype_fp.ll index aef8127d668c..598d0a83206d 100644 --- a/test/CodeGen/Hexagon/intrinsics/xtype_fp.ll +++ b/test/CodeGen/Hexagon/intrinsics/xtype_fp.ll @@ -1,13 +1,17 @@ ; RUN: llc -march=hexagon -mcpu=hexagonv5 -O0 < %s | FileCheck %s +; RUN: llc -march=hexagon -mcpu=hexagonv5 -O0 < %s | \ +; RUN: FileCheck -check-prefix=CHECK-CALL %s ; Hexagon Programmer's Reference Manual 11.10.4 XTYPE/FP +; CHECK-CALL-NOT: call + ; Floating point addition declare float @llvm.hexagon.F2.sfadd(float, float) define float @F2_sfadd(float %a, float %b) { %z = call float @llvm.hexagon.F2.sfadd(float %a, float %b) ret float %z } -; CHECK: r0 = sfadd(r0, r1) +; CHECK: = sfadd({{.*}}, {{.*}}) ; Classify floating-point value declare i32 @llvm.hexagon.F2.sfclass(float, i32) @@ -15,14 +19,14 @@ define i32 @F2_sfclass(float %a) { %z = call i32 @llvm.hexagon.F2.sfclass(float %a, i32 0) ret i32 %z } -; CHECK: p0 = sfclass(r0, #0) +; CHECK: = sfclass({{.*}}, #0) declare i32 @llvm.hexagon.F2.dfclass(double, i32) define i32 @F2_dfclass(double %a) { %z = call i32 @llvm.hexagon.F2.dfclass(double %a, i32 0) ret i32 %z } -; CHECK: p0 = dfclass(r1:0, #0) +; CHECK: = dfclass({{.*}}, #0) ; Compare floating-point value declare i32 @llvm.hexagon.F2.sfcmpge(float, float) @@ -30,56 +34,56 @@ define i32 @F2_sfcmpge(float %a, float %b) { %z = call i32 @llvm.hexagon.F2.sfcmpge(float %a, float %b) ret i32 %z } -; CHECK: p0 = sfcmp.ge(r0, r1) +; CHECK: = sfcmp.ge({{.*}}, {{.*}}) declare i32 @llvm.hexagon.F2.sfcmpuo(float, float) define i32 @F2_sfcmpuo(float %a, float %b) { %z = call i32 @llvm.hexagon.F2.sfcmpuo(float %a, float %b) ret i32 %z } -; CHECK: p0 = sfcmp.uo(r0, r1) +; CHECK: = sfcmp.uo({{.*}}, {{.*}}) declare i32 @llvm.hexagon.F2.sfcmpeq(float, float) define i32 @F2_sfcmpeq(float %a, float %b) { %z = call i32 @llvm.hexagon.F2.sfcmpeq(float %a, float %b) ret i32 %z } -; CHECK: p0 = sfcmp.eq(r0, r1) +; CHECK: = sfcmp.eq({{.*}}, {{.*}}) declare i32 @llvm.hexagon.F2.sfcmpgt(float, float) define i32 @F2_sfcmpgt(float %a, float %b) { %z = call i32 @llvm.hexagon.F2.sfcmpgt(float %a, float %b) ret i32 %z } -; CHECK: p0 = sfcmp.gt(r0, r1) +; CHECK: = sfcmp.gt({{.*}}, {{.*}}) declare i32 @llvm.hexagon.F2.dfcmpge(double, double) define i32 @F2_dfcmpge(double %a, double %b) { %z = call i32 @llvm.hexagon.F2.dfcmpge(double %a, double %b) ret i32 %z } -; CHECK: p0 = dfcmp.ge(r1:0, r3:2) +; CHECK: = dfcmp.ge({{.*}}, {{.*}}) declare i32 @llvm.hexagon.F2.dfcmpuo(double, double) define i32 @F2_dfcmpuo(double %a, double %b) { %z = call i32 @llvm.hexagon.F2.dfcmpuo(double %a, double %b) ret i32 %z } -; CHECK: p0 = dfcmp.uo(r1:0, r3:2) +; CHECK: = dfcmp.uo({{.*}}, {{.*}}) declare i32 @llvm.hexagon.F2.dfcmpeq(double, double) define i32 @F2_dfcmpeq(double %a, double %b) { %z = call i32 @llvm.hexagon.F2.dfcmpeq(double %a, double %b) ret i32 %z } -; CHECK: p0 = dfcmp.eq(r1:0, r3:2) +; CHECK: = dfcmp.eq({{.*}}, {{.*}}) declare i32 @llvm.hexagon.F2.dfcmpgt(double, double) define i32 @F2_dfcmpgt(double %a, double %b) { %z = call i32 @llvm.hexagon.F2.dfcmpgt(double %a, double %b) ret i32 %z } -; CHECK: p0 = dfcmp.gt(r1:0, r3:2) +; CHECK: = dfcmp.gt({{.*}}, {{.*}}) ; Convert floating-point value to other format declare double @llvm.hexagon.F2.conv.sf2df(float) @@ -87,14 +91,14 @@ define double @F2_conv_sf2df(float %a) { %z = call double @llvm.hexagon.F2.conv.sf2df(float %a) ret double %z } -; CHECK: = convert_sf2df(r0) +; CHECK: = convert_sf2df({{.*}}) declare float @llvm.hexagon.F2.conv.df2sf(double) define float @F2_conv_df2sf(double %a) { %z = call float @llvm.hexagon.F2.conv.df2sf(double %a) ret float %z } -; CHECK: r0 = convert_df2sf(r1:0) +; CHECK: = convert_df2sf({{.*}}) ; Convert integer to floating-point value declare double @llvm.hexagon.F2.conv.ud2df(i64) @@ -102,56 +106,56 @@ define double @F2_conv_ud2df(i64 %a) { %z = call double @llvm.hexagon.F2.conv.ud2df(i64 %a) ret double %z } -; CHECK: r1:0 = convert_ud2df(r1:0) +; CHECK: = convert_ud2df({{.*}}) declare double @llvm.hexagon.F2.conv.d2df(i64) define double @F2_conv_d2df(i64 %a) { %z = call double @llvm.hexagon.F2.conv.d2df(i64 %a) ret double %z } -; CHECK: r1:0 = convert_d2df(r1:0) +; CHECK: = convert_d2df({{.*}}) declare double @llvm.hexagon.F2.conv.uw2df(i32) define double @F2_conv_uw2df(i32 %a) { %z = call double @llvm.hexagon.F2.conv.uw2df(i32 %a) ret double %z } -; CHECK: = convert_uw2df(r0) +; CHECK: = convert_uw2df({{.*}}) declare double @llvm.hexagon.F2.conv.w2df(i32) define double @F2_conv_w2df(i32 %a) { %z = call double @llvm.hexagon.F2.conv.w2df(i32 %a) ret double %z } -; CHECK: = convert_w2df(r0) +; CHECK: = convert_w2df({{.*}}) declare float @llvm.hexagon.F2.conv.ud2sf(i64) define float @F2_conv_ud2sf(i64 %a) { %z = call float @llvm.hexagon.F2.conv.ud2sf(i64 %a) ret float %z } -; CHECK: r0 = convert_ud2sf(r1:0) +; CHECK: = convert_ud2sf({{.*}}) declare float @llvm.hexagon.F2.conv.d2sf(i64) define float @F2_conv_d2sf(i64 %a) { %z = call float @llvm.hexagon.F2.conv.d2sf(i64 %a) ret float %z } -; CHECK: r0 = convert_d2sf(r1:0) +; CHECK: = convert_d2sf({{.*}}) declare float @llvm.hexagon.F2.conv.uw2sf(i32) define float @F2_conv_uw2sf(i32 %a) { %z = call float @llvm.hexagon.F2.conv.uw2sf(i32 %a) ret float %z } -; CHECK: r0 = convert_uw2sf(r0) +; CHECK: = convert_uw2sf({{.*}}) declare float @llvm.hexagon.F2.conv.w2sf(i32) define float @F2_conv_w2sf(i32 %a) { %z = call float @llvm.hexagon.F2.conv.w2sf(i32 %a) ret float %z } -; CHECK: r0 = convert_w2sf(r0) +; CHECK: = convert_w2sf({{.*}}) ; Convert floating-point value to integer declare i64 @llvm.hexagon.F2.conv.df2d(double) @@ -159,112 +163,112 @@ define i64 @F2_conv_df2d(double %a) { %z = call i64 @llvm.hexagon.F2.conv.df2d(double %a) ret i64 %z } -; CHECK: r1:0 = convert_df2d(r1:0) +; CHECK: = convert_df2d({{.*}}) declare i64 @llvm.hexagon.F2.conv.df2ud(double) define i64 @F2_conv_df2ud(double %a) { %z = call i64 @llvm.hexagon.F2.conv.df2ud(double %a) ret i64 %z } -; CHECK: r1:0 = convert_df2ud(r1:0) +; CHECK: {{.*}} = convert_df2ud({{.*}}) declare i64 @llvm.hexagon.F2.conv.df2d.chop(double) define i64 @F2_conv_df2d_chop(double %a) { %z = call i64 @llvm.hexagon.F2.conv.df2d.chop(double %a) ret i64 %z } -; CHECK: r1:0 = convert_df2d(r1:0):chop +; CHECK: = convert_df2d({{.*}}):chop declare i64 @llvm.hexagon.F2.conv.df2ud.chop(double) define i64 @F2_conv_df2ud_chop(double %a) { %z = call i64 @llvm.hexagon.F2.conv.df2ud.chop(double %a) ret i64 %z } -; CHECK: r1:0 = convert_df2ud(r1:0):chop +; CHECK: = convert_df2ud({{.*}}):chop declare i64 @llvm.hexagon.F2.conv.sf2ud(float) define i64 @F2_conv_sf2ud(float %a) { %z = call i64 @llvm.hexagon.F2.conv.sf2ud(float %a) ret i64 %z } -; CHECK: = convert_sf2ud(r0) +; CHECK: = convert_sf2ud({{.*}}) declare i64 @llvm.hexagon.F2.conv.sf2d(float) define i64 @F2_conv_sf2d(float %a) { %z = call i64 @llvm.hexagon.F2.conv.sf2d(float %a) ret i64 %z } -; CHECK: = convert_sf2d(r0) +; CHECK: = convert_sf2d({{.*}}) declare i64 @llvm.hexagon.F2.conv.sf2d.chop(float) define i64 @F2_conv_sf2d_chop(float %a) { %z = call i64 @llvm.hexagon.F2.conv.sf2d.chop(float %a) ret i64 %z } -; CHECK: = convert_sf2d(r0):chop +; CHECK: = convert_sf2d({{.*}}):chop declare i64 @llvm.hexagon.F2.conv.sf2ud.chop(float) define i64 @F2_conv_sf2ud_chop(float %a) { %z = call i64 @llvm.hexagon.F2.conv.sf2ud.chop(float %a) ret i64 %z } -; CHECK: = convert_sf2ud(r0):chop +; CHECK: = convert_sf2ud({{.*}}):chop declare i32 @llvm.hexagon.F2.conv.df2uw(double) define i32 @F2_conv_df2uw(double %a) { %z = call i32 @llvm.hexagon.F2.conv.df2uw(double %a) ret i32 %z } -; CHECK: r0 = convert_df2uw(r1:0) +; CHECK: = convert_df2uw({{.*}}) declare i32 @llvm.hexagon.F2.conv.df2w(double) define i32 @F2_conv_df2w(double %a) { %z = call i32 @llvm.hexagon.F2.conv.df2w(double %a) ret i32 %z } -; CHECK: r0 = convert_df2w(r1:0) +; CHECK: = convert_df2w({{.*}}) declare i32 @llvm.hexagon.F2.conv.df2w.chop(double) define i32 @F2_conv_df2w_chop(double %a) { %z = call i32 @llvm.hexagon.F2.conv.df2w.chop(double %a) ret i32 %z } -; CHECK: r0 = convert_df2w(r1:0):chop +; CHECK: = convert_df2w({{.*}}):chop declare i32 @llvm.hexagon.F2.conv.df2uw.chop(double) define i32 @F2_conv_df2uw_chop(double %a) { %z = call i32 @llvm.hexagon.F2.conv.df2uw.chop(double %a) ret i32 %z } -; CHECK: r0 = convert_df2uw(r1:0):chop +; CHECK: = convert_df2uw({{.*}}):chop declare i32 @llvm.hexagon.F2.conv.sf2uw(float) define i32 @F2_conv_sf2uw(float %a) { %z = call i32 @llvm.hexagon.F2.conv.sf2uw(float %a) ret i32 %z } -; CHECK: r0 = convert_sf2uw(r0) +; CHECK: = convert_sf2uw({{.*}}) declare i32 @llvm.hexagon.F2.conv.sf2uw.chop(float) define i32 @F2_conv_sf2uw_chop(float %a) { %z = call i32 @llvm.hexagon.F2.conv.sf2uw.chop(float %a) ret i32 %z } -; CHECK: r0 = convert_sf2uw(r0):chop +; CHECK: = convert_sf2uw({{.*}}):chop declare i32 @llvm.hexagon.F2.conv.sf2w(float) define i32 @F2_conv_sf2w(float %a) { %z = call i32 @llvm.hexagon.F2.conv.sf2w(float %a) ret i32 %z } -; CHECK: r0 = convert_sf2w(r0) +; CHECK: = convert_sf2w({{.*}}) declare i32 @llvm.hexagon.F2.conv.sf2w.chop(float) define i32 @F2_conv_sf2w_chop(float %a) { %z = call i32 @llvm.hexagon.F2.conv.sf2w.chop(float %a) ret i32 %z } -; CHECK: r0 = convert_sf2w(r0):chop +; CHECK: = convert_sf2w({{.*}}):chop ; Floating point extreme value assistance declare float @llvm.hexagon.F2.sffixupr(float) @@ -272,21 +276,21 @@ define float @F2_sffixupr(float %a) { %z = call float @llvm.hexagon.F2.sffixupr(float %a) ret float %z } -; CHECK: r0 = sffixupr(r0) +; CHECK: = sffixupr({{.*}}) declare float @llvm.hexagon.F2.sffixupn(float, float) define float @F2_sffixupn(float %a, float %b) { %z = call float @llvm.hexagon.F2.sffixupn(float %a, float %b) ret float %z } -; CHECK: r0 = sffixupn(r0, r1) +; CHECK: = sffixupn({{.*}}, {{.*}}) declare float @llvm.hexagon.F2.sffixupd(float, float) define float @F2_sffixupd(float %a, float %b) { %z = call float @llvm.hexagon.F2.sffixupd(float %a, float %b) ret float %z } -; CHECK: r0 = sffixupd(r0, r1) +; CHECK: = sffixupd({{.*}}, {{.*}}) ; Floating point fused multiply-add declare float @llvm.hexagon.F2.sffma(float, float, float) @@ -294,14 +298,14 @@ define float @F2_sffma(float %a, float %b, float %c) { %z = call float @llvm.hexagon.F2.sffma(float %a, float %b, float %c) ret float %z } -; CHECK: r0 += sfmpy(r1, r2) +; CHECK: += sfmpy({{.*}}, {{.*}}) declare float @llvm.hexagon.F2.sffms(float, float, float) define float @F2_sffms(float %a, float %b, float %c) { %z = call float @llvm.hexagon.F2.sffms(float %a, float %b, float %c) ret float %z } -; CHECK: r0 -= sfmpy(r1, r2) +; CHECK: -= sfmpy({{.*}}, {{.*}}) ; Floating point fused multiply-add with scaling declare float @llvm.hexagon.F2.sffma.sc(float, float, float, i32) @@ -309,7 +313,7 @@ define float @F2_sffma_sc(float %a, float %b, float %c, i32 %d) { %z = call float @llvm.hexagon.F2.sffma.sc(float %a, float %b, float %c, i32 %d) ret float %z } -; CHECK: r0 += sfmpy(r1, r2, p0):scale +; CHECK: += sfmpy({{.*}}, {{.*}}, {{.*}}):scale ; Floating point fused multiply-add for library routines declare float @llvm.hexagon.F2.sffma.lib(float, float, float) @@ -317,14 +321,14 @@ define float @F2_sffma_lib(float %a, float %b, float %c) { %z = call float @llvm.hexagon.F2.sffma.lib(float %a, float %b, float %c) ret float %z } -; CHECK: r0 += sfmpy(r1, r2):lib +; CHECK: += sfmpy({{.*}}, {{.*}}):lib declare float @llvm.hexagon.F2.sffms.lib(float, float, float) define float @F2_sffms_lib(float %a, float %b, float %c) { %z = call float @llvm.hexagon.F2.sffms.lib(float %a, float %b, float %c) ret float %z } -; CHECK: r0 -= sfmpy(r1, r2):lib +; CHECK: -= sfmpy({{.*}}, {{.*}}):lib ; Create floating-point constant declare float @llvm.hexagon.F2.sfimm.p(i32) @@ -332,28 +336,28 @@ define float @F2_sfimm_p() { %z = call float @llvm.hexagon.F2.sfimm.p(i32 0) ret float %z } -; CHECK: r0 = sfmake(#0):pos +; CHECK: = sfmake(#0):pos declare float @llvm.hexagon.F2.sfimm.n(i32) define float @F2_sfimm_n() { %z = call float @llvm.hexagon.F2.sfimm.n(i32 0) ret float %z } -; CHECK: r0 = sfmake(#0):neg +; CHECK: = sfmake(#0):neg declare double @llvm.hexagon.F2.dfimm.p(i32) define double @F2_dfimm_p() { %z = call double @llvm.hexagon.F2.dfimm.p(i32 0) ret double %z } -; CHECK: r1:0 = dfmake(#0):pos +; CHECK: = dfmake(#0):pos declare double @llvm.hexagon.F2.dfimm.n(i32) define double @F2_dfimm_n() { %z = call double @llvm.hexagon.F2.dfimm.n(i32 0) ret double %z } -; CHECK: r1:0 = dfmake(#0):neg +; CHECK: = dfmake(#0):neg ; Floating point maximum declare float @llvm.hexagon.F2.sfmax(float, float) @@ -361,7 +365,7 @@ define float @F2_sfmax(float %a, float %b) { %z = call float @llvm.hexagon.F2.sfmax(float %a, float %b) ret float %z } -; CHECK: r0 = sfmax(r0, r1) +; CHECK: = sfmax({{.*}}, {{.*}}) ; Floating point minimum declare float @llvm.hexagon.F2.sfmin(float, float) @@ -369,7 +373,7 @@ define float @F2_sfmin(float %a, float %b) { %z = call float @llvm.hexagon.F2.sfmin(float %a, float %b) ret float %z } -; CHECK: r0 = sfmin(r0, r1) +; CHECK: = sfmin({{.*}}, {{.*}}) ; Floating point multiply declare float @llvm.hexagon.F2.sfmpy(float, float) @@ -377,7 +381,7 @@ define float @F2_sfmpy(float %a, float %b) { %z = call float @llvm.hexagon.F2.sfmpy(float %a, float %b) ret float %z } -; CHECK: r0 = sfmpy(r0, r1) +; CHECK: = sfmpy({{.*}}, {{.*}}) ; Floating point subtraction declare float @llvm.hexagon.F2.sfsub(float, float) @@ -385,4 +389,4 @@ define float @F2_sfsub(float %a, float %b) { %z = call float @llvm.hexagon.F2.sfsub(float %a, float %b) ret float %z } -; CHECK: r0 = sfsub(r0, r1) +; CHECK: = sfsub({{.*}}, {{.*}}) diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_mpy.ll b/test/CodeGen/Hexagon/intrinsics/xtype_mpy.ll index 6409e4e10ca2..a1490499fbf6 100644 --- a/test/CodeGen/Hexagon/intrinsics/xtype_mpy.ll +++ b/test/CodeGen/Hexagon/intrinsics/xtype_mpy.ll @@ -1,41 +1,45 @@ ; RUN: llc -march=hexagon -mcpu=hexagonv5 -O0 < %s | FileCheck %s +; RUN: llc -march=hexagon -mcpu=hexagonv5 -O0 < %s | \ +; RUN: FileCheck -check-prefix=CHECK-CALL %s ; Hexagon Programmer's Reference Manual 11.10.5 XTYPE/MPY +; CHECK-CALL-NOT: call + ; Multiply and use lower result declare i32 @llvm.hexagon.M4.mpyrr.addi(i32, i32, i32) define i32 @M4_mpyrr_addi(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M4.mpyrr.addi(i32 0, i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = add(#0, mpyi(r0, r1)) +; CHECK: = add(#0, mpyi({{.*}}, {{.*}})) declare i32 @llvm.hexagon.M4.mpyri.addi(i32, i32, i32) define i32 @M4_mpyri_addi(i32 %a) { %z = call i32 @llvm.hexagon.M4.mpyri.addi(i32 0, i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = add(#0, mpyi(r0, #0)) +; CHECK: = add(#0, mpyi({{.*}}, #0)) declare i32 @llvm.hexagon.M4.mpyri.addr.u2(i32, i32, i32) define i32 @M4_mpyri_addr_u2(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M4.mpyri.addr.u2(i32 %a, i32 0, i32 %b) ret i32 %z } -; CHECK: r0 = add(r0, mpyi(#0, r1)) +; CHECK: = add({{.*}}, mpyi(#0, {{.*}})) declare i32 @llvm.hexagon.M4.mpyri.addr(i32, i32, i32) define i32 @M4_mpyri_addr(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M4.mpyri.addr(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 = add(r0, mpyi(r1, #0)) +; CHECK: = add({{.*}}, mpyi({{.*}}, #0)) declare i32 @llvm.hexagon.M4.mpyrr.addr(i32, i32, i32) define i32 @M4_mpyrr_addr(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M4.mpyrr.addr(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r1 = add(r0, mpyi(r1, r2)) +; CHECK: = add({{.*}}, mpyi({{.*}}, {{.*}})) ; Vector multiply word by signed half (32x16) declare i64 @llvm.hexagon.M2.mmpyl.s0(i64, i64) @@ -43,56 +47,56 @@ define i64 @M2_mmpyl_s0(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyl.s0(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpyweh(r1:0, r3:2):sat +; CHECK: = vmpyweh({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M2.mmpyl.s1(i64, i64) define i64 @M2_mmpyl_s1(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyl.s1(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpyweh(r1:0, r3:2):<<1:sat +; CHECK: = vmpyweh({{.*}}, {{.*}}):<<1:sat declare i64 @llvm.hexagon.M2.mmpyh.s0(i64, i64) define i64 @M2_mmpyh_s0(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyh.s0(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpywoh(r1:0, r3:2):sat +; CHECK: = vmpywoh({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M2.mmpyh.s1(i64, i64) define i64 @M2_mmpyh_s1(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyh.s1(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpywoh(r1:0, r3:2):<<1:sat +; CHECK: = vmpywoh({{.*}}, {{.*}}):<<1:sat declare i64 @llvm.hexagon.M2.mmpyl.rs0(i64, i64) define i64 @M2_mmpyl_rs0(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyl.rs0(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpyweh(r1:0, r3:2):rnd:sat +; CHECK: = vmpyweh({{.*}}, {{.*}}):rnd:sat declare i64 @llvm.hexagon.M2.mmpyl.rs1(i64, i64) define i64 @M2_mmpyl_rs1(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyl.rs1(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpyweh(r1:0, r3:2):<<1:rnd:sat +; CHECK: = vmpyweh({{.*}}, {{.*}}):<<1:rnd:sat declare i64 @llvm.hexagon.M2.mmpyh.rs0(i64, i64) define i64 @M2_mmpyh_rs0(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyh.rs0(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpywoh(r1:0, r3:2):rnd:sat +; CHECK: = vmpywoh({{.*}}, {{.*}}):rnd:sat declare i64 @llvm.hexagon.M2.mmpyh.rs1(i64, i64) define i64 @M2_mmpyh_rs1(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyh.rs1(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpywoh(r1:0, r3:2):<<1:rnd:sat +; CHECK: = vmpywoh({{.*}}, {{.*}}):<<1:rnd:sat ; Vector multiply word by unsigned half (32x16) declare i64 @llvm.hexagon.M2.mmpyul.s0(i64, i64) @@ -100,56 +104,56 @@ define i64 @M2_mmpyul_s0(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyul.s0(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpyweuh(r1:0, r3:2):sat +; CHECK: = vmpyweuh({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M2.mmpyul.s1(i64, i64) define i64 @M2_mmpyul_s1(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyul.s1(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpyweuh(r1:0, r3:2):<<1:sat +; CHECK: = vmpyweuh({{.*}}, {{.*}}):<<1:sat declare i64 @llvm.hexagon.M2.mmpyuh.s0(i64, i64) define i64 @M2_mmpyuh_s0(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyuh.s0(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpywouh(r1:0, r3:2):sat +; CHECK: = vmpywouh({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M2.mmpyuh.s1(i64, i64) define i64 @M2_mmpyuh_s1(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyuh.s1(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpywouh(r1:0, r3:2):<<1:sat +; CHECK: = vmpywouh({{.*}}, {{.*}}):<<1:sat declare i64 @llvm.hexagon.M2.mmpyul.rs0(i64, i64) define i64 @M2_mmpyul_rs0(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyul.rs0(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpyweuh(r1:0, r3:2):rnd:sat +; CHECK: = vmpyweuh({{.*}}, {{.*}}):rnd:sat declare i64 @llvm.hexagon.M2.mmpyul.rs1(i64, i64) define i64 @M2_mmpyul_rs1(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyul.rs1(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpyweuh(r1:0, r3:2):<<1:rnd:sat +; CHECK: = vmpyweuh({{.*}}, {{.*}}):<<1:rnd:sat declare i64 @llvm.hexagon.M2.mmpyuh.rs0(i64, i64) define i64 @M2_mmpyuh_rs0(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyuh.rs0(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpywouh(r1:0, r3:2):rnd:sat +; CHECK: = vmpywouh({{.*}}, {{.*}}):rnd:sat declare i64 @llvm.hexagon.M2.mmpyuh.rs1(i64, i64) define i64 @M2_mmpyuh_rs1(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.mmpyuh.rs1(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpywouh(r1:0, r3:2):<<1:rnd:sat +; CHECK: = vmpywouh({{.*}}, {{.*}}):<<1:rnd:sat ; Multiply signed halfwords declare i64 @llvm.hexagon.M2.mpyd.ll.s0(i32, i32) @@ -157,616 +161,616 @@ define i64 @M2_mpyd_ll_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.ll.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.l, r1.l) +; CHECK: = mpy({{.*}}.l, {{.*}}.l) declare i64 @llvm.hexagon.M2.mpyd.ll.s1(i32, i32) define i64 @M2_mpyd_ll_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.ll.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.l, r1.l):<<1 +; CHECK: = mpy({{.*}}.l, {{.*}}.l):<<1 declare i64 @llvm.hexagon.M2.mpyd.lh.s0(i32, i32) define i64 @M2_mpyd_lh_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.lh.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.l, r1.h) +; CHECK: = mpy({{.*}}.l, {{.*}}.h) declare i64 @llvm.hexagon.M2.mpyd.lh.s1(i32, i32) define i64 @M2_mpyd_lh_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.lh.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.l, r1.h):<<1 +; CHECK: = mpy({{.*}}.l, {{.*}}.h):<<1 declare i64 @llvm.hexagon.M2.mpyd.hl.s0(i32, i32) define i64 @M2_mpyd_hl_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.hl.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.h, r1.l) +; CHECK: = mpy({{.*}}.h, {{.*}}.l) declare i64 @llvm.hexagon.M2.mpyd.hl.s1(i32, i32) define i64 @M2_mpyd_hl_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.hl.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.h, r1.l):<<1 +; CHECK: = mpy({{.*}}.h, {{.*}}.l):<<1 declare i64 @llvm.hexagon.M2.mpyd.hh.s0(i32, i32) define i64 @M2_mpyd_hh_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.hh.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.h, r1.h) +; CHECK: = mpy({{.*}}.h, {{.*}}.h) declare i64 @llvm.hexagon.M2.mpyd.hh.s1(i32, i32) define i64 @M2_mpyd_hh_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.hh.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.h, r1.h):<<1 +; CHECK: = mpy({{.*}}.h, {{.*}}.h):<<1 declare i64 @llvm.hexagon.M2.mpyd.rnd.ll.s0(i32, i32) define i64 @M2_mpyd_rnd_ll_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.rnd.ll.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.l, r1.l):rnd +; CHECK: = mpy({{.*}}.l, {{.*}}.l):rnd declare i64 @llvm.hexagon.M2.mpyd.rnd.ll.s1(i32, i32) define i64 @M2_mpyd_rnd_ll_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.rnd.ll.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.l, r1.l):<<1:rnd +; CHECK: = mpy({{.*}}.l, {{.*}}.l):<<1:rnd declare i64 @llvm.hexagon.M2.mpyd.rnd.lh.s0(i32, i32) define i64 @M2_mpyd_rnd_lh_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.rnd.lh.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.l, r1.h):rnd +; CHECK: = mpy({{.*}}.l, {{.*}}.h):rnd declare i64 @llvm.hexagon.M2.mpyd.rnd.lh.s1(i32, i32) define i64 @M2_mpyd_rnd_lh_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.rnd.lh.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.l, r1.h):<<1:rnd +; CHECK: = mpy({{.*}}.l, {{.*}}.h):<<1:rnd declare i64 @llvm.hexagon.M2.mpyd.rnd.hl.s0(i32, i32) define i64 @M2_mpyd_rnd_hl_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.rnd.hl.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.h, r1.l):rnd +; CHECK: = mpy({{.*}}.h, {{.*}}.l):rnd declare i64 @llvm.hexagon.M2.mpyd.rnd.hl.s1(i32, i32) define i64 @M2_mpyd_rnd_hl_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.rnd.hl.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.h, r1.l):<<1:rnd +; CHECK: = mpy({{.*}}.h, {{.*}}.l):<<1:rnd declare i64 @llvm.hexagon.M2.mpyd.rnd.hh.s0(i32, i32) define i64 @M2_mpyd_rnd_hh_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.rnd.hh.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.h, r1.h):rnd +; CHECK: = mpy({{.*}}.h, {{.*}}.h):rnd declare i64 @llvm.hexagon.M2.mpyd.rnd.hh.s1(i32, i32) define i64 @M2_mpyd_rnd_hh_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyd.rnd.hh.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0.h, r1.h):<<1:rnd +; CHECK: = mpy({{.*}}.h, {{.*}}.h):<<1:rnd declare i64 @llvm.hexagon.M2.mpyd.acc.ll.s0(i64, i32, i32) define i64 @M2_mpyd_acc_ll_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.acc.ll.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpy(r2.l, r3.l) +; CHECK: += mpy({{.*}}.l, {{.*}}.l) declare i64 @llvm.hexagon.M2.mpyd.acc.ll.s1(i64, i32, i32) define i64 @M2_mpyd_acc_ll_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.acc.ll.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpy(r2.l, r3.l):<<1 +; CHECK: += mpy({{.*}}.l, {{.*}}.l):<<1 declare i64 @llvm.hexagon.M2.mpyd.acc.lh.s0(i64, i32, i32) define i64 @M2_mpyd_acc_lh_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.acc.lh.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpy(r2.l, r3.h) +; CHECK: += mpy({{.*}}.l, {{.*}}.h) declare i64 @llvm.hexagon.M2.mpyd.acc.lh.s1(i64, i32, i32) define i64 @M2_mpyd_acc_lh_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.acc.lh.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpy(r2.l, r3.h):<<1 +; CHECK: += mpy({{.*}}.l, {{.*}}.h):<<1 declare i64 @llvm.hexagon.M2.mpyd.acc.hl.s0(i64, i32, i32) define i64 @M2_mpyd_acc_hl_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.acc.hl.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpy(r2.h, r3.l) +; CHECK: += mpy({{.*}}.h, {{.*}}.l) declare i64 @llvm.hexagon.M2.mpyd.acc.hl.s1(i64, i32, i32) define i64 @M2_mpyd_acc_hl_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.acc.hl.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpy(r2.h, r3.l):<<1 +; CHECK: += mpy({{.*}}.h, {{.*}}.l):<<1 declare i64 @llvm.hexagon.M2.mpyd.acc.hh.s0(i64, i32, i32) define i64 @M2_mpyd_acc_hh_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.acc.hh.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpy(r2.h, r3.h) +; CHECK: += mpy({{.*}}.h, {{.*}}.h) declare i64 @llvm.hexagon.M2.mpyd.acc.hh.s1(i64, i32, i32) define i64 @M2_mpyd_acc_hh_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.acc.hh.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpy(r2.h, r3.h):<<1 +; CHECK: += mpy({{.*}}.h, {{.*}}.h):<<1 declare i64 @llvm.hexagon.M2.mpyd.nac.ll.s0(i64, i32, i32) define i64 @M2_mpyd_nac_ll_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.nac.ll.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpy(r2.l, r3.l) +; CHECK: -= mpy({{.*}}.l, {{.*}}.l) declare i64 @llvm.hexagon.M2.mpyd.nac.ll.s1(i64, i32, i32) define i64 @M2_mpyd_nac_ll_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.nac.ll.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpy(r2.l, r3.l):<<1 +; CHECK: -= mpy({{.*}}.l, {{.*}}.l):<<1 declare i64 @llvm.hexagon.M2.mpyd.nac.lh.s0(i64, i32, i32) define i64 @M2_mpyd_nac_lh_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.nac.lh.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpy(r2.l, r3.h) +; CHECK: -= mpy({{.*}}.l, {{.*}}.h) declare i64 @llvm.hexagon.M2.mpyd.nac.lh.s1(i64, i32, i32) define i64 @M2_mpyd_nac_lh_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.nac.lh.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpy(r2.l, r3.h):<<1 +; CHECK: -= mpy({{.*}}.l, {{.*}}.h):<<1 declare i64 @llvm.hexagon.M2.mpyd.nac.hl.s0(i64, i32, i32) define i64 @M2_mpyd_nac_hl_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.nac.hl.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpy(r2.h, r3.l) +; CHECK: -= mpy({{.*}}.h, {{.*}}.l) declare i64 @llvm.hexagon.M2.mpyd.nac.hl.s1(i64, i32, i32) define i64 @M2_mpyd_nac_hl_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.nac.hl.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpy(r2.h, r3.l):<<1 +; CHECK: -= mpy({{.*}}.h, {{.*}}.l):<<1 declare i64 @llvm.hexagon.M2.mpyd.nac.hh.s0(i64, i32, i32) define i64 @M2_mpyd_nac_hh_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.nac.hh.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpy(r2.h, r3.h) +; CHECK: -= mpy({{.*}}.h, {{.*}}.h) declare i64 @llvm.hexagon.M2.mpyd.nac.hh.s1(i64, i32, i32) define i64 @M2_mpyd_nac_hh_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyd.nac.hh.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpy(r2.h, r3.h):<<1 +; CHECK: -= mpy({{.*}}.h, {{.*}}.h):<<1 declare i32 @llvm.hexagon.M2.mpy.ll.s0(i32, i32) define i32 @M2_mpy_ll_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.ll.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.l, r1.l) +; CHECK: = mpy({{.*}}.l, {{.*}}.l) declare i32 @llvm.hexagon.M2.mpy.ll.s1(i32, i32) define i32 @M2_mpy_ll_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.ll.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.l, r1.l):<<1 +; CHECK: = mpy({{.*}}.l, {{.*}}.l):<<1 declare i32 @llvm.hexagon.M2.mpy.lh.s0(i32, i32) define i32 @M2_mpy_lh_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.lh.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.l, r1.h) +; CHECK: = mpy({{.*}}.l, {{.*}}.h) declare i32 @llvm.hexagon.M2.mpy.lh.s1(i32, i32) define i32 @M2_mpy_lh_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.lh.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.l, r1.h):<<1 +; CHECK: = mpy({{.*}}.l, {{.*}}.h):<<1 declare i32 @llvm.hexagon.M2.mpy.hl.s0(i32, i32) define i32 @M2_mpy_hl_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.hl.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.h, r1.l) +; CHECK: = mpy({{.*}}.h, {{.*}}.l) declare i32 @llvm.hexagon.M2.mpy.hl.s1(i32, i32) define i32 @M2_mpy_hl_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.hl.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.h, r1.l):<<1 +; CHECK: = mpy({{.*}}.h, {{.*}}.l):<<1 declare i32 @llvm.hexagon.M2.mpy.hh.s0(i32, i32) define i32 @M2_mpy_hh_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.hh.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.h, r1.h) +; CHECK: = mpy({{.*}}.h, {{.*}}.h) declare i32 @llvm.hexagon.M2.mpy.hh.s1(i32, i32) define i32 @M2_mpy_hh_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.hh.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.h, r1.h):<<1 +; CHECK: = mpy({{.*}}.h, {{.*}}.h):<<1 declare i32 @llvm.hexagon.M2.mpy.sat.ll.s0(i32, i32) define i32 @M2_mpy_sat_ll_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.ll.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.l, r1.l):sat +; CHECK: = mpy({{.*}}.l, {{.*}}.l):sat declare i32 @llvm.hexagon.M2.mpy.sat.ll.s1(i32, i32) define i32 @M2_mpy_sat_ll_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.ll.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.l, r1.l):<<1:sat +; CHECK: = mpy({{.*}}.l, {{.*}}.l):<<1:sat declare i32 @llvm.hexagon.M2.mpy.sat.lh.s0(i32, i32) define i32 @M2_mpy_sat_lh_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.lh.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.l, r1.h):sat +; CHECK: = mpy({{.*}}.l, {{.*}}.h):sat declare i32 @llvm.hexagon.M2.mpy.sat.lh.s1(i32, i32) define i32 @M2_mpy_sat_lh_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.lh.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.l, r1.h):<<1:sat +; CHECK: = mpy({{.*}}.l, {{.*}}.h):<<1:sat declare i32 @llvm.hexagon.M2.mpy.sat.hl.s0(i32, i32) define i32 @M2_mpy_sat_hl_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.hl.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.h, r1.l):sat +; CHECK: = mpy({{.*}}.h, {{.*}}.l):sat declare i32 @llvm.hexagon.M2.mpy.sat.hl.s1(i32, i32) define i32 @M2_mpy_sat_hl_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.hl.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.h, r1.l):<<1:sat +; CHECK: = mpy({{.*}}.h, {{.*}}.l):<<1:sat declare i32 @llvm.hexagon.M2.mpy.sat.hh.s0(i32, i32) define i32 @M2_mpy_sat_hh_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.hh.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.h, r1.h):sat +; CHECK: = mpy({{.*}}.h, {{.*}}.h):sat declare i32 @llvm.hexagon.M2.mpy.sat.hh.s1(i32, i32) define i32 @M2_mpy_sat_hh_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.hh.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.h, r1.h):<<1:sat +; CHECK: = mpy({{.*}}.h, {{.*}}.h):<<1:sat declare i32 @llvm.hexagon.M2.mpy.sat.rnd.ll.s0(i32, i32) define i32 @M2_mpy_sat_rnd_ll_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.ll.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.l, r1.l):rnd:sat +; CHECK: = mpy({{.*}}.l, {{.*}}.l):rnd:sat declare i32 @llvm.hexagon.M2.mpy.sat.rnd.ll.s1(i32, i32) define i32 @M2_mpy_sat_rnd_ll_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.ll.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.l, r1.l):<<1:rnd:sat +; CHECK: = mpy({{.*}}.l, {{.*}}.l):<<1:rnd:sat declare i32 @llvm.hexagon.M2.mpy.sat.rnd.lh.s0(i32, i32) define i32 @M2_mpy_sat_rnd_lh_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.lh.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.l, r1.h):rnd:sat +; CHECK: = mpy({{.*}}.l, {{.*}}.h):rnd:sat declare i32 @llvm.hexagon.M2.mpy.sat.rnd.lh.s1(i32, i32) define i32 @M2_mpy_sat_rnd_lh_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.lh.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.l, r1.h):<<1:rnd:sat +; CHECK: = mpy({{.*}}.l, {{.*}}.h):<<1:rnd:sat declare i32 @llvm.hexagon.M2.mpy.sat.rnd.hl.s0(i32, i32) define i32 @M2_mpy_sat_rnd_hl_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.hl.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.h, r1.l):rnd:sat +; CHECK: = mpy({{.*}}.h, {{.*}}.l):rnd:sat declare i32 @llvm.hexagon.M2.mpy.sat.rnd.hl.s1(i32, i32) define i32 @M2_mpy_sat_rnd_hl_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.hl.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.h, r1.l):<<1:rnd:sat +; CHECK: = mpy({{.*}}.h, {{.*}}.l):<<1:rnd:sat declare i32 @llvm.hexagon.M2.mpy.sat.rnd.hh.s0(i32, i32) define i32 @M2_mpy_sat_rnd_hh_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.hh.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.h, r1.h):rnd:sat +; CHECK: = mpy({{.*}}.h, {{.*}}.h):rnd:sat declare i32 @llvm.hexagon.M2.mpy.sat.rnd.hh.s1(i32, i32) define i32 @M2_mpy_sat_rnd_hh_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.hh.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0.h, r1.h):<<1:rnd:sat +; CHECK: = mpy({{.*}}.h, {{.*}}.h):<<1:rnd:sat declare i32 @llvm.hexagon.M2.mpy.acc.ll.s0(i32, i32, i32) define i32 @M2_mpy_acc_ll_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.ll.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.l, r2.l) +; CHECK: += mpy({{.*}}.l, {{.*}}.l) declare i32 @llvm.hexagon.M2.mpy.acc.ll.s1(i32, i32, i32) define i32 @M2_mpy_acc_ll_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.ll.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.l, r2.l):<<1 +; CHECK: += mpy({{.*}}.l, {{.*}}.l):<<1 declare i32 @llvm.hexagon.M2.mpy.acc.lh.s0(i32, i32, i32) define i32 @M2_mpy_acc_lh_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.lh.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.l, r2.h) +; CHECK: += mpy({{.*}}.l, {{.*}}.h) declare i32 @llvm.hexagon.M2.mpy.acc.lh.s1(i32, i32, i32) define i32 @M2_mpy_acc_lh_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.lh.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.l, r2.h):<<1 +; CHECK: += mpy({{.*}}.l, {{.*}}.h):<<1 declare i32 @llvm.hexagon.M2.mpy.acc.hl.s0(i32, i32, i32) define i32 @M2_mpy_acc_hl_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.hl.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.h, r2.l) +; CHECK: += mpy({{.*}}.h, {{.*}}.l) declare i32 @llvm.hexagon.M2.mpy.acc.hl.s1(i32, i32, i32) define i32 @M2_mpy_acc_hl_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.hl.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.h, r2.l):<<1 +; CHECK: += mpy({{.*}}.h, {{.*}}.l):<<1 declare i32 @llvm.hexagon.M2.mpy.acc.hh.s0(i32, i32, i32) define i32 @M2_mpy_acc_hh_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.hh.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.h, r2.h) +; CHECK: += mpy({{.*}}.h, {{.*}}.h) declare i32 @llvm.hexagon.M2.mpy.acc.hh.s1(i32, i32, i32) define i32 @M2_mpy_acc_hh_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.hh.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.h, r2.h):<<1 +; CHECK: += mpy({{.*}}.h, {{.*}}.h):<<1 declare i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s0(i32, i32, i32) define i32 @M2_mpy_acc_sat_ll_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.l, r2.l):sat +; CHECK: += mpy({{.*}}.l, {{.*}}.l):sat declare i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s1(i32, i32, i32) define i32 @M2_mpy_acc_sat_ll_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.l, r2.l):<<1:sat +; CHECK: += mpy({{.*}}.l, {{.*}}.l):<<1:sat declare i32 @llvm.hexagon.M2.mpy.acc.sat.lh.s0(i32, i32, i32) define i32 @M2_mpy_acc_sat_lh_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.lh.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.l, r2.h):sat +; CHECK: += mpy({{.*}}.l, {{.*}}.h):sat declare i32 @llvm.hexagon.M2.mpy.acc.sat.lh.s1(i32, i32, i32) define i32 @M2_mpy_acc_sat_lh_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.lh.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.l, r2.h):<<1:sat +; CHECK: += mpy({{.*}}.l, {{.*}}.h):<<1:sat declare i32 @llvm.hexagon.M2.mpy.acc.sat.hl.s0(i32, i32, i32) define i32 @M2_mpy_acc_sat_hl_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.hl.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.h, r2.l):sat +; CHECK: += mpy({{.*}}.h, {{.*}}.l):sat declare i32 @llvm.hexagon.M2.mpy.acc.sat.hl.s1(i32, i32, i32) define i32 @M2_mpy_acc_sat_hl_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.hl.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.h, r2.l):<<1:sat +; CHECK: += mpy({{.*}}.h, {{.*}}.l):<<1:sat declare i32 @llvm.hexagon.M2.mpy.acc.sat.hh.s0(i32, i32, i32) define i32 @M2_mpy_acc_sat_hh_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.hh.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.h, r2.h):sat +; CHECK: += mpy({{.*}}.h, {{.*}}.h):sat declare i32 @llvm.hexagon.M2.mpy.acc.sat.hh.s1(i32, i32, i32) define i32 @M2_mpy_acc_sat_hh_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.hh.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1.h, r2.h):<<1:sat +; CHECK: += mpy({{.*}}.h, {{.*}}.h):<<1:sat declare i32 @llvm.hexagon.M2.mpy.nac.ll.s0(i32, i32, i32) define i32 @M2_mpy_nac_ll_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.ll.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.l, r2.l) +; CHECK: -= mpy({{.*}}.l, {{.*}}.l) declare i32 @llvm.hexagon.M2.mpy.nac.ll.s1(i32, i32, i32) define i32 @M2_mpy_nac_ll_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.ll.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.l, r2.l):<<1 +; CHECK: -= mpy({{.*}}.l, {{.*}}.l):<<1 declare i32 @llvm.hexagon.M2.mpy.nac.lh.s0(i32, i32, i32) define i32 @M2_mpy_nac_lh_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.lh.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.l, r2.h) +; CHECK: -= mpy({{.*}}.l, {{.*}}.h) declare i32 @llvm.hexagon.M2.mpy.nac.lh.s1(i32, i32, i32) define i32 @M2_mpy_nac_lh_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.lh.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.l, r2.h):<<1 +; CHECK: -= mpy({{.*}}.l, {{.*}}.h):<<1 declare i32 @llvm.hexagon.M2.mpy.nac.hl.s0(i32, i32, i32) define i32 @M2_mpy_nac_hl_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.hl.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.h, r2.l) +; CHECK: -= mpy({{.*}}.h, {{.*}}.l) declare i32 @llvm.hexagon.M2.mpy.nac.hl.s1(i32, i32, i32) define i32 @M2_mpy_nac_hl_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.hl.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.h, r2.l):<<1 +; CHECK: -= mpy({{.*}}.h, {{.*}}.l):<<1 declare i32 @llvm.hexagon.M2.mpy.nac.hh.s0(i32, i32, i32) define i32 @M2_mpy_nac_hh_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.hh.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.h, r2.h) +; CHECK: -= mpy({{.*}}.h, {{.*}}.h) declare i32 @llvm.hexagon.M2.mpy.nac.hh.s1(i32, i32, i32) define i32 @M2_mpy_nac_hh_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.hh.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.h, r2.h):<<1 +; CHECK: -= mpy({{.*}}.h, {{.*}}.h):<<1 declare i32 @llvm.hexagon.M2.mpy.nac.sat.ll.s0(i32, i32, i32) define i32 @M2_mpy_nac_sat_ll_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.ll.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.l, r2.l):sat +; CHECK: -= mpy({{.*}}.l, {{.*}}.l):sat declare i32 @llvm.hexagon.M2.mpy.nac.sat.ll.s1(i32, i32, i32) define i32 @M2_mpy_nac_sat_ll_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.ll.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.l, r2.l):<<1:sat +; CHECK: -= mpy({{.*}}.l, {{.*}}.l):<<1:sat declare i32 @llvm.hexagon.M2.mpy.nac.sat.lh.s0(i32, i32, i32) define i32 @M2_mpy_nac_sat_lh_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.lh.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.l, r2.h):sat +; CHECK: -= mpy({{.*}}.l, {{.*}}.h):sat declare i32 @llvm.hexagon.M2.mpy.nac.sat.lh.s1(i32, i32, i32) define i32 @M2_mpy_nac_sat_lh_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.lh.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.l, r2.h):<<1:sat +; CHECK: -= mpy({{.*}}.l, {{.*}}.h):<<1:sat declare i32 @llvm.hexagon.M2.mpy.nac.sat.hl.s0(i32, i32, i32) define i32 @M2_mpy_nac_sat_hl_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.hl.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.h, r2.l):sat +; CHECK: -= mpy({{.*}}.h, {{.*}}.l):sat declare i32 @llvm.hexagon.M2.mpy.nac.sat.hl.s1(i32, i32, i32) define i32 @M2_mpy_nac_sat_hl_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.hl.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.h, r2.l):<<1:sat +; CHECK: -= mpy({{.*}}.h, {{.*}}.l):<<1:sat declare i32 @llvm.hexagon.M2.mpy.nac.sat.hh.s0(i32, i32, i32) define i32 @M2_mpy_nac_sat_hh_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.hh.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.h, r2.h):sat +; CHECK: -= mpy({{.*}}.h, {{.*}}.h):sat declare i32 @llvm.hexagon.M2.mpy.nac.sat.hh.s1(i32, i32, i32) define i32 @M2_mpy_nac_sat_hh_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.hh.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1.h, r2.h):<<1:sat +; CHECK: -= mpy({{.*}}.h, {{.*}}.h):<<1:sat ; Multiply unsigned halfwords declare i64 @llvm.hexagon.M2.mpyud.ll.s0(i32, i32) @@ -774,336 +778,336 @@ define i64 @M2_mpyud_ll_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyud.ll.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpyu(r0.l, r1.l) +; CHECK: = mpyu({{.*}}.l, {{.*}}.l) declare i64 @llvm.hexagon.M2.mpyud.ll.s1(i32, i32) define i64 @M2_mpyud_ll_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyud.ll.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpyu(r0.l, r1.l):<<1 +; CHECK: = mpyu({{.*}}.l, {{.*}}.l):<<1 declare i64 @llvm.hexagon.M2.mpyud.lh.s0(i32, i32) define i64 @M2_mpyud_lh_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyud.lh.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpyu(r0.l, r1.h) +; CHECK: = mpyu({{.*}}.l, {{.*}}.h) declare i64 @llvm.hexagon.M2.mpyud.lh.s1(i32, i32) define i64 @M2_mpyud_lh_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyud.lh.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpyu(r0.l, r1.h):<<1 +; CHECK: = mpyu({{.*}}.l, {{.*}}.h):<<1 declare i64 @llvm.hexagon.M2.mpyud.hl.s0(i32, i32) define i64 @M2_mpyud_hl_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyud.hl.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpyu(r0.h, r1.l) +; CHECK: = mpyu({{.*}}.h, {{.*}}.l) declare i64 @llvm.hexagon.M2.mpyud.hl.s1(i32, i32) define i64 @M2_mpyud_hl_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyud.hl.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpyu(r0.h, r1.l):<<1 +; CHECK: = mpyu({{.*}}.h, {{.*}}.l):<<1 declare i64 @llvm.hexagon.M2.mpyud.hh.s0(i32, i32) define i64 @M2_mpyud_hh_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyud.hh.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpyu(r0.h, r1.h) +; CHECK: = mpyu({{.*}}.h, {{.*}}.h) declare i64 @llvm.hexagon.M2.mpyud.hh.s1(i32, i32) define i64 @M2_mpyud_hh_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.mpyud.hh.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpyu(r0.h, r1.h):<<1 +; CHECK: = mpyu({{.*}}.h, {{.*}}.h):<<1 declare i64 @llvm.hexagon.M2.mpyud.acc.ll.s0(i64, i32, i32) define i64 @M2_mpyud_acc_ll_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.acc.ll.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpyu(r2.l, r3.l) +; CHECK: += mpyu({{.*}}.l, {{.*}}.l) declare i64 @llvm.hexagon.M2.mpyud.acc.ll.s1(i64, i32, i32) define i64 @M2_mpyud_acc_ll_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.acc.ll.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpyu(r2.l, r3.l):<<1 +; CHECK: += mpyu({{.*}}.l, {{.*}}.l):<<1 declare i64 @llvm.hexagon.M2.mpyud.acc.lh.s0(i64, i32, i32) define i64 @M2_mpyud_acc_lh_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.acc.lh.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpyu(r2.l, r3.h) +; CHECK: += mpyu({{.*}}.l, {{.*}}.h) declare i64 @llvm.hexagon.M2.mpyud.acc.lh.s1(i64, i32, i32) define i64 @M2_mpyud_acc_lh_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.acc.lh.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpyu(r2.l, r3.h):<<1 +; CHECK: += mpyu({{.*}}.l, {{.*}}.h):<<1 declare i64 @llvm.hexagon.M2.mpyud.acc.hl.s0(i64, i32, i32) define i64 @M2_mpyud_acc_hl_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.acc.hl.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpyu(r2.h, r3.l) +; CHECK: += mpyu({{.*}}.h, {{.*}}.l) declare i64 @llvm.hexagon.M2.mpyud.acc.hl.s1(i64, i32, i32) define i64 @M2_mpyud_acc_hl_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.acc.hl.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpyu(r2.h, r3.l):<<1 +; CHECK: += mpyu({{.*}}.h, {{.*}}.l):<<1 declare i64 @llvm.hexagon.M2.mpyud.acc.hh.s0(i64, i32, i32) define i64 @M2_mpyud_acc_hh_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.acc.hh.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpyu(r2.h, r3.h) +; CHECK: += mpyu({{.*}}.h, {{.*}}.h) declare i64 @llvm.hexagon.M2.mpyud.acc.hh.s1(i64, i32, i32) define i64 @M2_mpyud_acc_hh_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.acc.hh.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpyu(r2.h, r3.h):<<1 +; CHECK: += mpyu({{.*}}.h, {{.*}}.h):<<1 declare i64 @llvm.hexagon.M2.mpyud.nac.ll.s0(i64, i32, i32) define i64 @M2_mpyud_nac_ll_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.nac.ll.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpyu(r2.l, r3.l) +; CHECK: -= mpyu({{.*}}.l, {{.*}}.l) declare i64 @llvm.hexagon.M2.mpyud.nac.ll.s1(i64, i32, i32) define i64 @M2_mpyud_nac_ll_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.nac.ll.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpyu(r2.l, r3.l):<<1 +; CHECK: -= mpyu({{.*}}.l, {{.*}}.l):<<1 declare i64 @llvm.hexagon.M2.mpyud.nac.lh.s0(i64, i32, i32) define i64 @M2_mpyud_nac_lh_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.nac.lh.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpyu(r2.l, r3.h) +; CHECK: -= mpyu({{.*}}.l, {{.*}}.h) declare i64 @llvm.hexagon.M2.mpyud.nac.lh.s1(i64, i32, i32) define i64 @M2_mpyud_nac_lh_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.nac.lh.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpyu(r2.l, r3.h):<<1 +; CHECK: -= mpyu({{.*}}.l, {{.*}}.h):<<1 declare i64 @llvm.hexagon.M2.mpyud.nac.hl.s0(i64, i32, i32) define i64 @M2_mpyud_nac_hl_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.nac.hl.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpyu(r2.h, r3.l) +; CHECK: -= mpyu({{.*}}.h, {{.*}}.l) declare i64 @llvm.hexagon.M2.mpyud.nac.hl.s1(i64, i32, i32) define i64 @M2_mpyud_nac_hl_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.nac.hl.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpyu(r2.h, r3.l):<<1 +; CHECK: -= mpyu({{.*}}.h, {{.*}}.l):<<1 declare i64 @llvm.hexagon.M2.mpyud.nac.hh.s0(i64, i32, i32) define i64 @M2_mpyud_nac_hh_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.nac.hh.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpyu(r2.h, r3.h) +; CHECK: -= mpyu({{.*}}.h, {{.*}}.h) declare i64 @llvm.hexagon.M2.mpyud.nac.hh.s1(i64, i32, i32) define i64 @M2_mpyud_nac_hh_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.mpyud.nac.hh.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpyu(r2.h, r3.h):<<1 +; CHECK: -= mpyu({{.*}}.h, {{.*}}.h):<<1 declare i32 @llvm.hexagon.M2.mpyu.ll.s0(i32, i32) define i32 @M2_mpyu_ll_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpyu.ll.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpyu(r0.l, r1.l) +; CHECK: = mpyu({{.*}}.l, {{.*}}.l) declare i32 @llvm.hexagon.M2.mpyu.ll.s1(i32, i32) define i32 @M2_mpyu_ll_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpyu.ll.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpyu(r0.l, r1.l):<<1 +; CHECK: = mpyu({{.*}}.l, {{.*}}.l):<<1 declare i32 @llvm.hexagon.M2.mpyu.lh.s0(i32, i32) define i32 @M2_mpyu_lh_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpyu.lh.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpyu(r0.l, r1.h) +; CHECK: = mpyu({{.*}}.l, {{.*}}.h) declare i32 @llvm.hexagon.M2.mpyu.lh.s1(i32, i32) define i32 @M2_mpyu_lh_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpyu.lh.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpyu(r0.l, r1.h):<<1 +; CHECK: = mpyu({{.*}}.l, {{.*}}.h):<<1 declare i32 @llvm.hexagon.M2.mpyu.hl.s0(i32, i32) define i32 @M2_mpyu_hl_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpyu.hl.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpyu(r0.h, r1.l) +; CHECK: = mpyu({{.*}}.h, {{.*}}.l) declare i32 @llvm.hexagon.M2.mpyu.hl.s1(i32, i32) define i32 @M2_mpyu_hl_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpyu.hl.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpyu(r0.h, r1.l):<<1 +; CHECK: = mpyu({{.*}}.h, {{.*}}.l):<<1 declare i32 @llvm.hexagon.M2.mpyu.hh.s0(i32, i32) define i32 @M2_mpyu_hh_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpyu.hh.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpyu(r0.h, r1.h) +; CHECK: = mpyu({{.*}}.h, {{.*}}.h) declare i32 @llvm.hexagon.M2.mpyu.hh.s1(i32, i32) define i32 @M2_mpyu_hh_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpyu.hh.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpyu(r0.h, r1.h):<<1 +; CHECK: = mpyu({{.*}}.h, {{.*}}.h):<<1 declare i32 @llvm.hexagon.M2.mpyu.acc.ll.s0(i32, i32, i32) define i32 @M2_mpyu_acc_ll_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.acc.ll.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpyu(r1.l, r2.l) +; CHECK: += mpyu({{.*}}.l, {{.*}}.l) declare i32 @llvm.hexagon.M2.mpyu.acc.ll.s1(i32, i32, i32) define i32 @M2_mpyu_acc_ll_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.acc.ll.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpyu(r1.l, r2.l):<<1 +; CHECK: += mpyu({{.*}}.l, {{.*}}.l):<<1 declare i32 @llvm.hexagon.M2.mpyu.acc.lh.s0(i32, i32, i32) define i32 @M2_mpyu_acc_lh_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.acc.lh.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpyu(r1.l, r2.h) +; CHECK: += mpyu({{.*}}.l, {{.*}}.h) declare i32 @llvm.hexagon.M2.mpyu.acc.lh.s1(i32, i32, i32) define i32 @M2_mpyu_acc_lh_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.acc.lh.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpyu(r1.l, r2.h):<<1 +; CHECK: += mpyu({{.*}}.l, {{.*}}.h):<<1 declare i32 @llvm.hexagon.M2.mpyu.acc.hl.s0(i32, i32, i32) define i32 @M2_mpyu_acc_hl_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.acc.hl.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpyu(r1.h, r2.l) +; CHECK: += mpyu({{.*}}.h, {{.*}}.l) declare i32 @llvm.hexagon.M2.mpyu.acc.hl.s1(i32, i32, i32) define i32 @M2_mpyu_acc_hl_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.acc.hl.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpyu(r1.h, r2.l):<<1 +; CHECK: += mpyu({{.*}}.h, {{.*}}.l):<<1 declare i32 @llvm.hexagon.M2.mpyu.acc.hh.s0(i32, i32, i32) define i32 @M2_mpyu_acc_hh_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.acc.hh.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpyu(r1.h, r2.h) +; CHECK: += mpyu({{.*}}.h, {{.*}}.h) declare i32 @llvm.hexagon.M2.mpyu.acc.hh.s1(i32, i32, i32) define i32 @M2_mpyu_acc_hh_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.acc.hh.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpyu(r1.h, r2.h):<<1 +; CHECK: += mpyu({{.*}}.h, {{.*}}.h):<<1 declare i32 @llvm.hexagon.M2.mpyu.nac.ll.s0(i32, i32, i32) define i32 @M2_mpyu_nac_ll_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.nac.ll.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpyu(r1.l, r2.l) +; CHECK: -= mpyu({{.*}}.l, {{.*}}.l) declare i32 @llvm.hexagon.M2.mpyu.nac.ll.s1(i32, i32, i32) define i32 @M2_mpyu_nac_ll_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.nac.ll.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpyu(r1.l, r2.l):<<1 +; CHECK: -= mpyu({{.*}}.l, {{.*}}.l):<<1 declare i32 @llvm.hexagon.M2.mpyu.nac.lh.s0(i32, i32, i32) define i32 @M2_mpyu_nac_lh_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.nac.lh.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpyu(r1.l, r2.h) +; CHECK: -= mpyu({{.*}}.l, {{.*}}.h) declare i32 @llvm.hexagon.M2.mpyu.nac.lh.s1(i32, i32, i32) define i32 @M2_mpyu_nac_lh_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.nac.lh.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpyu(r1.l, r2.h):<<1 +; CHECK: -= mpyu({{.*}}.l, {{.*}}.h):<<1 declare i32 @llvm.hexagon.M2.mpyu.nac.hl.s0(i32, i32, i32) define i32 @M2_mpyu_nac_hl_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.nac.hl.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpyu(r1.h, r2.l) +; CHECK: -= mpyu({{.*}}.h, {{.*}}.l) declare i32 @llvm.hexagon.M2.mpyu.nac.hl.s1(i32, i32, i32) define i32 @M2_mpyu_nac_hl_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.nac.hl.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpyu(r1.h, r2.l):<<1 +; CHECK: -= mpyu({{.*}}.h, {{.*}}.l):<<1 declare i32 @llvm.hexagon.M2.mpyu.nac.hh.s0(i32, i32, i32) define i32 @M2_mpyu_nac_hh_s0(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.nac.hh.s0(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpyu(r1.h, r2.h) +; CHECK: -= mpyu({{.*}}.h, {{.*}}.h) declare i32 @llvm.hexagon.M2.mpyu.nac.hh.s1(i32, i32, i32) define i32 @M2_mpyu_nac_hh_s1(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M2.mpyu.nac.hh.s1(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpyu(r1.h, r2.h):<<1 +; CHECK: -= mpyu({{.*}}.h, {{.*}}.h):<<1 ; Polynomial multiply words declare i64 @llvm.hexagon.M4.pmpyw(i32, i32) @@ -1111,14 +1115,14 @@ define i64 @M4_pmpyw(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M4.pmpyw(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = pmpyw(r0, r1) +; CHECK: = pmpyw({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M4.pmpyw.acc(i64, i32, i32) define i64 @M4_pmpyw_acc(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M4.pmpyw.acc(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 ^= pmpyw(r2, r3) +; CHECK: ^= pmpyw({{.*}}, {{.*}}) ; Vector reduce multiply word by signed half declare i64 @llvm.hexagon.M4.vrmpyoh.s0(i64, i64) @@ -1126,56 +1130,56 @@ define i64 @M4_vrmpyoh_s0(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M4.vrmpyoh.s0(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vrmpywoh(r1:0, r3:2) +; CHECK: = vrmpywoh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M4.vrmpyoh.s1(i64, i64) define i64 @M4_vrmpyoh_s1(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M4.vrmpyoh.s1(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vrmpywoh(r1:0, r3:2):<<1 +; CHECK: = vrmpywoh({{.*}}, {{.*}}):<<1 declare i64 @llvm.hexagon.M4.vrmpyeh.s0(i64, i64) define i64 @M4_vrmpyeh_s0(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M4.vrmpyeh.s0(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vrmpyweh(r1:0, r3:2) +; CHECK: = vrmpyweh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M4.vrmpyeh.s1(i64, i64) define i64 @M4_vrmpyeh_s1(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M4.vrmpyeh.s1(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vrmpyweh(r1:0, r3:2):<<1 +; CHECK: = vrmpyweh({{.*}}, {{.*}}):<<1 declare i64 @llvm.hexagon.M4.vrmpyoh.acc.s0(i64, i64, i64) define i64 @M4_vrmpyoh_acc_s0(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M4.vrmpyoh.acc.s0(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vrmpywoh(r3:2, r5:4) +; CHECK: += vrmpywoh({{.*}}, r5:4) declare i64 @llvm.hexagon.M4.vrmpyoh.acc.s1(i64, i64, i64) define i64 @M4_vrmpyoh_acc_s1(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M4.vrmpyoh.acc.s1(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vrmpywoh(r3:2, r5:4):<<1 +; CHECK: += vrmpywoh({{.*}}, r5:4):<<1 declare i64 @llvm.hexagon.M4.vrmpyeh.acc.s0(i64, i64, i64) define i64 @M4_vrmpyeh_acc_s0(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M4.vrmpyeh.acc.s0(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vrmpyweh(r3:2, r5:4) +; CHECK: += vrmpyweh({{.*}}, r5:4) declare i64 @llvm.hexagon.M4.vrmpyeh.acc.s1(i64, i64, i64) define i64 @M4_vrmpyeh_acc_s1(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M4.vrmpyeh.acc.s1(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vrmpyweh(r3:2, r5:4):<<1 +; CHECK: += vrmpyweh({{.*}}, r5:4):<<1 ; Multiply and use upper result declare i32 @llvm.hexagon.M2.dpmpyss.rnd.s0(i32, i32) @@ -1183,84 +1187,84 @@ define i32 @M2_dpmpyss_rnd_s0(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.dpmpyss.rnd.s0(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0, r1):rnd +; CHECK: = mpy({{.*}}, {{.*}}):rnd declare i32 @llvm.hexagon.M2.mpyu.up(i32, i32) define i32 @M2_mpyu_up(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpyu.up(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpyu(r0, r1) +; CHECK: = mpyu({{.*}}, {{.*}}) declare i32 @llvm.hexagon.M2.mpysu.up(i32, i32) define i32 @M2_mpysu_up(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpysu.up(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpysu(r0, r1) +; CHECK: = mpysu({{.*}}, {{.*}}) declare i32 @llvm.hexagon.M2.hmmpyh.s1(i32, i32) define i32 @M2_hmmpyh_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.hmmpyh.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0, r1.h):<<1:sat +; CHECK: = mpy({{.*}}, {{.*}}.h):<<1:sat declare i32 @llvm.hexagon.M2.hmmpyl.s1(i32, i32) define i32 @M2_hmmpyl_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.hmmpyl.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0, r1.l):<<1:sat +; CHECK: = mpy({{.*}}, {{.*}}.l):<<1:sat declare i32 @llvm.hexagon.M2.hmmpyh.rs1(i32, i32) define i32 @M2_hmmpyh_rs1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.hmmpyh.rs1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0, r1.h):<<1:rnd:sat +; CHECK: = mpy({{.*}}, {{.*}}.h):<<1:rnd:sat declare i32 @llvm.hexagon.M2.mpy.up.s1.sat(i32, i32) define i32 @M2_mpy_up_s1_sat(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.up.s1.sat(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0, r1):<<1:sat +; CHECK: = mpy({{.*}}, {{.*}}):<<1:sat declare i32 @llvm.hexagon.M2.hmmpyl.rs1(i32, i32) define i32 @M2_hmmpyl_rs1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.hmmpyl.rs1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0, r1.l):<<1:rnd:sat +; CHECK: = mpy({{.*}}, {{.*}}.l):<<1:rnd:sat declare i32 @llvm.hexagon.M2.mpy.up(i32, i32) define i32 @M2_mpy_up(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.up(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0, r1) +; CHECK: = mpy({{.*}}, {{.*}}) declare i32 @llvm.hexagon.M2.mpy.up.s1(i32, i32) define i32 @M2_mpy_up_s1(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.M2.mpy.up.s1(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = mpy(r0, r1):<<1 +; CHECK: = mpy({{.*}}, {{.*}}):<<1 declare i32 @llvm.hexagon.M4.mac.up.s1.sat(i32, i32, i32) define i32 @M4_mac_up_s1_sat(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M4.mac.up.s1.sat(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += mpy(r1, r2):<<1:sat +; CHECK: += mpy({{.*}}, {{.*}}):<<1:sat declare i32 @llvm.hexagon.M4.nac.up.s1.sat(i32, i32, i32) define i32 @M4_nac_up_s1_sat(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.M4.nac.up.s1.sat(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= mpy(r1, r2):<<1:sat +; CHECK: -= mpy({{.*}}, {{.*}}):<<1:sat ; Multiply and use full result declare i64 @llvm.hexagon.M2.dpmpyss.s0(i32, i32) @@ -1268,42 +1272,42 @@ define i64 @M2_dpmpyss_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.dpmpyss.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpy(r0, r1) +; CHECK: = mpy({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M2.dpmpyuu.s0(i32, i32) define i64 @M2_dpmpyuu_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.dpmpyuu.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = mpyu(r0, r1) +; CHECK: = mpyu({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M2.dpmpyss.acc.s0(i64, i32, i32) define i64 @M2_dpmpyss_acc_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.dpmpyss.acc.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpy(r2, r3) +; CHECK: += mpy({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M2.dpmpyss.nac.s0(i64, i32, i32) define i64 @M2_dpmpyss_nac_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.dpmpyss.nac.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpy(r2, r3) +; CHECK: -= mpy({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M2.dpmpyuu.acc.s0(i64, i32, i32) define i64 @M2_dpmpyuu_acc_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.dpmpyuu.acc.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += mpyu(r2, r3) +; CHECK: += mpyu({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M2.dpmpyuu.nac.s0(i64, i32, i32) define i64 @M2_dpmpyuu_nac_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.dpmpyuu.nac.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= mpyu(r2, r3) +; CHECK: -= mpyu({{.*}}, {{.*}}) ; Vector dual multiply declare i64 @llvm.hexagon.M2.vdmpys.s0(i64, i64) @@ -1311,14 +1315,14 @@ define i64 @M2_vdmpys_s0(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.vdmpys.s0(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vdmpy(r1:0, r3:2):sat +; CHECK: = vdmpy({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M2.vdmpys.s1(i64, i64) define i64 @M2_vdmpys_s1(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.vdmpys.s1(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vdmpy(r1:0, r3:2):<<1:sat +; CHECK: = vdmpy({{.*}}, {{.*}}):<<1:sat ; Vector reduce multiply bytes declare i64 @llvm.hexagon.M5.vrmpybuu(i64, i64) @@ -1326,28 +1330,28 @@ define i64 @M5_vrmpybuu(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M5.vrmpybuu(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vrmpybu(r1:0, r3:2) +; CHECK: = vrmpybu({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M5.vrmpybsu(i64, i64) define i64 @M5_vrmpybsu(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M5.vrmpybsu(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vrmpybsu(r1:0, r3:2) +; CHECK: = vrmpybsu({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M5.vrmacbuu(i64, i64, i64) define i64 @M5_vrmacbuu(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M5.vrmacbuu(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vrmpybu(r3:2, r5:4) +; CHECK: += vrmpybu({{.*}}, r5:4) declare i64 @llvm.hexagon.M5.vrmacbsu(i64, i64, i64) define i64 @M5_vrmacbsu(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M5.vrmacbsu(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vrmpybsu(r3:2, r5:4) +; CHECK: += vrmpybsu({{.*}}, r5:4) ; Vector dual multiply signed by unsigned bytes declare i64 @llvm.hexagon.M5.vdmpybsu(i64, i64) @@ -1355,14 +1359,14 @@ define i64 @M5_vdmpybsu(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M5.vdmpybsu(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vdmpybsu(r1:0, r3:2):sat +; CHECK: = vdmpybsu({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M5.vdmacbsu(i64, i64, i64) define i64 @M5_vdmacbsu(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M5.vdmacbsu(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vdmpybsu(r3:2, r5:4):sat +; CHECK: += vdmpybsu({{.*}}, r5:4):sat ; Vector multiply even halfwords declare i64 @llvm.hexagon.M2.vmpy2es.s0(i64, i64) @@ -1370,35 +1374,35 @@ define i64 @M2_vmpy2es_s0(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.vmpy2es.s0(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpyeh(r1:0, r3:2):sat +; CHECK: = vmpyeh({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M2.vmpy2es.s1(i64, i64) define i64 @M2_vmpy2es_s1(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.vmpy2es.s1(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vmpyeh(r1:0, r3:2):<<1:sat +; CHECK: = vmpyeh({{.*}}, {{.*}}):<<1:sat declare i64 @llvm.hexagon.M2.vmac2es(i64, i64, i64) define i64 @M2_vmac2es(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M2.vmac2es(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vmpyeh(r3:2, r5:4) +; CHECK: += vmpyeh({{.*}}, r5:4) declare i64 @llvm.hexagon.M2.vmac2es.s0(i64, i64, i64) define i64 @M2_vmac2es_s0(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M2.vmac2es.s0(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vmpyeh(r3:2, r5:4):sat +; CHECK: += vmpyeh({{.*}}, r5:4):sat declare i64 @llvm.hexagon.M2.vmac2es.s1(i64, i64, i64) define i64 @M2_vmac2es_s1(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M2.vmac2es.s1(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vmpyeh(r3:2, r5:4):<<1:sat +; CHECK: += vmpyeh({{.*}}, r5:4):<<1:sat ; Vector multiply halfwords declare i64 @llvm.hexagon.M2.vmpy2s.s0(i32, i32) @@ -1406,35 +1410,35 @@ define i64 @M2_vmpy2s_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.vmpy2s.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = vmpyh(r0, r1):sat +; CHECK: = vmpyh({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M2.vmpy2s.s1(i32, i32) define i64 @M2_vmpy2s_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.vmpy2s.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = vmpyh(r0, r1):<<1:sat +; CHECK: = vmpyh({{.*}}, {{.*}}):<<1:sat declare i64 @llvm.hexagon.M2.vmac2(i64, i32, i32) define i64 @M2_vmac2(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.vmac2(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += vmpyh(r2, r3) +; CHECK: += vmpyh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M2.vmac2s.s0(i64, i32, i32) define i64 @M2_vmac2s_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.vmac2s.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += vmpyh(r2, r3):sat +; CHECK: += vmpyh({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M2.vmac2s.s1(i64, i32, i32) define i64 @M2_vmac2s_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.vmac2s.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += vmpyh(r2, r3):<<1:sat +; CHECK: += vmpyh({{.*}}, {{.*}}):<<1:sat ; Vector multiply halfwords signed by unsigned declare i64 @llvm.hexagon.M2.vmpy2su.s0(i32, i32) @@ -1442,28 +1446,28 @@ define i64 @M2_vmpy2su_s0(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.vmpy2su.s0(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = vmpyhsu(r0, r1):sat +; CHECK: = vmpyhsu({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M2.vmpy2su.s1(i32, i32) define i64 @M2_vmpy2su_s1(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M2.vmpy2su.s1(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = vmpyhsu(r0, r1):<<1:sat +; CHECK: = vmpyhsu({{.*}}, {{.*}}):<<1:sat declare i64 @llvm.hexagon.M2.vmac2su.s0(i64, i32, i32) define i64 @M2_vmac2su_s0(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.vmac2su.s0(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += vmpyhsu(r2, r3):sat +; CHECK: += vmpyhsu({{.*}}, {{.*}}):sat declare i64 @llvm.hexagon.M2.vmac2su.s1(i64, i32, i32) define i64 @M2_vmac2su_s1(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M2.vmac2su.s1(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += vmpyhsu(r2, r3):<<1:sat +; CHECK: += vmpyhsu({{.*}}, {{.*}}):<<1:sat ; Vector reduce multiply halfwords declare i64 @llvm.hexagon.M2.vrmpy.s0(i64, i64) @@ -1471,14 +1475,14 @@ define i64 @M2_vrmpy_s0(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.M2.vrmpy.s0(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vrmpyh(r1:0, r3:2) +; CHECK: = vrmpyh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M2.vrmac.s0(i64, i64, i64) define i64 @M2_vrmac_s0(i64 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.M2.vrmac.s0(i64 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: r1:0 += vrmpyh(r3:2, r5:4) +; CHECK: += vrmpyh({{.*}}, r5:4) ; Vector multiply bytes declare i64 @llvm.hexagon.M5.vmpybsu(i32, i32) @@ -1486,28 +1490,28 @@ define i64 @M2_vmpybsu(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M5.vmpybsu(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = vmpybsu(r0, r1) +; CHECK: = vmpybsu({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M5.vmpybuu(i32, i32) define i64 @M2_vmpybuu(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M5.vmpybuu(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = vmpybu(r0, r1) +; CHECK: = vmpybu({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M5.vmacbuu(i64, i32, i32) define i64 @M2_vmacbuu(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M5.vmacbuu(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += vmpybu(r2, r3) +; CHECK: += vmpybu({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M5.vmacbsu(i64, i32, i32) define i64 @M2_vmacbsu(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M5.vmacbsu(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += vmpybsu(r2, r3) +; CHECK: += vmpybsu({{.*}}, {{.*}}) ; Vector polynomial multiply halfwords declare i64 @llvm.hexagon.M4.vpmpyh(i32, i32) @@ -1515,11 +1519,11 @@ define i64 @M4_vpmpyh(i32 %a, i32 %b) { %z = call i64 @llvm.hexagon.M4.vpmpyh(i32 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = vpmpyh(r0, r1) +; CHECK: = vpmpyh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.M4.vpmpyh.acc(i64, i32, i32) define i64 @M4_vpmpyh_acc(i64 %a, i32 %b, i32 %c) { %z = call i64 @llvm.hexagon.M4.vpmpyh.acc(i64 %a, i32 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 ^= vpmpyh(r2, r3) +; CHECK: ^= vpmpyh({{.*}}, {{.*}}) diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_perm.ll b/test/CodeGen/Hexagon/intrinsics/xtype_perm.ll index 0b761323e31e..3e044e3838de 100644 --- a/test/CodeGen/Hexagon/intrinsics/xtype_perm.ll +++ b/test/CodeGen/Hexagon/intrinsics/xtype_perm.ll @@ -1,41 +1,44 @@ ; RUN: llc -march=hexagon -O0 < %s | FileCheck %s +; RUN: llc -march=hexagon -O0 < %s | FileCheck -check-prefix=CHECK-CALL %s ; Hexagon Programmer's Reference Manual 11.10.6 XTYPE/PERM +; CHECK-CALL-NOT: call + ; Saturate declare i32 @llvm.hexagon.A2.sat(i64) define i32 @A2_sat(i64 %a) { %z = call i32 @llvm.hexagon.A2.sat(i64 %a) ret i32 %z } -; CHECK: r0 = sat(r1:0) +; CHECK: = sat({{.*}}) declare i32 @llvm.hexagon.A2.sath(i32) define i32 @A2_sath(i32 %a) { %z = call i32 @llvm.hexagon.A2.sath(i32 %a) ret i32 %z } -; CHECK: r0 = sath(r0) +; CHECK: = sath({{.*}}) declare i32 @llvm.hexagon.A2.satuh(i32) define i32 @A2_satuh(i32 %a) { %z = call i32 @llvm.hexagon.A2.satuh(i32 %a) ret i32 %z } -; CHECK: r0 = satuh(r0) +; CHECK: = satuh({{.*}}) declare i32 @llvm.hexagon.A2.satub(i32) define i32 @A2_satub(i32 %a) { %z = call i32 @llvm.hexagon.A2.satub(i32 %a) ret i32 %z } -; CHECK: r0 = satub(r0) +; CHECK: = satub({{.*}}) declare i32 @llvm.hexagon.A2.satb(i32) define i32 @A2_satb(i32 %a) { %z = call i32 @llvm.hexagon.A2.satb(i32 %a) ret i32 %z } -; CHECK: r0 = satb(r0) +; CHECK: = satb({{.*}}) ; Swizzle bytes declare i32 @llvm.hexagon.A2.swiz(i32) @@ -43,7 +46,7 @@ define i32 @A2_swiz(i32 %a) { %z = call i32 @llvm.hexagon.A2.swiz(i32 %a) ret i32 %z } -; CHECK: r0 = swiz(r0) +; CHECK: = swiz({{.*}}) ; Vector round and pack declare i32 @llvm.hexagon.S2.vrndpackwh(i64) @@ -51,14 +54,14 @@ define i32 @S2_vrndpackwh(i64 %a) { %z = call i32 @llvm.hexagon.S2.vrndpackwh(i64 %a) ret i32 %z } -; CHECK: r0 = vrndwh(r1:0) +; CHECK: = vrndwh({{.*}}) declare i32 @llvm.hexagon.S2.vrndpackwhs(i64) define i32 @S2_vrndpackwhs(i64 %a) { %z = call i32 @llvm.hexagon.S2.vrndpackwhs(i64 %a) ret i32 %z } -; CHECK: r0 = vrndwh(r1:0):sat +; CHECK: = vrndwh({{.*}}):sat ; Vector saturate and pack declare i32 @llvm.hexagon.S2.vsathub(i64) @@ -66,42 +69,42 @@ define i32 @S2_vsathub(i64 %a) { %z = call i32 @llvm.hexagon.S2.vsathub(i64 %a) ret i32 %z } -; CHECK: r0 = vsathub(r1:0) +; CHECK: = vsathub({{.*}}) declare i32 @llvm.hexagon.S2.vsatwh(i64) define i32 @S2_vsatwh(i64 %a) { %z = call i32 @llvm.hexagon.S2.vsatwh(i64 %a) ret i32 %z } -; CHECK: r0 = vsatwh(r1:0) +; CHECK: = vsatwh({{.*}}) declare i32 @llvm.hexagon.S2.vsatwuh(i64) define i32 @S2_vsatwuh(i64 %a) { %z = call i32 @llvm.hexagon.S2.vsatwuh(i64 %a) ret i32 %z } -; CHECK: r0 = vsatwuh(r1:0) +; CHECK: = vsatwuh({{.*}}) declare i32 @llvm.hexagon.S2.vsathb(i64) define i32 @S2_vsathb(i64 %a) { %z = call i32 @llvm.hexagon.S2.vsathb(i64 %a) ret i32 %z } -; CHECK: r0 = vsathb(r1:0) +; CHECK: = vsathb({{.*}}) declare i32 @llvm.hexagon.S2.svsathb(i32) define i32 @S2_svsathb(i32 %a) { %z = call i32 @llvm.hexagon.S2.svsathb(i32 %a) ret i32 %z } -; CHECK: r0 = vsathb(r0) +; CHECK: = vsathb({{.*}}) declare i32 @llvm.hexagon.S2.svsathub(i32) define i32 @S2_svsathub(i32 %a) { %z = call i32 @llvm.hexagon.S2.svsathub(i32 %a) ret i32 %z } -; CHECK: r0 = vsathub(r0) +; CHECK: = vsathub({{.*}}) ; Vector saturate without pack declare i64 @llvm.hexagon.S2.vsathub.nopack(i64) @@ -109,28 +112,28 @@ define i64 @S2_vsathub_nopack(i64 %a) { %z = call i64 @llvm.hexagon.S2.vsathub.nopack(i64 %a) ret i64 %z } -; CHECK: r1:0 = vsathub(r1:0) +; CHECK: = vsathub({{.*}}) declare i64 @llvm.hexagon.S2.vsatwuh.nopack(i64) define i64 @S2_vsatwuh_nopack(i64 %a) { %z = call i64 @llvm.hexagon.S2.vsatwuh.nopack(i64 %a) ret i64 %z } -; CHECK: r1:0 = vsatwuh(r1:0) +; CHECK: = vsatwuh({{.*}}) declare i64 @llvm.hexagon.S2.vsatwh.nopack(i64) define i64 @S2_vsatwh_nopack(i64 %a) { %z = call i64 @llvm.hexagon.S2.vsatwh.nopack(i64 %a) ret i64 %z } -; CHECK: r1:0 = vsatwh(r1:0) +; CHECK: = vsatwh({{.*}}) declare i64 @llvm.hexagon.S2.vsathb.nopack(i64) define i64 @S2_vsathb_nopack(i64 %a) { %z = call i64 @llvm.hexagon.S2.vsathb.nopack(i64 %a) ret i64 %z } -; CHECK: r1:0 = vsathb(r1:0) +; CHECK: = vsathb({{.*}}) ; Vector shuffle declare i64 @llvm.hexagon.S2.shuffeb(i64, i64) @@ -138,28 +141,28 @@ define i64 @S2_shuffeb(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.shuffeb(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = shuffeb(r1:0, r3:2) +; CHECK: = shuffeb({{.*}}, {{.*}}) declare i64 @llvm.hexagon.S2.shuffob(i64, i64) define i64 @S2_shuffob(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.shuffob(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = shuffob(r1:0, r3:2) +; CHECK: = shuffob({{.*}}, {{.*}}) declare i64 @llvm.hexagon.S2.shuffeh(i64, i64) define i64 @S2_shuffeh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.shuffeh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = shuffeh(r1:0, r3:2) +; CHECK: = shuffeh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.S2.shuffoh(i64, i64) define i64 @S2_shuffoh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.shuffoh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = shuffoh(r1:0, r3:2) +; CHECK: = shuffoh({{.*}}, {{.*}}) ; Vector splat bytes declare i32 @llvm.hexagon.S2.vsplatrb(i32) @@ -167,7 +170,7 @@ define i32 @S2_vsplatrb(i32 %a) { %z = call i32 @llvm.hexagon.S2.vsplatrb(i32 %a) ret i32 %z } -; CHECK: r0 = vsplatb(r0) +; CHECK: = vsplatb({{.*}}) ; Vector splat halfwords declare i64 @llvm.hexagon.S2.vsplatrh(i32) @@ -175,7 +178,7 @@ define i64 @S2_vsplatrh(i32 %a) { %z = call i64 @llvm.hexagon.S2.vsplatrh(i32 %a) ret i64 %z } -; CHECK: = vsplath(r0) +; CHECK: = vsplath({{.*}}) ; Vector splice declare i64 @llvm.hexagon.S2.vspliceib(i64, i64, i32) @@ -183,14 +186,14 @@ define i64 @S2_vspliceib(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.vspliceib(i64 %a, i64 %b, i32 0) ret i64 %z } -; CHECK: r1:0 = vspliceb(r1:0, r3:2, #0) +; CHECK: = vspliceb({{.*}}, {{.*}}, #0) declare i64 @llvm.hexagon.S2.vsplicerb(i64, i64, i32) define i64 @S2_vsplicerb(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.vsplicerb(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 = vspliceb(r1:0, r3:2, p0) +; CHECK: = vspliceb({{.*}}, {{.*}}, {{.*}}) ; Vector sign extend declare i64 @llvm.hexagon.S2.vsxtbh(i32) @@ -198,14 +201,14 @@ define i64 @S2_vsxtbh(i32 %a) { %z = call i64 @llvm.hexagon.S2.vsxtbh(i32 %a) ret i64 %z } -; CHECK: = vsxtbh(r0) +; CHECK: = vsxtbh({{.*}}) declare i64 @llvm.hexagon.S2.vsxthw(i32) define i64 @S2_vsxthw(i32 %a) { %z = call i64 @llvm.hexagon.S2.vsxthw(i32 %a) ret i64 %z } -; CHECK: = vsxthw(r0) +; CHECK: = vsxthw({{.*}}) ; Vector truncate declare i32 @llvm.hexagon.S2.vtrunohb(i64) @@ -213,28 +216,28 @@ define i32 @S2_vtrunohb(i64 %a) { %z = call i32 @llvm.hexagon.S2.vtrunohb(i64 %a) ret i32 %z } -; CHECK: r0 = vtrunohb(r1:0) +; CHECK: = vtrunohb({{.*}}) declare i32 @llvm.hexagon.S2.vtrunehb(i64) define i32 @S2_vtrunehb(i64 %a) { %z = call i32 @llvm.hexagon.S2.vtrunehb(i64 %a) ret i32 %z } -; CHECK: r0 = vtrunehb(r1:0) +; CHECK: = vtrunehb({{.*}}) declare i64 @llvm.hexagon.S2.vtrunowh(i64, i64) define i64 @S2_vtrunowh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.vtrunowh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vtrunowh(r1:0, r3:2) +; CHECK: = vtrunowh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.S2.vtrunewh(i64, i64) define i64 @S2_vtrunewh(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.vtrunewh(i64 %a, i64 %b) ret i64 %z } -; CHECK: r1:0 = vtrunewh(r1:0, r3:2) +; CHECK: = vtrunewh({{.*}}, {{.*}}) ; Vector zero extend declare i64 @llvm.hexagon.S2.vzxtbh(i32) @@ -242,11 +245,11 @@ define i64 @S2_vzxtbh(i32 %a) { %z = call i64 @llvm.hexagon.S2.vzxtbh(i32 %a) ret i64 %z } -; CHECK: = vzxtbh(r0) +; CHECK: = vzxtbh({{.*}}) declare i64 @llvm.hexagon.S2.vzxthw(i32) define i64 @S2_vzxthw(i32 %a) { %z = call i64 @llvm.hexagon.S2.vzxthw(i32 %a) ret i64 %z } -; CHECK: = vzxthw(r0) +; CHECK: = vzxthw({{.*}}) diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_pred.ll b/test/CodeGen/Hexagon/intrinsics/xtype_pred.ll index 96e63d8d7790..f06339b9a85a 100644 --- a/test/CodeGen/Hexagon/intrinsics/xtype_pred.ll +++ b/test/CodeGen/Hexagon/intrinsics/xtype_pred.ll @@ -1,48 +1,51 @@ ; RUN: llc -march=hexagon -O0 < %s | FileCheck %s +; RUN: llc -march=hexagon -O0 < %s | FileCheck -check-prefix=CHECK-CALL %s ; Hexagon Programmer's Reference Manual 11.10.7 XTYPE/PRED +; CHECK-CALL-NOT: call + ; Compare byte declare i32 @llvm.hexagon.A4.cmpbgt(i32, i32) define i32 @A4_cmpbgt(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A4.cmpbgt(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = cmpb.gt(r0, r1) +; CHECK: = cmpb.gt({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A4.cmpbeq(i32, i32) define i32 @A4_cmpbeq(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A4.cmpbeq(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = cmpb.eq(r0, r1) +; CHECK: = cmpb.eq({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A4.cmpbgtu(i32, i32) define i32 @A4_cmpbgtu(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A4.cmpbgtu(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = cmpb.gtu(r0, r1) +; CHECK: = cmpb.gtu({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A4.cmpbgti(i32, i32) define i32 @A4_cmpbgti(i32 %a) { %z = call i32 @llvm.hexagon.A4.cmpbgti(i32 %a, i32 0) ret i32 %z } -; CHECK: p0 = cmpb.gt(r0, #0) +; CHECK: = cmpb.gt({{.*}}, #0) declare i32 @llvm.hexagon.A4.cmpbeqi(i32, i32) define i32 @A4_cmpbeqi(i32 %a) { %z = call i32 @llvm.hexagon.A4.cmpbeqi(i32 %a, i32 0) ret i32 %z } -; CHECK: p0 = cmpb.eq(r0, #0) +; CHECK: = cmpb.eq({{.*}}, #0) declare i32 @llvm.hexagon.A4.cmpbgtui(i32, i32) define i32 @A4_cmpbgtui(i32 %a) { %z = call i32 @llvm.hexagon.A4.cmpbgtui(i32 %a, i32 0) ret i32 %z } -; CHECK: p0 = cmpb.gtu(r0, #0) +; CHECK: = cmpb.gtu({{.*}}, #0) ; Compare half declare i32 @llvm.hexagon.A4.cmphgt(i32, i32) @@ -50,42 +53,42 @@ define i32 @A4_cmphgt(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A4.cmphgt(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = cmph.gt(r0, r1) +; CHECK: = cmph.gt({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A4.cmpheq(i32, i32) define i32 @A4_cmpheq(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A4.cmpheq(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = cmph.eq(r0, r1) +; CHECK: = cmph.eq({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A4.cmphgtu(i32, i32) define i32 @A4_cmphgtu(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.A4.cmphgtu(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = cmph.gtu(r0, r1) +; CHECK: = cmph.gtu({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A4.cmphgti(i32, i32) define i32 @A4_cmphgti(i32 %a) { %z = call i32 @llvm.hexagon.A4.cmphgti(i32 %a, i32 0) ret i32 %z } -; CHECK: p0 = cmph.gt(r0, #0) +; CHECK: = cmph.gt({{.*}}, #0) declare i32 @llvm.hexagon.A4.cmpheqi(i32, i32) define i32 @A4_cmpheqi(i32 %a) { %z = call i32 @llvm.hexagon.A4.cmpheqi(i32 %a, i32 0) ret i32 %z } -; CHECK: p0 = cmph.eq(r0, #0) +; CHECK: = cmph.eq({{.*}}, #0) declare i32 @llvm.hexagon.A4.cmphgtui(i32, i32) define i32 @A4_cmphgtui(i32 %a) { %z = call i32 @llvm.hexagon.A4.cmphgtui(i32 %a, i32 0) ret i32 %z } -; CHECK: p0 = cmph.gtu(r0, #0) +; CHECK: = cmph.gtu({{.*}}, #0) ; Compare doublewords declare i32 @llvm.hexagon.C2.cmpgtp(i64, i64) @@ -93,21 +96,21 @@ define i32 @C2_cmpgtp(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.C2.cmpgtp(i64 %a, i64 %b) ret i32 %z } -; CHECK: p0 = cmp.gt(r1:0, r3:2) +; CHECK: = cmp.gt({{.*}}, {{.*}}) declare i32 @llvm.hexagon.C2.cmpeqp(i64, i64) define i32 @C2_cmpeqp(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.C2.cmpeqp(i64 %a, i64 %b) ret i32 %z } -; CHECK: p0 = cmp.eq(r1:0, r3:2) +; CHECK: = cmp.eq({{.*}}, {{.*}}) declare i32 @llvm.hexagon.C2.cmpgtup(i64, i64) define i32 @C2_cmpgtup(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.C2.cmpgtup(i64 %a, i64 %b) ret i32 %z } -; CHECK: p0 = cmp.gtu(r1:0, r3:2) +; CHECK: = cmp.gtu({{.*}}, {{.*}}) ; Compare bitmask declare i32 @llvm.hexagon.C2.bitsclri(i32, i32) @@ -115,42 +118,42 @@ define i32 @C2_bitsclri(i32 %a) { %z = call i32 @llvm.hexagon.C2.bitsclri(i32 %a, i32 0) ret i32 %z } -; CHECK: p0 = bitsclr(r0, #0) +; CHECK: = bitsclr({{.*}}, #0) declare i32 @llvm.hexagon.C4.nbitsclri(i32, i32) define i32 @C4_nbitsclri(i32 %a) { %z = call i32 @llvm.hexagon.C4.nbitsclri(i32 %a, i32 0) ret i32 %z } -; CHECK: p0 = !bitsclr(r0, #0) +; CHECK: = !bitsclr({{.*}}, #0) declare i32 @llvm.hexagon.C2.bitsset(i32, i32) define i32 @C2_bitsset(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.C2.bitsset(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = bitsset(r0, r1) +; CHECK: = bitsset({{.*}}, {{.*}}) declare i32 @llvm.hexagon.C4.nbitsset(i32, i32) define i32 @C4_nbitsset(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.C4.nbitsset(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = !bitsset(r0, r1) +; CHECK: = !bitsset({{.*}}, {{.*}}) declare i32 @llvm.hexagon.C2.bitsclr(i32, i32) define i32 @C2_bitsclr(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.C2.bitsclr(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = bitsclr(r0, r1) +; CHECK: = bitsclr({{.*}}, {{.*}}) declare i32 @llvm.hexagon.C4.nbitsclr(i32, i32) define i32 @C4_nbitsclr(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.C4.nbitsclr(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = !bitsclr(r0, r1) +; CHECK: = !bitsclr({{.*}}, {{.*}}) ; Mask generate from predicate declare i64 @llvm.hexagon.C2.mask(i32) @@ -158,7 +161,7 @@ define i64 @C2_mask(i32 %a) { %z = call i64 @llvm.hexagon.C2.mask(i32 %a) ret i64 %z } -; CHECK: = mask(p0) +; CHECK: = mask({{.*}}) ; Check for TLB match declare i32 @llvm.hexagon.A4.tlbmatch(i64, i32) @@ -166,7 +169,7 @@ define i32 @A4_tlbmatch(i64 %a, i32 %b) { %z = call i32 @llvm.hexagon.A4.tlbmatch(i64 %a, i32 %b) ret i32 %z } -; CHECK: p0 = tlbmatch(r1:0, r2) +; CHECK: = tlbmatch({{.*}}, {{.*}}) ; Test bit declare i32 @llvm.hexagon.S2.tstbit.i(i32, i32) @@ -174,28 +177,28 @@ define i32 @S2_tstbit_i(i32 %a) { %z = call i32 @llvm.hexagon.S2.tstbit.i(i32 %a, i32 0) ret i32 %z } -; CHECK: p0 = tstbit(r0, #0) +; CHECK: = tstbit({{.*}}, #0) declare i32 @llvm.hexagon.S4.ntstbit.i(i32, i32) define i32 @S4_ntstbit_i(i32 %a) { %z = call i32 @llvm.hexagon.S4.ntstbit.i(i32 %a, i32 0) ret i32 %z } -; CHECK: p0 = !tstbit(r0, #0) +; CHECK: = !tstbit({{.*}}, #0) declare i32 @llvm.hexagon.S2.tstbit.r(i32, i32) define i32 @S2_tstbit_r(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.tstbit.r(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = tstbit(r0, r1) +; CHECK: = tstbit({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S4.ntstbit.r(i32, i32) define i32 @S4_ntstbit_r(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S4.ntstbit.r(i32 %a, i32 %b) ret i32 %z } -; CHECK: p0 = !tstbit(r0, r1) +; CHECK: = !tstbit({{.*}}, {{.*}}) ; Vector compare halfwords declare i32 @llvm.hexagon.A2.vcmpheq(i64, i64) @@ -203,42 +206,42 @@ define i32 @A2_vcmpheq(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.A2.vcmpheq(i64 %a, i64 %b) ret i32 %z } -; CHECK: p0 = vcmph.eq(r1:0, r3:2) +; CHECK: = vcmph.eq({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.vcmphgt(i64, i64) define i32 @A2_vcmphgt(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.A2.vcmphgt(i64 %a, i64 %b) ret i32 %z } -; CHECK: p0 = vcmph.gt(r1:0, r3:2) +; CHECK: = vcmph.gt({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.vcmphgtu(i64, i64) define i32 @A2_vcmphgtu(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.A2.vcmphgtu(i64 %a, i64 %b) ret i32 %z } -; CHECK: p0 = vcmph.gtu(r1:0, r3:2) +; CHECK: = vcmph.gtu({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A4.vcmpheqi(i64, i32) define i32 @A4_vcmpheqi(i64 %a) { %z = call i32 @llvm.hexagon.A4.vcmpheqi(i64 %a, i32 0) ret i32 %z } -; CHECK: p0 = vcmph.eq(r1:0, #0) +; CHECK: = vcmph.eq({{.*}}, #0) declare i32 @llvm.hexagon.A4.vcmphgti(i64, i32) define i32 @A4_vcmphgti(i64 %a) { %z = call i32 @llvm.hexagon.A4.vcmphgti(i64 %a, i32 0) ret i32 %z } -; CHECK: p0 = vcmph.gt(r1:0, #0) +; CHECK: = vcmph.gt({{.*}}, #0) declare i32 @llvm.hexagon.A4.vcmphgtui(i64, i32) define i32 @A4_vcmphgtui(i64 %a) { %z = call i32 @llvm.hexagon.A4.vcmphgtui(i64 %a, i32 0) ret i32 %z } -; CHECK: p0 = vcmph.gtu(r1:0, #0) +; CHECK: = vcmph.gtu({{.*}}, #0) ; Vector compare bytes for any match declare i32 @llvm.hexagon.A4.vcmpbeq.any(i64, i64) @@ -246,7 +249,7 @@ define i32 @A4_vcmpbeq_any(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.A4.vcmpbeq.any(i64 %a, i64 %b) ret i32 %z } -; CHECK: p0 = any8(vcmpb.eq(r1:0, r3:2)) +; CHECK: = any8(vcmpb.eq({{.*}}, {{.*}})) ; Vector compare bytes declare i32 @llvm.hexagon.A2.vcmpbeq(i64, i64) @@ -254,42 +257,42 @@ define i32 @A2_vcmpbeq(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.A2.vcmpbeq(i64 %a, i64 %b) ret i32 %z } -; CHECK: p0 = vcmpb.eq(r1:0, r3:2) +; CHECK: = vcmpb.eq({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.vcmpbgtu(i64, i64) define i32 @A2_vcmpbgtu(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.A2.vcmpbgtu(i64 %a, i64 %b) ret i32 %z } -; CHECK: p0 = vcmpb.gtu(r1:0, r3:2) +; CHECK: = vcmpb.gtu({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A4.vcmpbgt(i64, i64) define i32 @A4_vcmpbgt(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.A4.vcmpbgt(i64 %a, i64 %b) ret i32 %z } -; CHECK: p0 = vcmpb.gt(r1:0, r3:2) +; CHECK: = vcmpb.gt({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A4.vcmpbeqi(i64, i32) define i32 @A4_vcmpbeqi(i64 %a) { %z = call i32 @llvm.hexagon.A4.vcmpbeqi(i64 %a, i32 0) ret i32 %z } -; CHECK: p0 = vcmpb.eq(r1:0, #0) +; CHECK: = vcmpb.eq({{.*}}, #0) declare i32 @llvm.hexagon.A4.vcmpbgti(i64, i32) define i32 @A4_vcmpbgti(i64 %a) { %z = call i32 @llvm.hexagon.A4.vcmpbgti(i64 %a, i32 0) ret i32 %z } -; CHECK: p0 = vcmpb.gt(r1:0, #0) +; CHECK: = vcmpb.gt({{.*}}, #0) declare i32 @llvm.hexagon.A4.vcmpbgtui(i64, i32) define i32 @A4_vcmpbgtui(i64 %a) { %z = call i32 @llvm.hexagon.A4.vcmpbgtui(i64 %a, i32 0) ret i32 %z } -; CHECK: p0 = vcmpb.gtu(r1:0, #0) +; CHECK: = vcmpb.gtu({{.*}}, #0) ; Vector compare words declare i32 @llvm.hexagon.A2.vcmpweq(i64, i64) @@ -297,42 +300,42 @@ define i32 @A2_vcmpweq(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.A2.vcmpweq(i64 %a, i64 %b) ret i32 %z } -; CHECK: p0 = vcmpw.eq(r1:0, r3:2) +; CHECK: = vcmpw.eq({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.vcmpwgt(i64, i64) define i32 @A2_vcmpwgt(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.A2.vcmpwgt(i64 %a, i64 %b) ret i32 %z } -; CHECK: p0 = vcmpw.gt(r1:0, r3:2) +; CHECK: = vcmpw.gt({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A2.vcmpwgtu(i64, i64) define i32 @A2_vcmpwgtu(i64 %a, i64 %b) { %z = call i32 @llvm.hexagon.A2.vcmpwgtu(i64 %a, i64 %b) ret i32 %z } -; CHECK: p0 = vcmpw.gtu(r1:0, r3:2) +; CHECK: = vcmpw.gtu({{.*}}, {{.*}}) declare i32 @llvm.hexagon.A4.vcmpweqi(i64, i32) define i32 @A4_vcmpweqi(i64 %a) { %z = call i32 @llvm.hexagon.A4.vcmpweqi(i64 %a, i32 0) ret i32 %z } -; CHECK: p0 = vcmpw.eq(r1:0, #0) +; CHECK: = vcmpw.eq({{.*}}, #0) declare i32 @llvm.hexagon.A4.vcmpwgti(i64, i32) define i32 @A4_vcmpwgti(i64 %a) { %z = call i32 @llvm.hexagon.A4.vcmpwgti(i64 %a, i32 0) ret i32 %z } -; CHECK: p0 = vcmpw.gt(r1:0, #0) +; CHECK: = vcmpw.gt({{.*}}, #0) declare i32 @llvm.hexagon.A4.vcmpwgtui(i64, i32) define i32 @A4_vcmpwgtui(i64 %a) { %z = call i32 @llvm.hexagon.A4.vcmpwgtui(i64 %a, i32 0) ret i32 %z } -; CHECK: p0 = vcmpw.gtu(r1:0, #0) +; CHECK: = vcmpw.gtu({{.*}}, #0) ; Viterbi pack even and odd predicate bitsclr declare i32 @llvm.hexagon.C2.vitpack(i32, i32) @@ -340,7 +343,7 @@ define i32 @C2_vitpack(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.C2.vitpack(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = vitpack(p1, p0) +; CHECK: = vitpack({{.*}}, {{.*}}) ; Vector mux declare i64 @llvm.hexagon.C2.vmux(i32, i64, i64) @@ -348,4 +351,4 @@ define i64 @C2_vmux(i32 %a, i64 %b, i64 %c) { %z = call i64 @llvm.hexagon.C2.vmux(i32 %a, i64 %b, i64 %c) ret i64 %z } -; CHECK: = vmux(p0, r3:2, r5:4) +; CHECK: = vmux({{.*}}, {{.*}}, {{.*}}) diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_shift.ll b/test/CodeGen/Hexagon/intrinsics/xtype_shift.ll index c84999bf94fd..1a65f44c1954 100644 --- a/test/CodeGen/Hexagon/intrinsics/xtype_shift.ll +++ b/test/CodeGen/Hexagon/intrinsics/xtype_shift.ll @@ -1,48 +1,51 @@ ; RUN: llc -march=hexagon -O0 < %s | FileCheck %s +; RUN: llc -march=hexagon -O0 < %s | FileCheck -check-prefix=CHECK-CALL %s ; Hexagon Programmer's Reference Manual 11.10.8 XTYPE/SHIFT +; CHECK-CALL-NOT: call + ; Shift by immediate declare i64 @llvm.hexagon.S2.asr.i.p(i64, i32) define i64 @S2_asr_i_p(i64 %a) { %z = call i64 @llvm.hexagon.S2.asr.i.p(i64 %a, i32 0) ret i64 %z } -; CHECK: r1:0 = asr(r1:0, #0) +; CHECK: = asr({{.*}}, #0) declare i64 @llvm.hexagon.S2.lsr.i.p(i64, i32) define i64 @S2_lsr_i_p(i64 %a) { %z = call i64 @llvm.hexagon.S2.lsr.i.p(i64 %a, i32 0) ret i64 %z } -; CHECK: r1:0 = lsr(r1:0, #0) +; CHECK: = lsr({{.*}}, #0) declare i64 @llvm.hexagon.S2.asl.i.p(i64, i32) define i64 @S2_asl_i_p(i64 %a) { %z = call i64 @llvm.hexagon.S2.asl.i.p(i64 %a, i32 0) ret i64 %z } -; CHECK: r1:0 = asl(r1:0, #0) +; CHECK: = asl({{.*}}, #0) declare i32 @llvm.hexagon.S2.asr.i.r(i32, i32) define i32 @S2_asr_i_r(i32 %a) { %z = call i32 @llvm.hexagon.S2.asr.i.r(i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = asr(r0, #0) +; CHECK: = asr({{.*}}, #0) declare i32 @llvm.hexagon.S2.lsr.i.r(i32, i32) define i32 @S2_lsr_i_r(i32 %a) { %z = call i32 @llvm.hexagon.S2.lsr.i.r(i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = lsr(r0, #0) +; CHECK: = lsr({{.*}}, #0) declare i32 @llvm.hexagon.S2.asl.i.r(i32, i32) define i32 @S2_asl_i_r(i32 %a) { %z = call i32 @llvm.hexagon.S2.asl.i.r(i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = asl(r0, #0) +; CHECK: = asl({{.*}}, #0) ; Shift by immediate and accumulate declare i64 @llvm.hexagon.S2.asr.i.p.nac(i64, i64, i32) @@ -50,84 +53,84 @@ define i64 @S2_asr_i_p_nac(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.asr.i.p.nac(i64 %a, i64 %b, i32 0) ret i64 %z } -; CHECK: r1:0 -= asr(r3:2, #0) +; CHECK: -= asr({{.*}}, #0) declare i64 @llvm.hexagon.S2.lsr.i.p.nac(i64, i64, i32) define i64 @S2_lsr_i_p_nac(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.lsr.i.p.nac(i64 %a, i64 %b, i32 0) ret i64 %z } -; CHECK: r1:0 -= lsr(r3:2, #0) +; CHECK: -= lsr({{.*}}, #0) declare i64 @llvm.hexagon.S2.asl.i.p.nac(i64, i64, i32) define i64 @S2_asl_i_p_nac(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.asl.i.p.nac(i64 %a, i64 %b, i32 0) ret i64 %z } -; CHECK: r1:0 -= asl(r3:2, #0) +; CHECK: -= asl({{.*}}, #0) declare i64 @llvm.hexagon.S2.asr.i.p.acc(i64, i64, i32) define i64 @S2_asr_i_p_acc(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.asr.i.p.acc(i64 %a, i64 %b, i32 0) ret i64 %z } -; CHECK: r1:0 += asr(r3:2, #0) +; CHECK: += asr({{.*}}, #0) declare i64 @llvm.hexagon.S2.lsr.i.p.acc(i64, i64, i32) define i64 @S2_lsr_i_p_acc(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.lsr.i.p.acc(i64 %a, i64 %b, i32 0) ret i64 %z } -; CHECK: r1:0 += lsr(r3:2, #0) +; CHECK: += lsr({{.*}}, #0) declare i64 @llvm.hexagon.S2.asl.i.p.acc(i64, i64, i32) define i64 @S2_asl_i_p_acc(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.asl.i.p.acc(i64 %a, i64 %b, i32 0) ret i64 %z } -; CHECK: r1:0 += asl(r3:2, #0) +; CHECK: += asl({{.*}}, #0) declare i32 @llvm.hexagon.S2.asr.i.r.nac(i32, i32, i32) define i32 @S2_asr_i_r_nac(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.asr.i.r.nac(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 -= asr(r1, #0) +; CHECK: -= asr({{.*}}, #0) declare i32 @llvm.hexagon.S2.lsr.i.r.nac(i32, i32, i32) define i32 @S2_lsr_i_r_nac(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.lsr.i.r.nac(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 -= lsr(r1, #0) +; CHECK: -= lsr({{.*}}, #0) declare i32 @llvm.hexagon.S2.asl.i.r.nac(i32, i32, i32) define i32 @S2_asl_i_r_nac(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.asl.i.r.nac(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 -= asl(r1, #0) +; CHECK: -= asl({{.*}}, #0) declare i32 @llvm.hexagon.S2.asr.i.r.acc(i32, i32, i32) define i32 @S2_asr_i_r_acc(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.asr.i.r.acc(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 += asr(r1, #0) +; CHECK: += asr({{.*}}, #0) declare i32 @llvm.hexagon.S2.lsr.i.r.acc(i32, i32, i32) define i32 @S2_lsr_i_r_acc(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.lsr.i.r.acc(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 += lsr(r1, #0) +; CHECK: += lsr({{.*}}, #0) declare i32 @llvm.hexagon.S2.asl.i.r.acc(i32, i32, i32) define i32 @S2_asl_i_r_acc(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.asl.i.r.acc(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 += asl(r1, #0) +; CHECK: += asl({{.*}}, #0) ; Shift by immediate and add declare i32 @llvm.hexagon.S4.addi.asl.ri(i32, i32, i32) @@ -135,35 +138,35 @@ define i32 @S4_addi_asl_ri(i32 %a) { %z = call i32 @llvm.hexagon.S4.addi.asl.ri(i32 0, i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = add(#0, asl(r0, #0)) +; CHECK: = add(#0, asl({{.*}}, #0)) declare i32 @llvm.hexagon.S4.subi.asl.ri(i32, i32, i32) define i32 @S4_subi_asl_ri(i32 %a) { %z = call i32 @llvm.hexagon.S4.subi.asl.ri(i32 0, i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = sub(#0, asl(r0, #0)) +; CHECK: = sub(#0, asl({{.*}}, #0)) declare i32 @llvm.hexagon.S4.addi.lsr.ri(i32, i32, i32) define i32 @S4_addi_lsr_ri(i32 %a) { %z = call i32 @llvm.hexagon.S4.addi.lsr.ri(i32 0, i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = add(#0, lsr(r0, #0)) +; CHECK: = add(#0, lsr({{.*}}, #0)) declare i32 @llvm.hexagon.S4.subi.lsr.ri(i32, i32, i32) define i32 @S4_subi_lsr_ri(i32 %a) { %z = call i32 @llvm.hexagon.S4.subi.lsr.ri(i32 0, i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = sub(#0, lsr(r0, #0)) +; CHECK: = sub(#0, lsr({{.*}}, #0)) declare i32 @llvm.hexagon.S2.addasl.rrri(i32, i32, i32) define i32 @S2_addasl_rrri(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.addasl.rrri(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 = addasl(r0, r1, #0) +; CHECK: = addasl({{.*}}, {{.*}}, #0) ; Shift by immediate and logical declare i64 @llvm.hexagon.S2.asr.i.p.and(i64, i64, i32) @@ -171,140 +174,140 @@ define i64 @S2_asr_i_p_and(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.asr.i.p.and(i64 %a, i64 %b, i32 0) ret i64 %z } -; CHECK: r1:0 &= asr(r3:2, #0) +; CHECK: &= asr({{.*}}, #0) declare i64 @llvm.hexagon.S2.lsr.i.p.and(i64, i64, i32) define i64 @S2_lsr_i_p_and(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.lsr.i.p.and(i64 %a, i64 %b, i32 0) ret i64 %z } -; CHECK: r1:0 &= lsr(r3:2, #0) +; CHECK: {{.*}} &= lsr({{.*}}, #0) declare i64 @llvm.hexagon.S2.asl.i.p.and(i64, i64, i32) define i64 @S2_asl_i_p_and(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.asl.i.p.and(i64 %a, i64 %b, i32 0) ret i64 %z } -; CHECK: r1:0 &= asl(r3:2, #0) +; CHECK: &= asl({{.*}}, #0) declare i64 @llvm.hexagon.S2.asr.i.p.or(i64, i64, i32) define i64 @S2_asr_i_p_or(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.asr.i.p.or(i64 %a, i64 %b, i32 0) ret i64 %z } -; CHECK: r1:0 |= asr(r3:2, #0) +; CHECK: |= asr({{.*}}, #0) declare i64 @llvm.hexagon.S2.lsr.i.p.or(i64, i64, i32) define i64 @S2_lsr_i_p_or(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.lsr.i.p.or(i64 %a, i64 %b, i32 0) ret i64 %z } -; CHECK: r1:0 |= lsr(r3:2, #0) +; CHECK: |= lsr({{.*}}, #0) declare i64 @llvm.hexagon.S2.asl.i.p.or(i64, i64, i32) define i64 @S2_asl_i_p_or(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.asl.i.p.or(i64 %a, i64 %b, i32 0) ret i64 %z } -; CHECK: r1:0 |= asl(r3:2, #0) +; CHECK: |= asl({{.*}}, #0) declare i64 @llvm.hexagon.S2.lsr.i.p.xacc(i64, i64, i32) define i64 @S2_lsr_i_p_xacc(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.lsr.i.p.xacc(i64 %a, i64 %b, i32 0) ret i64 %z } -; CHECK: r1:0 ^= lsr(r3:2, #0) +; CHECK: ^= lsr({{.*}}, #0) declare i64 @llvm.hexagon.S2.asl.i.p.xacc(i64, i64, i32) define i64 @S2_asl_i_p_xacc(i64 %a, i64 %b) { %z = call i64 @llvm.hexagon.S2.asl.i.p.xacc(i64 %a, i64 %b, i32 0) ret i64 %z } -; CHECK: r1:0 ^= asl(r3:2, #0) +; CHECK: ^= asl({{.*}}, #0) declare i32 @llvm.hexagon.S2.asr.i.r.and(i32, i32, i32) define i32 @S2_asr_i_r_and(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.asr.i.r.and(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 &= asr(r1, #0) +; CHECK: &= asr({{.*}}, #0) declare i32 @llvm.hexagon.S2.lsr.i.r.and(i32, i32, i32) define i32 @S2_lsr_i_r_and(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.lsr.i.r.and(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 &= lsr(r1, #0) +; CHECK: &= lsr({{.*}}, #0) declare i32 @llvm.hexagon.S2.asl.i.r.and(i32, i32, i32) define i32 @S2_asl_i_r_and(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.asl.i.r.and(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 &= asl(r1, #0) +; CHECK: &= asl({{.*}}, #0) declare i32 @llvm.hexagon.S2.asr.i.r.or(i32, i32, i32) define i32 @S2_asr_i_r_or(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.asr.i.r.or(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 |= asr(r1, #0) +; CHECK: |= asr({{.*}}, #0) declare i32 @llvm.hexagon.S2.lsr.i.r.or(i32, i32, i32) define i32 @S2_lsr_i_r_or(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.lsr.i.r.or(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 |= lsr(r1, #0) +; CHECK: |= lsr({{.*}}, #0) declare i32 @llvm.hexagon.S2.asl.i.r.or(i32, i32, i32) define i32 @S2_asl_i_r_or(i32%a, i32 %b) { %z = call i32 @llvm.hexagon.S2.asl.i.r.or(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 |= asl(r1, #0) +; CHECK: |= asl({{.*}}, #0) declare i32 @llvm.hexagon.S2.lsr.i.r.xacc(i32, i32, i32) define i32 @S2_lsr_i_r_xacc(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.lsr.i.r.xacc(i32%a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 ^= lsr(r1, #0) +; CHECK: ^= lsr({{.*}}, #0) declare i32 @llvm.hexagon.S2.asl.i.r.xacc(i32, i32, i32) define i32 @S2_asl_i_r_xacc(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.asl.i.r.xacc(i32 %a, i32 %b, i32 0) ret i32 %z } -; CHECK: r0 ^= asl(r1, #0) +; CHECK: ^= asl({{.*}}, #0) declare i32 @llvm.hexagon.S4.andi.asl.ri(i32, i32, i32) define i32 @S4_andi_asl_ri(i32 %a) { %z = call i32 @llvm.hexagon.S4.andi.asl.ri(i32 0, i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = and(#0, asl(r0, #0)) +; CHECK: = and(#0, asl({{.*}}, #0)) declare i32 @llvm.hexagon.S4.ori.asl.ri(i32, i32, i32) define i32 @S4_ori_asl_ri(i32 %a) { %z = call i32 @llvm.hexagon.S4.ori.asl.ri(i32 0, i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = or(#0, asl(r0, #0)) +; CHECK: = or(#0, asl({{.*}}, #0)) declare i32 @llvm.hexagon.S4.andi.lsr.ri(i32, i32, i32) define i32 @S4_andi_lsr_ri(i32 %a) { %z = call i32 @llvm.hexagon.S4.andi.lsr.ri(i32 0, i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = and(#0, lsr(r0, #0)) +; CHECK: = and(#0, lsr({{.*}}, #0)) declare i32 @llvm.hexagon.S4.ori.lsr.ri(i32, i32, i32) define i32 @S4_ori_lsr_ri(i32 %a) { %z = call i32 @llvm.hexagon.S4.ori.lsr.ri(i32 0, i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = or(#0, lsr(r0, #0)) +; CHECK: = or(#0, lsr({{.*}}, #0)) ; Shift right by immediate with rounding declare i64 @llvm.hexagon.S2.asr.i.p.rnd(i64, i32) @@ -312,14 +315,14 @@ define i64 @S2_asr_i_p_rnd(i64 %a) { %z = call i64 @llvm.hexagon.S2.asr.i.p.rnd(i64 %a, i32 0) ret i64 %z } -; CHECK: r1:0 = asr(r1:0, #0):rnd +; CHECK: = asr({{.*}}, #0):rnd declare i32 @llvm.hexagon.S2.asr.i.r.rnd(i32, i32) define i32 @S2_asr_i_r_rnd(i32 %a) { %z = call i32 @llvm.hexagon.S2.asr.i.r.rnd(i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = asr(r0, #0):rnd +; CHECK: = asr({{.*}}, #0):rnd ; Shift left by immediate with saturation declare i32 @llvm.hexagon.S2.asl.i.r.sat(i32, i32) @@ -327,7 +330,7 @@ define i32 @S2_asl_i_r_sat(i32 %a) { %z = call i32 @llvm.hexagon.S2.asl.i.r.sat(i32 %a, i32 0) ret i32 %z } -; CHECK: r0 = asl(r0, #0):sat +; CHECK: = asl({{.*}}, #0):sat ; Shift by register declare i64 @llvm.hexagon.S2.asr.r.p(i64, i32) @@ -335,63 +338,63 @@ define i64 @S2_asr_r_p(i64 %a, i32 %b) { %z = call i64 @llvm.hexagon.S2.asr.r.p(i64 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = asr(r1:0, r2) +; CHECK: = asr({{.*}}, {{.*}}) declare i64 @llvm.hexagon.S2.lsr.r.p(i64, i32) define i64 @S2_lsr_r_p(i64 %a, i32 %b) { %z = call i64 @llvm.hexagon.S2.lsr.r.p(i64 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = lsr(r1:0, r2) +; CHECK: = lsr({{.*}}, {{.*}}) declare i64 @llvm.hexagon.S2.asl.r.p(i64, i32) define i64 @S2_asl_r_p(i64 %a, i32 %b) { %z = call i64 @llvm.hexagon.S2.asl.r.p(i64 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = asl(r1:0, r2) +; CHECK: = asl({{.*}}, {{.*}}) declare i64 @llvm.hexagon.S2.lsl.r.p(i64, i32) define i64 @S2_lsl_r_p(i64 %a, i32 %b) { %z = call i64 @llvm.hexagon.S2.lsl.r.p(i64 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = lsl(r1:0, r2) +; CHECK: = lsl({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.asr.r.r(i32, i32) define i32 @S2_asr_r_r(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.asr.r.r(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = asr(r0, r1) +; CHECK: = asr({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.lsr.r.r(i32, i32) define i32 @S2_lsr_r_r(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.lsr.r.r(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = lsr(r0, r1) +; CHECK: = lsr({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.asl.r.r(i32, i32) define i32 @S2_asl_r_r(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.asl.r.r(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = asl(r0, r1) +; CHECK: = asl({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.lsl.r.r(i32, i32) define i32 @S2_lsl_r_r(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.lsl.r.r(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = lsl(r0, r1) +; CHECK: = lsl({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S4.lsli(i32, i32) define i32 @S4_lsli(i32 %a) { %z = call i32 @llvm.hexagon.S4.lsli(i32 0, i32 %a) ret i32 %z } -; CHECK: r0 = lsl(#0, r0) +; CHECK: = lsl(#0, {{.*}}) ; Shift by register and accumulate declare i64 @llvm.hexagon.S2.asr.r.p.nac(i64, i64, i32) @@ -399,112 +402,112 @@ define i64 @S2_asr_r_p_nac(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.asr.r.p.nac(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= asr(r3:2, r4) +; CHECK: -= asr({{.*}}, r4) declare i64 @llvm.hexagon.S2.lsr.r.p.nac(i64, i64, i32) define i64 @S2_lsr_r_p_nac(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.lsr.r.p.nac(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= lsr(r3:2, r4) +; CHECK: -= lsr({{.*}}, r4) declare i64 @llvm.hexagon.S2.asl.r.p.nac(i64, i64, i32) define i64 @S2_asl_r_p_nac(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.asl.r.p.nac(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= asl(r3:2, r4) +; CHECK: -= asl({{.*}}, r4) declare i64 @llvm.hexagon.S2.lsl.r.p.nac(i64, i64, i32) define i64 @S2_lsl_r_p_nac(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.lsl.r.p.nac(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 -= lsl(r3:2, r4) +; CHECK: -= lsl({{.*}}, r4) declare i64 @llvm.hexagon.S2.asr.r.p.acc(i64, i64, i32) define i64 @S2_asr_r_p_acc(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.asr.r.p.acc(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += asr(r3:2, r4) +; CHECK: += asr({{.*}}, r4) declare i64 @llvm.hexagon.S2.lsr.r.p.acc(i64, i64, i32) define i64 @S2_lsr_r_p_acc(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.lsr.r.p.acc(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += lsr(r3:2, r4) +; CHECK: += lsr({{.*}}, r4) declare i64 @llvm.hexagon.S2.asl.r.p.acc(i64, i64, i32) define i64 @S2_asl_r_p_acc(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.asl.r.p.acc(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += asl(r3:2, r4) +; CHECK: += asl({{.*}}, r4) declare i64 @llvm.hexagon.S2.lsl.r.p.acc(i64, i64, i32) define i64 @S2_lsl_r_p_acc(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.lsl.r.p.acc(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 += lsl(r3:2, r4) +; CHECK: += lsl({{.*}}, r4) declare i32 @llvm.hexagon.S2.asr.r.r.nac(i32, i32, i32) define i32 @S2_asr_r_r_nac(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.asr.r.r.nac(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= asr(r1, r2) +; CHECK: -= asr({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.lsr.r.r.nac(i32, i32, i32) define i32 @S2_lsr_r_r_nac(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.lsr.r.r.nac(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= lsr(r1, r2) +; CHECK: -= lsr({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.asl.r.r.nac(i32, i32, i32) define i32 @S2_asl_r_r_nac(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.asl.r.r.nac(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= asl(r1, r2) +; CHECK: -= asl({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.lsl.r.r.nac(i32, i32, i32) define i32 @S2_lsl_r_r_nac(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.lsl.r.r.nac(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 -= lsl(r1, r2) +; CHECK: -= lsl({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.asr.r.r.acc(i32, i32, i32) define i32 @S2_asr_r_r_acc(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.asr.r.r.acc(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += asr(r1, r2) +; CHECK: += asr({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.lsr.r.r.acc(i32, i32, i32) define i32 @S2_lsr_r_r_acc(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.lsr.r.r.acc(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += lsr(r1, r2) +; CHECK: += lsr({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.asl.r.r.acc(i32, i32, i32) define i32 @S2_asl_r_r_acc(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.asl.r.r.acc(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += asl(r1, r2) +; CHECK: += asl({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.lsl.r.r.acc(i32, i32, i32) define i32 @S2_lsl_r_r_acc(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.lsl.r.r.acc(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 += lsl(r1, r2) +; CHECK: += lsl({{.*}}, {{.*}}) ; Shift by register and logical declare i64 @llvm.hexagon.S2.asr.r.p.or(i64, i64, i32) @@ -512,112 +515,112 @@ define i64 @S2_asr_r_p_or(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.asr.r.p.or(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 |= asr(r3:2, r4) +; CHECK: |= asr({{.*}}, r4) declare i64 @llvm.hexagon.S2.lsr.r.p.or(i64, i64, i32) define i64 @S2_lsr_r_p_or(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.lsr.r.p.or(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 |= lsr(r3:2, r4) +; CHECK: |= lsr({{.*}}, r4) declare i64 @llvm.hexagon.S2.asl.r.p.or(i64, i64, i32) define i64 @S2_asl_r_p_or(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.asl.r.p.or(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 |= asl(r3:2, r4) +; CHECK: |= asl({{.*}}, r4) declare i64 @llvm.hexagon.S2.lsl.r.p.or(i64, i64, i32) define i64 @S2_lsl_r_p_or(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.lsl.r.p.or(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 |= lsl(r3:2, r4) +; CHECK: |= lsl({{.*}}, r4) declare i64 @llvm.hexagon.S2.asr.r.p.and(i64, i64, i32) define i64 @S2_asr_r_p_and(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.asr.r.p.and(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 &= asr(r3:2, r4) +; CHECK: &= asr({{.*}}, r4) declare i64 @llvm.hexagon.S2.lsr.r.p.and(i64, i64, i32) define i64 @S2_lsr_r_p_and(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.lsr.r.p.and(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 &= lsr(r3:2, r4) +; CHECK: &= lsr({{.*}}, r4) declare i64 @llvm.hexagon.S2.asl.r.p.and(i64, i64, i32) define i64 @S2_asl_r_p_and(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.asl.r.p.and(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 &= asl(r3:2, r4) +; CHECK: &= asl({{.*}}, r4) declare i64 @llvm.hexagon.S2.lsl.r.p.and(i64, i64, i32) define i64 @S2_lsl_r_p_and(i64 %a, i64 %b, i32 %c) { %z = call i64 @llvm.hexagon.S2.lsl.r.p.and(i64 %a, i64 %b, i32 %c) ret i64 %z } -; CHECK: r1:0 &= lsl(r3:2, r4) +; CHECK: &= lsl({{.*}}, r4) declare i32 @llvm.hexagon.S2.asr.r.r.or(i32, i32, i32) define i32 @S2_asr_r_r_or(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.asr.r.r.or(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 |= asr(r1, r2) +; CHECK: |= asr({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.lsr.r.r.or(i32, i32, i32) define i32 @S2_lsr_r_r_or(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.lsr.r.r.or(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 |= lsr(r1, r2) +; CHECK: |= lsr({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.asl.r.r.or(i32, i32, i32) define i32 @S2_asl_r_r_or(i32%a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.asl.r.r.or(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 |= asl(r1, r2) +; CHECK: |= asl({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.lsl.r.r.or(i32, i32, i32) define i32 @S2_lsl_r_r_or(i32%a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.lsl.r.r.or(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 |= lsl(r1, r2) +; CHECK: |= lsl({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.asr.r.r.and(i32, i32, i32) define i32 @S2_asr_r_r_and(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.asr.r.r.and(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 &= asr(r1, r2) +; CHECK: &= asr({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.lsr.r.r.and(i32, i32, i32) define i32 @S2_lsr_r_r_and(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.lsr.r.r.and(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 &= lsr(r1, r2) +; CHECK: &= lsr({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.asl.r.r.and(i32, i32, i32) define i32 @S2_asl_r_r_and(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.asl.r.r.and(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 &= asl(r1, r2) +; CHECK: &= asl({{.*}}, {{.*}}) declare i32 @llvm.hexagon.S2.lsl.r.r.and(i32, i32, i32) define i32 @S2_lsl_r_r_and(i32 %a, i32 %b, i32 %c) { %z = call i32 @llvm.hexagon.S2.lsl.r.r.and(i32 %a, i32 %b, i32 %c) ret i32 %z } -; CHECK: r0 &= lsl(r1, r2) +; CHECK: &= lsl({{.*}}, {{.*}}) ; Shift by register with saturation declare i32 @llvm.hexagon.S2.asr.r.r.sat(i32, i32) @@ -625,14 +628,14 @@ define i32 @S2_asr_r_r_sat(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.asr.r.r.sat(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = asr(r0, r1):sat +; CHECK: = asr({{.*}}, {{.*}}):sat declare i32 @llvm.hexagon.S2.asl.r.r.sat(i32, i32) define i32 @S2_asl_r_r_sat(i32 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.asl.r.r.sat(i32 %a, i32 %b) ret i32 %z } -; CHECK: r0 = asl(r0, r1):sat +; CHECK: = asl({{.*}}, {{.*}}):sat ; Vector shift halfwords by immediate declare i64 @llvm.hexagon.S2.asr.i.vh(i64, i32) @@ -640,21 +643,21 @@ define i64 @S2_asr_i_vh(i64 %a) { %z = call i64 @llvm.hexagon.S2.asr.i.vh(i64 %a, i32 0) ret i64 %z } -; CHECK: r1:0 = vasrh(r1:0, #0) +; CHECK: = vasrh({{.*}}, #0) declare i64 @llvm.hexagon.S2.lsr.i.vh(i64, i32) define i64 @S2_lsr_i_vh(i64 %a) { %z = call i64 @llvm.hexagon.S2.lsr.i.vh(i64 %a, i32 0) ret i64 %z } -; CHECK: r1:0 = vlsrh(r1:0, #0) +; CHECK: = vlsrh({{.*}}, #0) declare i64 @llvm.hexagon.S2.asl.i.vh(i64, i32) define i64 @S2_asl_i_vh(i64 %a) { %z = call i64 @llvm.hexagon.S2.asl.i.vh(i64 %a, i32 0) ret i64 %z } -; CHECK: r1:0 = vaslh(r1:0, #0) +; CHECK: = vaslh({{.*}}, #0) ; Vector shift halfwords by register declare i64 @llvm.hexagon.S2.asr.r.vh(i64, i32) @@ -662,28 +665,28 @@ define i64 @S2_asr_r_vh(i64 %a, i32 %b) { %z = call i64 @llvm.hexagon.S2.asr.r.vh(i64 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = vasrh(r1:0, r2) +; CHECK: = vasrh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.S2.lsr.r.vh(i64, i32) define i64 @S2_lsr_r_vh(i64 %a, i32 %b) { %z = call i64 @llvm.hexagon.S2.lsr.r.vh(i64 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = vlsrh(r1:0, r2) +; CHECK: = vlsrh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.S2.asl.r.vh(i64, i32) define i64 @S2_asl_r_vh(i64 %a, i32 %b) { %z = call i64 @llvm.hexagon.S2.asl.r.vh(i64 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = vaslh(r1:0, r2) +; CHECK: = vaslh({{.*}}, {{.*}}) declare i64 @llvm.hexagon.S2.lsl.r.vh(i64, i32) define i64 @S2_lsl_r_vh(i64 %a, i32 %b) { %z = call i64 @llvm.hexagon.S2.lsl.r.vh(i64 %a, i32 %b) ret i64 %z } -; CHECK: r1:0 = vlslh(r1:0, r2) +; CHECK: = vlslh({{.*}}, {{.*}}) ; Vector shift words by immediate declare i64 @llvm.hexagon.S2.asr.i.vw(i64, i32) @@ -691,21 +694,21 @@ define i64 @S2_asr_i_vw(i64 %a) { %z = call i64 @llvm.hexagon.S2.asr.i.vw(i64 %a, i32 0) ret i64 %z } -; CHECK: r1:0 = vasrw(r1:0, #0) +; CHECK: = vasrw({{.*}}, #0) declare i64 @llvm.hexagon.S2.lsr.i.vw(i64, i32) define i64 @S2_lsr_i_vw(i64 %a) { %z = call i64 @llvm.hexagon.S2.lsr.i.vw(i64 %a, i32 0) ret i64 %z } -; CHECK: r1:0 = vlsrw(r1:0, #0) +; CHECK: = vlsrw({{.*}}, #0) declare i64 @llvm.hexagon.S2.asl.i.vw(i64, i32) define i64 @S2_asl_i_vw(i64 %a) { %z = call i64 @llvm.hexagon.S2.asl.i.vw(i64 %a, i32 0) ret i64 %z } -; CHECK: r1:0 = vaslw(r1:0, #0) +; CHECK: = vaslw({{.*}}, #0) ; Vector shift words by with truncate and pack declare i32 @llvm.hexagon.S2.asr.i.svw.trun(i64, i32) @@ -713,11 +716,11 @@ define i32 @S2_asr_i_svw_trun(i64 %a) { %z = call i32 @llvm.hexagon.S2.asr.i.svw.trun(i64 %a, i32 0) ret i32 %z } -; CHECK: r0 = vasrw(r1:0, #0) +; CHECK: = vasrw({{.*}}, #0) declare i32 @llvm.hexagon.S2.asr.r.svw.trun(i64, i32) define i32 @S2_asr_r_svw_trun(i64 %a, i32 %b) { %z = call i32 @llvm.hexagon.S2.asr.r.svw.trun(i64 %a, i32 %b) ret i32 %z } -; CHECK: r0 = vasrw(r1:0, r2) +; CHECK: = vasrw({{.*}}, {{.*}}) diff --git a/test/CodeGen/Hexagon/loadi1-G0.ll b/test/CodeGen/Hexagon/loadi1-G0.ll new file mode 100644 index 000000000000..1116341c92ba --- /dev/null +++ b/test/CodeGen/Hexagon/loadi1-G0.ll @@ -0,0 +1,43 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv5 -hexagon-small-data-threshold=0 < %s | FileCheck %s +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32" +target triple = "hexagon-unknown-linux-gnu" + + +@flag = external global i1 + + +; CHECK-NOT: CONST + +define i32 @test_sextloadi1_32() { +entry: + %0 = load i1, i1* @flag, align 4 + %1 = sext i1 %0 to i32 + ret i32 %1 +} + + + +define i16 @test_zextloadi1_16() { +entry: + %0 = load i1, i1* @flag, align 4 + %1 = zext i1 %0 to i16 + ret i16 %1 +} + + +define i32 @test_zextloadi1_32() { +entry: + %0 = load i1, i1* @flag, align 4 + %1 = zext i1 %0 to i32 + ret i32 %1 +} + + +define i64 @test_zextloadi1_64() { +entry: + %0 = load i1, i1* @flag, align 4 + %1 = zext i1 %0 to i64 + ret i64 %1 +} + + diff --git a/test/CodeGen/Hexagon/loadi1-v4-G0.ll b/test/CodeGen/Hexagon/loadi1-v4-G0.ll new file mode 100644 index 000000000000..b7df1a125fb0 --- /dev/null +++ b/test/CodeGen/Hexagon/loadi1-v4-G0.ll @@ -0,0 +1,43 @@ +; RUN: llc -march=hexagon -hexagon-small-data-threshold=0 < %s | FileCheck %s +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32" +target triple = "hexagon-unknown-linux-gnu" + + +@flag = external global i1 + + +; CHECK-NOT: CONST + +define i32 @test_sextloadi1_32() { +entry: + %0 = load i1, i1* @flag, align 4 + %1 = sext i1 %0 to i32 + ret i32 %1 +} + + + +define i16 @test_zextloadi1_16() { +entry: + %0 = load i1, i1* @flag, align 4 + %1 = zext i1 %0 to i16 + ret i16 %1 +} + + +define i32 @test_zextloadi1_32() { +entry: + %0 = load i1, i1* @flag, align 4 + %1 = zext i1 %0 to i32 + ret i32 %1 +} + + +define i64 @test_zextloadi1_64() { +entry: + %0 = load i1, i1* @flag, align 4 + %1 = zext i1 %0 to i64 + ret i64 %1 +} + + diff --git a/test/CodeGen/Hexagon/loadi1-v4.ll b/test/CodeGen/Hexagon/loadi1-v4.ll new file mode 100644 index 000000000000..15b056040a42 --- /dev/null +++ b/test/CodeGen/Hexagon/loadi1-v4.ll @@ -0,0 +1,45 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32" +target triple = "hexagon-unknown-linux-gnu" + + +@flag = external global i1 + + +define i32 @test_sextloadi1_32() { +entry: + %0 = load i1, i1* @flag, align 4 +; CHECK: memub + %1 = sext i1 %0 to i32 + ret i32 %1 +} + + + +define i16 @test_zextloadi1_16() { +entry: + %0 = load i1, i1* @flag, align 4 +; CHECK: memub + %1 = zext i1 %0 to i16 + ret i16 %1 +} + + +define i32 @test_zextloadi1_32() { +entry: + %0 = load i1, i1* @flag, align 4 +; CHECK: memub + %1 = zext i1 %0 to i32 + ret i32 %1 +} + + +define i64 @test_zextloadi1_64() { +entry: + %0 = load i1, i1* @flag, align 4 +; CHECK: memub + %1 = zext i1 %0 to i64 + ret i64 %1 +} + + diff --git a/test/CodeGen/Hexagon/loadi1.ll b/test/CodeGen/Hexagon/loadi1.ll new file mode 100644 index 000000000000..38c1dfec8329 --- /dev/null +++ b/test/CodeGen/Hexagon/loadi1.ll @@ -0,0 +1,45 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32" +target triple = "hexagon-unknown-linux-gnu" + + +@flag = external global i1 + + +define i32 @test_sextloadi1_32() { +entry: + %0 = load i1, i1* @flag, align 4 +; CHECK: memub + %1 = sext i1 %0 to i32 + ret i32 %1 +} + + + +define i16 @test_zextloadi1_16() { +entry: + %0 = load i1, i1* @flag, align 4 +; CHECK: memub + %1 = zext i1 %0 to i16 + ret i16 %1 +} + + +define i32 @test_zextloadi1_32() { +entry: + %0 = load i1, i1* @flag, align 4 +; CHECK: memub + %1 = zext i1 %0 to i32 + ret i32 %1 +} + + +define i64 @test_zextloadi1_64() { +entry: + %0 = load i1, i1* @flag, align 4 +; CHECK: memub + %1 = zext i1 %0 to i64 + ret i64 %1 +} + + diff --git a/test/CodeGen/Hexagon/maxd.ll b/test/CodeGen/Hexagon/maxd.ll new file mode 100644 index 000000000000..7f237fd54e7a --- /dev/null +++ b/test/CodeGen/Hexagon/maxd.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: max + +define i64 @f(i64 %src, i64 %maxval) nounwind readnone { +entry: + %cmp = icmp slt i64 %maxval, %src + %cond = select i1 %cmp, i64 %src, i64 %maxval + ret i64 %cond +} diff --git a/test/CodeGen/Hexagon/maxh.ll b/test/CodeGen/Hexagon/maxh.ll new file mode 100644 index 000000000000..79b5e922c1bb --- /dev/null +++ b/test/CodeGen/Hexagon/maxh.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; The result of max(half-word, half-word) is also half-word. +; Check that we are not producing a sign extend after the max. +; CHECK-NOT: sxth + +define i64 @test_cast(i64 %arg0, i16 zeroext %arg1, i16 zeroext %arg2) nounwind readnone { +entry: + %conv.i = zext i16 %arg1 to i32 + %conv1.i = zext i16 %arg2 to i32 + %sub.i = sub nsw i32 %conv.i, %conv1.i + %sext.i = shl i32 %sub.i, 16 + %cmp.i = icmp slt i32 %sext.i, 65536 + %0 = ashr exact i32 %sext.i, 16 + %conv7.i = select i1 %cmp.i, i32 1, i32 %0 + %cmp8.i = icmp sgt i32 %conv7.i, 4 + %conv7.op.i = add i32 %conv7.i, 65535 + %shl = shl i64 %arg0, 2 + %.mask = and i32 %conv7.op.i, 65535 + %1 = zext i32 %.mask to i64 + %conv = select i1 %cmp8.i, i64 3, i64 %1 + %or = or i64 %conv, %shl + ret i64 %or +} diff --git a/test/CodeGen/Hexagon/maxud.ll b/test/CodeGen/Hexagon/maxud.ll new file mode 100644 index 000000000000..eca4faee602c --- /dev/null +++ b/test/CodeGen/Hexagon/maxud.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: maxu + +define i64 @f(i64 %src, i64 %maxval) nounwind readnone { +entry: + %cmp = icmp ult i64 %maxval, %src + %cond = select i1 %cmp, i64 %src, i64 %maxval + ret i64 %cond +} diff --git a/test/CodeGen/Hexagon/maxuw.ll b/test/CodeGen/Hexagon/maxuw.ll new file mode 100644 index 000000000000..0dba1f5acdef --- /dev/null +++ b/test/CodeGen/Hexagon/maxuw.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: maxu + +define i32 @f(i32 %src, i32 %maxval) nounwind readnone { +entry: + %cmp = icmp ult i32 %maxval, %src + %cond = select i1 %cmp, i32 %src, i32 %maxval + ret i32 %cond +} diff --git a/test/CodeGen/Hexagon/maxw.ll b/test/CodeGen/Hexagon/maxw.ll new file mode 100644 index 000000000000..e66ca958806f --- /dev/null +++ b/test/CodeGen/Hexagon/maxw.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: max + +define i32 @f(i32 %src, i32 %maxval) nounwind readnone { +entry: + %cmp = icmp slt i32 %maxval, %src + %cond = select i1 %cmp, i32 %src, i32 %maxval + ret i32 %cond +} diff --git a/test/CodeGen/Hexagon/mind.ll b/test/CodeGen/Hexagon/mind.ll new file mode 100644 index 000000000000..610283d97e2b --- /dev/null +++ b/test/CodeGen/Hexagon/mind.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: min + +define i64 @f(i64 %src, i64 %maxval) nounwind readnone { +entry: + %cmp = icmp sgt i64 %maxval, %src + %cond = select i1 %cmp, i64 %src, i64 %maxval + ret i64 %cond +} diff --git a/test/CodeGen/Hexagon/minu-zext-16.ll b/test/CodeGen/Hexagon/minu-zext-16.ll new file mode 100644 index 000000000000..e27507da3d44 --- /dev/null +++ b/test/CodeGen/Hexagon/minu-zext-16.ll @@ -0,0 +1,11 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: minu + +define zeroext i16 @f(i16* noalias nocapture %src) nounwind readonly { +entry: + %arrayidx = getelementptr inbounds i16, i16* %src, i32 1 + %0 = load i16, i16* %arrayidx, align 1 + %cmp = icmp ult i16 %0, 32767 + %. = select i1 %cmp, i16 %0, i16 32767 + ret i16 %. +} diff --git a/test/CodeGen/Hexagon/minu-zext-8.ll b/test/CodeGen/Hexagon/minu-zext-8.ll new file mode 100644 index 000000000000..15dc1a164912 --- /dev/null +++ b/test/CodeGen/Hexagon/minu-zext-8.ll @@ -0,0 +1,11 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: minu + +define zeroext i8 @f(i8* noalias nocapture %src) nounwind readonly { +entry: + %arrayidx = getelementptr inbounds i8, i8* %src, i32 1 + %0 = load i8, i8* %arrayidx, align 1 + %cmp = icmp ult i8 %0, 127 + %. = select i1 %cmp, i8 %0, i8 127 + ret i8 %. +} diff --git a/test/CodeGen/Hexagon/minud.ll b/test/CodeGen/Hexagon/minud.ll new file mode 100644 index 000000000000..29e81005081a --- /dev/null +++ b/test/CodeGen/Hexagon/minud.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: minu + +define i64 @f(i64 %src, i64 %maxval) nounwind readnone { +entry: + %cmp = icmp ugt i64 %maxval, %src + %cond = select i1 %cmp, i64 %src, i64 %maxval + ret i64 %cond +} diff --git a/test/CodeGen/Hexagon/minuw.ll b/test/CodeGen/Hexagon/minuw.ll new file mode 100644 index 000000000000..a88d1e116037 --- /dev/null +++ b/test/CodeGen/Hexagon/minuw.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: minu + +define i32 @f(i32 %src, i32 %maxval) nounwind readnone { +entry: + %cmp = icmp ugt i32 %maxval, %src + %cond = select i1 %cmp, i32 %src, i32 %maxval + ret i32 %cond +} diff --git a/test/CodeGen/Hexagon/minw.ll b/test/CodeGen/Hexagon/minw.ll new file mode 100644 index 000000000000..5bfaae09c805 --- /dev/null +++ b/test/CodeGen/Hexagon/minw.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: min + +define i32 @f(i32 %src, i32 %maxval) nounwind readnone { +entry: + %cmp = icmp sgt i32 %maxval, %src + %cond = select i1 %cmp, i32 %src, i32 %maxval + ret i32 %cond +} diff --git a/test/CodeGen/Hexagon/postinc-offset.ll b/test/CodeGen/Hexagon/postinc-offset.ll new file mode 100644 index 000000000000..5e0f4751f305 --- /dev/null +++ b/test/CodeGen/Hexagon/postinc-offset.ll @@ -0,0 +1,40 @@ +; RUN: llc -enable-aa-sched-mi -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s + +; CHECK: { +; CHECK: ={{ *}}memd([[REG0:(r[0-9]+)]]{{ *}}++{{ *}}#8) +; CHECK-NOT: memw([[REG0]]{{ *}}+{{ *}}#0){{ *}}= +; CHECK: } + +define void @main() #0 { +cond.end.6: + store i32 -1, i32* undef, align 8, !tbaa !0 + br label %polly.stmt.for.body.i + +if.then: + unreachable + +if.end: + ret void + +polly.stmt.for.body.i24: + %0 = extractelement <2 x i32> %add.ip_vec, i32 1 + br i1 undef, label %if.end, label %if.then + +polly.stmt.for.body.i: + %add.ip_vec30 = phi <2 x i32> [ %add.ip_vec, %polly.stmt.for.body.i ], [ zeroinitializer, %cond.end.6 ] + %scevgep.phi = phi i32* [ %scevgep.inc, %polly.stmt.for.body.i ], [ undef, %cond.end.6 ] + %polly.indvar = phi i32 [ %polly.indvar_next, %polly.stmt.for.body.i ], [ 0, %cond.end.6 ] + %vector_ptr = bitcast i32* %scevgep.phi to <2 x i32>* + %_p_vec_full = load <2 x i32>, <2 x i32>* %vector_ptr, align 8 + %add.ip_vec = add <2 x i32> %_p_vec_full, %add.ip_vec30 + %polly.indvar_next = add nsw i32 %polly.indvar, 2 + %polly.loop_cond = icmp slt i32 %polly.indvar, 4 + %scevgep.inc = getelementptr i32, i32* %scevgep.phi, i32 2 + br i1 %polly.loop_cond, label %polly.stmt.for.body.i, label %polly.stmt.for.body.i24 +} + +attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!0 = !{!"int", !1} +!1 = !{!"omnipotent char", !2} +!2 = !{!"Simple C/C++ TBAA"} diff --git a/test/CodeGen/Hexagon/signed_immediates.ll b/test/CodeGen/Hexagon/signed_immediates.ll new file mode 100644 index 000000000000..a4766313cc68 --- /dev/null +++ b/test/CodeGen/Hexagon/signed_immediates.ll @@ -0,0 +1,99 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; s4_0Imm +; CHECK: memb(r0++#-1) = r1 +define i8* @foo1(i8* %a, i8 %b) { + store i8 %b, i8* %a + %c = getelementptr i8, i8* %a, i32 -1 + ret i8* %c +} + +; s4_1Imm +; CHECK: memh(r0++#-2) = r1 +define i16* @foo2(i16* %a, i16 %b) { + store i16 %b, i16* %a + %c = getelementptr i16, i16* %a, i32 -1 + ret i16* %c +} + +; s4_2Imm +; CHECK: memw(r0++#-4) = r1 +define i32* @foo3(i32* %a, i32 %b) { + store i32 %b, i32* %a + %c = getelementptr i32, i32* %a, i32 -1 + ret i32* %c +} + +; s4_3Imm +; CHECK: memd(r0++#-8) = r3:2 +define i64* @foo4(i64* %a, i64 %b) { + store i64 %b, i64* %a + %c = getelementptr i64, i64* %a, i32 -1 + ret i64* %c +} + +; s6Ext +; CHECK: if (p0.new) memw(r0+#0)=#-1 +define void @foo5(i32* %a, i1 %b) { +br i1 %b, label %x, label %y +x: + store i32 -1, i32* %a + ret void +y: + ret void +} + +; s10Ext +; CHECK: p0 = cmp.eq(r0, #-1) +define i1 @foo7(i32 %a) { + %b = icmp eq i32 %a, -1 + ret i1 %b +} + +; s11_0Ext +; CHECK: memb(r0+#-1) = r1 +define void @foo8(i8* %a, i8 %b) { + %c = getelementptr i8, i8* %a, i32 -1 + store i8 %b, i8* %c + ret void +} + +; s11_1Ext +; CHECK: memh(r0+#-2) = r1 +define void @foo9(i16* %a, i16 %b) { + %c = getelementptr i16, i16* %a, i32 -1 + store i16 %b, i16* %c + ret void +} + +; s11_2Ext +; CHECK: memw(r0+#-4) = r1 +define void @foo10(i32* %a, i32 %b) { + %c = getelementptr i32, i32* %a, i32 -1 + store i32 %b, i32* %c + ret void +} + +; s11_3Ext +; CHECK: memd(r0+#-8) = r3:2 +define void @foo11(i64* %a, i64 %b) { + %c = getelementptr i64, i64* %a, i32 -1 + store i64 %b, i64* %c + ret void +} + +; s12Ext +; CHECK: if (p0.new) r0 = #-1 +define i32 @foo12(i32 %a, i1 %b) { +br i1 %b, label %x, label %y +x: + ret i32 -1 +y: + ret i32 %a +} + +; s16Ext +; CHECK: r0 = #-2 +define i32 @foo13() { + ret i32 -2 +}
\ No newline at end of file diff --git a/test/CodeGen/Hexagon/simple_addend.ll b/test/CodeGen/Hexagon/simple_addend.ll new file mode 100644 index 000000000000..ec3a87f1dcc0 --- /dev/null +++ b/test/CodeGen/Hexagon/simple_addend.ll @@ -0,0 +1,10 @@ +; RUN: llc -march=hexagon -filetype=obj -o - < %s | llvm-readobj -relocations | FileCheck %s + +declare void @bar(i32); + +define void @foo(i32 %a) { + %b = mul i32 %a, 3 + call void @bar(i32 %b) + ret void +} +; CHECK: 0x8 R_HEX_B22_PCREL bar 0x4 diff --git a/test/CodeGen/Hexagon/usr-ovf-dep.ll b/test/CodeGen/Hexagon/usr-ovf-dep.ll new file mode 100644 index 000000000000..1f06986f0aa9 --- /dev/null +++ b/test/CodeGen/Hexagon/usr-ovf-dep.ll @@ -0,0 +1,28 @@ +; RUN: llc -O2 < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32" +target triple = "hexagon" + +; Check that the two ":sat" instructions are in the same packet. +; CHECK: foo +; CHECK: { +; CHECK: :sat +; CHECK-NEXT: :sat + +target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32" +target triple = "hexagon" + +; Function Attrs: nounwind readnone +define i32 @foo(i32 %Rs, i32 %Rt, i32 %Ru) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.S2.asr.r.r.sat(i32 %Rs, i32 %Ru) + %1 = tail call i32 @llvm.hexagon.S2.asr.r.r.sat(i32 %Rt, i32 %Ru) + %add = add nsw i32 %1, %0 + ret i32 %add +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.hexagon.S2.asr.r.r.sat(i32, i32) #1 + +attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + diff --git a/test/CodeGen/MIR/basic-blocks.mir b/test/CodeGen/MIR/basic-blocks.mir new file mode 100644 index 000000000000..43d87507d5d3 --- /dev/null +++ b/test/CodeGen/MIR/basic-blocks.mir @@ -0,0 +1,43 @@ +# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s +# This test ensures that the MIR parser parses machine functions correctly. + +--- | + + define i32 @foo() { + entry: + ret i32 0 + } + + define i32 @bar() { + start: + ret i32 0 + } + +... +--- +# CHECK: name: foo +# CHECK: body: +# CHECK-NEXT: - name: entry +# CHECK-NEXT: alignment: 0 +# CHECK-NEXT: isLandingPad: false +# CHECK-NEXT: addressTaken: false +name: foo +body: + - name: entry +... +--- +# CHECK: name: bar +# CHECK: body: +# CHECK-NEXT: - name: start +# CHECK-NEXT: alignment: 4 +# CHECK-NEXT: isLandingPad: false +# CHECK-NEXT: addressTaken: false +# CHECK-NEXT: - alignment: 0 +# CHECK-NEXT: isLandingPad: false +# CHECK-NEXT: addressTaken: true +name: bar +body: + - name: start + alignment: 4 + - addressTaken: true +... diff --git a/test/CodeGen/MIR/function-missing-machine-function.mir b/test/CodeGen/MIR/function-missing-machine-function.mir new file mode 100644 index 000000000000..71b5b2845340 --- /dev/null +++ b/test/CodeGen/MIR/function-missing-machine-function.mir @@ -0,0 +1,13 @@ +# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s +# This test verifies that an error is reported when a MIR file has some +# function but is missing a corresponding machine function. + +# CHECK: no machine function information for function 'foo' in the MIR file + +--- | + + define i32 @foo() { + ret i32 0 + } + +... diff --git a/test/CodeGen/MIR/llvm-ir-error-reported.mir b/test/CodeGen/MIR/llvm-ir-error-reported.mir index 013b28cd7890..3508c341c44d 100644 --- a/test/CodeGen/MIR/llvm-ir-error-reported.mir +++ b/test/CodeGen/MIR/llvm-ir-error-reported.mir @@ -4,7 +4,7 @@ --- | - ; CHECK: [[@LINE+3]]:15: error: use of undefined value '%a' + ; CHECK: [[@LINE+3]]:15: use of undefined value '%a' define i32 @foo(i32 %x, i32 %y) { %z = alloca i32, align 4 store i32 %a, i32* %z, align 4 diff --git a/test/CodeGen/MIR/llvmIR.mir b/test/CodeGen/MIR/llvmIR.mir index 7a7b46b62638..4d7fde240c5b 100644 --- a/test/CodeGen/MIR/llvmIR.mir +++ b/test/CodeGen/MIR/llvmIR.mir @@ -30,3 +30,6 @@ } ... +--- +name: foo +... diff --git a/test/CodeGen/MIR/llvmIRMissing.mir b/test/CodeGen/MIR/llvmIRMissing.mir index 2acbcd1f9884..83d846ba44c3 100644 --- a/test/CodeGen/MIR/llvmIRMissing.mir +++ b/test/CodeGen/MIR/llvmIRMissing.mir @@ -1,5 +1,7 @@ -# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s +# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s # This test ensures that the MIR parser accepts files without the LLVM IR. --- +# CHECK: name: foo +name: foo ... diff --git a/test/CodeGen/MIR/machine-basic-block-unknown-name.mir b/test/CodeGen/MIR/machine-basic-block-unknown-name.mir new file mode 100644 index 000000000000..4c363c69edbb --- /dev/null +++ b/test/CodeGen/MIR/machine-basic-block-unknown-name.mir @@ -0,0 +1,18 @@ +# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that an error is reported whenever the MIR parser can't find +# a basic block with the machine basis block's name. + +--- | + + define i32 @foo() { + entry: + ret i32 0 + } + +... +--- +name: foo +body: + # CHECK: basic block 'entrie' is not defined in the function 'foo' + - name: entrie +... diff --git a/test/CodeGen/MIR/machine-function-missing-function.mir b/test/CodeGen/MIR/machine-function-missing-function.mir new file mode 100644 index 000000000000..eed4142d6597 --- /dev/null +++ b/test/CodeGen/MIR/machine-function-missing-function.mir @@ -0,0 +1,19 @@ +# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that an error is reported when the mir file has LLVM IR and +# one of the machine functions has a name that doesn't match any function in +# the LLVM IR. + +--- | + + define i32 @foo() { + ret i32 0 + } + +... +--- +name: foo +... +--- +# CHECK: function 'faa' isn't defined in the provided LLVM IR +name: faa +... diff --git a/test/CodeGen/MIR/machine-function-missing-name.mir b/test/CodeGen/MIR/machine-function-missing-name.mir index 54668f1a5efe..b16156e54bd1 100644 --- a/test/CodeGen/MIR/machine-function-missing-name.mir +++ b/test/CodeGen/MIR/machine-function-missing-name.mir @@ -14,7 +14,7 @@ ... --- -# CHECK: [[@LINE+1]]:1: error: missing required key 'name' +# CHECK: [[@LINE+1]]:1: missing required key 'name' nme: foo ... --- diff --git a/test/CodeGen/MIR/machine-function-redefinition-error.mir b/test/CodeGen/MIR/machine-function-redefinition-error.mir new file mode 100644 index 000000000000..be84161b5630 --- /dev/null +++ b/test/CodeGen/MIR/machine-function-redefinition-error.mir @@ -0,0 +1,10 @@ +# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that the machine function errors are reported correctly. + +--- +name: foo +... +--- +# CHECK: redefinition of machine function 'foo' +name: foo +... diff --git a/test/CodeGen/MIR/machine-function.mir b/test/CodeGen/MIR/machine-function.mir index 679bfd2d1620..a3c1d1d73927 100644 --- a/test/CodeGen/MIR/machine-function.mir +++ b/test/CodeGen/MIR/machine-function.mir @@ -10,15 +10,49 @@ define i32 @bar() { ret i32 0 } + + define i32 @func() { + ret i32 0 + } + + define i32 @func2() { + ret i32 0 + } ... --- # CHECK: name: foo +# CHECK-NEXT: alignment: +# CHECK-NEXT: exposesReturnsTwice: false +# CHECK-NEXT: hasInlineAsm: false # CHECK-NEXT: ... name: foo ... --- # CHECK: name: bar +# CHECK-NEXT: alignment: +# CHECK-NEXT: exposesReturnsTwice: false +# CHECK-NEXT: hasInlineAsm: false # CHECK-NEXT: ... name: bar ... +--- +# CHECK: name: func +# CHECK-NEXT: alignment: 8 +# CHECK-NEXT: exposesReturnsTwice: false +# CHECK-NEXT: hasInlineAsm: false +# CHECK-NEXT: ... +name: func +alignment: 8 +... +--- +# CHECK: name: func2 +# CHECK-NEXT: alignment: 16 +# CHECK-NEXT: exposesReturnsTwice: true +# CHECK-NEXT: hasInlineAsm: true +# CHECK-NEXT: ... +name: func2 +alignment: 16 +exposesReturnsTwice: true +hasInlineAsm: true +... diff --git a/test/CodeGen/Mips/cconv/callee-saved.ll b/test/CodeGen/Mips/cconv/callee-saved.ll index d0b1e64cdeea..0570ab35fd00 100644 --- a/test/CodeGen/Mips/cconv/callee-saved.ll +++ b/test/CodeGen/Mips/cconv/callee-saved.ll @@ -18,7 +18,7 @@ ; RUN: llc -march=mips64 -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64-INV %s ; RUN: llc -march=mips64el -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64-INV %s -; Test the the callee-saved registers are callee-saved as specified by section +; Test the callee-saved registers are callee-saved as specified by section ; 2 of the MIPSpro N32 Handbook and section 3 of the SYSV ABI spec. define void @gpr_clobber() nounwind { diff --git a/test/CodeGen/Mips/eh.ll b/test/CodeGen/Mips/eh.ll index fcbd99ef737b..2f843d9da9a6 100644 --- a/test/CodeGen/Mips/eh.ll +++ b/test/CodeGen/Mips/eh.ll @@ -4,7 +4,7 @@ @g1 = global double 0.000000e+00, align 8 @_ZTId = external constant i8* -define void @_Z1fd(double %i2) { +define void @_Z1fd(double %i2) personality i32 (...)* @__gxx_personality_v0 { entry: ; CHECK-EL: addiu $sp, $sp ; CHECK-EL: .cfi_def_cfa_offset @@ -26,7 +26,7 @@ lpad: ; preds = %entry ; CHECK-EL: # %lpad ; CHECK-EL: bne $5 - %exn.val = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + %exn.val = landingpad { i8*, i32 } cleanup catch i8* bitcast (i8** @_ZTId to i8*) %exn = extractvalue { i8*, i32 } %exn.val, 0 diff --git a/test/CodeGen/Mips/ehframe-indirect.ll b/test/CodeGen/Mips/ehframe-indirect.ll index dc06ef7840ff..a51cfb7e0fcd 100644 --- a/test/CodeGen/Mips/ehframe-indirect.ll +++ b/test/CodeGen/Mips/ehframe-indirect.ll @@ -7,7 +7,7 @@ @_ZTISt9exception = external constant i8* -define i32 @main() { +define i32 @main() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; ALL: .cfi_startproc ; ALL: .cfi_personality 128, DW.ref.__gxx_personality_v0 @@ -17,8 +17,7 @@ entry: ; ALL: jalr lpad: - %0 = landingpad { i8*, i32 } personality i8* - bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null catch i8* bitcast (i8** @_ZTISt9exception to i8*) ret i32 0 diff --git a/test/CodeGen/Mips/insn-zero-size-bb.ll b/test/CodeGen/Mips/insn-zero-size-bb.ll index 9739c6f17fab..ea61c994ae1d 100644 --- a/test/CodeGen/Mips/insn-zero-size-bb.ll +++ b/test/CodeGen/Mips/insn-zero-size-bb.ll @@ -8,7 +8,7 @@ declare i32 @foo(...) declare void @bar() -define void @main() { +define void @main() personality i8* bitcast (i32 (...)* @foo to i8*) { entry: invoke void @bar() #0 to label %unreachable unwind label %return @@ -19,7 +19,7 @@ unreachable: unreachable return: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @foo to i8*) + %0 = landingpad { i8*, i32 } catch i8* null ret void } diff --git a/test/CodeGen/Mips/mips16ex.ll b/test/CodeGen/Mips/mips16ex.ll index 68b584604b27..c3a02261119e 100644 --- a/test/CodeGen/Mips/mips16ex.ll +++ b/test/CodeGen/Mips/mips16ex.ll @@ -9,7 +9,7 @@ @_ZTIi = external constant i8* @.str1 = private unnamed_addr constant [15 x i8] c"exception %i \0A\00", align 1 -define i32 @main() { +define i32 @main() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: %retval = alloca i32, align 4 %exn.slot = alloca i8* @@ -24,7 +24,7 @@ entry: to label %unreachable unwind label %lpad lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %1 = landingpad { i8*, i32 } catch i8* bitcast (i8** @_ZTIi to i8*) %2 = extractvalue { i8*, i32 } %1, 0 store i8* %2, i8** %exn.slot @@ -56,7 +56,7 @@ try.cont: ; preds = %invoke.cont ret i32 0 lpad1: ; preds = %catch - %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %8 = landingpad { i8*, i32 } cleanup %9 = extractvalue { i8*, i32 } %8, 0 store i8* %9, i8** %exn.slot diff --git a/test/CodeGen/NVPTX/access-non-generic.ll b/test/CodeGen/NVPTX/access-non-generic.ll index 5deefe881e3f..c1327274a9cf 100644 --- a/test/CodeGen/NVPTX/access-non-generic.ll +++ b/test/CodeGen/NVPTX/access-non-generic.ll @@ -101,6 +101,28 @@ define i32 @ld_int_from_global_float(float addrspace(1)* %input, i32 %i, i32 %j) ret i32 %5 } +define void @nested_const_expr() { +; PTX-LABEL: nested_const_expr( + ; store 1 to bitcast(gep(addrspacecast(array), 0, 1)) + store i32 1, i32* bitcast (float* getelementptr ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i64 0, i64 1) to i32*), align 4 +; PTX: mov.u32 %r1, 1; +; PTX-NEXT: st.shared.u32 [array+4], %r1; + ret void +} + +define void @rauw(float addrspace(1)* %input) { + %generic_input = addrspacecast float addrspace(1)* %input to float* + %addr = getelementptr float, float* %generic_input, i64 10 + %v = load float, float* %addr + store float %v, float* %addr + ret void +; IR-LABEL: @rauw( +; IR-NEXT: %1 = getelementptr float, float addrspace(1)* %input, i64 10 +; IR-NEXT: %v = load float, float addrspace(1)* %1 +; IR-NEXT: store float %v, float addrspace(1)* %1 +; IR-NEXT: ret void +} + declare void @llvm.cuda.syncthreads() #3 attributes #3 = { noduplicate nounwind } diff --git a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll index c70670da13d6..8ff762aa7c48 100644 --- a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll +++ b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll @@ -27,8 +27,9 @@ entry: ; CHECK: cvta.to.global.u64 %rd[[A1_REG:[0-9]+]], %rd[[A_REG]] ; FIXME: casting A1_REG to A2_REG is unnecessary; A2_REG is essentially A_REG ; CHECK: cvta.global.u64 %rd[[A2_REG:[0-9]+]], %rd[[A1_REG]] +; CHECK: cvta.local.u64 %rd[[SP_REG:[0-9]+]] ; CHECK: ld.global.f32 %f[[A0_REG:[0-9]+]], [%rd[[A1_REG]]] -; CHECK: st.f32 [%SP+0], %f[[A0_REG]] +; CHECK: st.local.f32 [{{%rd[0-9]+}}], %f[[A0_REG]] %0 = load float, float* %a, align 4 %1 = bitcast [16 x i8]* %buf to float* @@ -49,7 +50,6 @@ entry: %7 = bitcast i8* %arrayidx7 to float* store float %6, float* %7, align 4 -; CHECK: add.u64 %rd[[SP_REG:[0-9]+]], %SP, 0 ; CHECK: .param .b64 param0; ; CHECK-NEXT: st.param.b64 [param0+0], %rd[[A2_REG]] ; CHECK-NEXT: .param .b64 param1; diff --git a/test/CodeGen/NVPTX/intrin-nocapture.ll b/test/CodeGen/NVPTX/intrin-nocapture.ll index 55781bb15a0b..2dbd29f616f8 100644 --- a/test/CodeGen/NVPTX/intrin-nocapture.ll +++ b/test/CodeGen/NVPTX/intrin-nocapture.ll @@ -11,7 +11,7 @@ declare i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32*) ; CHECK: @bar define void @bar() { %t1 = alloca i32 -; CHECK: call i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32* %t1) +; CHECK: call i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32* nonnull %t1) ; CHECK-NEXT: store i32 10, i32* %t1 %t2 = call i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32* %t1) store i32 10, i32* %t1 diff --git a/test/CodeGen/NVPTX/lower-alloca.ll b/test/CodeGen/NVPTX/lower-alloca.ll new file mode 100644 index 000000000000..397dc1fc52cc --- /dev/null +++ b/test/CodeGen/NVPTX/lower-alloca.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -S -nvptx-lower-alloca -nvptx-favor-non-generic -dce | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" +target triple = "nvptx64-unknown-unknown" + +define void @kernel() { +; LABEL: @lower_alloca +; PTX-LABEL: .visible .entry kernel( + %A = alloca i32 +; CHECK: addrspacecast i32* %A to i32 addrspace(5)* +; CHECK: store i32 0, i32 addrspace(5)* {{%.+}} +; PTX: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}} + store i32 0, i32* %A + call void @callee(i32* %A) + ret void +} + +declare void @callee(i32*) + +!nvvm.annotations = !{!0} +!0 = !{void ()* @kernel, !"kernel", i32 1} diff --git a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll index 34122912349b..bd496704890f 100644 --- a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll +++ b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll @@ -19,7 +19,7 @@ target triple = "powerpc64-apple-darwin8" ; CHECK: .cfi_endproc -define void @Bork(i64 %range.0.0, i64 %range.0.1, i64 %size) { +define void @Bork(i64 %range.0.0, i64 %range.0.1, i64 %size) personality i32 (...)* @__gxx_personality_v0 { entry: %effectiveRange = alloca %struct.Range, align 8 ; <%struct.Range*> [#uses=2] %tmp4 = call i8* @llvm.stacksave() ; <i8*> [#uses=1] @@ -33,7 +33,7 @@ bb30.preheader: ; preds = %entry br label %bb30 unwind: ; preds = %cond_true, %entry - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} catch i8* null call void @llvm.stackrestore(i8* %tmp4) resume { i8*, i32 } %exn diff --git a/test/CodeGen/PowerPC/builtins-ppc-p8vector.ll b/test/CodeGen/PowerPC/builtins-ppc-p8vector.ll new file mode 100644 index 000000000000..37111ef0d89b --- /dev/null +++ b/test/CodeGen/PowerPC/builtins-ppc-p8vector.ll @@ -0,0 +1,91 @@ +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+power8-vector -mattr=-vsx < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-VSX + +@vsc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>, align 16 +@vuc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>, align 16 +@res_vll = common global <2 x i64> zeroinitializer, align 16 +@res_vull = common global <2 x i64> zeroinitializer, align 16 +@res_vsc = common global <16 x i8> zeroinitializer, align 16 +@res_vuc = common global <16 x i8> zeroinitializer, align 16 + +; Function Attrs: nounwind +define void @test1() { +entry: + %__a.addr.i = alloca <16 x i8>, align 16 + %__b.addr.i = alloca <16 x i8>, align 16 + %0 = load <16 x i8>, <16 x i8>* @vsc, align 16 + %1 = load <16 x i8>, <16 x i8>* @vsc, align 16 + store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16 + store <16 x i8> %1, <16 x i8>* %__b.addr.i, align 16 + %2 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16 + %3 = load <16 x i8>, <16 x i8>* %__b.addr.i, align 16 + %4 = call <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8> %2, <16 x i8> %3) + store <2 x i64> %4, <2 x i64>* @res_vll, align 16 + ret void +; CHECK-LABEL: @test1 +; CHECK: lvx [[REG1:[0-9]+]], +; CHECK: lvx [[REG2:[0-9]+]], +; CHECK: vbpermq {{[0-9]+}}, [[REG2]], [[REG1]] +; CHECK-VSX: vbpermq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +; Function Attrs: nounwind +define void @test2() { +entry: + %__a.addr.i = alloca <16 x i8>, align 16 + %__b.addr.i = alloca <16 x i8>, align 16 + %0 = load <16 x i8>, <16 x i8>* @vuc, align 16 + %1 = load <16 x i8>, <16 x i8>* @vuc, align 16 + store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16 + store <16 x i8> %1, <16 x i8>* %__b.addr.i, align 16 + %2 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16 + %3 = load <16 x i8>, <16 x i8>* %__b.addr.i, align 16 + %4 = call <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8> %2, <16 x i8> %3) + store <2 x i64> %4, <2 x i64>* @res_vull, align 16 + ret void +; CHECK-LABEL: @test2 +; CHECK: lvx [[REG1:[0-9]+]], +; CHECK: lvx [[REG2:[0-9]+]], +; CHECK: vbpermq {{[0-9]+}}, [[REG2]], [[REG1]] +; CHECK-VSX: vbpermq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +; Function Attrs: nounwind +define void @test3() { +entry: + %__a.addr.i = alloca <16 x i8>, align 16 + %0 = load <16 x i8>, <16 x i8>* @vsc, align 16 + store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16 + %1 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16 + %2 = call <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @res_vsc, align 16 + ret void +; CHECK-LABEL: @test3 +; CHECK: lvx [[REG1:[0-9]+]], +; CHECK: vgbbd {{[0-9]+}}, [[REG1]] +; CHECK-VSX: vgbbd {{[0-9]+}}, {{[0-9]+}} +} + +; Function Attrs: nounwind +define void @test4() { +entry: + %__a.addr.i = alloca <16 x i8>, align 16 + %0 = load <16 x i8>, <16 x i8>* @vuc, align 16 + store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16 + %1 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16 + %2 = call <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @res_vuc, align 16 + ret void +; CHECK-LABEL: @test4 +; CHECK: lvx [[REG1:[0-9]+]], +; CHECK: vgbbd {{[0-9]+}}, [[REG1]] +; CHECK-VSX: vgbbd {{[0-9]+}}, {{[0-9]+}} +} + +; Function Attrs: nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8>, <16 x i8>) + +; Function Attrs: nounwind readnone +declare <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8>) diff --git a/test/CodeGen/PowerPC/extra-toc-reg-deps.ll b/test/CodeGen/PowerPC/extra-toc-reg-deps.ll index 1056c5a57aac..488771807ce6 100644 --- a/test/CodeGen/PowerPC/extra-toc-reg-deps.ll +++ b/test/CodeGen/PowerPC/extra-toc-reg-deps.ll @@ -61,7 +61,7 @@ target triple = "powerpc64-bgq-linux" @.str28 = external unnamed_addr constant [7 x i8], align 1 @_ZN4Foam4PoutE = external global %"class.Foam::prefixOSstream.27", align 8 -define void @_ZN4Foam13checkTopologyERKNS_8polyMeshEbb(i1 zeroext %allTopology) #0 { +define void @_ZN4Foam13checkTopologyERKNS_8polyMeshEbb(i1 zeroext %allTopology) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: br i1 undef, label %for.body, label %for.cond.cleanup @@ -124,7 +124,7 @@ _ZNK4Foam8ZoneMeshINS_9pointZoneENS_8polyMeshEE15checkDefinitionEb.exit: ; preds to label %_ZN4Foam4wordC2EPKcb.exit unwind label %lpad.i lpad.i: ; preds = %_ZNK4Foam8ZoneMeshINS_9pointZoneENS_8polyMeshEE15checkDefinitionEb.exit - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } cleanup resume { i8*, i32 } %0 @@ -157,7 +157,7 @@ for.cond.cleanup69: ; preds = %_ZNSsD2Ev.exit br i1 undef, label %if.then121, label %if.else lpad: ; preds = %_ZN4Foam4wordC2EPKcb.exit - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %1 = landingpad { i8*, i32 } cleanup br i1 undef, label %_ZNSsD2Ev.exit1578, label %if.then.i.i1570, !prof !1 @@ -181,7 +181,7 @@ if.else: ; preds = %for.cond.cleanup69 to label %_ZN4Foam4wordC2EPKcb.exit1701 unwind label %lpad.i1689 lpad.i1689: ; preds = %if.else - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %2 = landingpad { i8*, i32 } cleanup unreachable @@ -200,12 +200,12 @@ if.then178: ; preds = %invoke.cont176 unreachable lpad165: ; preds = %_ZN4Foam4wordC2EPKcb.exit1701 - %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %3 = landingpad { i8*, i32 } cleanup unreachable lpad175: ; preds = %invoke.cont169 - %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %4 = landingpad { i8*, i32 } cleanup invoke void @_ZN4Foam8pointSetD1Ev() to label %eh.resume unwind label %terminate.lpad @@ -215,7 +215,7 @@ if.end213: ; preds = %invoke.cont176 to label %_ZN4Foam4wordC2EPKcb.exit1777 unwind label %lpad.i1765 lpad.i1765: ; preds = %if.end213 - %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %5 = landingpad { i8*, i32 } cleanup br i1 undef, label %eh.resume.i1776, label %if.then.i.i.i1767, !prof !1 @@ -247,12 +247,12 @@ invoke.cont231: ; preds = %_ZNSsD2Ev.exit1792 to label %invoke.cont243 unwind label %lpad230 lpad217: ; preds = %_ZN4Foam4wordC2EPKcb.exit1777 - %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %6 = landingpad { i8*, i32 } cleanup br label %eh.resume lpad230: ; preds = %invoke.cont231, %_ZNSsD2Ev.exit1792 - %7 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %7 = landingpad { i8*, i32 } cleanup invoke void @_ZN4Foam7faceSetD1Ev() to label %eh.resume unwind label %terminate.lpad @@ -262,7 +262,7 @@ invoke.cont243: ; preds = %invoke.cont231 to label %_ZN4Foam4wordC2EPKcb.exit1862 unwind label %lpad.i1850 lpad.i1850: ; preds = %invoke.cont243 - %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %8 = landingpad { i8*, i32 } cleanup unreachable @@ -283,7 +283,7 @@ if.then292: ; preds = %_ZNSsD2Ev.exit1877 unreachable lpad276: ; preds = %_ZN4Foam4wordC2EPKcb.exit1862 - %9 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %9 = landingpad { i8*, i32 } cleanup unreachable @@ -314,7 +314,7 @@ invoke.cont676: ; preds = %invoke.cont674 to label %if.end878 unwind label %lpad663 lpad663: ; preds = %invoke.cont670, %if.end660, %invoke.cont668, %invoke.cont674, %invoke.cont676 - %10 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %10 = landingpad { i8*, i32 } cleanup br i1 undef, label %_ZN4Foam4ListIiED2Ev.exit.i3073, label %delete.notnull.i.i3071 @@ -342,7 +342,7 @@ if.else888: ; preds = %_ZN4Foam11regionSpl to label %_ZN4Foam4wordC2EPKcb.exit3098 unwind label %lpad.i3086 lpad.i3086: ; preds = %if.else888 - %11 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %11 = landingpad { i8*, i32 } cleanup unreachable @@ -371,7 +371,7 @@ invoke.cont906: ; preds = %call.i3116.noexc unreachable lpad898: ; preds = %_ZN4Foam4wordC2EPKcb.exit3098 - %12 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %12 = landingpad { i8*, i32 } cleanup br i1 undef, label %_ZNSsD2Ev.exit3204, label %if.then.i.i3196, !prof !1 @@ -382,7 +382,7 @@ _ZNSsD2Ev.exit3204: ; preds = %lpad898 unreachable lpad905.loopexit.split-lp: ; preds = %call.i3116.noexc, %_ZNSsD2Ev.exit3113 - %lpad.loopexit.split-lp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %lpad.loopexit.split-lp = landingpad { i8*, i32 } cleanup invoke void @_ZN4Foam8pointSetD1Ev() to label %eh.resume unwind label %terminate.lpad @@ -391,7 +391,7 @@ eh.resume: ; preds = %_ZN4Foam4ListIiED2E resume { i8*, i32 } undef terminate.lpad: ; preds = %_ZN4Foam4ListIiED2Ev.exit.i3073, %lpad230, %lpad175, %lpad905.loopexit.split-lp - %13 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %13 = landingpad { i8*, i32 } catch i8* null unreachable } diff --git a/test/CodeGen/PowerPC/fast-isel-icmp-split.ll b/test/CodeGen/PowerPC/fast-isel-icmp-split.ll index 459616eb9698..e1f22781db3d 100644 --- a/test/CodeGen/PowerPC/fast-isel-icmp-split.ll +++ b/test/CodeGen/PowerPC/fast-isel-icmp-split.ll @@ -9,7 +9,7 @@ target triple = "powerpc64-bgq-linux" %"class.boost::serialization::extended_type_info.129.150" = type { i32 (...)**, i32, i8* } ; Function Attrs: noinline -define void @_ZN5boost13serialization18extended_type_info4findEPKc() #0 align 2 { +define void @_ZN5boost13serialization18extended_type_info4findEPKc() #0 align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: br i1 undef, label %cond.true, label %cond.false @@ -42,7 +42,7 @@ if.then: ; preds = %invoke.cont.2 br label %cleanup lpad: ; preds = %cond.end - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %2 = landingpad { i8*, i32 } cleanup br label %eh.resume diff --git a/test/CodeGen/PowerPC/glob-comp-aa-crash.ll b/test/CodeGen/PowerPC/glob-comp-aa-crash.ll index 66df6bb8669d..88530a3f303f 100644 --- a/test/CodeGen/PowerPC/glob-comp-aa-crash.ll +++ b/test/CodeGen/PowerPC/glob-comp-aa-crash.ll @@ -17,7 +17,7 @@ target triple = "powerpc64-bgq-linux" declare i32 @__gxx_personality_v0(...) ; Function Attrs: optsize -define void @_ZNSt3__117__assoc_sub_state4copyEv(%"class.std::__1::__assoc_sub_state"* %this) #0 align 2 { +define void @_ZNSt3__117__assoc_sub_state4copyEv(%"class.std::__1::__assoc_sub_state"* %this) #0 align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: %__lk = alloca %"class.std::__1::unique_lock", align 8 %ref.tmp = alloca %"class.std::__exception_ptr::exception_ptr", align 8 @@ -50,14 +50,14 @@ invoke.cont4: ; preds = %if.then unreachable lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %1 = landingpad { i8*, i32 } cleanup %2 = extractvalue { i8*, i32 } %1, 0 %3 = extractvalue { i8*, i32 } %1, 1 br label %ehcleanup lpad3: ; preds = %if.then - %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %4 = landingpad { i8*, i32 } cleanup %5 = extractvalue { i8*, i32 } %4, 0 %6 = extractvalue { i8*, i32 } %4, 1 diff --git a/test/CodeGen/PowerPC/hello-reloc.s b/test/CodeGen/PowerPC/hello-reloc.s index 97dfbb5362fa..12f4315f675a 100644 --- a/test/CodeGen/PowerPC/hello-reloc.s +++ b/test/CodeGen/PowerPC/hello-reloc.s @@ -2,7 +2,7 @@ ; which is responsible for writing mach-o relocation entries for (PIC) ; PowerPC objects. -; RUN: llvm-mc -filetype=obj -relocation-model=pic -mcpu=g4 -triple=powerpc-apple-darwin8 %s -o - | llvm-readobj -relocations | FileCheck -check-prefix=DARWIN-G4-DUMP %s +; RUN: llvm-mc -filetype=obj -relocation-model=pic -mcpu=g4 -triple=powerpc-apple-darwin8 %s -o - | llvm-readobj -r --expand-relocs | FileCheck -check-prefix=DARWIN-G4-DUMP %s .machine ppc7400 .section __TEXT,__textcoal_nt,coalesced,pure_instructions @@ -62,19 +62,79 @@ L_.str: ; @.str ; DARWIN-G4-DUMP:AddressSize: 32bit ; DARWIN-G4-DUMP:Relocations [ ; DARWIN-G4-DUMP: Section __text { -; DARWIN-G4-DUMP: 0x34 1 2 0 PPC_RELOC_BR24 0 0x3 -; DARWIN-G4-DUMP: 0x30 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 0x74 -; DARWIN-G4-DUMP: 0x0 0 2 n/a PPC_RELOC_PAIR 1 0x14 -; DARWIN-G4-DUMP: 0x2C 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 0x74 -; DARWIN-G4-DUMP: 0x60 0 2 n/a PPC_RELOC_PAIR 1 0x14 +; DARWIN-G4-DUMP: Relocation { +; DARWIN-G4-DUMP: Offset: 0x34 +; DARWIN-G4-DUMP: PCRel: 1 +; DARWIN-G4-DUMP: Length: 2 +; DARWIN-G4-DUMP: Type: PPC_RELOC_BR24 (3) +; DARWIN-G4-DUMP: Section: __picsymbolstub1 +; DARWIN-G4-DUMP: } +; DARWIN-G4-DUMP: Relocation { +; DARWIN-G4-DUMP: Offset: 0x30 +; DARWIN-G4-DUMP: PCRel: 0 +; DARWIN-G4-DUMP: Length: 2 +; DARWIN-G4-DUMP: Type: PPC_RELOC_LO16_SECTDIFF (11) +; DARWIN-G4-DUMP: Value: 0x74 +; DARWIN-G4-DUMP: } +; DARWIN-G4-DUMP: Relocation { +; DARWIN-G4-DUMP: Offset: 0x0 +; DARWIN-G4-DUMP: PCRel: 0 +; DARWIN-G4-DUMP: Length: 2 +; DARWIN-G4-DUMP: Type: PPC_RELOC_PAIR (1) +; DARWIN-G4-DUMP: Value: 0x14 +; DARWIN-G4-DUMP: } +; DARWIN-G4-DUMP: Relocation { +; DARWIN-G4-DUMP: Offset: 0x2C +; DARWIN-G4-DUMP: PCRel: 0 +; DARWIN-G4-DUMP: Length: 2 +; DARWIN-G4-DUMP: Type: PPC_RELOC_HA16_SECTDIFF (12) +; DARWIN-G4-DUMP: Value: 0x74 +; DARWIN-G4-DUMP: } +; DARWIN-G4-DUMP: Relocation { +; DARWIN-G4-DUMP: Offset: 0x60 +; DARWIN-G4-DUMP: PCRel: 0 +; DARWIN-G4-DUMP: Length: 2 +; DARWIN-G4-DUMP: Type: PPC_RELOC_PAIR (1) +; DARWIN-G4-DUMP: Value: 0x14 +; DARWIN-G4-DUMP: } ; DARWIN-G4-DUMP: } ; DARWIN-G4-DUMP: Section __picsymbolstub1 { -; DARWIN-G4-DUMP: 0x14 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 0x70 -; DARWIN-G4-DUMP: 0x0 0 2 n/a PPC_RELOC_PAIR 1 0x58 -; DARWIN-G4-DUMP: 0xC 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 0x70 -; DARWIN-G4-DUMP: 0x18 0 2 n/a PPC_RELOC_PAIR 1 0x58 +; DARWIN-G4-DUMP: Relocation { +; DARWIN-G4-DUMP: Offset: 0x14 +; DARWIN-G4-DUMP: PCRel: 0 +; DARWIN-G4-DUMP: Length: 2 +; DARWIN-G4-DUMP: Type: PPC_RELOC_LO16_SECTDIFF (11) +; DARWIN-G4-DUMP: Value: 0x70 +; DARWIN-G4-DUMP: } +; DARWIN-G4-DUMP: Relocation { +; DARWIN-G4-DUMP: Offset: 0x0 +; DARWIN-G4-DUMP: PCRel: 0 +; DARWIN-G4-DUMP: Length: 2 +; DARWIN-G4-DUMP: Type: PPC_RELOC_PAIR (1) +; DARWIN-G4-DUMP: Value: 0x58 +; DARWIN-G4-DUMP: } +; DARWIN-G4-DUMP: Relocation { +; DARWIN-G4-DUMP: Offset: 0xC +; DARWIN-G4-DUMP: PCRel: 0 +; DARWIN-G4-DUMP: Length: 2 +; DARWIN-G4-DUMP: Type: PPC_RELOC_HA16_SECTDIFF (12) +; DARWIN-G4-DUMP: Value: 0x70 +; DARWIN-G4-DUMP: } +; DARWIN-G4-DUMP: Relocation { +; DARWIN-G4-DUMP: Offset: 0x18 +; DARWIN-G4-DUMP: PCRel: 0 +; DARWIN-G4-DUMP: Length: 2 +; DARWIN-G4-DUMP: Type: PPC_RELOC_PAIR (1) +; DARWIN-G4-DUMP: Value: 0x58 +; DARWIN-G4-DUMP: } ; DARWIN-G4-DUMP: } ; DARWIN-G4-DUMP: Section __la_symbol_ptr { -; DARWIN-G4-DUMP: 0x0 0 2 1 PPC_RELOC_VANILLA 0 dyld_stub_binding_helper +; DARWIN-G4-DUMP: Relocation { +; DARWIN-G4-DUMP: Offset: 0x0 +; DARWIN-G4-DUMP: PCRel: 0 +; DARWIN-G4-DUMP: Length: 2 +; DARWIN-G4-DUMP: Type: PPC_RELOC_VANILLA (0) +; DARWIN-G4-DUMP: Symbol: dyld_stub_binding_helper +; DARWIN-G4-DUMP: } ; DARWIN-G4-DUMP: } ; DARWIN-G4-DUMP:] diff --git a/test/CodeGen/PowerPC/mftb.ll b/test/CodeGen/PowerPC/mftb.ll new file mode 100644 index 000000000000..9ad93267b9dc --- /dev/null +++ b/test/CodeGen/PowerPC/mftb.ll @@ -0,0 +1,72 @@ +; Check handling of the mftb instruction. +; For CPUs 601 and pwr3, the mftb instruction should be emitted. +; On all other CPUs (including generic, ppc, ppc64), the mfspr instruction +; should be used instead. There should no longer be a deprecated warning +; message emittedfor this instruction for any CPU. + +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-MFSPR +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-MFSPR +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-MFSPR +; RUN: llc -mtriple=powerpc-unknown-linux-gnu < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-MFSPR +; RUN: llc -mtriple=powerpc-unknown-linux-gnu -mcpu=ppc < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-MFSPR +; RUN: llc -mtriple=powerpc-unknown-linux-gnu -mcpu=601 < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-MFTB +; RUN: llc -mtriple=powerpc-unknown-linux-gnu -mcpu=pwr3 < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-MFTB + +; CHECK-MFSPR-NOT: warning: deprecated +; CHECK-MFTB-NOT: warning: deprecated + +define i32 @get_time() { + %time = call i32 asm "mftb $0, 268", "=r"() + ret i32 %time +; CHECK-MFSPR-LABEL: @get_time +; CHECK-MFSPR: mfspr 3, 268 +; CHECK-MFSPR: blr + +; CHECK-MFTB-LABEL: @get_time +; CHECK-MFTB: mftb 3, 268 +; CHECK-MFTB: blr +} + +define i32 @get_timeu() { + %time = call i32 asm "mftb $0, 269", "=r"() + ret i32 %time +; CHECK-MFSPR-LABEL: @get_timeu +; CHECK-MFSPR: mfspr 3, 269 +; CHECK-MFSPR: blr + +; CHECK-MFTB-LABEL: @get_timeu +; CHECK-MFTB: mftbu 3 +; CHECK-MFTB: blr +} + +define i32 @get_time_e() { + %time = call i32 asm "mftb $0", "=r"() + ret i32 %time +; CHECK-MFSPR-LABEL: @get_time_e +; CHECK-MFSPR: mfspr 3, 268 +; CHECK-MFSPR: blr + +; CHECK-MFTB-LABEL: @get_time_e +; CHECK-MFTB: mftb 3, 268 +; CHECK-MFTB: blr +} + +define i32 @get_timeu_e() { + %time = call i32 asm "mftbu $0", "=r"() + ret i32 %time +; CHECK-MFSPR-LABEL: @get_timeu_e +; CHECK-MFSPR: mfspr 3, 269 +; CHECK-MFSPR: blr + +; CHECK-MFTB-LABEL: @get_timeu_e +; CHECK-MFTB: mftbu 3 +; CHECK-MFTB: blr +} + diff --git a/test/CodeGen/PowerPC/pr18663-2.ll b/test/CodeGen/PowerPC/pr18663-2.ll index 6b54440c4d56..c77291e51451 100644 --- a/test/CodeGen/PowerPC/pr18663-2.ll +++ b/test/CodeGen/PowerPC/pr18663-2.ll @@ -46,7 +46,7 @@ declare void @_ZN4Foam7IOerror4exitEi() #0 ; Function Attrs: inlinehint declare void @_ZN4Foam8fileName12stripInvalidEv() #2 align 2 -define void @_ZN4Foam3CSVINS_6VectorIdEEE4readEv() #0 align 2 { +define void @_ZN4Foam3CSVINS_6VectorIdEEE4readEv() #0 align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @_ZN4Foam6string6expandEb() to label %invoke.cont unwind label %lpad @@ -66,7 +66,7 @@ _ZN4Foam6stringC2ERKS0_.exit.i: ; preds = %invoke.cont to label %invoke.cont2 unwind label %lpad.i lpad.i: ; preds = %_ZN4Foam6stringC2ERKS0_.exit.i - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } cleanup br label %ehcleanup142 @@ -90,17 +90,17 @@ memptr.end.i: ; preds = %invoke.cont8 to label %if.end unwind label %lpad5 lpad: ; preds = %if.then.i.i.i.i176, %entry - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %1 = landingpad { i8*, i32 } cleanup br label %ehcleanup142 lpad3: ; preds = %invoke.cont2 - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %2 = landingpad { i8*, i32 } cleanup br label %ehcleanup142 lpad5: ; preds = %memptr.end.i, %invoke.cont8, %if.then - %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %3 = landingpad { i8*, i32 } cleanup br label %ehcleanup142 @@ -119,12 +119,12 @@ invoke.cont.i.i.i: ; preds = %.noexc205 unreachable lpad.i.i.i: ; preds = %.noexc205 - %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %4 = landingpad { i8*, i32 } cleanup br label %ehcleanup142 lpad19: ; preds = %for.body - %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %5 = landingpad { i8*, i32 } cleanup br label %ehcleanup142 diff --git a/test/CodeGen/PowerPC/preincprep-invoke.ll b/test/CodeGen/PowerPC/preincprep-invoke.ll index 0e09ff1b774a..8dbce9a3a08e 100644 --- a/test/CodeGen/PowerPC/preincprep-invoke.ll +++ b/test/CodeGen/PowerPC/preincprep-invoke.ll @@ -11,7 +11,7 @@ declare void @_ZN13CStdOutStream5FlushEv() declare i32 @__gxx_personality_v0(...) -define void @_Z11GetPasswordP13CStdOutStreamb() { +define void @_Z11GetPasswordP13CStdOutStreamb() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: br label %for.cond.i.i @@ -41,7 +41,7 @@ for.cond.i.i30: ; preds = %for.cond.i.i30, %in br label %for.cond.i.i30 lpad: ; preds = %invoke.cont4, %invoke.cont, %_ZN11CStringBaseIcEC2EPKc.exit.critedge - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %1 = landingpad { i8*, i32 } cleanup resume { i8*, i32 } undef } diff --git a/test/CodeGen/R600/lit.local.cfg b/test/CodeGen/R600/lit.local.cfg deleted file mode 100644 index ad9ce2541ef7..000000000000 --- a/test/CodeGen/R600/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if not 'R600' in config.root.targets: - config.unsupported = True diff --git a/test/CodeGen/SPARC/exception.ll b/test/CodeGen/SPARC/exception.ll index 0af48d0b64b8..f112328346d8 100644 --- a/test/CodeGen/SPARC/exception.ll +++ b/test/CodeGen/SPARC/exception.ll @@ -71,7 +71,7 @@ ; V9PIC: .L_ZTIi.DW.stub: ; V9PIC-NEXT: .xword _ZTIi -define i32 @main(i32 %argc, i8** nocapture readnone %argv) unnamed_addr #0 { +define i32 @main(i32 %argc, i8** nocapture readnone %argv) unnamed_addr #0 personality i32 (i32, i64, i8*, i8*)* @__gxx_personality_v0 { entry: %0 = icmp eq i32 %argc, 2 %1 = tail call i8* @__cxa_allocate_exception(i32 4) #1 @@ -102,7 +102,7 @@ entry: ret i32 %6 "8": ; preds = %"4", %"3" - %exc = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__gxx_personality_v0 + %exc = landingpad { i8*, i32 } catch %struct.__fundamental_type_info_pseudo* @_ZTIi catch %struct.__fundamental_type_info_pseudo* @_ZTIf %exc_ptr12 = extractvalue { i8*, i32 } %exc, 0 diff --git a/test/CodeGen/SPARC/obj-relocs.ll b/test/CodeGen/SPARC/obj-relocs.ll index 115263ac5d46..0e7e04032be0 100644 --- a/test/CodeGen/SPARC/obj-relocs.ll +++ b/test/CodeGen/SPARC/obj-relocs.ll @@ -1,31 +1,37 @@ ; RUN: llc < %s -march=sparcv9 -filetype=obj --relocation-model=static | llvm-readobj -r | FileCheck %s --check-prefix=CHECK-ABS ; RUN: llc < %s -march=sparcv9 -filetype=obj --relocation-model=pic | llvm-readobj -r | FileCheck %s --check-prefix=CHECK-PIC -;CHECK-ABS: Relocations [ -;CHECK-ABS: 0x{{[0-9,A-F]+}} R_SPARC_H44 AGlobalVar 0x0 -;CHECK-ABS: 0x{{[0-9,A-F]+}} R_SPARC_M44 AGlobalVar 0x0 -;CHECK-ABS: 0x{{[0-9,A-F]+}} R_SPARC_L44 AGlobalVar 0x0 -;CHECK-ABS: 0x{{[0-9,A-F]+}} R_SPARC_WDISP30 bar 0x0 -;CHECK-ABS:] +;CHECK-ABS: Relocations [ +;CHECK-ABS: 0x{{[0-9,A-F]+}} R_SPARC_H44 AGlobalVar 0x0 +;CHECK-ABS-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_M44 AGlobalVar 0x0 +;CHECK-ABS-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_L44 AGlobalVar 0x0 +;CHECK-ABS-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_H44 .rodata.str1.1 0x0 +;CHECK-ABS-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_M44 .rodata.str1.1 0x0 +;CHECK-ABS-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_WDISP30 bar 0x0 +;CHECK-ABS-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_L44 .rodata.str1.1 0x0 +;CHECK-ABS: ] -; CHECK-PIC: Relocations [ -; CHECK-PIC: 0x{{[0-9,A-F]+}} R_SPARC_PC22 _GLOBAL_OFFSET_TABLE_ 0x4 -; CHECK-PIC: 0x{{[0-9,A-F]+}} R_SPARC_PC10 _GLOBAL_OFFSET_TABLE_ 0x8 -; CHECK-PIC: 0x{{[0-9,A-F]+}} R_SPARC_GOT22 AGlobalVar 0x0 -; CHECK-PIC: 0x{{[0-9,A-F]+}} R_SPARC_GOT10 AGlobalVar 0x0 -; CHECK-PIC: 0x{{[0-9,A-F]+}} R_SPARC_WPLT30 bar 0x0 -; CHECK-PIC: ] +; CHECK-PIC: Relocations [ +; CHECK-PIC: 0x{{[0-9,A-F]+}} R_SPARC_PC22 _GLOBAL_OFFSET_TABLE_ 0x4 +; CHECK-PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_PC10 _GLOBAL_OFFSET_TABLE_ 0x8 +; CHECK-PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_GOT22 AGlobalVar 0x0 +; CHECK-PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_GOT10 AGlobalVar 0x0 +; CHECK-PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_GOT22 .L.mystr 0x0 +; CHECK-PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_GOT10 .L.mystr 0x0 +; CHECK-PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_WPLT30 bar 0x0 +; CHECK-PIC: ] @AGlobalVar = global i64 0, align 8 +@.mystr = private unnamed_addr constant [6 x i8] c"hello\00", align 1 define i64 @foo(i64 %a) { entry: %0 = load i64, i64* @AGlobalVar, align 4 %1 = add i64 %a, %0 - %2 = call i64 @bar(i64 %1) + %2 = call i64 @bar(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.mystr, i32 0, i32 0), i64 %1) ret i64 %2 } -declare i64 @bar(i64) +declare i64 @bar(i8*, i64) diff --git a/test/CodeGen/Thumb/sjljehprepare-lower-vector.ll b/test/CodeGen/Thumb/sjljehprepare-lower-vector.ll index ab082c79ba6f..605fe4627c99 100644 --- a/test/CodeGen/Thumb/sjljehprepare-lower-vector.ll +++ b/test/CodeGen/Thumb/sjljehprepare-lower-vector.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios" -define i8* @foo(<4 x i32> %c) { +define i8* @foo(<4 x i32> %c) personality i8* bitcast (i32 (...)* @baz to i8*) { entry: invoke void @bar () to label %unreachable unwind label %handler @@ -13,7 +13,7 @@ unreachable: unreachable handler: - %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @baz to i8*) + %tmp = landingpad { i8*, i32 } cleanup resume { i8*, i32 } undef } diff --git a/test/CodeGen/Thumb2/constant-islands.ll b/test/CodeGen/Thumb2/constant-islands.ll index a64d72e86efb..583849195e61 100644 --- a/test/CodeGen/Thumb2/constant-islands.ll +++ b/test/CodeGen/Thumb2/constant-islands.ll @@ -76,7 +76,7 @@ declare %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape*, declare %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform*) nounwind inlinehint ssp align 2 -define %class.RagDoll* @_ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f(%class.RagDoll* %this, %class.btDynamicsWorld* %ownerWorld, %class.btVector3* %positionOffset, float %scale) unnamed_addr ssp align 2 { +define %class.RagDoll* @_ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f(%class.RagDoll* %this, %class.btDynamicsWorld* %ownerWorld, %class.btVector3* %positionOffset, float %scale) unnamed_addr ssp align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { entry: %retval = alloca %class.RagDoll*, align 4 %this.addr = alloca %class.RagDoll*, align 4 @@ -635,7 +635,7 @@ for.inc: ; preds = %for.body br label %for.cond lpad: ; preds = %entry - %67 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %67 = landingpad { i8*, i32 } cleanup %68 = extractvalue { i8*, i32 } %67, 0 store i8* %68, i8** %exn.slot @@ -648,7 +648,7 @@ invoke.cont4: ; preds = %lpad br label %eh.resume lpad8: ; preds = %invoke.cont - %70 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %70 = landingpad { i8*, i32 } cleanup %71 = extractvalue { i8*, i32 } %70, 0 store i8* %71, i8** %exn.slot @@ -661,7 +661,7 @@ invoke.cont11: ; preds = %lpad8 br label %eh.resume lpad17: ; preds = %invoke.cont9 - %73 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %73 = landingpad { i8*, i32 } cleanup %74 = extractvalue { i8*, i32 } %73, 0 store i8* %74, i8** %exn.slot @@ -674,7 +674,7 @@ invoke.cont20: ; preds = %lpad17 br label %eh.resume lpad26: ; preds = %invoke.cont18 - %76 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %76 = landingpad { i8*, i32 } cleanup %77 = extractvalue { i8*, i32 } %76, 0 store i8* %77, i8** %exn.slot @@ -687,7 +687,7 @@ invoke.cont29: ; preds = %lpad26 br label %eh.resume lpad35: ; preds = %invoke.cont27 - %79 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %79 = landingpad { i8*, i32 } cleanup %80 = extractvalue { i8*, i32 } %79, 0 store i8* %80, i8** %exn.slot @@ -700,7 +700,7 @@ invoke.cont38: ; preds = %lpad35 br label %eh.resume lpad44: ; preds = %invoke.cont36 - %82 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %82 = landingpad { i8*, i32 } cleanup %83 = extractvalue { i8*, i32 } %82, 0 store i8* %83, i8** %exn.slot @@ -713,7 +713,7 @@ invoke.cont47: ; preds = %lpad44 br label %eh.resume lpad53: ; preds = %invoke.cont45 - %85 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %85 = landingpad { i8*, i32 } cleanup %86 = extractvalue { i8*, i32 } %85, 0 store i8* %86, i8** %exn.slot @@ -726,7 +726,7 @@ invoke.cont56: ; preds = %lpad53 br label %eh.resume lpad62: ; preds = %invoke.cont54 - %88 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %88 = landingpad { i8*, i32 } cleanup %89 = extractvalue { i8*, i32 } %88, 0 store i8* %89, i8** %exn.slot @@ -739,7 +739,7 @@ invoke.cont65: ; preds = %lpad62 br label %eh.resume lpad71: ; preds = %invoke.cont63 - %91 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %91 = landingpad { i8*, i32 } cleanup %92 = extractvalue { i8*, i32 } %91, 0 store i8* %92, i8** %exn.slot @@ -752,7 +752,7 @@ invoke.cont74: ; preds = %lpad71 br label %eh.resume lpad80: ; preds = %invoke.cont72 - %94 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %94 = landingpad { i8*, i32 } cleanup %95 = extractvalue { i8*, i32 } %94, 0 store i8* %95, i8** %exn.slot @@ -765,7 +765,7 @@ invoke.cont83: ; preds = %lpad80 br label %eh.resume lpad89: ; preds = %invoke.cont81 - %97 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %97 = landingpad { i8*, i32 } cleanup %98 = extractvalue { i8*, i32 } %97, 0 store i8* %98, i8** %exn.slot @@ -1264,7 +1264,7 @@ invoke.cont517: ; preds = %invoke.cont488 ret %class.RagDoll* %200 lpad258: ; preds = %for.end - %201 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %201 = landingpad { i8*, i32 } cleanup %202 = extractvalue { i8*, i32 } %201, 0 store i8* %202, i8** %exn.slot @@ -1274,7 +1274,7 @@ lpad258: ; preds = %for.end br label %eh.resume lpad284: ; preds = %invoke.cont259 - %204 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %204 = landingpad { i8*, i32 } cleanup %205 = extractvalue { i8*, i32 } %204, 0 store i8* %205, i8** %exn.slot @@ -1284,7 +1284,7 @@ lpad284: ; preds = %invoke.cont259 br label %eh.resume lpad313: ; preds = %invoke.cont285 - %207 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %207 = landingpad { i8*, i32 } cleanup %208 = extractvalue { i8*, i32 } %207, 0 store i8* %208, i8** %exn.slot @@ -1294,7 +1294,7 @@ lpad313: ; preds = %invoke.cont285 br label %eh.resume lpad342: ; preds = %invoke.cont314 - %210 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %210 = landingpad { i8*, i32 } cleanup %211 = extractvalue { i8*, i32 } %210, 0 store i8* %211, i8** %exn.slot @@ -1304,7 +1304,7 @@ lpad342: ; preds = %invoke.cont314 br label %eh.resume lpad371: ; preds = %invoke.cont343 - %213 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %213 = landingpad { i8*, i32 } cleanup %214 = extractvalue { i8*, i32 } %213, 0 store i8* %214, i8** %exn.slot @@ -1314,7 +1314,7 @@ lpad371: ; preds = %invoke.cont343 br label %eh.resume lpad400: ; preds = %invoke.cont372 - %216 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %216 = landingpad { i8*, i32 } cleanup %217 = extractvalue { i8*, i32 } %216, 0 store i8* %217, i8** %exn.slot @@ -1324,7 +1324,7 @@ lpad400: ; preds = %invoke.cont372 br label %eh.resume lpad429: ; preds = %invoke.cont401 - %219 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %219 = landingpad { i8*, i32 } cleanup %220 = extractvalue { i8*, i32 } %219, 0 store i8* %220, i8** %exn.slot @@ -1334,7 +1334,7 @@ lpad429: ; preds = %invoke.cont401 br label %eh.resume lpad458: ; preds = %invoke.cont430 - %222 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %222 = landingpad { i8*, i32 } cleanup %223 = extractvalue { i8*, i32 } %222, 0 store i8* %223, i8** %exn.slot @@ -1344,7 +1344,7 @@ lpad458: ; preds = %invoke.cont430 br label %eh.resume lpad487: ; preds = %invoke.cont459 - %225 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %225 = landingpad { i8*, i32 } cleanup %226 = extractvalue { i8*, i32 } %225, 0 store i8* %226, i8** %exn.slot @@ -1354,7 +1354,7 @@ lpad487: ; preds = %invoke.cont459 br label %eh.resume lpad516: ; preds = %invoke.cont488 - %228 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %228 = landingpad { i8*, i32 } cleanup %229 = extractvalue { i8*, i32 } %228, 0 store i8* %229, i8** %exn.slot @@ -1371,7 +1371,7 @@ eh.resume: ; preds = %lpad516, %lpad487, resume { i8*, i32 } %lpad.val526 terminate.lpad: ; preds = %lpad89, %lpad80, %lpad71, %lpad62, %lpad53, %lpad44, %lpad35, %lpad26, %lpad17, %lpad8, %lpad - %231 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + %231 = landingpad { i8*, i32 } catch i8* null call void @_ZSt9terminatev() noreturn nounwind unreachable diff --git a/test/CodeGen/WinEH/cppeh-alloca-sink.ll b/test/CodeGen/WinEH/cppeh-alloca-sink.ll index d50237fa78a7..cc6cec9e4d69 100644 --- a/test/CodeGen/WinEH/cppeh-alloca-sink.ll +++ b/test/CodeGen/WinEH/cppeh-alloca-sink.ll @@ -51,7 +51,7 @@ $"\01??_R0H@8" = comdat any @llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata" ; Function Attrs: uwtable -define void @sink_alloca_to_catch() #0 { +define void @sink_alloca_to_catch() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %0 = alloca i32 %only_used_in_catch = alloca i32, align 4 @@ -59,7 +59,7 @@ entry: to label %try.cont unwind label %lpad lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %1 = landingpad { i8*, i32 } catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0 %2 = extractvalue { i8*, i32 } %1, 1 %3 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #3 @@ -86,7 +86,7 @@ eh.resume: ; preds = %lpad declare void @use_catch_var(i32*) #1 ; Function Attrs: uwtable -define void @dont_sink_alloca_to_catch(i32 %n) #0 { +define void @dont_sink_alloca_to_catch(i32 %n) #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %0 = alloca i32 %n.addr = alloca i32, align 4 @@ -109,7 +109,7 @@ invoke.cont: ; preds = %while.body br label %try.cont lpad: ; preds = %while.body - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %2 = landingpad { i8*, i32 } catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*) %3 = extractvalue { i8*, i32 } %2, 0 store i8* %3, i8** %exn.slot @@ -141,7 +141,7 @@ try.cont: ; preds = %invoke.cont2, %invo br label %while.cond lpad1: ; preds = %catch - %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %8 = landingpad { i8*, i32 } cleanup %9 = extractvalue { i8*, i32 } %8, 0 store i8* %9, i8** %exn.slot diff --git a/test/CodeGen/WinEH/cppeh-catch-all.ll b/test/CodeGen/WinEH/cppeh-catch-all.ll index a6c94d400797..266dd3e305ca 100644 --- a/test/CodeGen/WinEH/cppeh-catch-all.ll +++ b/test/CodeGen/WinEH/cppeh-catch-all.ll @@ -25,7 +25,7 @@ target triple = "x86_64-pc-windows-msvc" ; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]] ; Function Attrs: uwtable -define void @_Z4testv() #0 { +define void @_Z4testv() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %exn.slot = alloca i8* %ehselector.slot = alloca i32 @@ -36,13 +36,13 @@ invoke.cont: ; preds = %entry br label %try.cont ; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry -; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* null ; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @_Z4testv.catch) ; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont] lpad: ; preds = %entry - %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %tmp = landingpad { i8*, i32 } catch i8* null %tmp1 = extractvalue { i8*, i32 } %tmp, 0 store i8* %tmp1, i8** %exn.slot diff --git a/test/CodeGen/WinEH/cppeh-catch-and-throw.ll b/test/CodeGen/WinEH/cppeh-catch-and-throw.ll index c60a339f6ba2..240ca987690d 100644 --- a/test/CodeGen/WinEH/cppeh-catch-and-throw.ll +++ b/test/CodeGen/WinEH/cppeh-catch-and-throw.ll @@ -50,7 +50,7 @@ $_TI1H = comdat any ; CHECK: } ; Function Attrs: uwtable -define void @"\01?test@@YAXXZ"() #0 { +define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %o = alloca %class.Obj, align 1 %tmp = alloca i32, align 4 @@ -62,7 +62,7 @@ entry: to label %unreachable unwind label %lpad lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %1 = landingpad { i8*, i32 } catch i8* null %2 = extractvalue { i8*, i32 } %1, 0 store i8* %2, i8** %exn.slot @@ -78,7 +78,7 @@ catch: ; preds = %lpad to label %unreachable unwind label %lpad1 lpad1: ; preds = %catch - %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %4 = landingpad { i8*, i32 } cleanup %5 = extractvalue { i8*, i32 } %4, 0 store i8* %5, i8** %exn.slot @@ -113,7 +113,7 @@ unreachable: ; preds = %catch, %entry ; CHECK: [[SPLIT_LABEL]] ; ; CHECK: [[LPAD_LABEL]] -; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: landingpad { i8*, i32 } ; CHECK: cleanup ; CHECK: unreachable ; CHECK: } diff --git a/test/CodeGen/WinEH/cppeh-catch-scalar.ll b/test/CodeGen/WinEH/cppeh-catch-scalar.ll index 4faef82a75fc..172502cf73c8 100644 --- a/test/CodeGen/WinEH/cppeh-catch-scalar.ll +++ b/test/CodeGen/WinEH/cppeh-catch-scalar.ll @@ -29,7 +29,7 @@ target triple = "x86_64-pc-windows-msvc" ; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]] ; Function Attrs: uwtable -define void @_Z4testv() #0 { +define void @_Z4testv() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %exn.slot = alloca i8* %ehselector.slot = alloca i32 @@ -41,13 +41,13 @@ invoke.cont: ; preds = %entry br label %try.cont ; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry -; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) ; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch) ; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont] lpad: ; preds = %entry - %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %tmp = landingpad { i8*, i32 } catch i8* bitcast (i8** @_ZTIi to i8*) %tmp1 = extractvalue { i8*, i32 } %tmp, 0 store i8* %tmp1, i8** %exn.slot diff --git a/test/CodeGen/WinEH/cppeh-catch-unwind.ll b/test/CodeGen/WinEH/cppeh-catch-unwind.ll index 0fd735be57a1..6fd70d84b2af 100644 --- a/test/CodeGen/WinEH/cppeh-catch-unwind.ll +++ b/test/CodeGen/WinEH/cppeh-catch-unwind.ll @@ -31,7 +31,7 @@ $"\01??_R0H@8" = comdat any @"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat -; CHECK-LABEL: define void @"\01?test@@YAXXZ"() #0 { +; CHECK-LABEL: define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { ; CHECK: entry: ; CHECK: [[OBJ_PTR:\%.+]] = alloca %class.SomeClass ; CHECK: [[TMP0:\%.+]] = alloca i32, align 4 @@ -41,7 +41,7 @@ $"\01??_R0H@8" = comdat any ; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]] ; Function Attrs: uwtable -define void @"\01?test@@YAXXZ"() #0 { +define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %obj = alloca %class.SomeClass, align 1 %0 = alloca i32, align 4 @@ -66,27 +66,27 @@ invoke.cont2: ; preds = %invoke.cont to label %try.cont unwind label %lpad3 ; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry -; CHECK: [[LPAD_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: [[LPAD_VAL:\%.+]] = landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) ; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch") ; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont15] lpad: ; preds = %entry - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %2 = landingpad { i8*, i32 } catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) %3 = extractvalue { i8*, i32 } %2, 0 %4 = extractvalue { i8*, i32 } %2, 1 br label %catch.dispatch7 ; CHECK: [[LPAD1_LABEL]]:{{[ ]+}}; preds = %invoke.cont -; CHECK: [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) ; CHECK-NEXT: [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test@@YAXXZ.cleanup", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch") ; CHECK-NEXT: indirectbr i8* [[RECOVER1]], [label %try.cont15] lpad1: ; preds = %invoke.cont - %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %5 = landingpad { i8*, i32 } cleanup catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) %6 = extractvalue { i8*, i32 } %5, 0 @@ -94,14 +94,14 @@ lpad1: ; preds = %invoke.cont br label %ehcleanup ; CHECK: [[LPAD3_LABEL]]:{{[ ]+}}; preds = %invoke.cont2 -; CHECK: [[LPAD3_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: [[LPAD3_VAL:\%.+]] = landingpad { i8*, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) ; CHECK-NEXT: [[RECOVER3:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1", i32 0, void (i8*, i8*)* @"\01?test@@YAXXZ.cleanup") ; CHECK-NEXT: indirectbr i8* [[RECOVER3]], [label %try.cont, label %try.cont15] lpad3: ; preds = %invoke.cont2 - %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %8 = landingpad { i8*, i32 } cleanup catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) %9 = extractvalue { i8*, i32 } %8, 0 @@ -128,7 +128,7 @@ try.cont: ; preds = %invoke.cont2, %invo ; CHECK-NOT: lpad5: lpad5: ; preds = %catch - %13 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %13 = landingpad { i8*, i32 } cleanup catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) %14 = extractvalue { i8*, i32 } %13, 0 @@ -202,7 +202,7 @@ eh.resume: ; preds = %catch.dispatch7 ; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont) ; ; CHECK: [[LPAD5_LABEL]]:{{[ ]+}}; preds = %entry -; CHECK: [[LPAD5_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: [[LPAD5_VAL:\%.+]] = landingpad { i8*, i32 } ; CHECK: cleanup ; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) ; CHECK: } diff --git a/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll b/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll index 5a570431510f..7e5f659f2a4f 100644 --- a/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll +++ b/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll @@ -26,7 +26,7 @@ $"\01??_R0H@8" = comdat any @"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat @llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata" -define i32 @main() { +define i32 @main() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %o = alloca %struct.HasDtor, align 1 invoke void @may_throw() @@ -37,14 +37,14 @@ invoke.cont2: ; preds = %invoke.cont br label %try.cont lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %0 = landingpad { i8*, i32 } catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0 %1 = extractvalue { i8*, i32 } %0, 0 %2 = extractvalue { i8*, i32 } %0, 1 br label %catch.dispatch lpad1: ; preds = %invoke.cont - %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %3 = landingpad { i8*, i32 } cleanup catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0 %4 = extractvalue { i8*, i32 } %3, 0 diff --git a/test/CodeGen/WinEH/cppeh-demote-liveout.ll b/test/CodeGen/WinEH/cppeh-demote-liveout.ll index 48d9b39ca64a..309952bfc94b 100644 --- a/test/CodeGen/WinEH/cppeh-demote-liveout.ll +++ b/test/CodeGen/WinEH/cppeh-demote-liveout.ll @@ -19,14 +19,14 @@ declare i32 @llvm.eh.typeid.for(i8*) @typeinfo.int = external global i32 -define i32 @liveout_catch(i32 %p) { +define i32 @liveout_catch(i32 %p) personality i32 (...)* @__CxxFrameHandler3 { entry: %val.entry = add i32 %p, 1 invoke void @might_throw() to label %ret unwind label %lpad lpad: - %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__CxxFrameHandler3 + %ehvals = landingpad { i8*, i32 } cleanup catch i32* @typeinfo.int %ehptr = extractvalue { i8*, i32 } %ehvals, 0 diff --git a/test/CodeGen/WinEH/cppeh-frame-vars.ll b/test/CodeGen/WinEH/cppeh-frame-vars.ll index eeda4319a6e6..1077ad0b8765 100644 --- a/test/CodeGen/WinEH/cppeh-frame-vars.ll +++ b/test/CodeGen/WinEH/cppeh-frame-vars.ll @@ -62,7 +62,7 @@ $"\01??_R0H@8" = comdat any ; CHECK: br label %for.cond ; Function Attrs: uwtable -define void @"\01?test@@YAXXZ"() #0 { +define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %NumExceptions = alloca i32, align 4 %ExceptionVal = alloca [10 x i32], align 16 @@ -99,13 +99,13 @@ invoke.cont: ; preds = %for.body br label %try.cont ; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %for.body -; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) ; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch") ; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont] lpad: ; preds = %for.body - %tmp4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %tmp4 = landingpad { i8*, i32 } catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) %tmp5 = extractvalue { i8*, i32 } %tmp4, 0 store i8* %tmp5, i8** %exn.slot diff --git a/test/CodeGen/WinEH/cppeh-inalloca.ll b/test/CodeGen/WinEH/cppeh-inalloca.ll index 13471b8661a3..3dc1348efffa 100644 --- a/test/CodeGen/WinEH/cppeh-inalloca.ll +++ b/test/CodeGen/WinEH/cppeh-inalloca.ll @@ -45,7 +45,7 @@ $"\01??_R0H@8" = comdat any ; CHECK: invoke void @"\01?may_throw@@YAXXZ"() ; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]] -define i32 @"\01?test@@YAHUA@@@Z"(<{ %struct.A }>* inalloca) #0 { +define i32 @"\01?test@@YAHUA@@@Z"(<{ %struct.A }>* inalloca) #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %retval = alloca i32, align 4 %exn.slot = alloca i8* @@ -59,14 +59,14 @@ invoke.cont: ; preds = %entry br label %try.cont ; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry -; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: landingpad { i8*, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) ; CHECK-NEXT: [[RECOVER:\%recover.*]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAHUA@@@Z.catch", i32 0, void (i8*, i8*)* @"\01?test@@YAHUA@@@Z.cleanup") ; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %cleanup] lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %1 = landingpad { i8*, i32 } cleanup catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) %2 = extractvalue { i8*, i32 } %1, 0 diff --git a/test/CodeGen/WinEH/cppeh-min-unwind.ll b/test/CodeGen/WinEH/cppeh-min-unwind.ll index 3fffa47a081b..b1f157ade29b 100644 --- a/test/CodeGen/WinEH/cppeh-min-unwind.ll +++ b/test/CodeGen/WinEH/cppeh-min-unwind.ll @@ -30,7 +30,7 @@ target triple = "x86_64-pc-windows-msvc" ; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]] ; Function Attrs: uwtable -define void @_Z4testv() #0 { +define void @_Z4testv() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %obj = alloca %class.SomeClass, align 4 %exn.slot = alloca i8* @@ -44,13 +44,13 @@ invoke.cont: ; preds = %entry ret void ; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry -; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: landingpad { i8*, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @_Z4testv.cleanup) ; CHECK-NEXT: indirectbr i8* [[RECOVER]], [] lpad: ; preds = %entry - %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %tmp = landingpad { i8*, i32 } cleanup %tmp1 = extractvalue { i8*, i32 } %tmp, 0 store i8* %tmp1, i8** %exn.slot diff --git a/test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll b/test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll index 52f613276d54..1294d0b8ff30 100644 --- a/test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll +++ b/test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll @@ -35,7 +35,7 @@ target triple = "x86_64-pc-windows-msvc" ; CHECK: } ; Function Attrs: nounwind uwtable -define void @"\01?test@@YAXXZ"() #0 { +define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %o = alloca %class.Obj, align 1 %exn.slot = alloca i8* @@ -48,7 +48,7 @@ invoke.cont: ; preds = %entry br label %try.cont lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 store i8* %1, i8** %exn.slot diff --git a/test/CodeGen/WinEH/cppeh-multi-catch.ll b/test/CodeGen/WinEH/cppeh-multi-catch.ll index 28340c60ad1e..25224551cadc 100644 --- a/test/CodeGen/WinEH/cppeh-multi-catch.ll +++ b/test/CodeGen/WinEH/cppeh-multi-catch.ll @@ -45,7 +45,7 @@ $"\01??_R0?AVSomeClass@@@8" = comdat any @"llvm.eh.handlermapentry.reference.?AVSomeClass@@" = private unnamed_addr constant %eh.HandlerMapEntry { i32 8, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor15* @"\01??_R0?AVSomeClass@@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section "llvm.metadata" -; CHECK: define void @"\01?test@@YAXXZ"() #0 { +; CHECK: define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { ; CHECK: entry: ; CHECK: [[OBJ_PTR:\%.+]] = alloca %class.SomeClass*, align 8 ; CHECK: [[LL_PTR:\%.+]] = alloca i64, align 8 @@ -55,7 +55,7 @@ $"\01??_R0?AVSomeClass@@@8" = comdat any ; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]] ; Function Attrs: uwtable -define void @"\01?test@@YAXXZ"() #0 { +define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %exn.slot = alloca i8* %ehselector.slot = alloca i32 @@ -69,7 +69,7 @@ invoke.cont: ; preds = %entry br label %try.cont ; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry -; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: landingpad { i8*, i32 } ; CHECK-NEXT: catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry.H ; CHECK-NEXT: catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry._J ; CHECK-NEXT: catch %eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@" @@ -82,7 +82,7 @@ invoke.cont: ; preds = %entry ; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %ret] lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %0 = landingpad { i8*, i32 } catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry.H catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry._J catch %eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@" diff --git a/test/CodeGen/WinEH/cppeh-nested-1.ll b/test/CodeGen/WinEH/cppeh-nested-1.ll index 2b13510c5745..a5e80ac2b2ab 100644 --- a/test/CodeGen/WinEH/cppeh-nested-1.ll +++ b/test/CodeGen/WinEH/cppeh-nested-1.ll @@ -39,7 +39,7 @@ $"\01??_R0H@8" = comdat any ; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]] ; Function Attrs: uwtable -define void @"\01?test@@YAXXZ"() #0 { +define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %exn.slot = alloca i8* %ehselector.slot = alloca i32 @@ -52,14 +52,14 @@ invoke.cont: ; preds = %entry br label %try.cont ; CHECK: [[LPAD_LABEL]]: -; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: landingpad { i8*, i32 } ; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) ; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*) ; CHECK: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch") ; CHECK: indirectbr i8* [[RECOVER]], [label %try.cont, label %try.cont10] lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %0 = landingpad { i8*, i32 } catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*) %1 = extractvalue { i8*, i32 } %0, 0 @@ -94,7 +94,7 @@ try.cont: ; preds = %invoke.cont2, %invo ; CHECK-NOT: lpad1: lpad1: ; preds = %catch - %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %6 = landingpad { i8*, i32 } catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*) %7 = extractvalue { i8*, i32 } %6, 0 store i8* %7, i8** %exn.slot @@ -155,7 +155,7 @@ eh.resume: ; %catch.dispatch3 ; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont) ; ; CHECK: [[LPAD1_LABEL]]:{{[ ]+}}; preds = %entry -; CHECK: [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 } ; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*) ; CHECK: [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch") ; CHECK: indirectbr i8* [[RECOVER1]], [] diff --git a/test/CodeGen/WinEH/cppeh-nested-2.ll b/test/CodeGen/WinEH/cppeh-nested-2.ll index f12f3dbed085..385958b006d2 100644 --- a/test/CodeGen/WinEH/cppeh-nested-2.ll +++ b/test/CodeGen/WinEH/cppeh-nested-2.ll @@ -49,7 +49,7 @@ target triple = "x86_64-pc-windows-msvc" ; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]] ; Function Attrs: uwtable -define void @_Z4testv() #0 { +define void @_Z4testv() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %outer = alloca %class.Outer, align 1 %exn.slot = alloca i8* @@ -91,13 +91,13 @@ invoke.cont5: ; preds = %invoke.cont4 br label %try.cont ; CHECK: [[LPAD_LABEL]]: -; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIf to i8*) ; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i8** @_ZTIf to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch) ; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont19] lpad: ; preds = %try.cont, %entry - %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %tmp = landingpad { i8*, i32 } catch i8* bitcast (i8** @_ZTIf to i8*) %tmp1 = extractvalue { i8*, i32 } %tmp, 0 store i8* %tmp1, i8** %exn.slot @@ -106,7 +106,7 @@ lpad: ; preds = %try.cont, %entry br label %catch.dispatch11 ; CHECK: [[LPAD1_LABEL]]: -; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: landingpad { i8*, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIf to i8*) @@ -117,7 +117,7 @@ lpad: ; preds = %try.cont, %entry ; CHECK-NEXT: indirectbr i8* [[RECOVER1]], [label %try.cont, label %try.cont19] lpad1: ; preds = %invoke.cont4, %invoke.cont - %tmp3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %tmp3 = landingpad { i8*, i32 } cleanup catch i8* bitcast (i8** @_ZTIi to i8*) catch i8* bitcast (i8** @_ZTIf to i8*) @@ -128,7 +128,7 @@ lpad1: ; preds = %invoke.cont4, %invo br label %catch.dispatch ; CHECK: [[LPAD3_LABEL]]: -; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: landingpad { i8*, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIf to i8*) @@ -140,7 +140,7 @@ lpad1: ; preds = %invoke.cont4, %invo ; CHECK-NEXT: indirectbr i8* [[RECOVER3]], [label %try.cont, label %try.cont19] lpad3: ; preds = %invoke.cont2 - %tmp6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %tmp6 = landingpad { i8*, i32 } cleanup catch i8* bitcast (i8** @_ZTIi to i8*) catch i8* bitcast (i8** @_ZTIf to i8*) @@ -189,7 +189,7 @@ invoke.cont9: ; preds = %try.cont ; CHECK-NOT: lpad7: lpad7: ; preds = %catch - %tmp14 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %tmp14 = landingpad { i8*, i32 } cleanup catch i8* bitcast (i8** @_ZTIf to i8*) %tmp15 = extractvalue { i8*, i32 } %tmp14, 0 @@ -263,7 +263,7 @@ eh.resume: ; preds = %catch.dispatch11 ; CHECK: ret i8* blockaddress(@_Z4testv, %try.cont) ; ; CHECK: [[LPAD7_LABEL]]:{{[ ]+}}; preds = %entry -; CHECK: [[LPAD7_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: [[LPAD7_VAL:\%.+]] = landingpad { i8*, i32 } ; (FIXME) The nested handler body isn't being populated yet. ; CHECK: } diff --git a/test/CodeGen/WinEH/cppeh-nested-3.ll b/test/CodeGen/WinEH/cppeh-nested-3.ll index c96abcc6e81c..33faaf0f591a 100644 --- a/test/CodeGen/WinEH/cppeh-nested-3.ll +++ b/test/CodeGen/WinEH/cppeh-nested-3.ll @@ -46,7 +46,7 @@ $"\01??_R0H@8" = comdat any ; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]] ; Function Attrs: uwtable -define void @"\01?test@@YAXXZ"() #0 { +define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %exn.slot = alloca i8* %ehselector.slot = alloca i32 @@ -60,14 +60,14 @@ invoke.cont: ; preds = %entry br label %try.cont10 ; CHECK: [[LPAD_LABEL]]: -; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: landingpad { i8*, i32 } ; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) ; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*) ; CHECK: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.2", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1") ; CHECK: indirectbr i8* [[RECOVER]], [label %try.cont10, label %try.cont19] lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %0 = landingpad { i8*, i32 } catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*) %1 = extractvalue { i8*, i32 } %0, 0 @@ -97,7 +97,7 @@ invoke.cont2: ; preds = %catch ; CHECK-NOT: lpad1: lpad1: ; preds = %catch - %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %5 = landingpad { i8*, i32 } catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*) %6 = extractvalue { i8*, i32 } %5, 0 @@ -139,7 +139,7 @@ try.cont10: ; preds = %invoke.cont9, %invo ; CHECK-NOT: lpad8: lpad8: ; preds = %try.cont - %12 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %12 = landingpad { i8*, i32 } catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*) %13 = extractvalue { i8*, i32 } %12, 0 store i8* %13, i8** %exn.slot @@ -212,7 +212,7 @@ eh.resume: ; preds = %lpad16, %catch.disp ; CHECK: to label %invoke.cont9 unwind label %[[LPAD8_LABEL:lpad[0-9]*]] ; ; CHECK: [[LPAD1_LABEL]]:{{[ ]+}}; preds = %entry -; CHECK: [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 } ; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) ; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*) ; CHECK: [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1") @@ -222,7 +222,7 @@ eh.resume: ; preds = %lpad16, %catch.disp ; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont10) ; ; CHECK: [[LPAD8_LABEL]]:{{[ ]+}}; preds = %invoke.cont2 -; CHECK: [[LPAD8_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: [[LPAD8_VAL:\%.+]] = landingpad { i8*, i32 } ; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*) ; CHECK: [[RECOVER2:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1") ; CHECK: indirectbr i8* [[RECOVER2]], [] diff --git a/test/CodeGen/WinEH/cppeh-nested-rethrow.ll b/test/CodeGen/WinEH/cppeh-nested-rethrow.ll index 60b404113345..14a5f233f9ba 100644 --- a/test/CodeGen/WinEH/cppeh-nested-rethrow.ll +++ b/test/CodeGen/WinEH/cppeh-nested-rethrow.ll @@ -56,7 +56,7 @@ $_TI1H = comdat any ; CHECK: call void (...) @llvm.frameescape ; Function Attrs: nounwind uwtable -define void @"\01?test1@@YAXXZ"() #0 { +define void @"\01?test1@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %tmp = alloca i32, align 4 %exn.slot = alloca i8* @@ -67,7 +67,7 @@ entry: to label %unreachable unwind label %lpad lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %1 = landingpad { i8*, i32 } catch i8* null %2 = extractvalue { i8*, i32 } %1, 0 store i8* %2, i8** %exn.slot @@ -82,7 +82,7 @@ catch: ; preds = %lpad to label %unreachable unwind label %lpad1 lpad1: ; preds = %catch - %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %4 = landingpad { i8*, i32 } catch i8* null %5 = extractvalue { i8*, i32 } %4, 0 store i8* %5, i8** %exn.slot @@ -124,7 +124,7 @@ declare void @llvm.eh.endcatch() #1 ; CHECK: call void (...) @llvm.frameescape ; Function Attrs: nounwind uwtable -define void @"\01?test2@@YAXXZ"() #0 { +define void @"\01?test2@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %tmp = alloca i32, align 4 %exn.slot = alloca i8* @@ -135,7 +135,7 @@ entry: to label %unreachable unwind label %lpad lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %1 = landingpad { i8*, i32 } catch i8* null %2 = extractvalue { i8*, i32 } %1, 0 store i8* %2, i8** %exn.slot @@ -150,7 +150,7 @@ catch: ; preds = %lpad to label %unreachable unwind label %lpad1 lpad1: ; preds = %catch - %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %4 = landingpad { i8*, i32 } catch i8* null %5 = extractvalue { i8*, i32 } %4, 0 store i8* %5, i8** %exn.slot diff --git a/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll b/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll index 15f6bfb4680d..83236c4188ff 100644 --- a/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll +++ b/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll @@ -72,7 +72,7 @@ $"\01??_R0H@8" = comdat any ; CHECK: br label %for.body ; Function Attrs: uwtable -define void @"\01?test@@YAXXZ"() #0 { +define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %e = alloca i32, align 4 %ExceptionVal = alloca [10 x i32], align 16 @@ -112,13 +112,13 @@ invoke.cont: ; preds = %for.body br label %try.cont ; CHECK: [[LPAD_LABEL:lpad[0-9]*]]:{{[ ]+}}; preds = %for.body -; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) ; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch") ; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %[[SPLIT_RECOVER_BB:.*]]] lpad: ; preds = %for.body - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %2 = landingpad { i8*, i32 } catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) %3 = extractvalue { i8*, i32 } %2, 1 %4 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #1 diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll b/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll index f395d64c7b5e..31b5e58562b2 100644 --- a/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll +++ b/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll @@ -18,13 +18,13 @@ declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #2 declare void @llvm.eh.endcatch() #2 ; Function Attrs: nounwind uwtable -define void @test_catch_all() #0 { +define void @test_catch_all() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: invoke void @may_throw() to label %try.cont unwind label %lpad lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 tail call void @llvm.eh.begincatch(i8* %1, i8* null) #2 diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll b/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll index 6383ca7f1883..fc632af17405 100644 --- a/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll +++ b/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll @@ -43,7 +43,7 @@ $"\01??_C@_06PNOAJMHG@e?3?5?$CFd?6?$AA@" = comdat any declare void @_CxxThrowException(i8*, %eh.ThrowInfo*) ; Function Attrs: uwtable -define i32 @main() #1 { +define i32 @main() #1 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %tmp.i = alloca i32, align 4 %e = alloca i32, align 4 @@ -57,7 +57,7 @@ entry: unreachable lpad1: ; preds = %entry - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %1 = landingpad { i8*, i32 } catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0 %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 0, i8* (i8*, i8*)* @main.catch) indirectbr i8* %recover, [label %try.cont.split] @@ -90,7 +90,7 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #3 ; Function Attrs: nounwind declare i8* @llvm.eh.actions(...) #3 -define internal i8* @main.catch(i8*, i8*) #5 { +define internal i8* @main.catch(i8*, i8*) #5 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %e.i8 = call i8* @llvm.framerecover(i8* bitcast (i32 ()* @main to i8*), i8* %1, i32 0) %e = bitcast i8* %e.i8 to i32* @@ -104,7 +104,7 @@ entry.split: ; preds = %entry ret i8* blockaddress(@main, %try.cont.split) stub: ; preds = %entry - %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %4 = landingpad { i8*, i32 } cleanup %recover = call i8* (...) @llvm.eh.actions() unreachable diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch.ll b/test/CodeGen/WinEH/cppeh-prepared-catch.ll index e7aaca86a882..c7a829ad7e42 100644 --- a/test/CodeGen/WinEH/cppeh-prepared-catch.ll +++ b/test/CodeGen/WinEH/cppeh-prepared-catch.ll @@ -30,7 +30,7 @@ $"\01??_R0H@8" = comdat any @"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat @llvm.eh.handlertype.H.8 = private unnamed_addr constant %eh.CatchHandlerType { i32 8, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata" -define internal i8* @"\01?f@@YAXXZ.catch"(i8*, i8*) #4 { +define internal i8* @"\01?f@@YAXXZ.catch"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %.i8 = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?f@@YAXXZ" to i8*), i8* %1, i32 0) %bc2 = bitcast i8* %.i8 to i32** @@ -42,7 +42,7 @@ invoke.cont2: ; preds = %entry ret i8* blockaddress(@"\01?f@@YAXXZ", %try.cont) lpad1: ; preds = %entry - %lp4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %lp4 = landingpad { i8*, i32 } cleanup catch %eh.CatchHandlerType* @llvm.eh.handlertype.N.0 %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.N.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch1") @@ -56,7 +56,7 @@ lpad1: ; preds = %entry ; CHECK: .long ("$cppxdata$?f@@YAXXZ")@IMGREL -define internal i8* @"\01?f@@YAXXZ.catch1"(i8*, i8*) #4 { +define internal i8* @"\01?f@@YAXXZ.catch1"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %.i8 = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?f@@YAXXZ" to i8*), i8* %1, i32 1) %2 = bitcast i8* %.i8 to double* @@ -68,7 +68,7 @@ done: ret i8* blockaddress(@"\01?f@@YAXXZ", %try.cont8) lpad: ; preds = %entry - %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %4 = landingpad { i8*, i32 } cleanup %recover = call i8* (...) @llvm.eh.actions() unreachable @@ -82,7 +82,7 @@ lpad: ; preds = %entry ; CHECK: .seh_handlerdata ; CHECK: .long ("$cppxdata$?f@@YAXXZ")@IMGREL -define void @"\01?f@@YAXXZ"() #0 { +define void @"\01?f@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %exn.slot = alloca i8* %ehselector.slot = alloca i32 @@ -96,7 +96,7 @@ invoke.cont: ; preds = %entry br label %try.cont lpad2: ; preds = %entry - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %2 = landingpad { i8*, i32 } catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.8 catch %eh.CatchHandlerType* @llvm.eh.handlertype.N.0 %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.8 to i8*), i32 0, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch", i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.N.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch1") @@ -107,7 +107,7 @@ try.cont: ; preds = %lpad2, %invoke.cont to label %try.cont8 unwind label %lpad1 lpad1: - %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %3 = landingpad { i8*, i32 } catch %eh.CatchHandlerType* @llvm.eh.handlertype.N.0 %recover2 = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.N.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch1") indirectbr i8* %recover2, [label %try.cont8] diff --git a/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll b/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll index 876cb53baba1..14973023356a 100644 --- a/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll +++ b/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll @@ -50,7 +50,7 @@ $_TI1H = comdat any ; CHECK-NEXT: .long .Ltmp0@IMGREL ; CHECK-NEXT: .long 0 -define void @"\01?test1@@YAXXZ"() #0 { +define void @"\01?test1@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %unwindhelp = alloca i64 %tmp = alloca i32, align 4 @@ -66,7 +66,7 @@ entry: to label %unreachable unwind label %lpad1 lpad1: ; preds = %entry - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %2 = landingpad { i8*, i32 } cleanup %recover = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test1@@YAXXZ.cleanup") indirectbr i8* %recover, [] @@ -118,7 +118,7 @@ entry: ; CHECK-NEXT: .long .Ltmp12@IMGREL ; CHECK-NEXT: .long 0 -define void @"\01?test2@@YAX_N@Z"(i1 zeroext %b) #2 { +define void @"\01?test2@@YAX_N@Z"(i1 zeroext %b) #2 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { %b.addr = alloca i8, align 1 %s = alloca %struct.S, align 1 %exn.slot = alloca i8* @@ -145,13 +145,13 @@ invoke.cont3: ; preds = %if.then br label %if.end lpad1: ; preds = %entry, %if.end - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %2 = landingpad { i8*, i32 } cleanup %recover = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test2@@YAX_N@Z.cleanup") indirectbr i8* %recover, [] lpad3: ; preds = %if.then - %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %3 = landingpad { i8*, i32 } cleanup %recover4 = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test2@@YAX_N@Z.cleanup1", i32 0, void (i8*, i8*)* @"\01?test2@@YAX_N@Z.cleanup") indirectbr i8* %recover4, [] @@ -196,7 +196,7 @@ declare i8* @llvm.framerecover(i8*, i8*, i32) #6 ; Function Attrs: nounwind declare void @llvm.eh.unwindhelp(i8*) #4 -define internal void @"\01?test2@@YAX_N@Z.cleanup"(i8*, i8*) #7 { +define internal void @"\01?test2@@YAX_N@Z.cleanup"(i8*, i8*) #7 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %s.i8 = call i8* @llvm.framerecover(i8* bitcast (void (i1)* @"\01?test2@@YAX_N@Z" to i8*), i8* %1, i32 0) %s = bitcast i8* %s.i8 to %struct.S* @@ -208,12 +208,12 @@ entry.split: ; preds = %entry ret void stub: ; preds = %entry - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %2 = landingpad { i8*, i32 } cleanup unreachable } -define internal void @"\01?test2@@YAX_N@Z.cleanup1"(i8*, i8*) #7 { +define internal void @"\01?test2@@YAX_N@Z.cleanup1"(i8*, i8*) #7 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %s1.i8 = call i8* @llvm.framerecover(i8* bitcast (void (i1)* @"\01?test2@@YAX_N@Z" to i8*), i8* %1, i32 1) %s1 = bitcast i8* %s1.i8 to %struct.S* @@ -225,7 +225,7 @@ entry.split: ; preds = %entry ret void stub: ; preds = %entry - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %2 = landingpad { i8*, i32 } cleanup unreachable } diff --git a/test/CodeGen/WinEH/cppeh-shared-empty-catch.ll b/test/CodeGen/WinEH/cppeh-shared-empty-catch.ll index dd99a092b201..678ea6f8ba13 100644 --- a/test/CodeGen/WinEH/cppeh-shared-empty-catch.ll +++ b/test/CodeGen/WinEH/cppeh-shared-empty-catch.ll @@ -34,7 +34,7 @@ $"\01??_R0H@8" = comdat any ; CHECK: invoke void @"\01?g@@YAXXZ"() ; Function Attrs: nounwind -define void @"\01?f@@YAXXZ"() #0 { +define void @"\01?f@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: invoke void @"\01?g@@YAXXZ"() to label %invoke.cont unwind label %lpad @@ -48,7 +48,7 @@ invoke.cont: ; preds = %entry to label %unreachable unwind label %lpad1 lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 br label %catch2 @@ -56,14 +56,14 @@ lpad: ; preds = %entry ; Note: Even though this landing pad has two catch clauses, it only has one action because both ; handlers do the same thing. ; CHECK: [[LPAD1_LABEL]]: -; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: landingpad { i8*, i32 } ; CHECK-NEXT: catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0 ; CHECK-NEXT: catch i8* null ; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch") ; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont4] lpad1: ; preds = %invoke.cont - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %2 = landingpad { i8*, i32 } catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0 catch i8* null %3 = extractvalue { i8*, i32 } %2, 0 diff --git a/test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll b/test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll index 81ee4542062d..5b974508bc11 100644 --- a/test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll +++ b/test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll @@ -91,7 +91,7 @@ $"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@" = comdat any ; CHECK: } ; Function Attrs: uwtable -define i32 @main() #0 { +define i32 @main() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %retval = alloca i32, align 4 %tmp = alloca i8, align 1 @@ -111,7 +111,7 @@ entry: to label %unreachable unwind label %lpad lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %0 = landingpad { i8*, i32 } catch %eh.CatchHandlerType* @llvm.eh.handlertype.D.0 catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0 catch i8* null @@ -146,7 +146,7 @@ try.cont: ; preds = %invoke.cont to label %unreachable unwind label %lpad4 lpad2: ; preds = %catch - %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %6 = landingpad { i8*, i32 } catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0 catch i8* null %7 = extractvalue { i8*, i32 } %6, 0 @@ -157,7 +157,7 @@ lpad2: ; preds = %catch br label %catch.dispatch5 lpad4: ; preds = %try.cont - %9 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %9 = landingpad { i8*, i32 } catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0 catch i8* null %10 = extractvalue { i8*, i32 } %9, 0 @@ -200,7 +200,7 @@ invoke.cont11: ; preds = %catch8 br label %try.cont19 lpad10: ; preds = %catch8 - %15 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %15 = landingpad { i8*, i32 } cleanup %16 = extractvalue { i8*, i32 } %15, 0 store i8* %16, i8** %exn.slot @@ -210,7 +210,7 @@ lpad10: ; preds = %catch8 br label %eh.resume lpad16: ; preds = %catch13 - %18 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %18 = landingpad { i8*, i32 } cleanup %19 = extractvalue { i8*, i32 } %18, 0 store i8* %19, i8** %exn.slot @@ -220,7 +220,7 @@ lpad16: ; preds = %catch13 br label %eh.resume lpad21: ; preds = %try.cont19 - %21 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %21 = landingpad { i8*, i32 } catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*) catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*) catch i8* null @@ -255,7 +255,7 @@ try.cont33: ; preds = %invoke.cont31 to label %unreachable unwind label %lpad35 lpad30: ; preds = %catch25 - %27 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %27 = landingpad { i8*, i32 } catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*) catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*) catch i8* null @@ -267,7 +267,7 @@ lpad30: ; preds = %catch25 br label %catch.dispatch36 lpad35: ; preds = %try.cont33 - %30 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %30 = landingpad { i8*, i32 } catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*) catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*) catch i8* null @@ -326,7 +326,7 @@ invoke.cont43: ; preds = %catch40 br label %try.cont60 lpad42: ; preds = %catch40 - %38 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %38 = landingpad { i8*, i32 } cleanup %39 = extractvalue { i8*, i32 } %38, 0 store i8* %39, i8** %exn.slot @@ -336,7 +336,7 @@ lpad42: ; preds = %catch40 br label %eh.resume lpad50: ; preds = %catch45 - %41 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %41 = landingpad { i8*, i32 } cleanup %42 = extractvalue { i8*, i32 } %41, 0 store i8* %42, i8** %exn.slot @@ -346,7 +346,7 @@ lpad50: ; preds = %catch45 br label %eh.resume lpad57: ; preds = %catch53 - %44 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %44 = landingpad { i8*, i32 } cleanup %45 = extractvalue { i8*, i32 } %44, 0 store i8* %45, i8** %exn.slot diff --git a/test/CodeGen/WinEH/cppeh-state-calc-1.ll b/test/CodeGen/WinEH/cppeh-state-calc-1.ll index 3549b1d51dee..1e71f8f38271 100644 --- a/test/CodeGen/WinEH/cppeh-state-calc-1.ll +++ b/test/CodeGen/WinEH/cppeh-state-calc-1.ll @@ -68,7 +68,7 @@ $_TI1D = comdat any @_TI1D = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1D to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat ; Function Attrs: nounwind uwtable -define void @"\01?test@@YAXXZ"() #0 { +define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %tmp = alloca i32, align 4 %x = alloca i32, align 4 @@ -84,7 +84,7 @@ entry: to label %unreachable unwind label %lpad lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %1 = landingpad { i8*, i32 } catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*) catch %eh.CatchHandlerType* @llvm.eh.handlertype.D.0 catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0 @@ -99,7 +99,7 @@ try.cont: ; preds = %lpad to label %unreachable unwind label %lpad3 lpad3: ; preds = %try.cont - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %2 = landingpad { i8*, i32 } catch %eh.CatchHandlerType* @llvm.eh.handlertype.D.0 catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0 catch i8* null @@ -114,7 +114,7 @@ try.cont10: ; preds = %lpad3, %lpad to label %unreachable unwind label %lpad12 lpad12: ; preds = %try.cont10 - %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %4 = landingpad { i8*, i32 } catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0 catch i8* null %recover2 = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch2", i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch3") @@ -164,7 +164,7 @@ declare void @"\01?catch_one@@YAXXZ"() #1 ; Function Attrs: nounwind declare i8* @llvm.eh.actions(...) #3 -define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*) #4 { +define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %x.i8 = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0) %x = bitcast i8* %x.i8 to i32* @@ -177,7 +177,7 @@ entry.split: ; preds = %entry ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont) stub: ; preds = %entry - %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %3 = landingpad { i8*, i32 } cleanup %recover = call i8* (...) @llvm.eh.actions() unreachable @@ -186,7 +186,7 @@ stub: ; preds = %entry ; Function Attrs: nounwind readnone declare void @llvm.donothing() #2 -define internal i8* @"\01?test@@YAXXZ.catch1"(i8*, i8*) #4 { +define internal i8* @"\01?test@@YAXXZ.catch1"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: call void @"\01?catch_a@@YAXXZ"() #3 invoke void @llvm.donothing() @@ -196,13 +196,13 @@ entry.split: ; preds = %entry ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont10) stub: ; preds = %entry - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %2 = landingpad { i8*, i32 } cleanup %recover = call i8* (...) @llvm.eh.actions() unreachable } -define internal i8* @"\01?test@@YAXXZ.catch2"(i8*, i8*) #4 { +define internal i8* @"\01?test@@YAXXZ.catch2"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: %x21.i8 = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2) %x21 = bitcast i8* %x21.i8 to i32* @@ -215,13 +215,13 @@ entry.split: ; preds = %entry ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont22) stub: ; preds = %entry - %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %3 = landingpad { i8*, i32 } cleanup %recover = call i8* (...) @llvm.eh.actions() unreachable } -define internal i8* @"\01?test@@YAXXZ.catch3"(i8*, i8*) #4 { +define internal i8* @"\01?test@@YAXXZ.catch3"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: call void @"\01?catch_all@@YAXXZ"() #3 invoke void @llvm.donothing() @@ -231,7 +231,7 @@ entry.split: ; preds = %entry ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont22) stub: ; preds = %entry - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %2 = landingpad { i8*, i32 } cleanup %recover = call i8* (...) @llvm.eh.actions() unreachable diff --git a/test/CodeGen/WinEH/seh-catch-all.ll b/test/CodeGen/WinEH/seh-catch-all.ll index c2a652b80990..5ac2295a5b41 100644 --- a/test/CodeGen/WinEH/seh-catch-all.ll +++ b/test/CodeGen/WinEH/seh-catch-all.ll @@ -21,7 +21,7 @@ declare i32 @__C_specific_handler(...) declare i8* @llvm.frameaddress(i32) ; Function Attrs: uwtable -define void @seh_catch_all() { +define void @seh_catch_all() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { entry: %exn.slot = alloca i8* %ehselector.slot = alloca i32 @@ -32,7 +32,7 @@ invoke.cont: ; preds = %entry br label %__try.cont lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) + %0 = landingpad { i8*, i32 } catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 store i8* %1, i8** %exn.slot diff --git a/test/CodeGen/WinEH/seh-inlined-finally.ll b/test/CodeGen/WinEH/seh-inlined-finally.ll index d2080cff79d4..5943cb77cee2 100644 --- a/test/CodeGen/WinEH/seh-inlined-finally.ll +++ b/test/CodeGen/WinEH/seh-inlined-finally.ll @@ -19,7 +19,7 @@ declare void @llvm.frameescape(...) declare dllimport void @EnterCriticalSection(%struct._RTL_CRITICAL_SECTION*) declare dllimport void @LeaveCriticalSection(%struct._RTL_CRITICAL_SECTION*) -define void @use_finally() { +define void @use_finally() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { entry: invoke void @may_crash() to label %invoke.cont unwind label %lpad @@ -29,7 +29,7 @@ invoke.cont: ; preds = %entry ret void lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) + %0 = landingpad { i8*, i32 } cleanup %call.i2 = tail call i32 @puts(i8* null) resume { i8*, i32 } %0 @@ -44,7 +44,7 @@ lpad: ; preds = %entry ; CHECK-NEXT: indirectbr i8* %recover, [] ; Function Attrs: nounwind uwtable -define i32 @call_may_crash_locked() { +define i32 @call_may_crash_locked() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { entry: %p = alloca %struct._RTL_CRITICAL_SECTION, align 8 call void (...) @llvm.frameescape(%struct._RTL_CRITICAL_SECTION* %p) @@ -60,7 +60,7 @@ invoke.cont: ; preds = %entry ret i32 42 lpad: ; preds = %entry - %tmp7 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) + %tmp7 = landingpad { i8*, i32 } cleanup %tmp8 = call i8* @llvm.frameaddress(i32 0) %tmp9 = call i8* @llvm.framerecover(i8* bitcast (i32 ()* @call_may_crash_locked to i8*), i8* %tmp8, i32 0) diff --git a/test/CodeGen/WinEH/seh-outlined-finally.ll b/test/CodeGen/WinEH/seh-outlined-finally.ll index 19558b705308..3c27212192dd 100644 --- a/test/CodeGen/WinEH/seh-outlined-finally.ll +++ b/test/CodeGen/WinEH/seh-outlined-finally.ll @@ -39,7 +39,7 @@ entry: } ; Function Attrs: uwtable -define i32 @main() #1 { +define i32 @main() #1 personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { entry: %myres = alloca i32, align 4 %exn.slot = alloca i8* @@ -59,7 +59,7 @@ invoke.cont2: ; preds = %invoke.cont ret i32 0 lpad: ; preds = %entry - %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) + %2 = landingpad { i8*, i32 } cleanup %3 = extractvalue { i8*, i32 } %2, 0 store i8* %3, i8** %exn.slot @@ -70,7 +70,7 @@ lpad: ; preds = %entry to label %invoke.cont3 unwind label %lpad1 lpad1: ; preds = %lpad, %invoke.cont - %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) + %6 = landingpad { i8*, i32 } cleanup %7 = extractvalue { i8*, i32 } %6, 0 store i8* %7, i8** %exn.slot diff --git a/test/CodeGen/WinEH/seh-prepared-basic.ll b/test/CodeGen/WinEH/seh-prepared-basic.ll index 880bb3c33a8d..b981dc2d9bd8 100644 --- a/test/CodeGen/WinEH/seh-prepared-basic.ll +++ b/test/CodeGen/WinEH/seh-prepared-basic.ll @@ -15,14 +15,14 @@ target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc" ; Function Attrs: uwtable -define void @do_except() #0 { +define void @do_except() #0 personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { entry: call void (...) @llvm.frameescape() invoke void @g() #5 to label %__try.cont unwind label %lpad1 lpad1: ; preds = %entry - %ehvals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) + %ehvals = landingpad { i8*, i32 } catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@do_except@@" to i8*) %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@do_except@@" to i8*), i32 -1, i8* blockaddress(@do_except, %__try.cont)) indirectbr i8* %recover, [label %__try.cont] diff --git a/test/CodeGen/WinEH/seh-resume-phi.ll b/test/CodeGen/WinEH/seh-resume-phi.ll index 256dd852d287..d2bd64167d22 100644 --- a/test/CodeGen/WinEH/seh-resume-phi.ll +++ b/test/CodeGen/WinEH/seh-resume-phi.ll @@ -9,13 +9,13 @@ declare void @cleanup() declare i32 @__C_specific_handler(...) declare i32 @llvm.eh.typeid.for(i8*) -define void @resume_phi() { +define void @resume_phi() personality i32 (...)* @__C_specific_handler { entry: invoke void @might_crash(i8* null) to label %return unwind label %lpad1 lpad1: - %ehvals1 = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler + %ehvals1 = landingpad { i8*, i32 } catch i32 ()* @filt %ehptr1 = extractvalue { i8*, i32 } %ehvals1, 0 %ehsel1 = extractvalue { i8*, i32 } %ehvals1, 1 @@ -28,7 +28,7 @@ __except: to label %return unwind label %lpad2 lpad2: - %ehvals2 = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler + %ehvals2 = landingpad { i8*, i32 } cleanup %ehptr2 = extractvalue { i8*, i32 } %ehvals2, 0 %ehsel2 = extractvalue { i8*, i32 } %ehvals2, 1 diff --git a/test/CodeGen/WinEH/seh-simple.ll b/test/CodeGen/WinEH/seh-simple.ll index 9a451874d587..98f06ef12c9f 100644 --- a/test/CodeGen/WinEH/seh-simple.ll +++ b/test/CodeGen/WinEH/seh-simple.ll @@ -12,7 +12,7 @@ declare void @might_crash() declare i32 @__C_specific_handler(...) declare i32 @llvm.eh.typeid.for(i8*) -define i32 @simple_except_store() { +define i32 @simple_except_store() personality i32 (...)* @__C_specific_handler { entry: %retval = alloca i32 store i32 0, i32* %retval @@ -20,7 +20,7 @@ entry: to label %return unwind label %lpad lpad: - %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler + %ehvals = landingpad { i8*, i32 } catch i32 ()* @filt %sel = extractvalue { i8*, i32 } %ehvals, 1 %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*)) @@ -45,7 +45,7 @@ eh.resume: ; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@simple_except_store, %__except)) ; CHECK-NEXT: indirectbr {{.*}} [label %__except] -define i32 @catch_all() { +define i32 @catch_all() personality i32 (...)* @__C_specific_handler { entry: %retval = alloca i32 store i32 0, i32* %retval @@ -53,7 +53,7 @@ entry: to label %return unwind label %lpad lpad: - %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler + %ehvals = landingpad { i8*, i32 } catch i8* null store i32 1, i32* %retval br label %return @@ -73,13 +73,13 @@ return: ; CHECK: store i32 1, i32* %retval -define i32 @except_phi() { +define i32 @except_phi() personality i32 (...)* @__C_specific_handler { entry: invoke void @might_crash() to label %return unwind label %lpad lpad: - %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler + %ehvals = landingpad { i8*, i32 } catch i32 ()* @filt %sel = extractvalue { i8*, i32 } %ehvals, 1 %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*)) @@ -107,7 +107,7 @@ eh.resume: ; CHECK-NEXT: %r = phi i32 [ 0, %entry ], [ 1, %lpad.return_crit_edge ] ; CHECK-NEXT: ret i32 %r -define i32 @lpad_phi() { +define i32 @lpad_phi() personality i32 (...)* @__C_specific_handler { entry: invoke void @might_crash() to label %cont unwind label %lpad @@ -118,7 +118,7 @@ cont: lpad: %ncalls.1 = phi i32 [ 0, %entry ], [ 1, %cont ] - %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler + %ehvals = landingpad { i8*, i32 } catch i32 ()* @filt %sel = extractvalue { i8*, i32 } %ehvals, 1 %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*)) @@ -153,13 +153,13 @@ eh.resume: ; CHECK-NEXT: %r = phi i32 [ 2, %cont ], [ %{{.*}}, %lpad.return_crit_edge ] ; CHECK-NEXT: ret i32 %r -define i32 @cleanup_and_except() { +define i32 @cleanup_and_except() personality i32 (...)* @__C_specific_handler { entry: invoke void @might_crash() to label %return unwind label %lpad lpad: - %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler + %ehvals = landingpad { i8*, i32 } cleanup catch i32 ()* @filt call void @cleanup() diff --git a/test/CodeGen/X86/2007-05-05-Personality.ll b/test/CodeGen/X86/2007-05-05-Personality.ll index b99c58c6e4af..f177a35273a3 100644 --- a/test/CodeGen/X86/2007-05-05-Personality.ll +++ b/test/CodeGen/X86/2007-05-05-Personality.ll @@ -12,13 +12,13 @@ @error = external global i8 -define void @_ada_x() { +define void @_ada_x() personality i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*) { entry: invoke void @raise() to label %eh_then unwind label %unwind unwind: ; preds = %entry - %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*) + %eh_ptr = landingpad { i8*, i32 } catch i8* @error %eh_select = extractvalue { i8*, i32 } %eh_ptr, 1 %eh_typeid = tail call i32 @llvm.eh.typeid.for(i8* @error) diff --git a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll index d1cfb447a2c3..3d3851cbd4c2 100644 --- a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll +++ b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll @@ -13,7 +13,7 @@ @.str33 = external constant [29 x i32] ; <[29 x i32]*> [#uses=1] @.str89 = external constant [5 x i32] ; <[5 x i32]*> [#uses=1] -define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalias sret %agg.result, %struct.wxDateTime* %this, i32* %format, %"struct.wxDateTime::TimeZone"* %tz, i1 %foo) { +define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalias sret %agg.result, %struct.wxDateTime* %this, i32* %format, %"struct.wxDateTime::TimeZone"* %tz, i1 %foo) personality i32 (...)* @__gxx_personality_v0 { entry: br i1 %foo, label %bb116.i, label %bb115.critedge.i bb115.critedge.i: ; preds = %entry @@ -151,11 +151,11 @@ bb7819: ; preds = %bb3314 bb7834: ; preds = %bb7806, %invcont5831 br label %bb3261 lpad: ; preds = %bb7806, %bb5968, %invcont5814, %bb440.i8663, %bb155.i8541, %bb5657, %bb3306 - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} cleanup ret void lpad8185: ; preds = %invcont5831 - %exn8185 = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn8185 = landingpad {i8*, i32} cleanup ret void } diff --git a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll index fc7ddf0bc67a..7ddedacbabd9 100644 --- a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll +++ b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll @@ -6,7 +6,7 @@ declare i8* @_Znwm(i32) declare i8* @__cxa_begin_catch(i8*) nounwind -define i32 @main(i32 %argc, i8** %argv) { +define i32 @main(i32 %argc, i8** %argv) personality i32 (...)* @__gxx_personality_v0 { entry: br i1 false, label %bb37, label %bb34 @@ -21,7 +21,7 @@ tmp12.i.i.i.i.i.noexc65: ; preds = %bb37 unreachable lpad243: ; preds = %bb37 - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} cleanup %eh_ptr244 = extractvalue { i8*, i32 } %exn, 0 store i32 (...)** getelementptr ([5 x i32 (...)*], [5 x i32 (...)*]* @_ZTVN10Evaluation10GridOutputILi3EEE, i32 0, i32 2), i32 (...)*** null, align 8 diff --git a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll index e14c30a27449..91f29c4f24cd 100644 --- a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll +++ b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll @@ -6,7 +6,7 @@ declare i32 @f() declare i32 @g() -define i32 @phi() { +define i32 @phi() personality i32 (...)* @__gxx_personality_v0 { entry: %a = call i32 @f() ; <i32> [#uses=1] %b = invoke i32 @g() @@ -24,7 +24,7 @@ cont2: ; preds = %cont lpad: ; preds = %cont, %entry %y = phi i32 [ %a, %entry ], [ %aa, %cont ] ; <i32> [#uses=1] - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} cleanup ret i32 %y } diff --git a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll index f8c7a151b2c9..6814ed1d894e 100644 --- a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll +++ b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll @@ -3,7 +3,7 @@ declare i32 @f() -define i32 @phi(i32 %x) { +define i32 @phi(i32 %x) personality i32 (...)* @__gxx_personality_v0 { entry: %a = invoke i32 @f() to label %cont unwind label %lpad ; <i32> [#uses=1] @@ -17,7 +17,7 @@ cont2: ; preds = %cont lpad: ; preds = %cont, %entry %v = phi i32 [ %x, %entry ], [ %a, %cont ] ; <i32> [#uses=1] - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} cleanup ret i32 %v } diff --git a/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll index 2ec49f486c99..aa88576c148e 100644 --- a/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll +++ b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll @@ -9,7 +9,7 @@ %struct.ComplexType = type { i32 } -define i32 @t(i32 %clientPort, i32 %pluginID, i32 %requestID, i32 %objectID, i64 %serverIdentifier, i64 %argumentsData, i32 %argumentsLength) ssp { +define i32 @t(i32 %clientPort, i32 %pluginID, i32 %requestID, i32 %objectID, i64 %serverIdentifier, i64 %argumentsData, i32 %argumentsLength) ssp personality i32 (...)* @__gxx_personality_v0 { entry: ; CHECK: _t: ; CHECK: movl 16(%rbp), @@ -34,7 +34,7 @@ invcont2: ; preds = %invcont1 ret i32 0 lpad: ; preds = %invcont1, %invcont, %entry - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} cleanup %8 = call i32 @vm_deallocate(i32 undef, i64 0, i64 %0) ; <i32> [#uses=0] unreachable diff --git a/test/CodeGen/X86/2009-11-25-ImpDefBug.ll b/test/CodeGen/X86/2009-11-25-ImpDefBug.ll index 0bf13de61275..2f4e11e54e35 100644 --- a/test/CodeGen/X86/2009-11-25-ImpDefBug.ll +++ b/test/CodeGen/X86/2009-11-25-ImpDefBug.ll @@ -20,7 +20,7 @@ declare void @_ZNSt6vectorIP10ASN1ObjectSaIS1_EE13_M_insert_auxEN9__gnu_cxx17__n declare i32 @_Z17LoadObjectFromBERR8xmstreamPP10ASN1ObjectPPF10ASN1StatusP13ASN1ObjHeaderS3_E(%struct.xmstream*, %struct.ASN1Object**, i32 (%struct.ASN1ObjHeader*, %struct.ASN1Object**)**) -define i32 @_ZN8ASN1Unit4loadER8xmstreamjm18ASN1LengthEncoding(%struct.ASN1Unit* %this, %struct.xmstream* nocapture %stream, i32 %numObjects, i64 %size, i32 %lEncoding) { +define i32 @_ZN8ASN1Unit4loadER8xmstreamjm18ASN1LengthEncoding(%struct.ASN1Unit* %this, %struct.xmstream* nocapture %stream, i32 %numObjects, i64 %size, i32 %lEncoding) personality i32 (...)* @__gxx_personality_v0 { entry: br label %meshBB85 @@ -46,7 +46,7 @@ bb1.i5: ; preds = %bb.i1 lpad: ; preds = %bb1.i.fragment.cl, %bb1.i.fragment, %bb5 %.SV10.phi807 = phi i8* [ undef, %bb1.i.fragment.cl ], [ undef, %bb1.i.fragment ], [ undef, %bb5 ] ; <i8*> [#uses=1] - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} cleanup %1 = load i8, i8* %.SV10.phi807, align 8 ; <i8> [#uses=0] br i1 undef, label %meshBB81.bbcl.disp, label %bb13.fragment.bbcl.disp diff --git a/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll b/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll index 2ba4d9aaded8..41c318b62eab 100644 --- a/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll +++ b/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll @@ -7,7 +7,7 @@ target triple = "i386-apple-darwin10.0" declare i32 @_ZN11HullLibrary16CreateConvexHullERK8HullDescR10HullResult(i8*, i8* nocapture, i8* nocapture) ssp align 2 -define void @_ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi(i8* %psb, i8* %idraw, i32 %drawflags) ssp align 2 { +define void @_ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi(i8* %psb, i8* %idraw, i32 %drawflags) ssp align 2 personality i32 (...)* @__gxx_personality_v0 { entry: br i1 undef, label %bb92, label %bb58 @@ -60,7 +60,7 @@ bb92: ; preds = %entry unreachable lpad159: ; preds = %bb58 - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} cleanup unreachable } diff --git a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll index 4711d5274675..fc5520e12ac0 100644 --- a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll +++ b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll @@ -13,7 +13,7 @@ target triple = "i386-apple-darwin10.0.0" ; CHECK: movl %esi,{{.*}}(%ebp) ; CHECK: calll __Z6throwsv -define i8* @_Z4test1SiS_(%struct.S* byval %s1, i32 %n, %struct.S* byval %s2) ssp { +define i8* @_Z4test1SiS_(%struct.S* byval %s1, i32 %n, %struct.S* byval %s2) ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: %retval = alloca i8*, align 4 ; <i8**> [#uses=2] %n.addr = alloca i32, align 4 ; <i32*> [#uses=1] @@ -30,13 +30,13 @@ invoke.cont: ; preds = %entry br label %finally terminate.handler: ; preds = %match.end - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %1 = landingpad { i8*, i32 } cleanup call void @_ZSt9terminatev() noreturn nounwind unreachable try.handler: ; preds = %entry - %exc1.ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %exc1.ptr = landingpad { i8*, i32 } catch i8* null %exc1 = extractvalue { i8*, i32 } %exc1.ptr, 0 %selector = extractvalue { i8*, i32 } %exc1.ptr, 1 @@ -57,7 +57,7 @@ invoke.cont2: ; preds = %match br label %match.end match.handler: ; preds = %match - %exc3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %exc3 = landingpad { i8*, i32 } cleanup %7 = extractvalue { i8*, i32 } %exc3, 0 store i8* %7, i8** %_rethrow diff --git a/test/CodeGen/X86/2010-08-04-MingWCrash.ll b/test/CodeGen/X86/2010-08-04-MingWCrash.ll index 61f527b0470c..e97615a417ad 100644 --- a/test/CodeGen/X86/2010-08-04-MingWCrash.ll +++ b/test/CodeGen/X86/2010-08-04-MingWCrash.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=i386-pc-mingw32 -define void @func() nounwind { +define void @func() nounwind personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { invoke.cont: %call = tail call i8* @malloc() %a = invoke i32 @bar() @@ -10,7 +10,7 @@ bb1: ret void lpad: - %exn.ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %exn.ptr = landingpad { i8*, i32 } catch i8* null %exn = extractvalue { i8*, i32 } %exn.ptr, 0 %eh.selector = extractvalue { i8*, i32 } %exn.ptr, 1 diff --git a/test/CodeGen/X86/2011-12-15-vec_shift.ll b/test/CodeGen/X86/2011-12-15-vec_shift.ll index 0183e107460e..4d49b3af88ee 100644 --- a/test/CodeGen/X86/2011-12-15-vec_shift.ll +++ b/test/CodeGen/X86/2011-12-15-vec_shift.ll @@ -12,8 +12,8 @@ define <16 x i8> @shift(<16 x i8> %a, <16 x i8> %b) nounwind { ; Make sure we're masking and pcmp'ing the VSELECT conditon vector. ; CHECK-WO-SSE4: psllw $5, [[REG1:%xmm.]] - ; CHECK-WO-SSE4: pand [[REG1]], [[REG2:%xmm.]] - ; CHECK-WO-SSE4: pcmpeqb {{%xmm., }}[[REG2]] + ; CHECK-WO-SSE4: pxor [[REG2:%xmm.]], [[REG2:%xmm.]] + ; CHECK-WO-SSE4: pcmpgtb {{%xmm., }}[[REG2]] %1 = shl <16 x i8> %a, %b ret <16 x i8> %1 } diff --git a/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll index 21443441c9f3..20615afdfa17 100644 --- a/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll +++ b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll @@ -16,7 +16,7 @@ target triple = "i386-apple-macosx10.7" declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind -define void @f(i32* nocapture %arg, i32* nocapture %arg1, i32* nocapture %arg2, i32* nocapture %arg3, i32 %arg4, i32 %arg5) optsize ssp { +define void @f(i32* nocapture %arg, i32* nocapture %arg1, i32* nocapture %arg2, i32* nocapture %arg3, i32 %arg4, i32 %arg5) optsize ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { bb: br i1 undef, label %bb6, label %bb7 @@ -43,7 +43,7 @@ bb11: ; preds = %bb7 bb20: ; preds = %bb43, %bb41, %bb29, %bb7 %tmp21 = phi i32 [ undef, %bb7 ], [ %tmp12, %bb43 ], [ %tmp12, %bb29 ], [ %tmp12, %bb41 ] - %tmp22 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %tmp22 = landingpad { i8*, i32 } catch i8* bitcast ({ i8*, i8* }* @Exception to i8*) br i1 undef, label %bb23, label %bb69 diff --git a/test/CodeGen/X86/2012-05-19-CoalescerCrash.ll b/test/CodeGen/X86/2012-05-19-CoalescerCrash.ll index 837fbc0777f7..a3f68fa4c223 100644 --- a/test/CodeGen/X86/2012-05-19-CoalescerCrash.ll +++ b/test/CodeGen/X86/2012-05-19-CoalescerCrash.ll @@ -7,7 +7,7 @@ target triple = "i386-pc-linux-gnu" -define void @_ZN4llvm17AsmMatcherEmitter3runERNS_11raw_ostreamE() align 2 { +define void @_ZN4llvm17AsmMatcherEmitter3runERNS_11raw_ostreamE() align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { invoke void @_ZNK4llvm13CodeGenTarget12getAsmParserEv() to label %1 unwind label %5 @@ -16,7 +16,7 @@ define void @_ZN4llvm17AsmMatcherEmitter3runERNS_11raw_ostreamE() align 2 { to label %4 unwind label %2 ; <label>:2 ; preds = %1 - %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %3 = landingpad { i8*, i32 } cleanup unreachable @@ -25,12 +25,12 @@ define void @_ZN4llvm17AsmMatcherEmitter3runERNS_11raw_ostreamE() align 2 { to label %12 unwind label %7 ; <label>:5 ; preds = %0 - %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %6 = landingpad { i8*, i32 } cleanup br label %33 ; <label>:7 ; preds = %4 - %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %8 = landingpad { i8*, i32 } cleanup br label %9 @@ -52,7 +52,7 @@ define void @_ZN4llvm17AsmMatcherEmitter3runERNS_11raw_ostreamE() align 2 { br i1 %15, label %20, label %18 ; <label>:16 ; preds = %12 - %17 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %17 = landingpad { i8*, i32 } cleanup br label %26 @@ -67,7 +67,7 @@ define void @_ZN4llvm17AsmMatcherEmitter3runERNS_11raw_ostreamE() align 2 { br label %14 ; <label>:21 ; preds = %18 - %22 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %22 = landingpad { i8*, i32 } cleanup %23 = extractvalue { i8*, i32 } %22, 1 br i1 undef, label %26, label %24 @@ -88,7 +88,7 @@ define void @_ZN4llvm17AsmMatcherEmitter3runERNS_11raw_ostreamE() align 2 { br label %9 ; <label>:30 ; preds = %26 - %31 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %31 = landingpad { i8*, i32 } catch i8* null unreachable @@ -100,7 +100,7 @@ define void @_ZN4llvm17AsmMatcherEmitter3runERNS_11raw_ostreamE() align 2 { unreachable ; <label>:35 ; preds = %9 - %36 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %36 = landingpad { i8*, i32 } catch i8* null unreachable } diff --git a/test/CodeGen/X86/2012-11-28-merge-store-alias.ll b/test/CodeGen/X86/2012-11-28-merge-store-alias.ll index ed1daadf6297..c16deeff3d99 100644 --- a/test/CodeGen/X86/2012-11-28-merge-store-alias.ll +++ b/test/CodeGen/X86/2012-11-28-merge-store-alias.ll @@ -3,6 +3,7 @@ ; CHECK: merge_stores_can ; CHECK: callq foo ; CHECK: xorps %xmm0, %xmm0 +; CHECK-NEXT: movl 36(%rsp), %ebp ; CHECK-NEXT: movups %xmm0 ; CHECK: callq foo ; CHECK: ret diff --git a/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/test/CodeGen/X86/2012-11-30-misched-dbg.ll index 818c5ed56873..22227faab942 100644 --- a/test/CodeGen/X86/2012-11-30-misched-dbg.ll +++ b/test/CodeGen/X86/2012-11-30-misched-dbg.ll @@ -99,7 +99,7 @@ declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...) %"class.__gnu_cxx::hash_map" = type { %"class.__gnu_cxx::hashtable" } %"class.__gnu_cxx::hashtable" = type { i64, i64, i64, i64, i64, i64 } -define void @main() uwtable ssp { +define void @main() uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: %X = alloca %"class.__gnu_cxx::hash_map", align 8 br i1 undef, label %cond.true, label %cond.end @@ -117,7 +117,7 @@ exit.i: ; preds = %cond.end unreachable lpad2.i.i.i.i: ; preds = %cond.end - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } cleanup br i1 undef, label %lpad.body.i.i, label %if.then.i.i.i.i.i.i.i.i diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll index 275d4213bd2b..c8f249b7529d 100644 --- a/test/CodeGen/X86/MergeConsecutiveStores.ll +++ b/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -463,6 +463,67 @@ define void @merge_vec_element_store(<8 x float> %v, float* %ptr) { ; CHECK-NEXT: retq } +; PR21711 - Merge vector stores into wider vector stores. +; These should be merged into 32-byte stores. +define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x float>* %ptr) { + %idx0 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 3 + %idx1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4 + %idx2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 5 + %idx3 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 6 + %shuffle0 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %shuffle1 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %shuffle2 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %shuffle3 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + store <4 x float> %shuffle0, <4 x float>* %idx0, align 16 + store <4 x float> %shuffle1, <4 x float>* %idx1, align 16 + store <4 x float> %shuffle2, <4 x float>* %idx2, align 16 + store <4 x float> %shuffle3, <4 x float>* %idx3, align 16 + ret void + +; CHECK-LABEL: merge_vec_extract_stores +; CHECK: vmovaps %xmm0, 48(%rdi) +; CHECK-NEXT: vextractf128 $1, %ymm0, 64(%rdi) +; CHECK-NEXT: vmovaps %xmm1, 80(%rdi) +; CHECK-NEXT: vextractf128 $1, %ymm1, 96(%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +} + +; Merging vector stores when sourced from vector loads is not currently handled. +define void @merge_vec_stores_from_loads(<4 x float>* %v, <4 x float>* %ptr) { + %load_idx0 = getelementptr inbounds <4 x float>, <4 x float>* %v, i64 0 + %load_idx1 = getelementptr inbounds <4 x float>, <4 x float>* %v, i64 1 + %v0 = load <4 x float>, <4 x float>* %load_idx0 + %v1 = load <4 x float>, <4 x float>* %load_idx1 + %store_idx0 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 0 + %store_idx1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 1 + store <4 x float> %v0, <4 x float>* %store_idx0, align 16 + store <4 x float> %v1, <4 x float>* %store_idx1, align 16 + ret void + +; CHECK-LABEL: merge_vec_stores_from_loads +; CHECK: vmovaps +; CHECK-NEXT: vmovaps +; CHECK-NEXT: vmovaps +; CHECK-NEXT: vmovaps +; CHECK-NEXT: retq +} + +; Merging vector stores when sourced from a constant vector is not currently handled. +define void @merge_vec_stores_of_constants(<4 x i32>* %ptr) { + %idx0 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3 + %idx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 4 + store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx0, align 16 + store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx1, align 16 + ret void + +; CHECK-LABEL: merge_vec_stores_of_constants +; CHECK: vxorps +; CHECK-NEXT: vmovaps +; CHECK-NEXT: vmovaps +; CHECK-NEXT: retq +} + ; This is a minimized test based on real code that was failing. ; We could merge stores (and loads) like this... diff --git a/test/CodeGen/X86/asm-label2.ll b/test/CodeGen/X86/asm-label2.ll index 8715aa98ba5e..031bd3852e62 100644 --- a/test/CodeGen/X86/asm-label2.ll +++ b/test/CodeGen/X86/asm-label2.ll @@ -7,7 +7,7 @@ ; CHECK: jmp LBB0_1 ; CHECK: LBB0_1: -define void @foobar() { +define void @foobar() personality i32 (...)* @__gxx_personality_v0 { entry: invoke void @_zed() to label %invoke.cont unwind label %lpad @@ -16,7 +16,7 @@ invoke.cont: ; preds = %entry ret void lpad: ; preds = %entry - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} cleanup unreachable } diff --git a/test/CodeGen/X86/avx2-vector-shifts.ll b/test/CodeGen/X86/avx2-vector-shifts.ll index 5d99269ae1dc..b92b78035009 100644 --- a/test/CodeGen/X86/avx2-vector-shifts.ll +++ b/test/CodeGen/X86/avx2-vector-shifts.ll @@ -302,49 +302,17 @@ define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind { define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { ; CHECK-LABEL: shl_32i8 -; CHECK: vextracti128 $1, %ymm0, %xmm3 -; CHECK-NEXT: vpsllw $4, %xmm3, %xmm2 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm8 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; CHECK-NEXT: vpand %xmm8, %xmm2, %xmm5 -; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2 -; CHECK-NEXT: vpsllw $5, %xmm2, %xmm2 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm9 = [224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224] -; CHECK-NEXT: vpand %xmm9, %xmm2, %xmm7 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; CHECK-NEXT: vpand %xmm7, %xmm2, %xmm4 -; CHECK-NEXT: vpcmpeqb %xmm2, %xmm4, %xmm4 -; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm3, %xmm3 -; CHECK-NEXT: vpsllw $2, %xmm3, %xmm4 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] -; CHECK-NEXT: vpand %xmm5, %xmm4, %xmm4 -; CHECK-NEXT: vpaddb %xmm7, %xmm7, %xmm7 -; CHECK-NEXT: vpand %xmm7, %xmm2, %xmm6 -; CHECK-NEXT: vpcmpeqb %xmm2, %xmm6, %xmm6 -; CHECK-NEXT: vpblendvb %xmm6, %xmm4, %xmm3, %xmm3 -; CHECK-NEXT: vpaddb %xmm3, %xmm3, %xmm4 -; CHECK-NEXT: vpaddb %xmm7, %xmm7, %xmm6 -; CHECK-NEXT: vpand %xmm6, %xmm2, %xmm6 -; CHECK-NEXT: vpcmpeqb %xmm2, %xmm6, %xmm6 -; CHECK-NEXT: vpblendvb %xmm6, %xmm4, %xmm3, %xmm3 -; CHECK-NEXT: vpsllw $4, %xmm0, %xmm4 -; CHECK-NEXT: vpand %xmm8, %xmm4, %xmm4 -; CHECK-NEXT: vpsllw $5, %xmm1, %xmm1 -; CHECK-NEXT: vpand %xmm9, %xmm1, %xmm1 -; CHECK-NEXT: vpand %xmm1, %xmm2, %xmm6 -; CHECK-NEXT: vpcmpeqb %xmm2, %xmm6, %xmm6 -; CHECK-NEXT: vpblendvb %xmm6, %xmm4, %xmm0, %xmm0 -; CHECK-NEXT: vpsllw $2, %xmm0, %xmm4 -; CHECK-NEXT: vpand %xmm5, %xmm4, %xmm4 -; CHECK-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpand %xmm1, %xmm2, %xmm5 -; CHECK-NEXT: vpcmpeqb %xmm2, %xmm5, %xmm5 -; CHECK-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0 -; CHECK-NEXT: vpaddb %xmm0, %xmm0, %xmm4 -; CHECK-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpand %xmm1, %xmm2, %xmm1 -; CHECK-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 -; CHECK-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; CHECK: vpsllw $5, %ymm1, %ymm1 +; CHECK-NEXT: vpsllw $4, %ymm0, %ymm2 +; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vpsllw $2, %ymm0, %ymm2 +; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vpaddb %ymm0, %ymm0, %ymm2 +; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: retq %shl = shl <32 x i8> %r, %a ret <32 x i8> %shl @@ -381,169 +349,30 @@ define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind { define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { ; CHECK-LABEL: ashr_32i8 -; CHECK: vextracti128 $1, %ymm1, %xmm2 -; CHECK-NEXT: vpextrb $1, %xmm2, %ecx -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm3 -; CHECK-NEXT: vpextrb $1, %xmm3, %eax -; CHECK-NEXT: sarb %cl, %al -; CHECK-NEXT: vpextrb $0, %xmm2, %ecx -; CHECK-NEXT: vpextrb $0, %xmm3, %edx -; CHECK-NEXT: sarb %cl, %dl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: movzbl %dl, %edx -; CHECK-NEXT: vpextrb $2, %xmm2, %ecx -; CHECK-NEXT: vpextrb $2, %xmm3, %esi -; CHECK-NEXT: sarb %cl, %sil -; CHECK-NEXT: vmovd %edx, %xmm4 -; CHECK-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: vpextrb $3, %xmm2, %ecx -; CHECK-NEXT: vpextrb $3, %xmm3, %edx -; CHECK-NEXT: sarb %cl, %dl -; CHECK-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; CHECK-NEXT: vpextrb $4, %xmm2, %ecx -; CHECK-NEXT: vpextrb $4, %xmm3, %eax -; CHECK-NEXT: sarb %cl, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; CHECK-NEXT: vpextrb $5, %xmm2, %ecx -; CHECK-NEXT: vpextrb $5, %xmm3, %eax -; CHECK-NEXT: sarb %cl, %al -; CHECK-NEXT: vpextrb $6, %xmm2, %ecx -; CHECK-NEXT: vpextrb $6, %xmm3, %edx -; CHECK-NEXT: sarb %cl, %dl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpextrb $7, %xmm2, %ecx -; CHECK-NEXT: vpextrb $7, %xmm3, %edx -; CHECK-NEXT: sarb %cl, %dl -; CHECK-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; CHECK-NEXT: vpextrb $8, %xmm2, %ecx -; CHECK-NEXT: vpextrb $8, %xmm3, %eax -; CHECK-NEXT: sarb %cl, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; CHECK-NEXT: vpextrb $9, %xmm2, %ecx -; CHECK-NEXT: vpextrb $9, %xmm3, %eax -; CHECK-NEXT: sarb %cl, %al -; CHECK-NEXT: vpextrb $10, %xmm2, %ecx -; CHECK-NEXT: vpextrb $10, %xmm3, %edx -; CHECK-NEXT: sarb %cl, %dl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpextrb $11, %xmm2, %ecx -; CHECK-NEXT: vpextrb $11, %xmm3, %edx -; CHECK-NEXT: sarb %cl, %dl -; CHECK-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; CHECK-NEXT: vpextrb $12, %xmm2, %ecx -; CHECK-NEXT: vpextrb $12, %xmm3, %eax -; CHECK-NEXT: sarb %cl, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; CHECK-NEXT: vpextrb $13, %xmm2, %ecx -; CHECK-NEXT: vpextrb $13, %xmm3, %eax -; CHECK-NEXT: sarb %cl, %al -; CHECK-NEXT: vpextrb $14, %xmm2, %ecx -; CHECK-NEXT: vpextrb $14, %xmm3, %edx -; CHECK-NEXT: sarb %cl, %dl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; CHECK-NEXT: vpextrb $15, %xmm2, %ecx -; CHECK-NEXT: vpextrb $15, %xmm3, %eax -; CHECK-NEXT: sarb %cl, %al -; CHECK-NEXT: vpextrb $1, %xmm1, %ecx -; CHECK-NEXT: vpextrb $1, %xmm0, %esi -; CHECK-NEXT: sarb %cl, %sil -; CHECK-NEXT: movzbl %dl, %ecx -; CHECK-NEXT: vpinsrb $14, %ecx, %xmm4, %xmm2 -; CHECK-NEXT: vpextrb $0, %xmm1, %ecx -; CHECK-NEXT: vpextrb $0, %xmm0, %edx -; CHECK-NEXT: sarb %cl, %dl -; CHECK-NEXT: vpextrb $2, %xmm1, %ecx -; CHECK-NEXT: vpextrb $2, %xmm0, %edi -; CHECK-NEXT: sarb %cl, %dil -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: movzbl %dl, %ecx -; CHECK-NEXT: vmovd %ecx, %xmm3 -; CHECK-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: vpextrb $3, %xmm1, %ecx -; CHECK-NEXT: vpextrb $3, %xmm0, %edx -; CHECK-NEXT: sarb %cl, %dl -; CHECK-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; CHECK-NEXT: vpextrb $4, %xmm1, %ecx -; CHECK-NEXT: vpextrb $4, %xmm0, %eax -; CHECK-NEXT: sarb %cl, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; CHECK-NEXT: vpextrb $5, %xmm1, %ecx -; CHECK-NEXT: vpextrb $5, %xmm0, %eax -; CHECK-NEXT: sarb %cl, %al -; CHECK-NEXT: vpextrb $6, %xmm1, %ecx -; CHECK-NEXT: vpextrb $6, %xmm0, %edx -; CHECK-NEXT: sarb %cl, %dl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpextrb $7, %xmm1, %ecx -; CHECK-NEXT: vpextrb $7, %xmm0, %edx -; CHECK-NEXT: sarb %cl, %dl -; CHECK-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; CHECK-NEXT: vpextrb $8, %xmm1, %ecx -; CHECK-NEXT: vpextrb $8, %xmm0, %eax -; CHECK-NEXT: sarb %cl, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; CHECK-NEXT: vpextrb $9, %xmm1, %ecx -; CHECK-NEXT: vpextrb $9, %xmm0, %eax -; CHECK-NEXT: sarb %cl, %al -; CHECK-NEXT: vpextrb $10, %xmm1, %ecx -; CHECK-NEXT: vpextrb $10, %xmm0, %edx -; CHECK-NEXT: sarb %cl, %dl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpextrb $11, %xmm1, %ecx -; CHECK-NEXT: vpextrb $11, %xmm0, %edx -; CHECK-NEXT: sarb %cl, %dl -; CHECK-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; CHECK-NEXT: vpextrb $12, %xmm1, %ecx -; CHECK-NEXT: vpextrb $12, %xmm0, %eax -; CHECK-NEXT: sarb %cl, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; CHECK-NEXT: vpextrb $13, %xmm1, %ecx -; CHECK-NEXT: vpextrb $13, %xmm0, %eax -; CHECK-NEXT: sarb %cl, %al -; CHECK-NEXT: vpextrb $14, %xmm1, %ecx -; CHECK-NEXT: vpextrb $14, %xmm0, %edx -; CHECK-NEXT: sarb %cl, %dl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpextrb $15, %xmm1, %ecx -; CHECK-NEXT: vpextrb $15, %xmm0, %edx -; CHECK-NEXT: sarb %cl, %dl -; CHECK-NEXT: vpinsrb $14, %eax, %xmm3, %xmm0 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; CHECK: vpsllw $5, %ymm1, %ymm1 +; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] +; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] +; CHECK-NEXT: vpsraw $4, %ymm3, %ymm4 +; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; CHECK-NEXT: vpsraw $2, %ymm3, %ymm4 +; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; CHECK-NEXT: vpsraw $1, %ymm3, %ymm4 +; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 +; CHECK-NEXT: vpsrlw $8, %ymm2, %ymm2 +; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] +; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] +; CHECK-NEXT: vpsraw $4, %ymm0, %ymm3 +; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; CHECK-NEXT: vpsraw $2, %ymm0, %ymm3 +; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; CHECK-NEXT: vpsraw $1, %ymm0, %ymm3 +; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; CHECK-NEXT: vpsrlw $8, %ymm0, %ymm0 +; CHECK-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: retq %ashr = ashr <32 x i8> %r, %a ret <32 x i8> %ashr @@ -580,169 +409,18 @@ define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind { define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { ; CHECK-LABEL: lshr_32i8 -; CHECK: vextracti128 $1, %ymm1, %xmm2 -; CHECK-NEXT: vpextrb $1, %xmm2, %ecx -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm3 -; CHECK-NEXT: vpextrb $1, %xmm3, %eax -; CHECK-NEXT: shrb %cl, %al -; CHECK-NEXT: vpextrb $0, %xmm2, %ecx -; CHECK-NEXT: vpextrb $0, %xmm3, %edx -; CHECK-NEXT: shrb %cl, %dl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: movzbl %dl, %edx -; CHECK-NEXT: vpextrb $2, %xmm2, %ecx -; CHECK-NEXT: vpextrb $2, %xmm3, %esi -; CHECK-NEXT: shrb %cl, %sil -; CHECK-NEXT: vmovd %edx, %xmm4 -; CHECK-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: vpextrb $3, %xmm2, %ecx -; CHECK-NEXT: vpextrb $3, %xmm3, %edx -; CHECK-NEXT: shrb %cl, %dl -; CHECK-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; CHECK-NEXT: vpextrb $4, %xmm2, %ecx -; CHECK-NEXT: vpextrb $4, %xmm3, %eax -; CHECK-NEXT: shrb %cl, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; CHECK-NEXT: vpextrb $5, %xmm2, %ecx -; CHECK-NEXT: vpextrb $5, %xmm3, %eax -; CHECK-NEXT: shrb %cl, %al -; CHECK-NEXT: vpextrb $6, %xmm2, %ecx -; CHECK-NEXT: vpextrb $6, %xmm3, %edx -; CHECK-NEXT: shrb %cl, %dl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpextrb $7, %xmm2, %ecx -; CHECK-NEXT: vpextrb $7, %xmm3, %edx -; CHECK-NEXT: shrb %cl, %dl -; CHECK-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; CHECK-NEXT: vpextrb $8, %xmm2, %ecx -; CHECK-NEXT: vpextrb $8, %xmm3, %eax -; CHECK-NEXT: shrb %cl, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; CHECK-NEXT: vpextrb $9, %xmm2, %ecx -; CHECK-NEXT: vpextrb $9, %xmm3, %eax -; CHECK-NEXT: shrb %cl, %al -; CHECK-NEXT: vpextrb $10, %xmm2, %ecx -; CHECK-NEXT: vpextrb $10, %xmm3, %edx -; CHECK-NEXT: shrb %cl, %dl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpextrb $11, %xmm2, %ecx -; CHECK-NEXT: vpextrb $11, %xmm3, %edx -; CHECK-NEXT: shrb %cl, %dl -; CHECK-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; CHECK-NEXT: vpextrb $12, %xmm2, %ecx -; CHECK-NEXT: vpextrb $12, %xmm3, %eax -; CHECK-NEXT: shrb %cl, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; CHECK-NEXT: vpextrb $13, %xmm2, %ecx -; CHECK-NEXT: vpextrb $13, %xmm3, %eax -; CHECK-NEXT: shrb %cl, %al -; CHECK-NEXT: vpextrb $14, %xmm2, %ecx -; CHECK-NEXT: vpextrb $14, %xmm3, %edx -; CHECK-NEXT: shrb %cl, %dl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; CHECK-NEXT: vpextrb $15, %xmm2, %ecx -; CHECK-NEXT: vpextrb $15, %xmm3, %eax -; CHECK-NEXT: shrb %cl, %al -; CHECK-NEXT: vpextrb $1, %xmm1, %ecx -; CHECK-NEXT: vpextrb $1, %xmm0, %esi -; CHECK-NEXT: shrb %cl, %sil -; CHECK-NEXT: movzbl %dl, %ecx -; CHECK-NEXT: vpinsrb $14, %ecx, %xmm4, %xmm2 -; CHECK-NEXT: vpextrb $0, %xmm1, %ecx -; CHECK-NEXT: vpextrb $0, %xmm0, %edx -; CHECK-NEXT: shrb %cl, %dl -; CHECK-NEXT: vpextrb $2, %xmm1, %ecx -; CHECK-NEXT: vpextrb $2, %xmm0, %edi -; CHECK-NEXT: shrb %cl, %dil -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: movzbl %dl, %ecx -; CHECK-NEXT: vmovd %ecx, %xmm3 -; CHECK-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: vpextrb $3, %xmm1, %ecx -; CHECK-NEXT: vpextrb $3, %xmm0, %edx -; CHECK-NEXT: shrb %cl, %dl -; CHECK-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; CHECK-NEXT: vpextrb $4, %xmm1, %ecx -; CHECK-NEXT: vpextrb $4, %xmm0, %eax -; CHECK-NEXT: shrb %cl, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; CHECK-NEXT: vpextrb $5, %xmm1, %ecx -; CHECK-NEXT: vpextrb $5, %xmm0, %eax -; CHECK-NEXT: shrb %cl, %al -; CHECK-NEXT: vpextrb $6, %xmm1, %ecx -; CHECK-NEXT: vpextrb $6, %xmm0, %edx -; CHECK-NEXT: shrb %cl, %dl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpextrb $7, %xmm1, %ecx -; CHECK-NEXT: vpextrb $7, %xmm0, %edx -; CHECK-NEXT: shrb %cl, %dl -; CHECK-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; CHECK-NEXT: vpextrb $8, %xmm1, %ecx -; CHECK-NEXT: vpextrb $8, %xmm0, %eax -; CHECK-NEXT: shrb %cl, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; CHECK-NEXT: vpextrb $9, %xmm1, %ecx -; CHECK-NEXT: vpextrb $9, %xmm0, %eax -; CHECK-NEXT: shrb %cl, %al -; CHECK-NEXT: vpextrb $10, %xmm1, %ecx -; CHECK-NEXT: vpextrb $10, %xmm0, %edx -; CHECK-NEXT: shrb %cl, %dl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpextrb $11, %xmm1, %ecx -; CHECK-NEXT: vpextrb $11, %xmm0, %edx -; CHECK-NEXT: shrb %cl, %dl -; CHECK-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; CHECK-NEXT: vpextrb $12, %xmm1, %ecx -; CHECK-NEXT: vpextrb $12, %xmm0, %eax -; CHECK-NEXT: shrb %cl, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; CHECK-NEXT: vpextrb $13, %xmm1, %ecx -; CHECK-NEXT: vpextrb $13, %xmm0, %eax -; CHECK-NEXT: shrb %cl, %al -; CHECK-NEXT: vpextrb $14, %xmm1, %ecx -; CHECK-NEXT: vpextrb $14, %xmm0, %edx -; CHECK-NEXT: shrb %cl, %dl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpextrb $15, %xmm1, %ecx -; CHECK-NEXT: vpextrb $15, %xmm0, %edx -; CHECK-NEXT: shrb %cl, %dl -; CHECK-NEXT: vpinsrb $14, %eax, %xmm3, %xmm0 -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; CHECK: vpsllw $5, %ymm1, %ymm1 +; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm2 +; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vpsrlw $2, %ymm0, %ymm2 +; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vpsrlw $1, %ymm0, %ymm2 +; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: retq %lshr = lshr <32 x i8> %r, %a ret <32 x i8> %lshr diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 9387192f8aa4..a06cadaa3f5a 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -176,13 +176,6 @@ define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { } declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone -define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) { - ; CHECK: vcvtusi2sdq {{.*}}encoding: [0x62 - %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone - define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) { ; CHECK: vcvttsd2si {{.*}}encoding: [0x62 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1] @@ -510,30 +503,6 @@ declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double> } declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) -define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) { - ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1] - %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1, - <8 x i64>zeroinitializer, i8 -1) - ret <8 x i64> %res -} -declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) - -define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) { - ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1] - %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1, - <16 x i32>zeroinitializer, i16 -1) - ret <16 x i32> %res -} -declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) - -define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) { - ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1] - %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1, - <16 x i32>zeroinitializer, i16 -1) - ret <16 x i32> %res -} -declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) - define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) { ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1] %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1) @@ -630,28 +599,6 @@ define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask ret <8 x double> %res } -define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) { -; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1] - %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 -1) - ret <16 x float> %res -} - -define <16 x float> @test_vpermt2ps_mask(<16 x float>%x, <16 x float>%y, <16 x i32>%perm, i16 %mask) { -; CHECK-LABEL: test_vpermt2ps_mask: -; CHECK: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x7f,0xc1] - %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 %mask) - ret <16 x float> %res -} - -declare <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16) - -define <8 x i64> @test_vmovntdqa(i8 *%x) { -; CHECK-LABEL: test_vmovntdqa: -; CHECK: vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07] - %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %x) - ret <8 x i64> %res -} - declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*) define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) { @@ -2807,3 +2754,262 @@ define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) { %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4) ret <2 x double> %res } + +define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) { +; CHECK-LABEL: test_x86_avx512_cvtsi2sd32: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone + +define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) { +; CHECK-LABEL: test_x86_avx512_cvtsi2sd64: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone + +define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) { +; CHECK-LABEL: test_x86_avx512_cvtsi2ss32: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone + +define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) { +; CHECK-LABEL: test_x86_avx512_cvtsi2ss64: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone + +define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b) +; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} + +define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr) +; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem: +; CHECK: ## BB#0: +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %b = load i32, i32* %ptr + %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} + +define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b) +; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} + +define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr) +; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %b = load i32, i32* %ptr + %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone + +define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b) +; CHECK-LABEL: _mm_cvt_roundu64_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} + +define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b) +; CHECK-LABEL: _mm_cvtu64_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone + +define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b) +; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone + +define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b) +; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1] + ret <2 x double> %res +} + +define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b) +; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone + +define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) { + ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1] + %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1, + <8 x i64>zeroinitializer, i8 -1) + ret <8 x i64> %res +} +declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) { + ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1] + %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1, + <16 x i32>zeroinitializer, i16 -1) + ret <16 x i32> %res +} +declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) { + ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1] + %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1, + <16 x i32>zeroinitializer, i16 -1) + ret <16 x i32> %res +} +declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_512 +; CHECK-NOT: call +; CHECK: vpmaxsd %zmm +; CHECK: {%k1} +define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { + %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_512 +; CHECK-NOT: call +; CHECK: vpmaxsq %zmm +; CHECK: {%k1} +define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { + %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + %res2 = add <8 x i64> %res, %res1 + ret <8 x i64> %res2 +} + +declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_512 +; CHECK-NOT: call +; CHECK: vpmaxud %zmm +; CHECK: {%k1} +define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { + %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_512 +; CHECK-NOT: call +; CHECK: vpmaxuq %zmm +; CHECK: {%k1} +define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { + %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + %res2 = add <8 x i64> %res, %res1 + ret <8 x i64> %res2 +} + +declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_512 +; CHECK-NOT: call +; CHECK: vpminsd %zmm +; CHECK: {%k1} +define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { + %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_512 +; CHECK-NOT: call +; CHECK: vpminsq %zmm +; CHECK: {%k1} +define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { + %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + %res2 = add <8 x i64> %res, %res1 + ret <8 x i64> %res2 +} + +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_512 +; CHECK-NOT: call +; CHECK: vpminud %zmm +; CHECK: {%k1} +define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { + %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_512 +; CHECK-NOT: call +; CHECK: vpminuq %zmm +; CHECK: {%k1} +define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { + %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + %res2 = add <8 x i64> %res, %res1 + ret <8 x i64> %res2 +} diff --git a/test/CodeGen/X86/avx512-shuffle.ll b/test/CodeGen/X86/avx512-shuffle.ll index 2683d6fe238c..7e9eda58737d 100644 --- a/test/CodeGen/X86/avx512-shuffle.ll +++ b/test/CodeGen/X86/avx512-shuffle.ll @@ -116,10 +116,10 @@ define <16 x i32> @test15(<16 x i32> %a) { ret <16 x i32> %b } ; CHECK-LABEL: test16 -; CHECK: valignq $2, %zmm0, %zmm1 +; CHECK: valignq $3, %zmm0, %zmm1 ; CHECK: ret define <8 x double> @test16(<8 x double> %a, <8 x double> %b) nounwind { - %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9> + %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10> ret <8 x double> %c } @@ -252,6 +252,62 @@ define <8 x double> @test32(<8 x double> %a, <8 x double> %b) nounwind { ret <8 x double> %c } +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +define <8 x double> @test_vshuff64x2_512(<8 x double> %x, <8 x double> %x1) nounwind { +; CHECK-LABEL: test_vshuff64x2_512: +; CHECK: ## BB#0: +; CHECK-NEXT: vshuff64x2 $136, %zmm0, %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 0, i32 1, i32 4, i32 5> + ret <8 x double> %res +} + +define <8 x double> @test_vshuff64x2_512_mask(<8 x double> %x, <8 x double> %x1, <8 x i1> %mask) nounwind { +; CHECK-LABEL: test_vshuff64x2_512_mask: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmovsxwq %xmm2, %zmm1 +; CHECK-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1 +; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 +; CHECK-NEXT: vshuff64x2 $136, %zmm0, %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 0, i32 1, i32 4, i32 5> + %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer + ret <8 x double> %res +} + +define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> %mask) nounwind { +; CHECK-LABEL: test_vshufi64x2_512_mask: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmovsxwq %xmm2, %zmm1 +; CHECK-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1 +; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 +; CHECK-NEXT: vshufi64x2 $168, %zmm0, %zmm0, %zmm0 {%k1} +; CHECK-NEXT: retq + %y = shufflevector <8 x i64> %x, <8 x i64> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 4, i32 5> + %res = select <8 x i1> %mask, <8 x i64> %y, <8 x i64> %x + ret <8 x i64> %res +} + +define <8 x double> @test_vshuff64x2_512_mem(<8 x double> %x, <8 x double> *%ptr) nounwind { +; CHECK-LABEL: test_vshuff64x2_512_mem: +; CHECK: ## BB#0: +; CHECK-NEXT: vshuff64x2 $40, %zmm0, %zmm0, %zmm0 +; CHECK-NEXT: retq + %x1 = load <8 x double>,<8 x double> *%ptr,align 1 + %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 0, i32 1> + ret <8 x double> %res +} + +define <16 x float> @test_vshuff32x4_512_mem(<16 x float> %x, <16 x float> *%ptr) nounwind { +; CHECK-LABEL: test_vshuff32x4_512_mem: +; CHECK: ## BB#0: +; CHECK-NEXT: vshuff64x2 $20, %zmm0, %zmm0, %zmm0 +; CHECK-NEXT: retq + %x1 = load <16 x float>,<16 x float> *%ptr,align 1 + %res = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> + ret <16 x float> %res +} + define <16 x i32> @test_align_v16i32_rr(<16 x i32> %a, <16 x i32> %b) nounwind { ; CHECK-LABEL: test_align_v16i32_rr: ; CHECK: ## BB#0: diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll index 04028a1da510..6a4a3aa7e371 100644 --- a/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/test/CodeGen/X86/avx512-vec-cmp.ll @@ -394,7 +394,7 @@ define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y ; KNL-LABEL: test28 ; KNL: vpcmpgtq ; KNL: vpcmpgtq -; KNL: kxorw +; KNL: kxnorw define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) { %x_gt_y = icmp sgt <8 x i64> %x, %y %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1 @@ -406,7 +406,7 @@ define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1 ; KNL-LABEL: test29 ; KNL: vpcmpgtd ; KNL: vpcmpgtd -; KNL: kxnorw +; KNL: kxorw define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) { %x_gt_y = icmp sgt <16 x i32> %x, %y %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1 diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index 0db2941cac6f..9ee0e09d1b7a 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -788,3 +788,133 @@ define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr } declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_512 +; CHECK-NOT: call +; CHECK: vpmaxsb %zmm +; CHECK: {%k1} +define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { + %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + %res2 = add <64 x i8> %res, %res1 + ret <64 x i8> %res2 +} + +declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_512 +; CHECK-NOT: call +; CHECK: vpmaxsw %zmm +; CHECK: {%k1} +define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { + %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + %res2 = add <32 x i16> %res, %res1 + ret <32 x i16> %res2 +} + +declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_512 +; CHECK-NOT: call +; CHECK: vpmaxub %zmm +; CHECK: {%k1} +define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { + %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + %res2 = add <64 x i8> %res, %res1 + ret <64 x i8> %res2 +} + +declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_512 +; CHECK-NOT: call +; CHECK: vpmaxuw %zmm +; CHECK: {%k1} +define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { + %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + %res2 = add <32 x i16> %res, %res1 + ret <32 x i16> %res2 +} + +declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_512 +; CHECK-NOT: call +; CHECK: vpminsb %zmm +; CHECK: {%k1} +define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { + %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + %res2 = add <64 x i8> %res, %res1 + ret <64 x i8> %res2 +} + +declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_512 +; CHECK-NOT: call +; CHECK: vpminsw %zmm +; CHECK: {%k1} +define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { + %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + %res2 = add <32 x i16> %res, %res1 + ret <32 x i16> %res2 +} + +declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_512 +; CHECK-NOT: call +; CHECK: vpminub %zmm +; CHECK: {%k1} +define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { + %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + %res2 = add <64 x i8> %res, %res1 + ret <64 x i8> %res2 +} + +declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_512 +; CHECK-NOT: call +; CHECK: vpminuw %zmm +; CHECK: {%k1} +define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { + %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + %res2 = add <32 x i16> %res, %res1 + ret <32 x i16> %res2 +} + +declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_512 +; CHECK-NOT: call +; CHECK: vpavgb %zmm +; CHECK: {%k1} +define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { + %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + %res2 = add <64 x i8> %res, %res1 + ret <64 x i8> %res2 +} + +declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_512 +; CHECK-NOT: call +; CHECK: vpavgw %zmm +; CHECK: {%k1} +define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { + %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + %res2 = add <32 x i16> %res, %res1 + ret <32 x i16> %res2 +} diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index f0efb2c947e9..cf8c32a48b6b 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -2667,4 +2667,264 @@ define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, ret <32 x i8> %res } -declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
\ No newline at end of file +declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) + +declare <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_128 +; CHECK-NOT: call +; CHECK: vpmaxsb %xmm +; CHECK: {%k1} +define <16 x i8>@test_int_x86_avx512_mask_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { + %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2 ,i16 %mask) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) + %res2 = add <16 x i8> %res, %res1 + ret <16 x i8> %res2 +} + +declare <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_256 +; CHECK-NOT: call +; CHECK: vpmaxsb %ymm +; CHECK: {%k1} +define <32 x i8>@test_int_x86_avx512_mask_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { + %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) + %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) + %res2 = add <32 x i8> %res, %res1 + ret <32 x i8> %res2 +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_128 +; CHECK-NOT: call +; CHECK: vpmaxsw %xmm +; CHECK: {%k1} +define <8 x i16>@test_int_x86_avx512_mask_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { + %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) + %res2 = add <8 x i16> %res, %res1 + ret <8 x i16> %res2 +} + +declare <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_256 +; CHECK-NOT: call +; CHECK: vpmaxsw %ymm +; CHECK: {%k1} +define <16 x i16>@test_int_x86_avx512_mask_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { + %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) + %res2 = add <16 x i16> %res, %res1 + ret <16 x i16> %res2 +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_128 +; CHECK-NOT: call +; CHECK: vpmaxub %xmm +; CHECK: {%k1} +define <16 x i8>@test_int_x86_avx512_mask_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2,i16 %mask) { + %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) + %res2 = add <16 x i8> %res, %res1 + ret <16 x i8> %res2 +} + +declare <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_256 +; CHECK-NOT: call +; CHECK: vpmaxub %ymm +; CHECK: {%k1} +define <32 x i8>@test_int_x86_avx512_mask_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { + %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) + %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) + %res2 = add <32 x i8> %res, %res1 + ret <32 x i8> %res2 +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_128 +; CHECK-NOT: call +; CHECK: vpmaxuw %xmm +; CHECK: {%k1} +define <8 x i16>@test_int_x86_avx512_mask_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { + %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) + %res2 = add <8 x i16> %res, %res1 + ret <8 x i16> %res2 +} + +declare <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_256 +; CHECK-NOT: call +; CHECK: vpmaxuw %ymm +; CHECK: {%k1} +define <16 x i16>@test_int_x86_avx512_mask_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { + %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) + %res2 = add <16 x i16> %res, %res1 + ret <16 x i16> %res2 +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_128 +; CHECK-NOT: call +; CHECK: vpminsb %xmm +; CHECK: {%k1} +define <16 x i8>@test_int_x86_avx512_mask_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { + %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) + %res2 = add <16 x i8> %res, %res1 + ret <16 x i8> %res2 +} + +declare <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_256 +; CHECK-NOT: call +; CHECK: vpminsb %ymm +; CHECK: {%k1} +define <32 x i8>@test_int_x86_avx512_mask_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { + %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) + %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) + %res2 = add <32 x i8> %res, %res1 + ret <32 x i8> %res2 +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_128 +; CHECK-NOT: call +; CHECK: vpminsw %xmm +; CHECK: {%k1} +define <8 x i16>@test_int_x86_avx512_mask_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { + %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) + %res2 = add <8 x i16> %res, %res1 + ret <8 x i16> %res2 +} + +declare <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_256 +; CHECK-NOT: call +; CHECK: vpminsw %ymm +; CHECK: {%k1} +define <16 x i16>@test_int_x86_avx512_mask_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { + %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) + %res2 = add <16 x i16> %res, %res1 + ret <16 x i16> %res2 +} + +declare <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_128 +; CHECK-NOT: call +; CHECK: vpminub %xmm +; CHECK: {%k1} +define <16 x i8>@test_int_x86_avx512_mask_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { + %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) + %res2 = add <16 x i8> %res, %res1 + ret <16 x i8> %res2 +} + +declare <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_256 +; CHECK-NOT: call +; CHECK: vpminub %ymm +; CHECK: {%k1} +define <32 x i8>@test_int_x86_avx512_mask_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { + %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) + %res1 = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) + %res2 = add <32 x i8> %res, %res1 + ret <32 x i8> %res2 +} + +declare <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_128 +; CHECK-NOT: call +; CHECK: vpminuw %xmm +; CHECK: {%k1} +define <8 x i16>@test_int_x86_avx512_mask_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { + %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) + %res2 = add <8 x i16> %res, %res1 + ret <8 x i16> %res2 +} + +declare <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_256 +; CHECK-NOT: call +; CHECK: vpminuw %ymm +; CHECK: {%k1} +define <16 x i16>@test_int_x86_avx512_mask_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { + %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) + %res2 = add <16 x i16> %res, %res1 + ret <16 x i16> %res2 +} + +declare <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_128 +; CHECK-NOT: call +; CHECK: vpavgb %xmm +; CHECK: {%k1} +define <16 x i8>@test_int_x86_avx512_mask_pavg_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { + %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) + %res2 = add <16 x i8> %res, %res1 + ret <16 x i8> %res2 +} + +declare <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_256 +; CHECK-NOT: call +; CHECK: vpavgb %ymm +; CHECK: {%k1} +define <32 x i8>@test_int_x86_avx512_mask_pavg_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { + %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) + %res1 = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) + %res2 = add <32 x i8> %res, %res1 + ret <32 x i8> %res2 +} + +declare <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_128 +; CHECK-NOT: call +; CHECK: vpavgw %xmm +; CHECK: {%k1} +define <8 x i16>@test_int_x86_avx512_mask_pavg_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { + %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) + %res2 = add <8 x i16> %res, %res1 + ret <8 x i16> %res2 +} + +declare <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_256 +; CHECK-NOT: call +; CHECK: vpavgw %ymm +; CHECK: {%k1} +define <16 x i16>@test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { + %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) + %res2 = add <16 x i16> %res, %res1 + ret <16 x i16> %res2 +} diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll index 9d96c272f355..dfd4986b85c1 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -2586,4 +2586,212 @@ define <8 x float> @test_getexp_ps_256(<8 x float> %a0) { %res = call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1) ret <8 x float> %res } -declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
\ No newline at end of file +declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone + +declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_128 +; CHECK-NOT: call +; CHECK: vpmaxsd %xmm +; CHECK: {%k1} +define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) { + %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) + %res2 = add <4 x i32> %res, %res1 + ret <4 x i32> %res2 +} + +declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_256 +; CHECK-NOT: call +; CHECK: vpmaxsd %ymm +; CHECK: {%k1} +define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { + %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} + +declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_128 +; CHECK-NOT: call +; CHECK: vpmaxsq %xmm +; CHECK: {%k1} +define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { + %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) + %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) + %res2 = add <2 x i64> %res, %res1 + ret <2 x i64> %res2 +} + +declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_256 +; CHECK-NOT: call +; CHECK: vpmaxsq %ymm +; CHECK: {%k1} +define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { + %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) + %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) + %res2 = add <4 x i64> %res, %res1 + ret <4 x i64> %res2 +} + +declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_128 +; CHECK-NOT: call +; CHECK: vpmaxud %xmm +; CHECK: {%k1} +define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) { + %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) + %res2 = add <4 x i32> %res, %res1 + ret <4 x i32> %res2 +} + +declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_256 +; CHECK-NOT: call +; CHECK: vpmaxud %ymm +; CHECK: {%k1} +define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { + %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} + +declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_128 +; CHECK-NOT: call +; CHECK: vpmaxuq %xmm +; CHECK: {%k1} +define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { + %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) + %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) + %res2 = add <2 x i64> %res, %res1 + ret <2 x i64> %res2 +} + +declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_256 +; CHECK-NOT: call +; CHECK: vpmaxuq %ymm +; CHECK: {%k1} +define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { + %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) + %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) + %res2 = add <4 x i64> %res, %res1 + ret <4 x i64> %res2 +} + +declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_128 +; CHECK-NOT: call +; CHECK: vpminsd %xmm +; CHECK: {%k1} +define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) { + %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) + %res2 = add <4 x i32> %res, %res1 + ret <4 x i32> %res2 +} + +declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_256 +; CHECK-NOT: call +; CHECK: vpminsd %ymm +; CHECK: {%k1} +define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { + %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} + +declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_128 +; CHECK-NOT: call +; CHECK: vpminsq %xmm +; CHECK: {%k1} +define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { + %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) + %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) + %res2 = add <2 x i64> %res, %res1 + ret <2 x i64> %res2 +} + +declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_256 +; CHECK-NOT: call +; CHECK: vpminsq %ymm +; CHECK: {%k1} +define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { + %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) + %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) + %res2 = add <4 x i64> %res, %res1 + ret <4 x i64> %res2 +} + +declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_128 +; CHECK-NOT: call +; CHECK: vpminud %xmm +; CHECK: {%k1} +define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) { + %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) + %res2 = add <4 x i32> %res, %res1 + ret <4 x i32> %res2 +} + +declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_256 +; CHECK-NOT: call +; CHECK: vpminud %ymm +; CHECK: {%k1} +define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { + %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} + +declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_128 +; CHECK-NOT: call +; CHECK: vpminuq %xmm +; CHECK: {%k1} +define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { + %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) + %res1 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) + %res2 = add <2 x i64> %res, %res1 + ret <2 x i64> %res2 +} + +declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_256 +; CHECK-NOT: call +; CHECK: vpminuq %ymm +; CHECK: {%k1} +define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { + %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) + %res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) + %res2 = add <4 x i64> %res, %res1 + ret <4 x i64> %res2 +}
\ No newline at end of file diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll index e0276e42d4d2..89defa956a45 100644 --- a/test/CodeGen/X86/block-placement.ll +++ b/test/CodeGen/X86/block-placement.ll @@ -546,7 +546,7 @@ exit: declare i32 @__gxx_personality_v0(...) -define void @test_eh_lpad_successor() { +define void @test_eh_lpad_successor() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; Some times the landing pad ends up as the first successor of an invoke block. ; When this happens, a strange result used to fall out of updateTerminators: we ; didn't correctly locate the fallthrough successor, assuming blindly that the @@ -564,7 +564,7 @@ preheader: br label %loop lpad: - %lpad.val = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %lpad.val = landingpad { i8*, i32 } cleanup resume { i8*, i32 } %lpad.val @@ -574,7 +574,7 @@ loop: declare void @fake_throw() noreturn -define void @test_eh_throw() { +define void @test_eh_throw() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; For blocks containing a 'throw' (or similar functionality), we have ; a no-return invoke. In this case, only EH successors will exist, and ; fallthrough simply won't occur. Make sure we don't crash trying to update @@ -591,7 +591,7 @@ continue: unreachable cleanup: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } cleanup unreachable } diff --git a/test/CodeGen/X86/branchfolding-landingpads.ll b/test/CodeGen/X86/branchfolding-landingpads.ll index 40ec92ea0d7f..032b98812452 100644 --- a/test/CodeGen/X86/branchfolding-landingpads.ll +++ b/test/CodeGen/X86/branchfolding-landingpads.ll @@ -18,20 +18,20 @@ declare void @_throw() ; CHECK-LABEL: @main ; CHECK: %unreachable -define i32 @main(i8* %cleanup) { +define i32 @main(i8* %cleanup) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @_throw() #0 to label %unreachable unwind label %catch.dispatch9 catch.dispatch9: ; preds = %entry - %tmp13 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %tmp13 = landingpad { i8*, i32 } cleanup catch i8* null invoke void @_throw() #0 to label %unreachable unwind label %lpad31 lpad31: ; preds = %catch.dispatch9 - %tmp20 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %tmp20 = landingpad { i8*, i32 } cleanup catch i8* null call void @foo() diff --git a/test/CodeGen/X86/bswap-vector.ll b/test/CodeGen/X86/bswap-vector.ll index 7d5f380c1e28..5376601a95e3 100644 --- a/test/CodeGen/X86/bswap-vector.ll +++ b/test/CodeGen/X86/bswap-vector.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK-NOSSSE3 -; RUN: llc < %s -mcpu=core2 | FileCheck %s --check-prefix=CHECK-SSSE3 -; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK-AVX2 +; RUN: llc < %s -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-SSE --check-prefix=CHECK-NOSSSE3 +; RUN: llc < %s -mcpu=core2 | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-SSE --check-prefix=CHECK-SSSE3 +; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK-AVX --check-prefix=CHECK-AVX2 ; RUN: llc < %s -mcpu=core-avx2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE-AVX2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -285,3 +285,174 @@ entry: %r = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %v) ret <4 x i16> %r } + +; +; Double BSWAP -> Identity +; + +define <8 x i16> @identity_v8i16(<8 x i16> %v) { +; CHECK-ALL-LABEL: identity_v8i16: +; CHECK-ALL: # BB#0: # %entry +; CHECK-ALL: retq +entry: + %bs1 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %v) + %bs2 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %bs1) + ret <8 x i16> %bs2 +} + +define <4 x i32> @identity_v4i32(<4 x i32> %v) { +; CHECK-ALL-LABEL: identity_v4i32: +; CHECK-ALL: # BB#0: # %entry +; CHECK-ALL-NEXT: retq +entry: + %bs1 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %v) + %bs2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %bs1) + ret <4 x i32> %bs2 +} + +define <2 x i64> @identity_v2i64(<2 x i64> %v) { +; CHECK-ALL-LABEL: identity_v2i64: +; CHECK-ALL: # BB#0: # %entry +; CHECK-ALL-NEXT: retq +entry: + %bs1 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v) + %bs2 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %bs1) + ret <2 x i64> %bs2 +} + +define <16 x i16> @identity_v16i16(<16 x i16> %v) { +; CHECK-ALL-LABEL: identity_v16i16: +; CHECK-ALL: # BB#0: # %entry +; CHECK-ALL-NEXT: retq +entry: + %bs1 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %v) + %bs2 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %bs1) + ret <16 x i16> %bs2 +} + +define <8 x i32> @identity_v8i32(<8 x i32> %v) { +; CHECK-ALL-LABEL: identity_v8i32: +; CHECK-ALL: # BB#0: # %entry +; CHECK-ALL-NEXT: retq +entry: + %bs1 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %v) + %bs2 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %bs1) + ret <8 x i32> %bs2 +} + +define <4 x i64> @identity_v4i64(<4 x i64> %v) { +; CHECK-ALL-LABEL: identity_v4i64: +; CHECK-ALL: # BB#0: # %entry +; CHECK-ALL-NEXT: retq +entry: + %bs1 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %v) + %bs2 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %bs1) + ret <4 x i64> %bs2 +} + +define <4 x i16> @identity_v4i16(<4 x i16> %v) { +; CHECK-ALL-LABEL: identity_v4i16: +; CHECK-ALL: # BB#0: # %entry +; CHECK-ALL-NEXT: retq +entry: + %bs1 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %v) + %bs2 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %bs1) + ret <4 x i16> %bs2 +} + +; +; Constant Folding +; + +define <8 x i16> @fold_v8i16() { +; CHECK-SSE-LABEL: fold_v8i16: +; CHECK-SSE: # BB#0: # %entry +; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [0,256,65535,512,65023,1024,64511,1536] +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX-LABEL: fold_v8i16: +; CHECK-AVX: # BB#0: # %entry +; CHECK-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,256,65535,512,65023,1024,64511,1536] +; CHECK-AVX-NEXT: retq +entry: + %r = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> <i16 0, i16 1, i16 -1, i16 2, i16 -3, i16 4, i16 -5, i16 6>) + ret <8 x i16> %r +} + +define <4 x i32> @fold_v4i32() { +; CHECK-SSE-LABEL: fold_v4i32: +; CHECK-SSE: # BB#0: # %entry +; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [0,4294967295,33554432,4261412863] +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX-LABEL: fold_v4i32: +; CHECK-AVX: # BB#0: # %entry +; CHECK-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,33554432,4261412863] +; CHECK-AVX-NEXT: retq +entry: + %r = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> <i32 0, i32 -1, i32 2, i32 -3>) + ret <4 x i32> %r +} + +define <2 x i64> @fold_v2i64() { +; CHECK-SSE-LABEL: fold_v2i64: +; CHECK-SSE: # BB#0: # %entry +; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [18374686479671623680,18446744073709551615] +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX-LABEL: fold_v2i64: +; CHECK-AVX: # BB#0: # %entry +; CHECK-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18374686479671623680,18446744073709551615] +; CHECK-AVX-NEXT: retq +entry: + %r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> <i64 255, i64 -1>) + ret <2 x i64> %r +} + +define <16 x i16> @fold_v16i16() { +; CHECK-SSE-LABEL: fold_v16i16: +; CHECK-SSE: # BB#0: # %entry +; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [0,256,65535,512,65023,1024,64511,1536] +; CHECK-SSE-NEXT: movaps {{.*#+}} xmm1 = [63999,2048,63487,2560,62975,3072,62463,3584] +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX-LABEL: fold_v16i16: +; CHECK-AVX: # BB#0: # %entry +; CHECK-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,256,65535,512,65023,1024,64511,1536,63999,2048,63487,2560,62975,3072,62463,3584] +; CHECK-AVX-NEXT: retq +entry: + %r = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> <i16 0, i16 1, i16 -1, i16 2, i16 -3, i16 4, i16 -5, i16 6, i16 -7, i16 8, i16 -9, i16 10, i16 -11, i16 12, i16 -13, i16 14>) + ret <16 x i16> %r +} + +define <8 x i32> @fold_v8i32() { +; CHECK-SSE-LABEL: fold_v8i32: +; CHECK-SSE: # BB#0: # %entry +; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [0,16777216,4294967295,33554432] +; CHECK-SSE-NEXT: movaps {{.*#+}} xmm1 = [4261412863,67108864,4227858431,100663296] +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX-LABEL: fold_v8i32: +; CHECK-AVX: # BB#0: # %entry +; CHECK-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,16777216,4294967295,33554432,4261412863,67108864,4227858431,100663296] +; CHECK-AVX-NEXT: retq +entry: + %r = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> <i32 0, i32 1, i32 -1, i32 2, i32 -3, i32 4, i32 -5, i32 6>) + ret <8 x i32> %r +} + +define <4 x i64> @fold_v4i64() { +; CHECK-SSE-LABEL: fold_v4i64: +; CHECK-SSE: # BB#0: # %entry +; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [18374686479671623680,18446744073709551615] +; CHECK-SSE-NEXT: movaps {{.*#+}} xmm1 = [18446462598732840960,72056494526300160] +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX-LABEL: fold_v4i64: +; CHECK-AVX: # BB#0: # %entry +; CHECK-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18374686479671623680,18446744073709551615,18446462598732840960,72056494526300160] +; CHECK-AVX-NEXT: retq +entry: + %r = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> <i64 255, i64 -1, i64 65535, i64 16776960>) + ret <4 x i64> %r +} diff --git a/test/CodeGen/X86/catch.ll b/test/CodeGen/X86/catch.ll index 64e92783ac98..be7466e8abbb 100644 --- a/test/CodeGen/X86/catch.ll +++ b/test/CodeGen/X86/catch.ll @@ -7,13 +7,13 @@ ; CHECK-NEXT: .quad .Lstr @str = private unnamed_addr constant [12 x i8] c"NSException\00" -define void @f() { +define void @f() personality i8* bitcast (void ()* @h to i8*) { invoke void @g() to label %invoke.cont unwind label %lpad invoke.cont: ret void lpad: - %tmp14 = landingpad { i8*, i32 } personality i8* bitcast (void ()* @h to i8*) + %tmp14 = landingpad { i8*, i32 } catch i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str, i64 0, i64 0) ret void } diff --git a/test/CodeGen/X86/cfi.ll b/test/CodeGen/X86/cfi.ll index b57ff45f51e3..d5a3a8a26a3f 100644 --- a/test/CodeGen/X86/cfi.ll +++ b/test/CodeGen/X86/cfi.ll @@ -8,7 +8,7 @@ ; PIC: .cfi_lsda 27, .Lexception0 -define void @bar() { +define void @bar() personality i32 (...)* @__gxx_personality_v0 { entry: %call = invoke i32 @foo() to label %invoke.cont unwind label %lpad @@ -17,7 +17,7 @@ invoke.cont: ret void lpad: - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} catch i8* null ret void } diff --git a/test/CodeGen/X86/code_placement_eh.ll b/test/CodeGen/X86/code_placement_eh.ll index 2da3f9f53ef8..62fddffffc47 100644 --- a/test/CodeGen/X86/code_placement_eh.ll +++ b/test/CodeGen/X86/code_placement_eh.ll @@ -6,7 +6,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" target triple = "i386-apple-darwin10.0" -define void @foo() { +define void @foo() personality i32 (...)* @__gxx_personality_v0 { invcont5: br label %bb15 @@ -22,12 +22,12 @@ bb18.i5.i: ; preds = %.noexc6.i.i, %bb51. to label %.noexc6.i.i unwind label %lpad.i.i ; <float> [#uses=0] lpad.i.i: ; preds = %bb18.i5.i, %.noexc6.i.i - %lpadval.i.i = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + %lpadval.i.i = landingpad { i8*, i32 } catch i8* null unreachable lpad59.i: ; preds = %bb15 - %lpadval60.i.i = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + %lpadval60.i.i = landingpad { i8*, i32 } catch i8* null unreachable diff --git a/test/CodeGen/X86/codegen-prepare-extload.ll b/test/CodeGen/X86/codegen-prepare-extload.ll index 65502b312b04..c5c761ee63ef 100644 --- a/test/CodeGen/X86/codegen-prepare-extload.ll +++ b/test/CodeGen/X86/codegen-prepare-extload.ll @@ -30,7 +30,7 @@ false: } ; Check that we manage to form a zextload is an operation with only one -; argument to explicitly extend is in the the way. +; argument to explicitly extend is in the way. ; OPTALL-LABEL: @promoteOneArg ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p ; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 @@ -55,7 +55,7 @@ false: } ; Check that we manage to form a sextload is an operation with only one -; argument to explicitly extend is in the the way. +; argument to explicitly extend is in the way. ; Version with sext. ; OPTALL-LABEL: @promoteOneArgSExt ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p @@ -80,7 +80,7 @@ false: } ; Check that we manage to form a zextload is an operation with two -; arguments to explicitly extend is in the the way. +; arguments to explicitly extend is in the way. ; Extending %add will create two extensions: ; 1. One for %b. ; 2. One for %t. @@ -119,7 +119,7 @@ false: } ; Check that we manage to form a sextload is an operation with two -; arguments to explicitly extend is in the the way. +; arguments to explicitly extend is in the way. ; Version with sext. ; OPTALL-LABEL: @promoteTwoArgSExt ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p diff --git a/test/CodeGen/X86/disable-tail-calls.ll b/test/CodeGen/X86/disable-tail-calls.ll new file mode 100644 index 000000000000..80e8fd74e92d --- /dev/null +++ b/test/CodeGen/X86/disable-tail-calls.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -march x86-64 | FileCheck %s --check-prefix=NO-OPTION +; RUN: llc < %s -march x86-64 -disable-tail-calls | FileCheck %s --check-prefix=DISABLE-TRUE +; RUN: llc < %s -march x86-64 -disable-tail-calls=false | FileCheck %s --check-prefix=DISABLE-FALSE + +; Check that command line option "-disable-tail-calls" overrides function +; attribute "disable-tail-calls". + +; NO-OPTION-LABEL: {{\_?}}func_attr +; NO-OPTION: callq {{\_?}}callee + +; DISABLE-FALSE-LABEL: {{\_?}}func_attr +; DISABLE-FALSE: jmp {{\_?}}callee + +; DISABLE-TRUE-LABEL: {{\_?}}func_attr +; DISABLE-TRUE: callq {{\_?}}callee + +define i32 @func_attr(i32 %a) #0 { +entry: + %call = tail call i32 @callee(i32 %a) + ret i32 %call +} + +; NO-OPTION-LABEL: {{\_?}}func_noattr +; NO-OPTION: jmp {{\_?}}callee + +; DISABLE-FALSE-LABEL: {{\_?}}func_noattr +; DISABLE-FALSE: jmp {{\_?}}callee + +; DISABLE-TRUE-LABEL: {{\_?}}func_noattr +; DISABLE-TRUE: callq {{\_?}}callee + +define i32 @func_noattr(i32 %a) { +entry: + %call = tail call i32 @callee(i32 %a) + ret i32 %call +} + +declare i32 @callee(i32) + +attributes #0 = { "disable-tail-calls"="true" } diff --git a/test/CodeGen/X86/dllimport.ll b/test/CodeGen/X86/dllimport.ll index 9db654f22712..34faaeb6fed7 100644 --- a/test/CodeGen/X86/dllimport.ll +++ b/test/CodeGen/X86/dllimport.ll @@ -57,3 +57,7 @@ define void @use() nounwind { ret void } + +; CHECK: _fp: +; CHECK-NEXT: .long _fun +@fp = constant void ()* @fun diff --git a/test/CodeGen/X86/dwarf-eh-prepare.ll b/test/CodeGen/X86/dwarf-eh-prepare.ll index 25572d868da0..9acfaeb193e7 100644 --- a/test/CodeGen/X86/dwarf-eh-prepare.ll +++ b/test/CodeGen/X86/dwarf-eh-prepare.ll @@ -9,7 +9,7 @@ declare void @might_throw() declare void @cleanup() -define i32 @simple_cleanup_catch() { +define i32 @simple_cleanup_catch() personality i32 (...)* @__gxx_personality_v0 { invoke void @might_throw() to label %cont unwind label %lpad @@ -22,7 +22,7 @@ cont: ; CHECK: ret i32 0 lpad: - %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + %ehvals = landingpad { i8*, i32 } cleanup catch i8* @int_typeinfo %ehptr = extractvalue { i8*, i32 } %ehvals, 0 @@ -33,7 +33,7 @@ lpad: br i1 %int_match, label %catch_int, label %eh.resume ; CHECK: lpad: -; CHECK: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 +; CHECK: landingpad { i8*, i32 } ; CHECK: call void @cleanup() ; CHECK: call i32 @llvm.eh.typeid.for ; CHECK: br i1 @@ -54,7 +54,7 @@ eh.resume: } -define i32 @catch_no_resume() { +define i32 @catch_no_resume() personality i32 (...)* @__gxx_personality_v0 { invoke void @might_throw() to label %cont unwind label %lpad @@ -62,7 +62,7 @@ cont: ret i32 0 lpad: - %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + %ehvals = landingpad { i8*, i32 } catch i8* @int_typeinfo %ehptr = extractvalue { i8*, i32 } %ehvals, 0 %ehsel = extractvalue { i8*, i32 } %ehvals, 1 @@ -81,18 +81,18 @@ eh.resume: ; Check that we can prune the unreachable resume instruction. -; CHECK-LABEL: define i32 @catch_no_resume() { +; CHECK-LABEL: define i32 @catch_no_resume() personality i32 (...)* @__gxx_personality_v0 { ; CHECK: invoke void @might_throw() ; CHECK: ret i32 0 ; CHECK: lpad: -; CHECK: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 +; CHECK: landingpad { i8*, i32 } ; CHECK-NOT: br i1 ; CHECK: ret i32 1 ; CHECK-NOT: call void @_Unwind_Resume ; CHECK: {{^[}]}} -define i32 @catch_cleanup_merge() { +define i32 @catch_cleanup_merge() personality i32 (...)* @__gxx_personality_v0 { invoke void @might_throw() to label %inner_invoke unwind label %outer_lpad inner_invoke: @@ -102,12 +102,12 @@ cont: ret i32 0 outer_lpad: - %ehvals1 = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + %ehvals1 = landingpad { i8*, i32 } catch i8* @int_typeinfo br label %catch.dispatch inner_lpad: - %ehvals2 = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + %ehvals2 = landingpad { i8*, i32 } cleanup catch i8* @int_typeinfo call void @cleanup() @@ -138,11 +138,11 @@ eh.resume: ; CHECK: ret i32 0 ; ; CHECK: outer_lpad: -; CHECK: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 +; CHECK: landingpad { i8*, i32 } ; CHECK: br label %catch.dispatch ; ; CHECK: inner_lpad: -; CHECK: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 +; CHECK: landingpad { i8*, i32 } ; CHECK: call void @cleanup() ; CHECK: br label %catch.dispatch ; diff --git a/test/CodeGen/X86/eh-label.ll b/test/CodeGen/X86/eh-label.ll index aff0bcfffcfe..d349174f95b7 100644 --- a/test/CodeGen/X86/eh-label.ll +++ b/test/CodeGen/X86/eh-label.ll @@ -3,7 +3,7 @@ declare void @g() -define void @f() { +define void @f() personality i8* bitcast (void ()* @g to i8*) { bb0: call void asm ".Lfunc_end0:", ""() ; CHECK: #APP @@ -12,7 +12,7 @@ bb0: invoke void @g() to label %bb2 unwind label %bb1 bb1: - landingpad { i8*, i32 } personality i8* bitcast (void ()* @g to i8*) + landingpad { i8*, i32 } catch i8* null call void @g() ret void diff --git a/test/CodeGen/X86/exception-label.ll b/test/CodeGen/X86/exception-label.ll index cafa1e630b96..2270d2da1801 100644 --- a/test/CodeGen/X86/exception-label.ll +++ b/test/CodeGen/X86/exception-label.ll @@ -8,13 +8,13 @@ declare void @g() -define void @f() { +define void @f() personality i8* bitcast (void ()* @g to i8*) { bb0: call void asm ".Lexception0:", ""() invoke void @g() to label %bb2 unwind label %bb1 bb1: - landingpad { i8*, i32 } personality i8* bitcast (void ()* @g to i8*) + landingpad { i8*, i32 } catch i8* null br label %bb2 diff --git a/test/CodeGen/X86/fast-isel-cmp-branch.ll b/test/CodeGen/X86/fast-isel-cmp-branch.ll index 684647ca9484..d7b64ed3a5b8 100644 --- a/test/CodeGen/X86/fast-isel-cmp-branch.ll +++ b/test/CodeGen/X86/fast-isel-cmp-branch.ll @@ -12,7 +12,7 @@ declare void @bar() -define void @foo(i32 %a, i32 %b) nounwind { +define void @foo(i32 %a, i32 %b) nounwind personality i32 (...)* @__gxx_personality_v0 { entry: %q = add i32 %a, 7 %r = add i32 %b, 9 @@ -26,7 +26,7 @@ true: return: ret void unw: - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} cleanup unreachable } diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll index 67b30292be3c..1886d3379aad 100644 --- a/test/CodeGen/X86/fast-isel-gep.ll +++ b/test/CodeGen/X86/fast-isel-gep.ll @@ -89,7 +89,7 @@ define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind { ; PR9500, rdar://9156159 - Don't do non-local address mode folding, ; because it may require values which wouldn't otherwise be live out ; of their blocks. -define void @test6() { +define void @test6() personality i32 (...)* @__gxx_personality_v0 { if.end: ; preds = %if.then, %invoke.cont %tmp15 = load i64, i64* undef %dec = add i64 %tmp15, 13 @@ -103,7 +103,7 @@ invoke.cont16: ; preds = %if.then14 unreachable lpad: ; preds = %if.end19, %if.then14, %if.end, %entry - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} cleanup unreachable } diff --git a/test/CodeGen/X86/fp-fast.ll b/test/CodeGen/X86/fp-fast.ll index 27af5738ca3e..4f503af716a8 100644 --- a/test/CodeGen/X86/fp-fast.ll +++ b/test/CodeGen/X86/fp-fast.ll @@ -114,3 +114,81 @@ define float @test11(float %a) { ret float %t2 } +; Verify that the first two adds are independent regardless of how the inputs are +; commuted. The destination registers are used as source registers for the third add. + +define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) { +; CHECK-LABEL: reassociate_adds1: +; CHECK: # BB#0: +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %t0 = fadd float %x0, %x1 + %t1 = fadd float %t0, %x2 + %t2 = fadd float %t1, %x3 + ret float %t2 +} + +define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) { +; CHECK-LABEL: reassociate_adds2: +; CHECK: # BB#0: +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %t0 = fadd float %x0, %x1 + %t1 = fadd float %x2, %t0 + %t2 = fadd float %t1, %x3 + ret float %t2 +} + +define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) { +; CHECK-LABEL: reassociate_adds3: +; CHECK: # BB#0: +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %t0 = fadd float %x0, %x1 + %t1 = fadd float %t0, %x2 + %t2 = fadd float %x3, %t1 + ret float %t2 +} + +define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) { +; CHECK-LABEL: reassociate_adds4: +; CHECK: # BB#0: +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %t0 = fadd float %x0, %x1 + %t1 = fadd float %x2, %t0 + %t2 = fadd float %x3, %t1 + ret float %t2 +} + +; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not +; produced because that would cost more compile time. + +define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) { +; CHECK-LABEL: reassociate_adds5: +; CHECK: # BB#0: +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddss %xmm5, %xmm4, %xmm1 +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddss %xmm7, %xmm6, %xmm1 +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %t0 = fadd float %x0, %x1 + %t1 = fadd float %t0, %x2 + %t2 = fadd float %t1, %x3 + %t3 = fadd float %t2, %x4 + %t4 = fadd float %t3, %x5 + %t5 = fadd float %t4, %x6 + %t6 = fadd float %t5, %x7 + ret float %t6 +} diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll index b656dc9d68e2..82064c2a3907 100644 --- a/test/CodeGen/X86/gcc_except_table.ll +++ b/test/CodeGen/X86/gcc_except_table.ll @@ -3,7 +3,7 @@ ; RUN: llc -mtriple i686-pc-windows-gnu %s -o - | FileCheck %s --check-prefix=MINGW32 @_ZTIi = external constant i8* -define i32 @main() uwtable optsize ssp { +define i32 @main() uwtable optsize ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; APPLE: .cfi_startproc ; APPLE: .cfi_personality 155, ___gxx_personality_v0 ; APPLE: .cfi_lsda 16, Lexception0 @@ -36,7 +36,7 @@ entry: to label %try.cont unwind label %lpad lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } cleanup catch i8* bitcast (i8** @_ZTIi to i8*) br label %eh.resume diff --git a/test/CodeGen/X86/gcc_except_table_functions.ll b/test/CodeGen/X86/gcc_except_table_functions.ll index 7a64a01fa38d..8e002ad142b8 100644 --- a/test/CodeGen/X86/gcc_except_table_functions.ll +++ b/test/CodeGen/X86/gcc_except_table_functions.ll @@ -10,7 +10,7 @@ declare void @filt1() declare void @_Z1fv() declare i32 @llvm.eh.typeid.for(i8*) -define i32 @main() uwtable { +define i32 @main() uwtable personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @_Z1fv() to label %try.cont unwind label %lpad @@ -19,7 +19,7 @@ try.cont: ret i32 0 lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } cleanup catch i8* bitcast (void ()* @filt0 to i8*) catch i8* bitcast (void ()* @filt1 to i8*) diff --git a/test/CodeGen/X86/global-fill.ll b/test/CodeGen/X86/global-fill.ll new file mode 100644 index 000000000000..656c8ca2c323 --- /dev/null +++ b/test/CodeGen/X86/global-fill.ll @@ -0,0 +1,27 @@ +; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s + +@test1 = global [2 x i24] [i24 -1, i24 -1] +; CHECK-LABEL: test1: +; CHECK-NEXT: .long 16777215 +; CHECK-NEXT: .long 16777215 + +@test2 = global [2 x i7] [i7 1, i7 1] +; CHECK-LABEL: test2: +; CHECK-NEXT: .space 2,1 + +@test3 = global [4 x i128] [i128 -1, i128 -1, i128 -1, i128 -1] +; CHECK-LABEL: test3: +; CHECK-NEXT: .space 64,255 + +@test4 = global [3 x i16] [i16 257, i16 257, i16 257] +; CHECK-LABEL: test4: +; CHECK-NEXT: .space 6,1 + +@test5 = global [2 x [2 x i16]] [[2 x i16] [i16 257, i16 257], [2 x i16] [i16 -1, i16 -1]] +; CHECK-LABEL: test5: +; CHECK-NEXT: .space 4,1 +; CHECK-NEXT: .space 4,255 + +@test6 = global [2 x [2 x i16]] [[2 x i16] [i16 257, i16 257], [2 x i16] [i16 257, i16 257]] +; CHECK-LABEL: test6: +; CHECK-NEXT: .space 8,1 diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll index 8c61411e53eb..82547a606742 100644 --- a/test/CodeGen/X86/global-sections.ll +++ b/test/CodeGen/X86/global-sections.ll @@ -61,12 +61,12 @@ bb5: declare void @G() -define void @F3(i32 %y) { +define void @F3(i32 %y) personality i8* bitcast (void ()* @G to i8*) { bb0: invoke void @G() to label %bb2 unwind label %bb1 bb1: - landingpad { i8*, i32 } personality i8* bitcast (void ()* @G to i8*) + landingpad { i8*, i32 } catch i8* null br label %bb2 bb2: diff --git a/test/CodeGen/X86/implicit-null-check-negative.ll b/test/CodeGen/X86/implicit-null-check-negative.ll new file mode 100644 index 000000000000..e0210d9315f1 --- /dev/null +++ b/test/CodeGen/X86/implicit-null-check-negative.ll @@ -0,0 +1,53 @@ +; RUN: llc -mtriple=x86_64-apple-macosx -O3 -debug-only=faultmaps -enable-implicit-null-checks < %s | FileCheck %s +; REQUIRES: asserts + +; List cases where we should *not* be emitting implicit null checks. + +; CHECK-NOT: Fault Map Output + +define i32 @imp_null_check_load(i32* %x, i32* %y) { + entry: + %c = icmp eq i32* %x, null +; It isn't legal to move the load from %x from "not_null" to here -- +; the store to %y could be aliasing it. + br i1 %c, label %is_null, label %not_null + + is_null: + ret i32 42 + + not_null: + store i32 0, i32* %y + %t = load i32, i32* %x + ret i32 %t +} + +define i32 @imp_null_check_gep_load(i32* %x) { + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null + + is_null: + ret i32 42 + + not_null: +; null + 5000 * sizeof(i32) lies outside the null page and hence the +; load to %t cannot be assumed to be reliably faulting. + %x.gep = getelementptr i32, i32* %x, i32 5000 + %t = load i32, i32* %x.gep + ret i32 %t +} + +define i32 @imp_null_check_load_no_md(i32* %x) { +; Everything is okay except that the !never.executed metadata is +; missing. + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null + + is_null: + ret i32 42 + + not_null: + %t = load i32, i32* %x + ret i32 %t +} diff --git a/test/CodeGen/X86/implicit-null-check.ll b/test/CodeGen/X86/implicit-null-check.ll new file mode 100644 index 000000000000..f4c539800fbb --- /dev/null +++ b/test/CodeGen/X86/implicit-null-check.ll @@ -0,0 +1,118 @@ +; RUN: llc -O3 -mtriple=x86_64-apple-macosx -enable-implicit-null-checks < %s | FileCheck %s + +define i32 @imp_null_check_load(i32* %x) { +; CHECK-LABEL: _imp_null_check_load: +; CHECK: Ltmp1: +; CHECK: movl (%rdi), %eax +; CHECK: retq +; CHECK: Ltmp0: +; CHECK: movl $42, %eax +; CHECK: retq + + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null + + is_null: + ret i32 42 + + not_null: + %t = load i32, i32* %x + ret i32 %t +} + +define i32 @imp_null_check_gep_load(i32* %x) { +; CHECK-LABEL: _imp_null_check_gep_load: +; CHECK: Ltmp3: +; CHECK: movl 128(%rdi), %eax +; CHECK: retq +; CHECK: Ltmp2: +; CHECK: movl $42, %eax +; CHECK: retq + + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null + + is_null: + ret i32 42 + + not_null: + %x.gep = getelementptr i32, i32* %x, i32 32 + %t = load i32, i32* %x.gep + ret i32 %t +} + +define i32 @imp_null_check_add_result(i32* %x, i32 %p) { +; CHECK-LABEL: _imp_null_check_add_result: +; CHECK: Ltmp5: +; CHECK: addl (%rdi), %esi +; CHECK: movl %esi, %eax +; CHECK: retq +; CHECK: Ltmp4: +; CHECK: movl $42, %eax +; CHECK: retq + + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null + + is_null: + ret i32 42 + + not_null: + %t = load i32, i32* %x + %p1 = add i32 %t, %p + ret i32 %p1 +} + +; CHECK-LABEL: __LLVM_FaultMaps: + +; Version: +; CHECK-NEXT: .byte 1 + +; Reserved x2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 0 + +; # functions: +; CHECK-NEXT: .long 3 + +; FunctionAddr: +; CHECK-NEXT: .quad _imp_null_check_add_result +; NumFaultingPCs +; CHECK-NEXT: .long 1 +; Reserved: +; CHECK-NEXT: .long 0 +; Fault[0].Type: +; CHECK-NEXT: .long 1 +; Fault[0].FaultOffset: +; CHECK-NEXT: .long Ltmp5-_imp_null_check_add_result +; Fault[0].HandlerOffset: +; CHECK-NEXT: .long Ltmp4-_imp_null_check_add_result + +; FunctionAddr: +; CHECK-NEXT: .quad _imp_null_check_gep_load +; NumFaultingPCs +; CHECK-NEXT: .long 1 +; Reserved: +; CHECK-NEXT: .long 0 +; Fault[0].Type: +; CHECK-NEXT: .long 1 +; Fault[0].FaultOffset: +; CHECK-NEXT: .long Ltmp3-_imp_null_check_gep_load +; Fault[0].HandlerOffset: +; CHECK-NEXT: .long Ltmp2-_imp_null_check_gep_load + +; FunctionAddr: +; CHECK-NEXT: .quad _imp_null_check_load +; NumFaultingPCs +; CHECK-NEXT: .long 1 +; Reserved: +; CHECK-NEXT: .long 0 +; Fault[0].Type: +; CHECK-NEXT: .long 1 +; Fault[0].FaultOffset: +; CHECK-NEXT: .long Ltmp1-_imp_null_check_load +; Fault[0].HandlerOffset: +; CHECK-NEXT: .long Ltmp0-_imp_null_check_load diff --git a/test/CodeGen/X86/inalloca-invoke.ll b/test/CodeGen/X86/inalloca-invoke.ll index cf5cbe142ec7..9a184e563b19 100644 --- a/test/CodeGen/X86/inalloca-invoke.ll +++ b/test/CodeGen/X86/inalloca-invoke.ll @@ -11,7 +11,7 @@ declare void @begin(%Iter* sret) declare void @plus(%Iter* sret, %Iter*, i32) declare void @reverse(%frame.reverse* inalloca align 4) -define i32 @main() { +define i32 @main() personality i32 (...)* @pers { %temp.lvalue = alloca %Iter br label %blah @@ -49,7 +49,7 @@ invoke.cont5: ; preds = %invoke.cont ret i32 0 lpad: ; preds = %invoke.cont, %entry - %lp = landingpad { i8*, i32 } personality i32 (...)* @pers + %lp = landingpad { i8*, i32 } cleanup unreachable } diff --git a/test/CodeGen/X86/indirect-hidden.ll b/test/CodeGen/X86/indirect-hidden.ll index 309375d93024..9e1b7d373554 100644 --- a/test/CodeGen/X86/indirect-hidden.ll +++ b/test/CodeGen/X86/indirect-hidden.ll @@ -8,10 +8,10 @@ declare void @throws() -define void @get_indirect_hidden() { +define void @get_indirect_hidden() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { invoke void @throws() to label %end unwind label %lpad lpad: - %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %tmp = landingpad { i8*, i32 } catch i8* bitcast (i8** @hidden_typeid to i8*) br label %end @@ -19,10 +19,10 @@ end: ret void } -define void @get_indirect() { +define void @get_indirect() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { invoke void @throws() to label %end unwind label %lpad lpad: - %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %tmp = landingpad { i8*, i32 } catch i8* bitcast (i8** @normal_typeid to i8*) br label %end diff --git a/test/CodeGen/X86/large-gep-chain.ll b/test/CodeGen/X86/large-gep-chain.ll index 44247b8658a7..8df282983f56 100644 --- a/test/CodeGen/X86/large-gep-chain.ll +++ b/test/CodeGen/X86/large-gep-chain.ll @@ -13,7 +13,7 @@ @7 = external unnamed_addr constant [27 x i8], align 1 @8 = external unnamed_addr constant [63 x i8], align 1 -define void @main() uwtable ssp { +define void @main() uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { bb: br i1 undef, label %bb1, label %bb2 @@ -25313,7 +25313,7 @@ bb25275: ; preds = %bb25274 br label %bb25272 bb25276: ; preds = %bb25283, %bb25274, %bb25273 - %tmp25277 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %tmp25277 = landingpad { i8*, i32 } cleanup br label %bb25361 @@ -25383,7 +25383,7 @@ bb25297: ; preds = %bb25296 br label %bb25300 bb25298: ; preds = %bb25296, %bb25295, %bb25290, %bb25287 - %tmp25299 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %tmp25299 = landingpad { i8*, i32 } cleanup br label %bb25360 @@ -25461,7 +25461,7 @@ bb25323: ; preds = %bb25319 to label %bb25326 unwind label %bb25324 bb25324: ; preds = %bb25357, %bb25344, %bb25343, %bb25342, %bb25337, %bb25334, %bb25333, %bb25323, %bb25313, %bb25307, %bb25306 - %tmp25325 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %tmp25325 = landingpad { i8*, i32 } cleanup br label %bb25359 @@ -25562,7 +25562,7 @@ bb25354: ; preds = %bb25353 br label %bb25358 bb25355: ; preds = %bb25353, %bb25352, %bb25351 - %tmp25356 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %tmp25356 = landingpad { i8*, i32 } cleanup br label %bb25359 diff --git a/test/CodeGen/X86/patchpoint-invoke.ll b/test/CodeGen/X86/patchpoint-invoke.ll index 98e9eb3b6a44..b7f198d960a6 100644 --- a/test/CodeGen/X86/patchpoint-invoke.ll +++ b/test/CodeGen/X86/patchpoint-invoke.ll @@ -2,7 +2,7 @@ ; Test invoking of patchpoints ; -define i64 @patchpoint_invoke(i64 %p1, i64 %p2) { +define i64 @patchpoint_invoke(i64 %p1, i64 %p2) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: ; CHECK-LABEL: patchpoint_invoke: ; CHECK-NEXT: [[FUNC_BEGIN:.L.*]]: @@ -25,7 +25,7 @@ success: ret i64 %result threw: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null ret i64 0 } diff --git a/test/CodeGen/X86/personality.ll b/test/CodeGen/X86/personality.ll index 424a30734f00..53162ebc8688 100644 --- a/test/CodeGen/X86/personality.ll +++ b/test/CodeGen/X86/personality.ll @@ -2,13 +2,13 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s -check-prefix=X32 ; PR1632 -define void @_Z1fv() { +define void @_Z1fv() personality i32 (...)* @__gxx_personality_v0 { entry: invoke void @_Z1gv() to label %return unwind label %unwind unwind: ; preds = %entry - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} cleanup br i1 false, label %eh_then, label %cleanup20 @@ -17,7 +17,7 @@ eh_then: ; preds = %unwind to label %return unwind label %unwind10 unwind10: ; preds = %eh_then - %exn10 = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn10 = landingpad {i8*, i32} cleanup %upgraded.eh_select13 = extractvalue { i8*, i32 } %exn10, 1 %upgraded.eh_select131 = sext i32 %upgraded.eh_select13 to i64 @@ -41,8 +41,10 @@ declare void @__cxa_end_catch() declare i32 @__gxx_personality_v0(...) +; X64-NOT: .quad ___gxx_personality_v0 ; X64: .cfi_personality 155, ___gxx_personality_v0 +; X32-NOT: .long ___gxx_personality_v0 ; X32: .cfi_personality 155, L___gxx_personality_v0$non_lazy_ptr ; X32: .section __IMPORT,__pointers,non_lazy_symbol_pointers diff --git a/test/CodeGen/X86/personality_size.ll b/test/CodeGen/X86/personality_size.ll index 79d131b82b2e..41f1ac8cad64 100644 --- a/test/CodeGen/X86/personality_size.ll +++ b/test/CodeGen/X86/personality_size.ll @@ -2,13 +2,13 @@ ; RUN: llc < %s -relocation-model=pic -mtriple=i386-pc-solaris2.11 | FileCheck %s -check-prefix=X32 ; PR1632 -define void @_Z1fv() { +define void @_Z1fv() personality i32 (...)* @__gxx_personality_v0 { entry: invoke void @_Z1gv() to label %return unwind label %unwind unwind: ; preds = %entry - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} cleanup ret void diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll index 21463b8539dc..dbe5bd646c7f 100644 --- a/test/CodeGen/X86/pmul.ll +++ b/test/CodeGen/X86/pmul.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX2 define <16 x i8> @mul8c(<16 x i8> %i) nounwind { ; SSE2-LABEL: mul8c: @@ -75,10 +76,6 @@ define <2 x i64> @b(<2 x i64> %i) nounwind { ; ALL-NEXT: movdqa {{.*#+}} xmm1 = [117,117] ; ALL-NEXT: movdqa %xmm0, %xmm2 ; ALL-NEXT: pmuludq %xmm1, %xmm2 -; ALL-NEXT: pxor %xmm3, %xmm3 -; ALL-NEXT: pmuludq %xmm0, %xmm3 -; ALL-NEXT: psllq $32, %xmm3 -; ALL-NEXT: paddq %xmm3, %xmm2 ; ALL-NEXT: psrlq $32, %xmm0 ; ALL-NEXT: pmuludq %xmm1, %xmm0 ; ALL-NEXT: psllq $32, %xmm0 @@ -248,3 +245,35 @@ entry: %A = mul <2 x i64> %i, %j ret <2 x i64> %A } + +define <4 x i64> @b1(<4 x i64> %i) nounwind { +; AVX2-LABEL: @b1 +; AVX2: vpbroadcastq +; AVX2-NEXT: vpmuludq +; AVX2-NEXT: vpsrlq $32 +; AVX2-NEXT: vpmuludq +; AVX2-NEXT: vpsllq $32 +; AVX2-NEXT: vpaddq +; AVX2-NEXT: retq +entry: + %A = mul <4 x i64> %i, < i64 117, i64 117, i64 117, i64 117 > + ret <4 x i64> %A +} + +define <4 x i64> @b2(<4 x i64> %i, <4 x i64> %j) nounwind { +; AVX2-LABEL: @b2 +; AVX2: vpmuludq +; AVX2-NEXT: vpsrlq $32 +; AVX2-NEXT: vpmuludq +; AVX2-NEXT: vpsllq $32 +; AVX2-NEXT: vpaddq +; AVX2-NEXT: vpsrlq $32 +; AVX2-NEXT: vpmuludq +; AVX2-NEXT: vpsllq $32 +; AVX2-NEXT: vpaddq +; AVX2-NEXT: retq +entry: + %A = mul <4 x i64> %i, %j + ret <4 x i64> %A +} + diff --git a/test/CodeGen/X86/pr3522.ll b/test/CodeGen/X86/pr3522.ll index 867f2828d4d9..9e048d59d4ee 100644 --- a/test/CodeGen/X86/pr3522.ll +++ b/test/CodeGen/X86/pr3522.ll @@ -5,7 +5,7 @@ target triple = "i386-pc-linux-gnu" @.str = external constant [13 x i8] ; <[13 x i8]*> [#uses=1] -define void @_ada_c34018a() { +define void @_ada_c34018a() personality i32 (...)* @__gxx_personality_v0 { entry: %0 = tail call i32 @report__ident_int(i32 90) ; <i32> [#uses=1] %1 = trunc i32 %0 to i8 ; <i8> [#uses=1] @@ -22,7 +22,7 @@ return: ; preds = %lpad ret void lpad: ; preds = %entry - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} cleanup %2 = icmp eq i8 %1, 90 ; <i1> [#uses=1] br i1 %2, label %return, label %bb22 diff --git a/test/CodeGen/X86/scev-interchange.ll b/test/CodeGen/X86/scev-interchange.ll index e224c0858aff..9cbb462e47da 100644 --- a/test/CodeGen/X86/scev-interchange.ll +++ b/test/CodeGen/X86/scev-interchange.ll @@ -51,7 +51,7 @@ declare fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vecto declare fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias nocapture sret, i32) -define fastcc void @_ZN4FE_QILi3EEC1Ej(i32 %degree) { +define fastcc void @_ZN4FE_QILi3EEC1Ej(i32 %degree) personality i32 (...)* @__gxx_personality_v0 { entry: invoke fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* undef, i64 1, i8* undef) to label %invcont.i unwind label %lpad.i @@ -149,7 +149,7 @@ bb71.i: ; preds = %bb.i.i.i262.i, %bb66.i to label %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i unwind label %lpad.i.i.i.i.i.i ; <i8*> [#uses=0] lpad.i.i.i.i.i.i: ; preds = %bb71.i - %exn.i.i.i.i.i.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn.i.i.i.i.i.i = landingpad {i8*, i32} cleanup unreachable @@ -164,7 +164,7 @@ _ZNSt6vectorIjSaIjEED1Ev.exit.i.i: ; preds = %_ZNSt12_Vector_baseIjSaIjEEC2EmRK to label %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i unwind label %lpad.i.i.i.i8.i.i ; <i8*> [#uses=0] lpad.i.i.i.i8.i.i: ; preds = %_ZNSt6vectorIjSaIjEED1Ev.exit.i.i - %exn.i.i.i.i8.i.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn.i.i.i.i8.i.i = landingpad {i8*, i32} cleanup invoke void @_Unwind_Resume(i8* undef) to label %.noexc.i9.i.i unwind label %lpad.i19.i.i @@ -183,7 +183,7 @@ bb50.i.i.i: ; preds = %bb.i.i.i.i.i.i.i.i.i.i, %_ZNSt12_Vector_baseIjSaIjEEC2Em to label %bb83.i unwind label %lpad188.i lpad.i19.i.i: ; preds = %lpad.i.i.i.i8.i.i - %exn.i19.i.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn.i19.i.i = landingpad {i8*, i32} cleanup unreachable @@ -198,7 +198,7 @@ invcont84.i: ; preds = %bb83.i to label %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i unwind label %lpad.i.i.i.i315.i ; <i8*> [#uses=0] lpad.i.i.i.i315.i: ; preds = %invcont84.i - %exn.i.i.i.i315.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn.i.i.i.i315.i = landingpad {i8*, i32} cleanup invoke void @_Unwind_Resume(i8* undef) to label %.noexc.i316.i unwind label %lpad.i352.i @@ -217,7 +217,7 @@ bb50.i.i: ; preds = %bb.i.i.i.i.i.i.i.i320.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmR to label %invcont86.i unwind label %lpad200.i lpad.i352.i: ; preds = %lpad.i.i.i.i315.i - %exn.i352.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn.i352.i = landingpad {i8*, i32} cleanup unreachable @@ -242,7 +242,7 @@ invcont101.i: ; preds = %bb100.i to label %_ZN10FullMatrixIdEC1Ejj.exit.i.i unwind label %lpad.i.i.i.i.i lpad.i.i.i.i.i: ; preds = %invcont101.i - %exn.i.i.i.i.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn.i.i.i.i.i = landingpad {i8*, i32} cleanup unreachable @@ -251,7 +251,7 @@ _ZN10FullMatrixIdEC1Ejj.exit.i.i: ; preds = %invcont101.i to label %_ZN10FullMatrixIdEC1Ejj.exit28.i.i unwind label %lpad.i.i.i27.i.i lpad.i.i.i27.i.i: ; preds = %_ZN10FullMatrixIdEC1Ejj.exit.i.i - %exn.i.i.i27.i.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn.i.i.i27.i.i = landingpad {i8*, i32} cleanup invoke void @_Unwind_Resume(i8* undef) to label %.noexc.i.i unwind label %lpad.i.i @@ -272,7 +272,7 @@ bb.i.i.i297.i.i: ; preds = %bb58.i.i unreachable lpad.i.i: ; preds = %lpad.i.i.i27.i.i - %exn.i.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn.i.i = landingpad {i8*, i32} cleanup unreachable @@ -312,67 +312,67 @@ bb29.loopexit.i.i: ; preds = %.noexc232.i br label %bb9.i216.i lpad.i: ; preds = %entry - %exn.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn.i = landingpad {i8*, i32} cleanup unreachable lpad120.i: ; preds = %invcont.i - %exn120.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn120.i = landingpad {i8*, i32} cleanup unreachable lpad124.i: ; preds = %invcont1.i - %exn124.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn124.i = landingpad {i8*, i32} cleanup unreachable lpad128.i: ; preds = %invcont3.i - %exn128.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn128.i = landingpad {i8*, i32} cleanup unreachable lpad132.i: ; preds = %invcont4.i - %exn132.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn132.i = landingpad {i8*, i32} cleanup unreachable lpad136.i: ; preds = %invcont6.i - %exn136.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn136.i = landingpad {i8*, i32} cleanup unreachable lpad140.i: ; preds = %bb21.i, %invcont7.i - %exn140.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn140.i = landingpad {i8*, i32} cleanup unreachable lpad144.i: ; preds = %bb10.i168.i, %invcont9.i - %exn144.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn144.i = landingpad {i8*, i32} cleanup unreachable lpad148.i: ; preds = %invcont10.i - %exn148.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn148.i = landingpad {i8*, i32} cleanup unreachable lpad188.i: ; preds = %bb50.i.i.i - %exn188.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn188.i = landingpad {i8*, i32} cleanup unreachable lpad196.i: ; preds = %bb.i191.i - %exn196 = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn196 = landingpad {i8*, i32} cleanup unreachable lpad200.i: ; preds = %bb50.i.i - %exn200.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn200.i = landingpad {i8*, i32} cleanup unreachable lpad204.i: ; preds = %invcont86.i - %exn204.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn204.i = landingpad {i8*, i32} cleanup unreachable } diff --git a/test/CodeGen/X86/seh-catch-all-win32.ll b/test/CodeGen/X86/seh-catch-all-win32.ll new file mode 100644 index 000000000000..28b0bca962ea --- /dev/null +++ b/test/CodeGen/X86/seh-catch-all-win32.ll @@ -0,0 +1,85 @@ +; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s + +; 32-bit catch-all has to use a filter function because that's how it saves the +; exception code. + +@str = linkonce_odr unnamed_addr constant [27 x i8] c"GetExceptionCode(): 0x%lx\0A\00", align 1 + +declare i32 @_except_handler3(...) +declare void @crash() +declare i32 @printf(i8* nocapture readonly, ...) nounwind +declare i32 @llvm.eh.typeid.for(i8*) +declare i8* @llvm.frameaddress(i32) +declare i8* @llvm.framerecover(i8*, i8*, i32) +declare void @llvm.frameescape(...) +declare i8* @llvm.x86.seh.exceptioninfo(i8*, i8*) + +define i32 @main() personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) { +entry: + %__exceptioncode = alloca i32, align 4 + call void (...) @llvm.frameescape(i32* %__exceptioncode) + invoke void @crash() #5 + to label %__try.cont unwind label %lpad + +lpad: ; preds = %entry + %0 = landingpad { i8*, i32 } + catch i8* bitcast (i32 ()* @"filt$main" to i8*) + %1 = extractvalue { i8*, i32 } %0, 1 + %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @"filt$main" to i8*)) #4 + %matches = icmp eq i32 %1, %2 + br i1 %matches, label %__except, label %eh.resume + +__except: ; preds = %lpad + %3 = load i32, i32* %__exceptioncode, align 4 + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i32 0, i32 0), i32 %3) #4 + br label %__try.cont + +__try.cont: ; preds = %entry, %__except + ret i32 0 + +eh.resume: ; preds = %lpad + resume { i8*, i32 } %0 +} + +define internal i32 @"filt$main"() { +entry: + %0 = tail call i8* @llvm.frameaddress(i32 1) + %1 = tail call i8* @llvm.framerecover(i8* bitcast (i32 ()* @main to i8*), i8* %0, i32 0) + %__exceptioncode = bitcast i8* %1 to i32* + %2 = tail call i8* @llvm.x86.seh.exceptioninfo(i8* bitcast (i32 ()* @main to i8*), i8* %0) + %3 = bitcast i8* %2 to i32** + %4 = load i32*, i32** %3, align 4 + %5 = load i32, i32* %4, align 4 + store i32 %5, i32* %__exceptioncode, align 4 + ret i32 1 +} + +; Check that we can get the exception code from eax to the printf. + +; CHECK-LABEL: _main: +; CHECK: Lmain$frame_escape_0 = [[code_offs:[-0-9]+]] +; CHECK: Lmain$frame_escape_1 = [[reg_offs:[-0-9]+]] +; CHECK: movl %esp, [[reg_offs]](%ebp) +; CHECK: movl $L__ehtable$main, +; EH state 0 +; CHECK: movl $0, -4(%ebp) +; CHECK: calll _crash +; CHECK: retl +; CHECK: # Block address taken +; stackrestore +; CHECK: movl [[reg_offs]](%ebp), %esp +; EH state -1 +; CHECK: movl [[code_offs]](%ebp), %[[code:[a-z]+]] +; CHECK: movl $-1, -4(%ebp) +; CHECK-DAG: movl %[[code]], 4(%esp) +; CHECK-DAG: movl $_str, (%esp) +; CHECK: calll _printf + +; CHECK: .section .xdata,"dr" +; CHECK: L__ehtable$main +; CHECK-NEXT: .long -1 +; CHECK-NEXT: .long _filt$main +; CHECK-NEXT: .long Ltmp{{[0-9]+}} + +; CHECK-LABEL: _filt$main: +; CHECK: movl diff --git a/test/CodeGen/X86/seh-catch-all.ll b/test/CodeGen/X86/seh-catch-all.ll index 51840134eda3..1c1a3c2139d6 100644 --- a/test/CodeGen/X86/seh-catch-all.ll +++ b/test/CodeGen/X86/seh-catch-all.ll @@ -6,13 +6,13 @@ declare i32 @__C_specific_handler(...) declare void @crash() declare i32 @printf(i8* nocapture readonly, ...) nounwind -define i32 @main() { +define i32 @main() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { entry: invoke void @crash() to label %__try.cont unwind label %lpad lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) + %0 = landingpad { i8*, i32 } catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 %2 = ptrtoint i8* %1 to i64 @@ -30,6 +30,7 @@ eh.resume: ; Check that we can get the exception code from eax to the printf. ; CHECK-LABEL: main: +; CHECK: callq crash ; CHECK: retq ; CHECK: # Block address taken ; CHECK: leaq str(%rip), %rcx @@ -38,7 +39,7 @@ eh.resume: ; CHECK: .seh_handlerdata ; CHECK-NEXT: .long 1 -; CHECK-NEXT: .Ltmp{{[0-9]+}}@IMGREL -; CHECK-NEXT: .Ltmp{{[0-9]+}}@IMGREL+1 -; CHECK-NEXT: 1 -; CHECK-NEXT: .Ltmp{{[0-9]+}}@IMGREL +; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL +; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL+1 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL diff --git a/test/CodeGen/X86/seh-except-finally.ll b/test/CodeGen/X86/seh-except-finally.ll index c796f1ef2888..4327a64468f9 100644 --- a/test/CodeGen/X86/seh-except-finally.ll +++ b/test/CodeGen/X86/seh-except-finally.ll @@ -33,7 +33,7 @@ declare void @crash() declare i32 @filt() ; Function Attrs: nounwind uwtable -define void @use_both() #1 { +define void @use_both() #1 personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { entry: %exn.slot = alloca i8* %ehselector.slot = alloca i32 @@ -49,7 +49,7 @@ invoke.cont2: ; preds = %invoke.cont br label %__try.cont lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) + %1 = landingpad { i8*, i32 } cleanup catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@use_both@@" to i8*) %2 = extractvalue { i8*, i32 } %1, 0 @@ -61,7 +61,7 @@ lpad: ; preds = %entry to label %invoke.cont3 unwind label %lpad1 lpad1: ; preds = %lpad, %invoke.cont - %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) + %5 = landingpad { i8*, i32 } catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@use_both@@" to i8*) %6 = extractvalue { i8*, i32 } %5, 0 store i8* %6, i8** %exn.slot diff --git a/test/CodeGen/X86/seh-filter.ll b/test/CodeGen/X86/seh-filter.ll index 6a3a23edb1ae..37ed15841a93 100644 --- a/test/CodeGen/X86/seh-filter.ll +++ b/test/CodeGen/X86/seh-filter.ll @@ -1,14 +1,14 @@ ; RUN: llc -O0 -mtriple=x86_64-windows-msvc < %s | FileCheck %s declare void @g() -define void @f() { +define void @f() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { invoke void @g() to label %return unwind label %lpad return: ret void lpad: - %ehptrs = landingpad {i8*, i32} personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) + %ehptrs = landingpad {i8*, i32} filter [0 x i8*] zeroinitializer call void @__cxa_call_unexpected(i8* null) unreachable diff --git a/test/CodeGen/X86/seh-finally.ll b/test/CodeGen/X86/seh-finally.ll index 91baed570f25..350cd932f481 100644 --- a/test/CodeGen/X86/seh-finally.ll +++ b/test/CodeGen/X86/seh-finally.ll @@ -1,10 +1,12 @@ -; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=X64 +; RUN: sed -e 's/__C_specific_handler/_except_handler3/' %s | \ +; RUN: llc -mtriple=i686-windows-msvc | FileCheck %s --check-prefix=X86 @str_recovered = internal unnamed_addr constant [10 x i8] c"recovered\00", align 1 declare void @crash() -define i32 @main() { +define i32 @main() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { entry: invoke void @crash() to label %invoke.cont unwind label %lpad @@ -15,7 +17,7 @@ invoke.cont: ; preds = %entry ret i32 0 lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) + %0 = landingpad { i8*, i32 } cleanup %1 = extractvalue { i8*, i32 } %0, 0 %2 = extractvalue { i8*, i32 } %0, 1 @@ -26,23 +28,38 @@ invoke.cont1: ; preds = %lpad resume { i8*, i32 } %0 terminate.lpad: ; preds = %lpad - %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) + %3 = landingpad { i8*, i32 } catch i8* null call void @abort() unreachable } -; CHECK-LABEL: main: -; CHECK: .seh_handlerdata -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long .Ltmp0@IMGREL -; CHECK-NEXT: .long .Ltmp1@IMGREL -; CHECK-NEXT: .long main.cleanup@IMGREL -; CHECK-NEXT: .long 0 - -; CHECK-LABEL: main.cleanup: -; CHECK: callq puts -; CHECK: retq +; X64-LABEL: main: +; X64: retq + +; X64: .seh_handlerdata +; X64-NEXT: .long 1 +; X64-NEXT: .long .Ltmp0@IMGREL +; X64-NEXT: .long .Ltmp1@IMGREL +; X64-NEXT: .long main.cleanup@IMGREL +; X64-NEXT: .long 0 + +; X64-LABEL: main.cleanup: +; X64: callq puts +; X64: retq + +; X86-LABEL: _main: +; X86: retl + +; X86: .section .xdata,"dr" +; X86: L__ehtable$main: +; X86-NEXT: .long -1 +; X86-NEXT: .long 0 +; X86-NEXT: .long _main.cleanup + +; X86-LABEL: _main.cleanup: +; X86: calll _puts +; X86: retl declare i32 @__C_specific_handler(...) diff --git a/test/CodeGen/X86/seh-safe-div-win32.ll b/test/CodeGen/X86/seh-safe-div-win32.ll new file mode 100644 index 000000000000..0f76ec07a6b6 --- /dev/null +++ b/test/CodeGen/X86/seh-safe-div-win32.ll @@ -0,0 +1,172 @@ +; RUN: llc -mtriple i686-pc-windows-msvc < %s | FileCheck %s + +; This test case is also intended to be run manually as a complete functional +; test. It should link, print something, and exit zero rather than crashing. +; It is the hypothetical lowering of a C source program that looks like: +; +; int safe_div(int *n, int *d) { +; int r; +; __try { +; __try { +; r = *n / *d; +; } __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION) { +; puts("EXCEPTION_ACCESS_VIOLATION"); +; r = -1; +; } +; } __except(GetExceptionCode() == EXCEPTION_INT_DIVIDE_BY_ZERO) { +; puts("EXCEPTION_INT_DIVIDE_BY_ZERO"); +; r = -2; +; } +; return r; +; } + +@str1 = internal constant [27 x i8] c"EXCEPTION_ACCESS_VIOLATION\00" +@str2 = internal constant [29 x i8] c"EXCEPTION_INT_DIVIDE_BY_ZERO\00" + +define i32 @safe_div(i32* %n, i32* %d) personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) { +entry: + %r = alloca i32, align 4 + store i32 42, i32* %r + invoke void @try_body(i32* %r, i32* %n, i32* %d) + to label %__try.cont unwind label %lpad + +lpad: + %vals = landingpad { i8*, i32 } + catch i8* bitcast (i32 ()* @safe_div_filt0 to i8*) + catch i8* bitcast (i32 ()* @safe_div_filt1 to i8*) + %ehptr = extractvalue { i8*, i32 } %vals, 0 + %sel = extractvalue { i8*, i32 } %vals, 1 + %filt0_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @safe_div_filt0 to i8*)) + %is_filt0 = icmp eq i32 %sel, %filt0_val + br i1 %is_filt0, label %handler0, label %eh.dispatch1 + +eh.dispatch1: + %filt1_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @safe_div_filt1 to i8*)) + %is_filt1 = icmp eq i32 %sel, %filt1_val + br i1 %is_filt1, label %handler1, label %eh.resume + +handler0: + call void @puts(i8* getelementptr ([27 x i8], [27 x i8]* @str1, i32 0, i32 0)) + store i32 -1, i32* %r, align 4 + br label %__try.cont + +handler1: + call void @puts(i8* getelementptr ([29 x i8], [29 x i8]* @str2, i32 0, i32 0)) + store i32 -2, i32* %r, align 4 + br label %__try.cont + +eh.resume: + resume { i8*, i32 } %vals + +__try.cont: + %safe_ret = load i32, i32* %r, align 4 + ret i32 %safe_ret +} + +; Normal path code + +; CHECK: {{^}}_safe_div: +; CHECK: movl $42, [[rloc:.*\(%ebp\)]] +; CHECK: leal [[rloc]], +; CHECK: calll _try_body +; CHECK: [[cont_bb:LBB0_[0-9]+]]: +; CHECK: movl [[rloc]], %eax +; CHECK: retl + +; Landing pad code + +; CHECK: [[handler0:Ltmp[0-9]+]]: # Block address taken +; CHECK: # %handler0 +; Restore SP +; CHECK: movl {{.*}}(%ebp), %esp +; CHECK: calll _puts +; CHECK: jmp [[cont_bb]] + +; CHECK: [[handler1:Ltmp[0-9]+]]: # Block address taken +; CHECK: # %handler1 +; Restore SP +; CHECK: movl {{.*}}(%ebp), %esp +; CHECK: calll _puts +; CHECK: jmp [[cont_bb]] + +; CHECK: .section .xdata,"dr" +; CHECK: L__ehtable$safe_div: +; CHECK-NEXT: .long -1 +; CHECK-NEXT: .long _safe_div_filt1 +; CHECK-NEXT: .long [[handler1]] +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long _safe_div_filt0 +; CHECK-NEXT: .long [[handler0]] + +define void @try_body(i32* %r, i32* %n, i32* %d) { +entry: + %0 = load i32, i32* %n, align 4 + %1 = load i32, i32* %d, align 4 + %div = sdiv i32 %0, %1 + store i32 %div, i32* %r, align 4 + ret void +} + +; The prototype of these filter functions is: +; int filter(EXCEPTION_POINTERS *eh_ptrs, void *rbp); + +; The definition of EXCEPTION_POINTERS is: +; typedef struct _EXCEPTION_POINTERS { +; EXCEPTION_RECORD *ExceptionRecord; +; CONTEXT *ContextRecord; +; } EXCEPTION_POINTERS; + +; The definition of EXCEPTION_RECORD is: +; typedef struct _EXCEPTION_RECORD { +; DWORD ExceptionCode; +; ... +; } EXCEPTION_RECORD; + +; FIXME: Use llvm.eh.exceptioninfo for this. +declare i32 @safe_div_filt0() +declare i32 @safe_div_filt1() +; define i32 @safe_div_filt0() { +; %eh_ptrs_c = bitcast i8* %eh_ptrs to i32** +; %eh_rec = load i32*, i32** %eh_ptrs_c +; %eh_code = load i32, i32* %eh_rec +; ; EXCEPTION_ACCESS_VIOLATION = 0xC0000005 +; %cmp = icmp eq i32 %eh_code, 3221225477 +; %filt.res = zext i1 %cmp to i32 +; ret i32 %filt.res +; } +; define i32 @safe_div_filt1() { +; %eh_ptrs_c = bitcast i8* %eh_ptrs to i32** +; %eh_rec = load i32*, i32** %eh_ptrs_c +; %eh_code = load i32, i32* %eh_rec +; ; EXCEPTION_INT_DIVIDE_BY_ZERO = 0xC0000094 +; %cmp = icmp eq i32 %eh_code, 3221225620 +; %filt.res = zext i1 %cmp to i32 +; ret i32 %filt.res +; } + +@str_result = internal constant [21 x i8] c"safe_div result: %d\0A\00" + +define i32 @main() { + %d.addr = alloca i32, align 4 + %n.addr = alloca i32, align 4 + + store i32 10, i32* %n.addr, align 4 + store i32 2, i32* %d.addr, align 4 + %r1 = call i32 @safe_div(i32* %n.addr, i32* %d.addr) + call void (i8*, ...) @printf(i8* getelementptr ([21 x i8], [21 x i8]* @str_result, i32 0, i32 0), i32 %r1) + + store i32 10, i32* %n.addr, align 4 + store i32 0, i32* %d.addr, align 4 + %r2 = call i32 @safe_div(i32* %n.addr, i32* %d.addr) + call void (i8*, ...) @printf(i8* getelementptr ([21 x i8], [21 x i8]* @str_result, i32 0, i32 0), i32 %r2) + + %r3 = call i32 @safe_div(i32* %n.addr, i32* null) + call void (i8*, ...) @printf(i8* getelementptr ([21 x i8], [21 x i8]* @str_result, i32 0, i32 0), i32 %r3) + ret i32 0 +} + +declare i32 @_except_handler3(...) +declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind +declare void @puts(i8*) +declare void @printf(i8*, ...) +declare void @abort() diff --git a/test/CodeGen/X86/seh-safe-div.ll b/test/CodeGen/X86/seh-safe-div.ll index 80b15b601020..699e58ee8bae 100644 --- a/test/CodeGen/X86/seh-safe-div.ll +++ b/test/CodeGen/X86/seh-safe-div.ll @@ -23,14 +23,14 @@ @str1 = internal constant [27 x i8] c"EXCEPTION_ACCESS_VIOLATION\00" @str2 = internal constant [29 x i8] c"EXCEPTION_INT_DIVIDE_BY_ZERO\00" -define i32 @safe_div(i32* %n, i32* %d) { +define i32 @safe_div(i32* %n, i32* %d) personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { entry: %r = alloca i32, align 4 invoke void @try_body(i32* %r, i32* %n, i32* %d) to label %__try.cont unwind label %lpad lpad: - %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) + %vals = landingpad { i8*, i32 } catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*) catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*) %ehptr = extractvalue { i8*, i32 } %vals, 0 diff --git a/test/CodeGen/X86/setjmp-spills.ll b/test/CodeGen/X86/setjmp-spills.ll index c35caae97af6..43136e018c88 100644 --- a/test/CodeGen/X86/setjmp-spills.ll +++ b/test/CodeGen/X86/setjmp-spills.ll @@ -78,7 +78,7 @@ second: ; This is the same as above, but using "invoke" rather than "call" to ; call setjmp(). -define void @setjmp_invoker() { +define void @setjmp_invoker() personality void ()* @personality { ; X86-32-LABEL: setjmp_invoker: ; X86-64-LABEL: setjmp_invoker: %a1 = call i32 @get_val() @@ -103,7 +103,7 @@ cont: br i1 %setjmp_result, label %second, label %first lpad: - %lp = landingpad { i8*, i32 } personality void ()* @personality cleanup + %lp = landingpad { i8*, i32 } cleanup unreachable first: diff --git a/test/CodeGen/X86/split-eh-lpad-edges.ll b/test/CodeGen/X86/split-eh-lpad-edges.ll index 852214e7c248..82dd3b7674f9 100644 --- a/test/CodeGen/X86/split-eh-lpad-edges.ll +++ b/test/CodeGen/X86/split-eh-lpad-edges.ll @@ -10,7 +10,7 @@ %struct.objc_selector = type opaque @"\01l_objc_msgSend_fixup_alloc" = external global %struct._message_ref_t, align 16 ; <%struct._message_ref_t*> [#uses=2] -define %struct.NSArray* @newFetchedRowsForFetchPlan_MT(%struct.FetchPlanHeader* %fetchPlan, %struct.objc_selector* %selectionMethod, %struct.NSObject* %selectionParameter) ssp { +define %struct.NSArray* @newFetchedRowsForFetchPlan_MT(%struct.FetchPlanHeader* %fetchPlan, %struct.objc_selector* %selectionMethod, %struct.NSObject* %selectionParameter) ssp personality i32 (...)* @__gxx_personality_v0 { entry: %0 = invoke %struct.NSObject* null(%struct.NSObject* null, %struct._message_ref_t* @"\01l_objc_msgSend_fixup_alloc") to label %invcont unwind label %lpad ; <%struct.NSObject*> [#uses=1] @@ -28,7 +28,7 @@ invcont27: ; preds = %invcont26 lpad: ; preds = %invcont26, %invcont, %entry %pool.1 = phi %struct.NSAutoreleasePool* [ null, %entry ], [ null, %invcont ], [ null, %invcont26 ] ; <%struct.NSAutoreleasePool*> [#uses=0] - %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + %exn = landingpad {i8*, i32} cleanup unreachable } diff --git a/test/CodeGen/X86/stack-protector.ll b/test/CodeGen/X86/stack-protector.ll index acaba6dc17f8..398b8548747b 100644 --- a/test/CodeGen/X86/stack-protector.ll +++ b/test/CodeGen/X86/stack-protector.ll @@ -2097,7 +2097,7 @@ entry: ; test18a: Addr-of a variable passed into an invoke instruction. ; no ssp attribute ; Requires no protector. -define i32 @test18a() { +define i32 @test18a() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: ; LINUX-I386-LABEL: test18a: ; LINUX-I386-NOT: calll __stack_chk_fail @@ -2125,7 +2125,7 @@ invoke.cont: ret i32 0 lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null ret i32 0 } @@ -2134,7 +2134,7 @@ lpad: ; ssp attribute ; Requires no protector. ; Function Attrs: ssp -define i32 @test18b() #0 { +define i32 @test18b() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: ; LINUX-I386-LABEL: test18b: ; LINUX-I386-NOT: calll __stack_chk_fail @@ -2162,7 +2162,7 @@ invoke.cont: ret i32 0 lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null ret i32 0 } @@ -2171,7 +2171,7 @@ lpad: ; sspstrong attribute ; Requires protector. ; Function Attrs: sspstrong -define i32 @test18c() #1 { +define i32 @test18c() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: ; LINUX-I386-LABEL: test18c: ; LINUX-I386: mov{{l|q}} %gs: @@ -2199,7 +2199,7 @@ invoke.cont: ret i32 0 lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null ret i32 0 } @@ -2208,7 +2208,7 @@ lpad: ; sspreq attribute ; Requires protector. ; Function Attrs: sspreq -define i32 @test18d() #2 { +define i32 @test18d() #2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: ; LINUX-I386-LABEL: test18d: ; LINUX-I386: mov{{l|q}} %gs: @@ -2236,7 +2236,7 @@ invoke.cont: ret i32 0 lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null ret i32 0 } @@ -2244,7 +2244,7 @@ lpad: ; (GEP followed by an invoke) ; no ssp attribute ; Requires no protector. -define i32 @test19a() { +define i32 @test19a() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: ; LINUX-I386-LABEL: test19a: ; LINUX-I386-NOT: calll __stack_chk_fail @@ -2274,7 +2274,7 @@ invoke.cont: ret i32 0 lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null ret i32 0 } @@ -2284,7 +2284,7 @@ lpad: ; ssp attribute ; Requires no protector. ; Function Attrs: ssp -define i32 @test19b() #0 { +define i32 @test19b() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: ; LINUX-I386-LABEL: test19b: ; LINUX-I386-NOT: calll __stack_chk_fail @@ -2314,7 +2314,7 @@ invoke.cont: ret i32 0 lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null ret i32 0 } @@ -2324,7 +2324,7 @@ lpad: ; sspstrong attribute ; Requires protector. ; Function Attrs: sspstrong -define i32 @test19c() #1 { +define i32 @test19c() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: ; LINUX-I386-LABEL: test19c: ; LINUX-I386: mov{{l|q}} %gs: @@ -2354,7 +2354,7 @@ invoke.cont: ret i32 0 lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null ret i32 0 } @@ -2364,7 +2364,7 @@ lpad: ; sspreq attribute ; Requires protector. ; Function Attrs: sspreq -define i32 @test19d() #2 { +define i32 @test19d() #2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: ; LINUX-I386-LABEL: test19d: ; LINUX-I386: mov{{l|q}} %gs: @@ -2398,7 +2398,7 @@ invoke.cont: ret i32 0 lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null ret i32 0 } diff --git a/test/CodeGen/X86/statepoint-invoke.ll b/test/CodeGen/X86/statepoint-invoke.ll index df78978c117c..81b9ab89ebca 100644 --- a/test/CodeGen/X86/statepoint-invoke.ll +++ b/test/CodeGen/X86/statepoint-invoke.ll @@ -9,7 +9,7 @@ declare i32 @"personality_function"() define i64 addrspace(1)* @test_basic(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) -gc "statepoint-example" { +gc "statepoint-example" personality i32 ()* @"personality_function" { entry: ; CHECK: Ltmp{{[0-9]+}}: ; CHECK: callq some_call @@ -31,7 +31,7 @@ exceptional_return: ; CHECK: Ltmp{{[0-9]+}}: ; CHECK: movq ; CHECK: retq - %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + %landing_pad = landingpad { i8*, i32 } cleanup %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 %obj.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13) @@ -46,7 +46,7 @@ exceptional_return: define i64 addrspace(1)* @test_result(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) - gc "statepoint-example" { + gc "statepoint-example" personality i32 ()* @personality_function { entry: ; CHECK: .Ltmp{{[0-9]+}}: ; CHECK: callq some_other_call @@ -63,7 +63,7 @@ normal_return: exceptional_return: ; CHECK: .Ltmp{{[0-9]+}}: ; CHECK: movq - %landing_pad = landingpad { i8*, i32 } personality i32 ()* @personality_function + %landing_pad = landingpad { i8*, i32 } cleanup %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13) @@ -76,7 +76,7 @@ exceptional_return: ; CHECK: .align 4 define i64 addrspace(1)* @test_same_val(i1 %cond, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) - gc "statepoint-example" { + gc "statepoint-example" personality i32 ()* @"personality_function" { entry: br i1 %cond, label %left, label %right @@ -120,14 +120,14 @@ normal_return: ret i64 addrspace(1)* %ret exceptional_return.left: - %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + %landing_pad = landingpad { i8*, i32 } cleanup %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 %val.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13) ret i64 addrspace(1)* %val.relocated2 exceptional_return.right: - %landing_pad1 = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + %landing_pad1 = landingpad { i8*, i32 } cleanup %relocate_token1 = extractvalue { i8*, i32 } %landing_pad1, 1 %val.relocated3 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token1, i32 13, i32 13) @@ -135,7 +135,7 @@ exceptional_return.right: } define i64 addrspace(1)* @test_null_undef(i64 addrspace(1)* %val1) - gc "statepoint-example" { + gc "statepoint-example" personality i32 ()* @"personality_function" { ; CHECK-LABEL: test_null_undef: entry: ; CHECK: callq some_call @@ -152,7 +152,7 @@ normal_return: ret i64 addrspace(1)* %null.relocated exceptional_return: - %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + %landing_pad = landingpad { i8*, i32 } cleanup %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 %null.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13) @@ -161,7 +161,7 @@ exceptional_return: } define i64 addrspace(1)* @test_alloca_and_const(i64 addrspace(1)* %val1) - gc "statepoint-example" { + gc "statepoint-example" personality i32 ()* @"personality_function" { ; CHECK-LABEL: test_alloca_and_const: entry: %a = alloca i32 @@ -183,7 +183,7 @@ exceptional_return: ; CHECK: movl $15 ; CHECK-NEXT: popq ; CHECK-NEXT: retq - %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + %landing_pad = landingpad { i8*, i32 } cleanup %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 %aa.rel2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 14, i32 14) diff --git a/test/CodeGen/X86/statepoint-stack-usage.ll b/test/CodeGen/X86/statepoint-stack-usage.ll index 02d20c9fcb96..a4aa747af8cf 100644 --- a/test/CodeGen/X86/statepoint-stack-usage.ll +++ b/test/CodeGen/X86/statepoint-stack-usage.ll @@ -14,6 +14,8 @@ define i32 @back_to_back_calls(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 a ; CHECK: movq %rdi, 16(%rsp) ; CHECK: movq %rdx, 8(%rsp) ; CHECK: movq %rsi, (%rsp) +; There should be no more than three moves +; CHECK-NOT: movq %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 12) %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 13) @@ -52,9 +54,53 @@ define i32 @reserve_first(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrsp ret i32 1 } +; Test that stack slots are reused for invokes +define i32 @back_to_back_invokes(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" personality i32 ()* @"personality_function" { +; CHECK-LABEL: back_to_back_invokes +entry: + ; The exact stores don't matter, but there need to be three stack slots created + ; CHECK: movq %rdi, 16(%rsp) + ; CHECK: movq %rdx, 8(%rsp) + ; CHECK: movq %rsi, (%rsp) + ; CHECK: callq + %safepoint_token = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) + to label %normal_return unwind label %exceptional_return + +normal_return: + %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 12) + %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 13) + %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 14) + ; Should work even through bitcasts + %c1.casted = bitcast i32 addrspace(1)* %c1 to i8 addrspace(1)* + ; This is the key check. There should NOT be any memory moves here + ; CHECK-NOT: movq + ; CHECK: callq + %safepoint_token2 = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %c1.casted, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1) + to label %normal_return2 unwind label %exceptional_return2 + +normal_return2: + %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 14) + %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 13) + %c2 = tail call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %safepoint_token2, i32 12, i32 12) + ret i32 1 + +exceptional_return: + %landing_pad = landingpad { i8*, i32 } + cleanup + ret i32 0 + +exceptional_return2: + %landing_pad2 = landingpad { i8*, i32 } + cleanup + ret i32 0 +} + ; Function Attrs: nounwind declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3 +declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32, i32, i32) #3 declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) -attributes #1 = { uwtable }
\ No newline at end of file +declare i32 @"personality_function"() + +attributes #1 = { uwtable } diff --git a/test/CodeGen/X86/switch.ll b/test/CodeGen/X86/switch.ll index a4dece65479c..748fd6f238b1 100644 --- a/test/CodeGen/X86/switch.ll +++ b/test/CodeGen/X86/switch.ll @@ -16,23 +16,18 @@ bb1: tail call void @g(i32 1) br label %return bb2: tail call void @g(i32 1) br label %return return: ret void -; Should be lowered as straight compares in -O0 mode. -; NOOPT-LABEL: basic -; NOOPT: subl $1, %eax -; NOOPT: je -; NOOPT: subl $3, %eax -; NOOPT: je -; NOOPT: subl $4, %eax -; NOOPT: je -; NOOPT: subl $5, %eax -; NOOPT: je - -; Jump table otherwise. +; Lowered as a jump table, both with and without optimization. ; CHECK-LABEL: basic ; CHECK: decl ; CHECK: cmpl $4 ; CHECK: ja ; CHECK: jmpq *.LJTI +; NOOPT-LABEL: basic +; NOOPT: decl +; NOOPT: subl $4 +; NOOPT: ja +; NOOPT: movq .LJTI +; NOOPT: jmpq } @@ -205,6 +200,21 @@ return: ret void ; CHECK: leal -5 ; CHECK: cmpl $10 ; CHECK: jmpq *.LJTI + +; At -O0, we don't build jump tables for only parts of a switch. +; NOOPT-LABEL: optimal_jump_table1 +; NOOPT: testl %edi, %edi +; NOOPT: je +; NOOPT: subl $5, %eax +; NOOPT: je +; NOOPT: subl $6, %eax +; NOOPT: je +; NOOPT: subl $12, %eax +; NOOPT: je +; NOOPT: subl $13, %eax +; NOOPT: je +; NOOPT: subl $15, %eax +; NOOPT: je } @@ -489,6 +499,8 @@ entry: i32 30, label %bb3 i32 40, label %bb4 i32 50, label %bb5 + i32 60, label %bb6 + i32 70, label %bb6 ], !prof !4 bb0: tail call void @g(i32 0) br label %return bb1: tail call void @g(i32 1) br label %return @@ -496,16 +508,87 @@ bb2: tail call void @g(i32 2) br label %return bb3: tail call void @g(i32 3) br label %return bb4: tail call void @g(i32 4) br label %return bb5: tail call void @g(i32 5) br label %return +bb6: tail call void @g(i32 6) br label %return +bb7: tail call void @g(i32 7) br label %return return: ret void -; To balance the tree by weight, the pivot is shifted to the right, moving hot -; cases closer to the root. +; Without branch probabilities, the pivot would be 40, since that would yield +; equal-sized sub-trees. When taking weights into account, case 70 becomes the +; pivot. Since there is room for 3 cases in a leaf, cases 50 and 60 are also +; included in the right-hand side because that doesn't reduce their rank. + ; CHECK-LABEL: left_leaning_weight_balanced_tree ; CHECK-NOT: cmpl -; CHECK: cmpl $39 +; CHECK: cmpl $49 +} + +!4 = !{!"branch_weights", i32 1, i32 10, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1000} + + +define void @left_leaning_weight_balanced_tree2(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 0, label %bb0 + i32 10, label %bb1 + i32 20, label %bb2 + i32 30, label %bb3 + i32 40, label %bb4 + i32 50, label %bb5 + i32 60, label %bb6 + i32 70, label %bb6 + ], !prof !5 +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 2) br label %return +bb3: tail call void @g(i32 3) br label %return +bb4: tail call void @g(i32 4) br label %return +bb5: tail call void @g(i32 5) br label %return +bb6: tail call void @g(i32 6) br label %return +bb7: tail call void @g(i32 7) br label %return +return: ret void + +; Same as the previous test, except case 50 has higher rank to the left than it +; would have on the right. Case 60 would have the same rank on both sides, so is +; moved into the leaf. + +; CHECK-LABEL: left_leaning_weight_balanced_tree2 +; CHECK-NOT: cmpl +; CHECK: cmpl $59 +} + +!5 = !{!"branch_weights", i32 1, i32 10, i32 1, i32 1, i32 1, i32 1, i32 90, i32 70, i32 1000} + + +define void @right_leaning_weight_balanced_tree(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 0, label %bb0 + i32 10, label %bb1 + i32 20, label %bb2 + i32 30, label %bb3 + i32 40, label %bb4 + i32 50, label %bb5 + i32 60, label %bb6 + i32 70, label %bb6 + ], !prof !6 +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 2) br label %return +bb3: tail call void @g(i32 3) br label %return +bb4: tail call void @g(i32 4) br label %return +bb5: tail call void @g(i32 5) br label %return +bb6: tail call void @g(i32 6) br label %return +bb7: tail call void @g(i32 7) br label %return +return: ret void + +; Analogous to left_leaning_weight_balanced_tree. + +; CHECK-LABEL: right_leaning_weight_balanced_tree +; CHECK-NOT: cmpl +; CHECK: cmpl $19 } -!4 = !{!"branch_weights", i32 1, i32 10, i32 1, i32 1, i32 1, i32 10, i32 10} +!6 = !{!"branch_weights", i32 1, i32 1000, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 10} define void @jump_table_affects_balance(i32 %x) { diff --git a/test/CodeGen/X86/unaligned-32-byte-memops.ll b/test/CodeGen/X86/unaligned-32-byte-memops.ll index b337a80b84b3..d979c16f4abd 100644 --- a/test/CodeGen/X86/unaligned-32-byte-memops.ll +++ b/test/CodeGen/X86/unaligned-32-byte-memops.ll @@ -1,66 +1,72 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s --check-prefix=SANDYB --check-prefix=CHECK -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx-i | FileCheck %s --check-prefix=SANDYB --check-prefix=CHECK -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2 --check-prefix=CHECK -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s --check-prefix=HASWELL --check-prefix=CHECK +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,+slow-unaligned-mem-32 | FileCheck %s --check-prefix=AVXSLOW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,-slow-unaligned-mem-32 | FileCheck %s --check-prefix=AVXFAST +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX2 -; On Sandy Bridge or Ivy Bridge, we should not generate an unaligned 32-byte load -; because that is slower than two 16-byte loads. -; Other AVX-capable chips don't have that problem. +; Don't generate an unaligned 32-byte load on this test if that is slower than two 16-byte loads. define <8 x float> @load32bytes(<8 x float>* %Ap) { - ; CHECK-LABEL: load32bytes - - ; SANDYB: vmovaps - ; SANDYB: vinsertf128 - ; SANDYB: retq - - ; BTVER2: vmovups - ; BTVER2: retq - - ; HASWELL: vmovups - ; HASWELL: retq - +; AVXSLOW-LABEL: load32bytes: +; AVXSLOW: # BB#0: +; AVXSLOW-NEXT: vmovaps (%rdi), %xmm0 +; AVXSLOW-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 +; AVXSLOW-NEXT: retq +; +; AVXFAST-LABEL: load32bytes: +; AVXFAST: # BB#0: +; AVXFAST-NEXT: vmovups (%rdi), %ymm0 +; AVXFAST-NEXT: retq +; +; AVX2-LABEL: load32bytes: +; AVX2: # BB#0: +; AVX2-NEXT: vmovups (%rdi), %ymm0 +; AVX2-NEXT: retq %A = load <8 x float>, <8 x float>* %Ap, align 16 ret <8 x float> %A } -; On Sandy Bridge or Ivy Bridge, we should not generate an unaligned 32-byte store -; because that is slowerthan two 16-byte stores. -; Other AVX-capable chips don't have that problem. +; Don't generate an unaligned 32-byte store on this test if that is slower than two 16-byte loads. define void @store32bytes(<8 x float> %A, <8 x float>* %P) { - ; CHECK-LABEL: store32bytes - - ; SANDYB: vextractf128 - ; SANDYB: vmovaps - ; SANDYB: retq - - ; BTVER2: vmovups - ; BTVER2: retq - - ; HASWELL: vmovups - ; HASWELL: retq - +; AVXSLOW-LABEL: store32bytes: +; AVXSLOW: # BB#0: +; AVXSLOW-NEXT: vextractf128 $1, %ymm0, 16(%rdi) +; AVXSLOW-NEXT: vmovaps %xmm0, (%rdi) +; AVXSLOW-NEXT: vzeroupper +; AVXSLOW-NEXT: retq +; +; AVXFAST-LABEL: store32bytes: +; AVXFAST: # BB#0: +; AVXFAST-NEXT: vmovups %ymm0, (%rdi) +; AVXFAST-NEXT: vzeroupper +; AVXFAST-NEXT: retq +; +; AVX2-LABEL: store32bytes: +; AVX2: # BB#0: +; AVX2-NEXT: vmovups %ymm0, (%rdi) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq store <8 x float> %A, <8 x float>* %P, align 16 ret void } -; Merge two consecutive 16-byte subvector loads into a single 32-byte load -; if it's faster. +; Merge two consecutive 16-byte subvector loads into a single 32-byte load if it's faster. define <8 x float> @combine_16_byte_loads_no_intrinsic(<4 x float>* %ptr) { - ; CHECK-LABEL: combine_16_byte_loads_no_intrinsic - - ; SANDYB: vmovups - ; SANDYB-NEXT: vinsertf128 - ; SANDYB-NEXT: retq - - ; BTVER2: vmovups - ; BTVER2-NEXT: retq - - ; HASWELL: vmovups - ; HASWELL-NEXT: retq - +; AVXSLOW-LABEL: combine_16_byte_loads_no_intrinsic: +; AVXSLOW: # BB#0: +; AVXSLOW-NEXT: vmovups 48(%rdi), %xmm0 +; AVXSLOW-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0 +; AVXSLOW-NEXT: retq +; +; AVXFAST-LABEL: combine_16_byte_loads_no_intrinsic: +; AVXFAST: # BB#0: +; AVXFAST-NEXT: vmovups 48(%rdi), %ymm0 +; AVXFAST-NEXT: retq +; +; AVX2-LABEL: combine_16_byte_loads_no_intrinsic: +; AVX2: # BB#0: +; AVX2-NEXT: vmovups 48(%rdi), %ymm0 +; AVX2-NEXT: retq %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 3 %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4 %v1 = load <4 x float>, <4 x float>* %ptr1, align 1 @@ -69,21 +75,49 @@ define <8 x float> @combine_16_byte_loads_no_intrinsic(<4 x float>* %ptr) { ret <8 x float> %v3 } -; Swap the order of the shufflevector operands to ensure that the -; pattern still matches. -define <8 x float> @combine_16_byte_loads_no_intrinsic_swap(<4 x float>* %ptr) { - ; CHECK-LABEL: combine_16_byte_loads_no_intrinsic_swap - - ; SANDYB: vmovups - ; SANDYB-NEXT: vinsertf128 - ; SANDYB-NEXT: retq - - ; BTVER2: vmovups - ; BTVER2-NEXT: retq +define <8 x float> @combine_16_byte_loads_aligned(<4 x float>* %ptr) { +;; FIXME: The first load is 32-byte aligned, so the second load should get merged. +; AVXSLOW-LABEL: combine_16_byte_loads_aligned: +; AVXSLOW: # BB#0: +; AVXSLOW-NEXT: vmovaps 48(%rdi), %xmm0 +; AVXSLOW-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0 +; AVXSLOW-NEXT: retq +; +; AVXFAST-LABEL: combine_16_byte_loads_aligned: +; AVXFAST: # BB#0: +; AVXFAST-NEXT: vmovaps 48(%rdi), %ymm0 +; AVXFAST-NEXT: retq +; +; AVX2-LABEL: combine_16_byte_loads_aligned: +; AVX2: # BB#0: +; AVX2-NEXT: vmovaps 48(%rdi), %ymm0 +; AVX2-NEXT: retq + %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 3 + %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4 + %v1 = load <4 x float>, <4 x float>* %ptr1, align 32 + %v2 = load <4 x float>, <4 x float>* %ptr2, align 1 + %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <8 x float> %v3 +} - ; HASWELL: vmovups - ; HASWELL-NEXT: retq +; Swap the order of the shufflevector operands to ensure that the pattern still matches. +define <8 x float> @combine_16_byte_loads_no_intrinsic_swap(<4 x float>* %ptr) { +; AVXSLOW-LABEL: combine_16_byte_loads_no_intrinsic_swap: +; AVXSLOW: # BB#0: +; AVXSLOW-NEXT: vmovups 64(%rdi), %xmm0 +; AVXSLOW-NEXT: vinsertf128 $1, 80(%rdi), %ymm0, %ymm0 +; AVXSLOW-NEXT: retq +; +; AVXFAST-LABEL: combine_16_byte_loads_no_intrinsic_swap: +; AVXFAST: # BB#0: +; AVXFAST-NEXT: vmovups 64(%rdi), %ymm0 +; AVXFAST-NEXT: retq +; +; AVX2-LABEL: combine_16_byte_loads_no_intrinsic_swap: +; AVX2: # BB#0: +; AVX2-NEXT: vmovups 64(%rdi), %ymm0 +; AVX2-NEXT: retq %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4 %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 5 %v1 = load <4 x float>, <4 x float>* %ptr1, align 1 @@ -94,28 +128,29 @@ define <8 x float> @combine_16_byte_loads_no_intrinsic_swap(<4 x float>* %ptr) { ; Check each element type other than float to make sure it is handled correctly. ; Use the loaded values with an 'add' to make sure we're using the correct load type. -; Even though BtVer2 has fast 32-byte loads, we should not generate those for -; 256-bit integer vectors because BtVer2 doesn't have AVX2. +; Don't generate 32-byte loads for integer ops unless we have AVX2. define <4 x i64> @combine_16_byte_loads_i64(<2 x i64>* %ptr, <4 x i64> %x) { - ; CHECK-LABEL: combine_16_byte_loads_i64 - - ; SANDYB: vextractf128 - ; SANDYB-NEXT: vpaddq - ; SANDYB-NEXT: vpaddq - ; SANDYB-NEXT: vinsertf128 - ; SANDYB-NEXT: retq - - ; BTVER2: vextractf128 - ; BTVER2-NEXT: vpaddq - ; BTVER2-NEXT: vpaddq - ; BTVER2-NEXT: vinsertf128 - ; BTVER2-NEXT: retq - - ; HASWELL-NOT: vextract - ; HASWELL: vpaddq - ; HASWELL-NEXT: retq - +; AVXSLOW-LABEL: combine_16_byte_loads_i64: +; AVXSLOW: # BB#0: +; AVXSLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVXSLOW-NEXT: vpaddq 96(%rdi), %xmm1, %xmm1 +; AVXSLOW-NEXT: vpaddq 80(%rdi), %xmm0, %xmm0 +; AVXSLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVXSLOW-NEXT: retq +; +; AVXFAST-LABEL: combine_16_byte_loads_i64: +; AVXFAST: # BB#0: +; AVXFAST-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVXFAST-NEXT: vpaddq 96(%rdi), %xmm1, %xmm1 +; AVXFAST-NEXT: vpaddq 80(%rdi), %xmm0, %xmm0 +; AVXFAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVXFAST-NEXT: retq +; +; AVX2-LABEL: combine_16_byte_loads_i64: +; AVX2: # BB#0: +; AVX2-NEXT: vpaddq 80(%rdi), %ymm0, %ymm0 +; AVX2-NEXT: retq %ptr1 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 5 %ptr2 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 6 %v1 = load <2 x i64>, <2 x i64>* %ptr1, align 1 @@ -126,24 +161,26 @@ define <4 x i64> @combine_16_byte_loads_i64(<2 x i64>* %ptr, <4 x i64> %x) { } define <8 x i32> @combine_16_byte_loads_i32(<4 x i32>* %ptr, <8 x i32> %x) { - ; CHECK-LABEL: combine_16_byte_loads_i32 - - ; SANDYB: vextractf128 - ; SANDYB-NEXT: vpaddd - ; SANDYB-NEXT: vpaddd - ; SANDYB-NEXT: vinsertf128 - ; SANDYB-NEXT: retq - - ; BTVER2: vextractf128 - ; BTVER2-NEXT: vpaddd - ; BTVER2-NEXT: vpaddd - ; BTVER2-NEXT: vinsertf128 - ; BTVER2-NEXT: retq - - ; HASWELL-NOT: vextract - ; HASWELL: vpaddd - ; HASWELL-NEXT: retq - +; AVXSLOW-LABEL: combine_16_byte_loads_i32: +; AVXSLOW: # BB#0: +; AVXSLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVXSLOW-NEXT: vpaddd 112(%rdi), %xmm1, %xmm1 +; AVXSLOW-NEXT: vpaddd 96(%rdi), %xmm0, %xmm0 +; AVXSLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVXSLOW-NEXT: retq +; +; AVXFAST-LABEL: combine_16_byte_loads_i32: +; AVXFAST: # BB#0: +; AVXFAST-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVXFAST-NEXT: vpaddd 112(%rdi), %xmm1, %xmm1 +; AVXFAST-NEXT: vpaddd 96(%rdi), %xmm0, %xmm0 +; AVXFAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVXFAST-NEXT: retq +; +; AVX2-LABEL: combine_16_byte_loads_i32: +; AVX2: # BB#0: +; AVX2-NEXT: vpaddd 96(%rdi), %ymm0, %ymm0 +; AVX2-NEXT: retq %ptr1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 6 %ptr2 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 7 %v1 = load <4 x i32>, <4 x i32>* %ptr1, align 1 @@ -154,24 +191,26 @@ define <8 x i32> @combine_16_byte_loads_i32(<4 x i32>* %ptr, <8 x i32> %x) { } define <16 x i16> @combine_16_byte_loads_i16(<8 x i16>* %ptr, <16 x i16> %x) { - ; CHECK-LABEL: combine_16_byte_loads_i16 - - ; SANDYB: vextractf128 - ; SANDYB-NEXT: vpaddw - ; SANDYB-NEXT: vpaddw - ; SANDYB-NEXT: vinsertf128 - ; SANDYB-NEXT: retq - - ; BTVER2: vextractf128 - ; BTVER2-NEXT: vpaddw - ; BTVER2-NEXT: vpaddw - ; BTVER2-NEXT: vinsertf128 - ; BTVER2-NEXT: retq - - ; HASWELL-NOT: vextract - ; HASWELL: vpaddw - ; HASWELL-NEXT: retq - +; AVXSLOW-LABEL: combine_16_byte_loads_i16: +; AVXSLOW: # BB#0: +; AVXSLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVXSLOW-NEXT: vpaddw 128(%rdi), %xmm1, %xmm1 +; AVXSLOW-NEXT: vpaddw 112(%rdi), %xmm0, %xmm0 +; AVXSLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVXSLOW-NEXT: retq +; +; AVXFAST-LABEL: combine_16_byte_loads_i16: +; AVXFAST: # BB#0: +; AVXFAST-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVXFAST-NEXT: vpaddw 128(%rdi), %xmm1, %xmm1 +; AVXFAST-NEXT: vpaddw 112(%rdi), %xmm0, %xmm0 +; AVXFAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVXFAST-NEXT: retq +; +; AVX2-LABEL: combine_16_byte_loads_i16: +; AVX2: # BB#0: +; AVX2-NEXT: vpaddw 112(%rdi), %ymm0, %ymm0 +; AVX2-NEXT: retq %ptr1 = getelementptr inbounds <8 x i16>, <8 x i16>* %ptr, i64 7 %ptr2 = getelementptr inbounds <8 x i16>, <8 x i16>* %ptr, i64 8 %v1 = load <8 x i16>, <8 x i16>* %ptr1, align 1 @@ -182,24 +221,26 @@ define <16 x i16> @combine_16_byte_loads_i16(<8 x i16>* %ptr, <16 x i16> %x) { } define <32 x i8> @combine_16_byte_loads_i8(<16 x i8>* %ptr, <32 x i8> %x) { - ; CHECK-LABEL: combine_16_byte_loads_i8 - - ; SANDYB: vextractf128 - ; SANDYB-NEXT: vpaddb - ; SANDYB-NEXT: vpaddb - ; SANDYB-NEXT: vinsertf128 - ; SANDYB-NEXT: retq - - ; BTVER2: vextractf128 - ; BTVER2-NEXT: vpaddb - ; BTVER2-NEXT: vpaddb - ; BTVER2-NEXT: vinsertf128 - ; BTVER2-NEXT: retq - - ; HASWELL-NOT: vextract - ; HASWELL: vpaddb - ; HASWELL-NEXT: retq - +; AVXSLOW-LABEL: combine_16_byte_loads_i8: +; AVXSLOW: # BB#0: +; AVXSLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVXSLOW-NEXT: vpaddb 144(%rdi), %xmm1, %xmm1 +; AVXSLOW-NEXT: vpaddb 128(%rdi), %xmm0, %xmm0 +; AVXSLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVXSLOW-NEXT: retq +; +; AVXFAST-LABEL: combine_16_byte_loads_i8: +; AVXFAST: # BB#0: +; AVXFAST-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVXFAST-NEXT: vpaddb 144(%rdi), %xmm1, %xmm1 +; AVXFAST-NEXT: vpaddb 128(%rdi), %xmm0, %xmm0 +; AVXFAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVXFAST-NEXT: retq +; +; AVX2-LABEL: combine_16_byte_loads_i8: +; AVX2: # BB#0: +; AVX2-NEXT: vpaddb 128(%rdi), %ymm0, %ymm0 +; AVX2-NEXT: retq %ptr1 = getelementptr inbounds <16 x i8>, <16 x i8>* %ptr, i64 8 %ptr2 = getelementptr inbounds <16 x i8>, <16 x i8>* %ptr, i64 9 %v1 = load <16 x i8>, <16 x i8>* %ptr1, align 1 @@ -210,21 +251,22 @@ define <32 x i8> @combine_16_byte_loads_i8(<16 x i8>* %ptr, <32 x i8> %x) { } define <4 x double> @combine_16_byte_loads_double(<2 x double>* %ptr, <4 x double> %x) { - ; CHECK-LABEL: combine_16_byte_loads_double - - ; SANDYB: vmovupd - ; SANDYB-NEXT: vinsertf128 - ; SANDYB-NEXT: vaddpd - ; SANDYB-NEXT: retq - - ; BTVER2-NOT: vinsertf128 - ; BTVER2: vaddpd - ; BTVER2-NEXT: retq - - ; HASWELL-NOT: vinsertf128 - ; HASWELL: vaddpd - ; HASWELL-NEXT: retq - +; AVXSLOW-LABEL: combine_16_byte_loads_double: +; AVXSLOW: # BB#0: +; AVXSLOW-NEXT: vmovupd 144(%rdi), %xmm1 +; AVXSLOW-NEXT: vinsertf128 $1, 160(%rdi), %ymm1, %ymm1 +; AVXSLOW-NEXT: vaddpd %ymm0, %ymm1, %ymm0 +; AVXSLOW-NEXT: retq +; +; AVXFAST-LABEL: combine_16_byte_loads_double: +; AVXFAST: # BB#0: +; AVXFAST-NEXT: vaddpd 144(%rdi), %ymm0, %ymm0 +; AVXFAST-NEXT: retq +; +; AVX2-LABEL: combine_16_byte_loads_double: +; AVX2: # BB#0: +; AVX2-NEXT: vaddpd 144(%rdi), %ymm0, %ymm0 +; AVX2-NEXT: retq %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 9 %ptr2 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 10 %v1 = load <2 x double>, <2 x double>* %ptr1, align 1 diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index 5052ff51092e..8dded07af7d4 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 ; ; Signed Integer to Double @@ -34,12 +35,28 @@ define <2 x double> @sitofp_2vf64(<2 x i64> %a) { define <2 x double> @sitofp_2vf64_i32(<4 x i32> %a) { ; SSE2-LABEL: sitofp_2vf64_i32: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: sitofp_2vf64_i32: +; AVX: # BB#0: +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX-NEXT: retq + %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1> + %cvt = sitofp <2 x i32> %shuf to <2 x double> + ret <2 x double> %cvt +} + +define <2 x double> @sitofp_2vf64_i16(<8 x i16> %a) { +; SSE2-LABEL: sitofp_2vf64_i16: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; SSE2-NEXT: movd %xmm1, %rax -; SSE2-NEXT: cltq +; SSE2-NEXT: movswq %ax, %rax ; SSE2-NEXT: movd %xmm0, %rcx -; SSE2-NEXT: movslq %ecx, %rcx +; SSE2-NEXT: movswq %cx, %rcx ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sdq %rcx, %xmm0 ; SSE2-NEXT: xorps %xmm1, %xmm1 @@ -47,20 +64,55 @@ define <2 x double> @sitofp_2vf64_i32(<4 x i32> %a) { ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; -; AVX-LABEL: sitofp_2vf64_i32: +; AVX-LABEL: sitofp_2vf64_i16: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: cltq +; AVX-NEXT: movswq %ax, %rax ; AVX-NEXT: vpextrq $1, %xmm0, %rcx -; AVX-NEXT: movslq %ecx, %rcx +; AVX-NEXT: movswq %cx, %rcx ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vcvtsi2sdq %rcx, %xmm0, %xmm0 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX-NEXT: retq - %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1> - %cvt = sitofp <2 x i32> %shuf to <2 x double> + %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1> + %cvt = sitofp <2 x i16> %shuf to <2 x double> + ret <2 x double> %cvt +} + +define <2 x double> @sitofp_2vf64_i8(<16 x i8> %a) { +; SSE2-LABEL: sitofp_2vf64_i8: +; SSE2: # BB#0: +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE2-NEXT: movd %xmm1, %rax +; SSE2-NEXT: movsbq %al, %rax +; SSE2-NEXT: movd %xmm0, %rcx +; SSE2-NEXT: movsbq %cl, %rcx +; SSE2-NEXT: xorps %xmm0, %xmm0 +; SSE2-NEXT: cvtsi2sdq %rcx, %xmm0 +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: cvtsi2sdq %rax, %xmm1 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: retq +; +; AVX-LABEL: sitofp_2vf64_i8: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero +; AVX-NEXT: vmovq %xmm0, %rax +; AVX-NEXT: movsbq %al, %rax +; AVX-NEXT: vpextrq $1, %xmm0, %rcx +; AVX-NEXT: movsbq %cl, %rcx +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vcvtsi2sdq %rcx, %xmm0, %xmm0 +; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq + %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1> + %cvt = sitofp <2 x i8> %shuf to <2 x double> ret <2 x double> %cvt } @@ -85,22 +137,39 @@ define <4 x double> @sitofp_4vf64(<4 x i64> %a) { ; SSE2-NEXT: movapd %xmm3, %xmm1 ; SSE2-NEXT: retq ; -; AVX-LABEL: sitofp_4vf64: -; AVX: # BB#0: -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpextrq $1, %xmm1, %rax -; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2 -; AVX-NEXT: vmovq %xmm1, %rax -; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; AVX-NEXT: vpextrq $1, %xmm0, %rax -; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2 -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: sitofp_4vf64: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrq $1, %xmm1, %rax +; AVX1-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2 +; AVX1-NEXT: vmovq %xmm1, %rax +; AVX1-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1 +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vpextrq $1, %xmm0, %rax +; AVX1-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2 +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0 +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: sitofp_4vf64: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpextrq $1, %xmm1, %rax +; AVX2-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2 +; AVX2-NEXT: vmovq %xmm1, %rax +; AVX2-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1 +; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX2-NEXT: vpextrq $1, %xmm0, %rax +; AVX2-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2 +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0 +; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %cvt = sitofp <4 x i64> %a to <4 x double> ret <4 x double> %cvt } @@ -108,28 +177,10 @@ define <4 x double> @sitofp_4vf64(<4 x i64> %a) { define <4 x double> @sitofp_4vf64_i32(<4 x i32> %a) { ; SSE2-LABEL: sitofp_4vf64_i32: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3] -; SSE2-NEXT: movd %xmm1, %rax -; SSE2-NEXT: cltq -; SSE2-NEXT: cvtsi2sdq %rax, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; SSE2-NEXT: movd %xmm1, %rax -; SSE2-NEXT: cltq -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2sdq %rax, %xmm1 -; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; SSE2-NEXT: movd %xmm0, %rax -; SSE2-NEXT: cltq -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2sdq %rax, %xmm1 +; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm0, %rax -; SSE2-NEXT: cltq -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2sdq %rax, %xmm0 -; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: sitofp_4vf64_i32: @@ -140,6 +191,47 @@ define <4 x double> @sitofp_4vf64_i32(<4 x i32> %a) { ret <4 x double> %cvt } +define <4 x double> @sitofp_4vf64_i16(<8 x i16> %a) { +; SSE2-LABEL: sitofp_4vf64_i16: +; SSE2: # BB#0: +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 +; SSE2-NEXT: retq +; +; AVX-LABEL: sitofp_4vf64_i16: +; AVX: # BB#0: +; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX-NEXT: retq + %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %cvt = sitofp <4 x i16> %shuf to <4 x double> + ret <4 x double> %cvt +} + +define <4 x double> @sitofp_4vf64_i8(<16 x i8> %a) { +; SSE2-LABEL: sitofp_4vf64_i8: +; SSE2: # BB#0: +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: psrad $24, %xmm1 +; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 +; SSE2-NEXT: retq +; +; AVX-LABEL: sitofp_4vf64_i8: +; AVX: # BB#0: +; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX-NEXT: retq + %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %cvt = sitofp <4 x i8> %shuf to <4 x double> + ret <4 x double> %cvt +} + ; ; Unsigned Integer to Double ; @@ -216,6 +308,85 @@ define <2 x double> @uitofp_2vf64_i32(<4 x i32> %a) { ret <2 x double> %cvt } +define <2 x double> @uitofp_2vf64_i16(<8 x i16> %a) { +; SSE2-LABEL: uitofp_2vf64_i16: +; SSE2: # BB#0: +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] +; SSE2-NEXT: subpd %xmm3, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1] +; SSE2-NEXT: addpd %xmm4, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: subpd %xmm3, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1] +; SSE2-NEXT: addpd %xmm2, %xmm1 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: retq +; +; AVX-LABEL: uitofp_2vf64_i16: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0] +; AVX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] +; AVX-NEXT: vsubpd %xmm3, %xmm2, %xmm2 +; AVX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX-NEXT: vsubpd %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX-NEXT: retq + %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1> + %cvt = uitofp <2 x i16> %shuf to <2 x double> + ret <2 x double> %cvt +} + +define <2 x double> @uitofp_2vf64_i8(<16 x i8> %a) { +; SSE2-LABEL: uitofp_2vf64_i8: +; SSE2: # BB#0: +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] +; SSE2-NEXT: subpd %xmm3, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1] +; SSE2-NEXT: addpd %xmm4, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: subpd %xmm3, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1] +; SSE2-NEXT: addpd %xmm2, %xmm1 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: retq +; +; AVX-LABEL: uitofp_2vf64_i8: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero +; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0] +; AVX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] +; AVX-NEXT: vsubpd %xmm3, %xmm2, %xmm2 +; AVX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX-NEXT: vsubpd %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX-NEXT: retq + %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1> + %cvt = uitofp <2 x i8> %shuf to <2 x double> + ret <2 x double> %cvt +} + define <4 x double> @uitofp_4vf64(<4 x i64> %a) { ; SSE2-LABEL: uitofp_4vf64: ; SSE2: # BB#0: @@ -243,29 +414,53 @@ define <4 x double> @uitofp_4vf64(<4 x i64> %a) { ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE2-NEXT: retq ; -; AVX-LABEL: uitofp_4vf64: -; AVX: # BB#0: -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0] -; AVX-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; AVX-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25] -; AVX-NEXT: vsubpd %xmm4, %xmm3, %xmm3 -; AVX-NEXT: vhaddpd %xmm3, %xmm3, %xmm3 -; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; AVX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; AVX-NEXT: vsubpd %xmm4, %xmm1, %xmm1 -; AVX-NEXT: vhaddpd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm1[0] -; AVX-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; AVX-NEXT: vsubpd %xmm4, %xmm3, %xmm3 -; AVX-NEXT: vhaddpd %xmm3, %xmm3, %xmm3 -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; AVX-NEXT: vsubpd %xmm4, %xmm0, %xmm0 -; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0] -; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: uitofp_4vf64: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25] +; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vhaddpd %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX1-NEXT: vsubpd %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vhaddpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm1[0] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vhaddpd %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; AVX1-NEXT: vsubpd %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_4vf64: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0] +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX2-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25] +; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3 +; AVX2-NEXT: vhaddpd %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX2-NEXT: vsubpd %xmm4, %xmm1, %xmm1 +; AVX2-NEXT: vhaddpd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm1[0] +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3 +; AVX2-NEXT: vhaddpd %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; AVX2-NEXT: vsubpd %xmm4, %xmm0, %xmm0 +; AVX2-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0] +; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %cvt = uitofp <4 x i64> %a to <4 x double> ret <4 x double> %cvt } @@ -288,7 +483,66 @@ define <4 x double> @uitofp_4vf64_i32(<4 x i32> %a) { ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1] ; SSE2-NEXT: addpd %xmm1, %xmm5 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0] -; SSE2-NEXT: pand .LCPI7_2(%rip), %xmm2 +; SSE2-NEXT: pand .LCPI13_2(%rip), %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; SSE2-NEXT: subpd %xmm4, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1] +; SSE2-NEXT: addpd %xmm2, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] +; SSE2-NEXT: subpd %xmm4, %xmm5 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1] +; SSE2-NEXT: addpd %xmm5, %xmm2 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE2-NEXT: retq +; +; AVX1-LABEL: uitofp_4vf64_i32: +; AVX1: # BB#0: +; AVX1-NEXT: vpand .LCPI13_0(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1 +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX1-NEXT: vmulpd .LCPI13_1(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_4vf64_i32: +; AVX2: # BB#0: +; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1 +; AVX2-NEXT: vbroadcastsd .LCPI13_0(%rip), %ymm2 +; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpbroadcastd .LCPI13_1(%rip), %xmm2 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq + %cvt = uitofp <4 x i32> %a to <4 x double> + ret <4 x double> %cvt +} + +define <4 x double> @uitofp_4vf64_i16(<8 x i16> %a) { +; SSE2-LABEL: uitofp_4vf64_i16: +; SSE2: # BB#0: +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,2,1] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; SSE2-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25] +; SSE2-NEXT: subpd %xmm4, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1] +; SSE2-NEXT: addpd %xmm5, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] +; SSE2-NEXT: subpd %xmm4, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1] +; SSE2-NEXT: addpd %xmm1, %xmm5 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,7,5,6,7] +; SSE2-NEXT: pand .LCPI14_2(%rip), %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE2-NEXT: subpd %xmm4, %xmm2 @@ -301,16 +555,60 @@ define <4 x double> @uitofp_4vf64_i32(<4 x i32> %a) { ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE2-NEXT: retq ; -; AVX-LABEL: uitofp_4vf64_i32: +; AVX-LABEL: uitofp_4vf64_i16: ; AVX: # BB#0: -; AVX-NEXT: vpand .LCPI7_0(%rip), %xmm0, %xmm1 -; AVX-NEXT: vcvtdq2pd %xmm1, %ymm1 -; AVX-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX-NEXT: vmulpd .LCPI7_1(%rip), %ymm0, %ymm0 -; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq - %cvt = uitofp <4 x i32> %a to <4 x double> + %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %cvt = uitofp <4 x i16> %shuf to <4 x double> + ret <4 x double> %cvt +} + +define <4 x double> @uitofp_4vf64_i8(<16 x i8> %a) { +; SSE2-LABEL: uitofp_4vf64_i8: +; SSE2: # BB#0: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] +; SSE2-NEXT: subpd %xmm3, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1] +; SSE2-NEXT: addpd %xmm5, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] +; SSE2-NEXT: subpd %xmm3, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,0,1] +; SSE2-NEXT: addpd %xmm4, %xmm5 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,7,5,6,7] +; SSE2-NEXT: pand .LCPI15_2(%rip), %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,0,1] +; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] +; SSE2-NEXT: subpd %xmm3, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[2,3,0,1] +; SSE2-NEXT: addpd %xmm4, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1] +; SSE2-NEXT: subpd %xmm3, %xmm5 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1] +; SSE2-NEXT: addpd %xmm5, %xmm2 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE2-NEXT: retq +; +; AVX-LABEL: uitofp_4vf64_i8: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX-NEXT: retq + %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %cvt = uitofp <4 x i8> %shuf to <4 x double> ret <4 x double> %cvt } @@ -362,6 +660,43 @@ define <4 x float> @sitofp_4vf32_i64(<2 x i64> %a) { ret <4 x float> %ext } +define <4 x float> @sitofp_4vf32_i16(<8 x i16> %a) { +; SSE2-LABEL: sitofp_4vf32_i16: +; SSE2: # BB#0: +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: sitofp_4vf32_i16: +; AVX: # BB#0: +; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: retq + %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %cvt = sitofp <4 x i16> %shuf to <4 x float> + ret <4 x float> %cvt +} + +define <4 x float> @sitofp_4vf32_i8(<16 x i8> %a) { +; SSE2-LABEL: sitofp_4vf32_i8: +; SSE2: # BB#0: +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: psrad $24, %xmm0 +; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: sitofp_4vf32_i8: +; AVX: # BB#0: +; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: retq + %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %cvt = sitofp <4 x i8> %shuf to <4 x float> + ret <4 x float> %cvt +} + define <8 x float> @sitofp_8vf32(<8 x i32> %a) { ; SSE2-LABEL: sitofp_8vf32: ; SSE2: # BB#0: @@ -398,27 +733,112 @@ define <4 x float> @sitofp_4vf32_4i64(<4 x i64> %a) { ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; AVX-LABEL: sitofp_4vf32_4i64: -; AVX: # BB#0: -; AVX-NEXT: vpextrq $1, %xmm0, %rax -; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 -; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 -; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] -; AVX-NEXT: vpextrq $1, %xmm0, %rax -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; AVX1-LABEL: sitofp_4vf32_4i64: +; AVX1: # BB#0: +; AVX1-NEXT: vpextrq $1, %xmm0, %rax +; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; AVX1-NEXT: vpextrq $1, %xmm0, %rax +; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 +; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: sitofp_4vf32_4i64: +; AVX2: # BB#0: +; AVX2-NEXT: vpextrq $1, %xmm0, %rax +; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 +; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 +; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; AVX2-NEXT: vpextrq $1, %xmm0, %rax +; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 +; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq %cvt = sitofp <4 x i64> %a to <4 x float> ret <4 x float> %cvt } +define <8 x float> @sitofp_8vf32_i16(<8 x i16> %a) { +; SSE2-LABEL: sitofp_8vf32_i16: +; SSE2: # BB#0: +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: cvtdq2ps %xmm1, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; AVX1-LABEL: sitofp_8vf32_i16: +; AVX1: # BB#0: +; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: sitofp_8vf32_i16: +; AVX2: # BB#0: +; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX2-NEXT: retq + %cvt = sitofp <8 x i16> %a to <8 x float> + ret <8 x float> %cvt +} + +define <8 x float> @sitofp_8vf32_i8(<16 x i8> %a) { +; SSE2-LABEL: sitofp_8vf32_i8: +; SSE2: # BB#0: +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] +; SSE2-NEXT: psrad $24, %xmm1 +; SSE2-NEXT: cvtdq2ps %xmm1, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: psrad $24, %xmm0 +; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; AVX1-LABEL: sitofp_8vf32_i8: +; AVX1: # BB#0: +; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: sitofp_8vf32_i8: +; AVX2: # BB#0: +; AVX2-NEXT: vpmovzxbd %xmm0, %ymm0 +; AVX2-NEXT: vpslld $24, %ymm0, %ymm0 +; AVX2-NEXT: vpsrad $24, %ymm0, %ymm0 +; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX2-NEXT: retq + %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %cvt = sitofp <8 x i8> %shuf to <8 x float> + ret <8 x float> %cvt +} + ; ; Unsigned Integer to Float ; @@ -428,21 +848,33 @@ define <4 x float> @uitofp_4vf32(<4 x i32> %a) { ; SSE2: # BB#0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] ; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: por .LCPI12_1(%rip), %xmm1 +; SSE2-NEXT: por .LCPI24_1(%rip), %xmm1 ; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: por .LCPI12_2(%rip), %xmm0 -; SSE2-NEXT: addps .LCPI12_3(%rip), %xmm0 +; SSE2-NEXT: por .LCPI24_2(%rip), %xmm0 +; SSE2-NEXT: addps .LCPI24_3(%rip), %xmm0 ; SSE2-NEXT: addps %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; AVX-LABEL: uitofp_4vf32: -; AVX: # BB#0: -; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; AVX-NEXT: vpsrld $16, %xmm0, %xmm0 -; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; AVX-NEXT: vaddps .LCPI12_2(%rip), %xmm0, %xmm0 -; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: uitofp_4vf32: +; AVX1: # BB#0: +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] +; AVX1-NEXT: vaddps .LCPI24_2(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_4vf32: +; AVX2: # BB#0: +; AVX2-NEXT: vpbroadcastd .LCPI24_0(%rip), %xmm1 +; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] +; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd .LCPI24_1(%rip), %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; AVX2-NEXT: vbroadcastss .LCPI24_2(%rip), %xmm2 +; AVX2-NEXT: vaddps %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq %cvt = uitofp <4 x i32> %a to <4 x float> ret <4 x float> %cvt } @@ -455,30 +887,30 @@ define <4 x float> @uitofp_4vf32_i64(<2 x i64> %a) { ; SSE2-NEXT: movl %eax, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB13_1 +; SSE2-NEXT: js .LBB25_1 ; SSE2-NEXT: # BB#2: ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0 -; SSE2-NEXT: jmp .LBB13_3 -; SSE2-NEXT: .LBB13_1: +; SSE2-NEXT: jmp .LBB25_3 +; SSE2-NEXT: .LBB25_1: ; SSE2-NEXT: shrq %rax ; SSE2-NEXT: orq %rax, %rcx ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm0 ; SSE2-NEXT: addss %xmm0, %xmm0 -; SSE2-NEXT: .LBB13_3: +; SSE2-NEXT: .LBB25_3: ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; SSE2-NEXT: movd %xmm1, %rax ; SSE2-NEXT: movl %eax, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB13_4 +; SSE2-NEXT: js .LBB25_4 ; SSE2-NEXT: # BB#5: ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq -; SSE2-NEXT: .LBB13_4: +; SSE2-NEXT: .LBB25_4: ; SSE2-NEXT: shrq %rax ; SSE2-NEXT: orq %rax, %rcx ; SSE2-NEXT: xorps %xmm1, %xmm1 @@ -493,39 +925,39 @@ define <4 x float> @uitofp_4vf32_i64(<2 x i64> %a) { ; AVX-NEXT: movl %eax, %ecx ; AVX-NEXT: andl $1, %ecx ; AVX-NEXT: testq %rax, %rax -; AVX-NEXT: js .LBB13_1 +; AVX-NEXT: js .LBB25_1 ; AVX-NEXT: # BB#2: ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 -; AVX-NEXT: jmp .LBB13_3 -; AVX-NEXT: .LBB13_1: +; AVX-NEXT: jmp .LBB25_3 +; AVX-NEXT: .LBB25_1: ; AVX-NEXT: shrq %rax ; AVX-NEXT: orq %rax, %rcx ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1 ; AVX-NEXT: vaddss %xmm1, %xmm1, %xmm1 -; AVX-NEXT: .LBB13_3: +; AVX-NEXT: .LBB25_3: ; AVX-NEXT: vmovq %xmm0, %rax ; AVX-NEXT: movl %eax, %ecx ; AVX-NEXT: andl $1, %ecx ; AVX-NEXT: testq %rax, %rax -; AVX-NEXT: js .LBB13_4 +; AVX-NEXT: js .LBB25_4 ; AVX-NEXT: # BB#5: ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 -; AVX-NEXT: jmp .LBB13_6 -; AVX-NEXT: .LBB13_4: +; AVX-NEXT: jmp .LBB25_6 +; AVX-NEXT: .LBB25_4: ; AVX-NEXT: shrq %rax ; AVX-NEXT: orq %rax, %rcx ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0 ; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; AVX-NEXT: .LBB13_6: +; AVX-NEXT: .LBB25_6: ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX-NEXT: testq %rax, %rax -; AVX-NEXT: js .LBB13_8 +; AVX-NEXT: js .LBB25_8 ; AVX-NEXT: # BB#7: ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 -; AVX-NEXT: .LBB13_8: +; AVX-NEXT: .LBB25_8: ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; AVX-NEXT: retq @@ -534,6 +966,43 @@ define <4 x float> @uitofp_4vf32_i64(<2 x i64> %a) { ret <4 x float> %ext } +define <4 x float> @uitofp_4vf32_i16(<8 x i16> %a) { +; SSE2-LABEL: uitofp_4vf32_i16: +; SSE2: # BB#0: +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: uitofp_4vf32_i16: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: retq + %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %cvt = uitofp <4 x i16> %shuf to <4 x float> + ret <4 x float> %cvt +} + +define <4 x float> @uitofp_4vf32_i8(<16 x i8> %a) { +; SSE2-LABEL: uitofp_4vf32_i8: +; SSE2: # BB#0: +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: uitofp_4vf32_i8: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: retq + %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %cvt = uitofp <4 x i8> %shuf to <4 x float> + ret <4 x float> %cvt +} + define <8 x float> @uitofp_8vf32(<8 x i32> %a) { ; SSE2-LABEL: uitofp_8vf32: ; SSE2: # BB#0: @@ -556,18 +1025,30 @@ define <8 x float> @uitofp_8vf32(<8 x i32> %a) { ; SSE2-NEXT: addps %xmm2, %xmm1 ; SSE2-NEXT: retq ; -; AVX-LABEL: uitofp_8vf32: -; AVX: # BB#0: -; AVX-NEXT: vandps .LCPI14_0(%rip), %ymm0, %ymm1 -; AVX-NEXT: vcvtdq2ps %ymm1, %ymm1 -; AVX-NEXT: vpsrld $16, %xmm0, %xmm2 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX-NEXT: vpsrld $16, %xmm0, %xmm0 -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 -; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 -; AVX-NEXT: vmulps .LCPI14_1(%rip), %ymm0, %ymm0 -; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: uitofp_8vf32: +; AVX1: # BB#0: +; AVX1-NEXT: vandps .LCPI28_0(%rip), %ymm0, %ymm1 +; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1 +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX1-NEXT: vmulps .LCPI28_1(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vaddps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_8vf32: +; AVX2: # BB#0: +; AVX2-NEXT: vpbroadcastd .LCPI28_0(%rip), %ymm1 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd .LCPI28_1(%rip), %ymm2 +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] +; AVX2-NEXT: vbroadcastss .LCPI28_2(%rip), %ymm2 +; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq %cvt = uitofp <8 x i32> %a to <8 x float> ret <8 x float> %cvt } @@ -579,136 +1060,321 @@ define <4 x float> @uitofp_4vf32_4i64(<4 x i64> %a) { ; SSE2-NEXT: movl %eax, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB15_1 +; SSE2-NEXT: js .LBB29_1 ; SSE2-NEXT: # BB#2: ; SSE2-NEXT: cvtsi2ssq %rax, %xmm3 -; SSE2-NEXT: jmp .LBB15_3 -; SSE2-NEXT: .LBB15_1: +; SSE2-NEXT: jmp .LBB29_3 +; SSE2-NEXT: .LBB29_1: ; SSE2-NEXT: shrq %rax ; SSE2-NEXT: orq %rax, %rcx ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm3 ; SSE2-NEXT: addss %xmm3, %xmm3 -; SSE2-NEXT: .LBB15_3: +; SSE2-NEXT: .LBB29_3: ; SSE2-NEXT: movd %xmm0, %rax ; SSE2-NEXT: movl %eax, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB15_4 +; SSE2-NEXT: js .LBB29_4 ; SSE2-NEXT: # BB#5: ; SSE2-NEXT: cvtsi2ssq %rax, %xmm2 -; SSE2-NEXT: jmp .LBB15_6 -; SSE2-NEXT: .LBB15_4: +; SSE2-NEXT: jmp .LBB29_6 +; SSE2-NEXT: .LBB29_4: ; SSE2-NEXT: shrq %rax ; SSE2-NEXT: orq %rax, %rcx ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm2 ; SSE2-NEXT: addss %xmm2, %xmm2 -; SSE2-NEXT: .LBB15_6: +; SSE2-NEXT: .LBB29_6: ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; SSE2-NEXT: movd %xmm1, %rax ; SSE2-NEXT: movl %eax, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB15_7 +; SSE2-NEXT: js .LBB29_7 ; SSE2-NEXT: # BB#8: ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1 -; SSE2-NEXT: jmp .LBB15_9 -; SSE2-NEXT: .LBB15_7: +; SSE2-NEXT: jmp .LBB29_9 +; SSE2-NEXT: .LBB29_7: ; SSE2-NEXT: shrq %rax ; SSE2-NEXT: orq %rax, %rcx ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm1 ; SSE2-NEXT: addss %xmm1, %xmm1 -; SSE2-NEXT: .LBB15_9: +; SSE2-NEXT: .LBB29_9: ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; SSE2-NEXT: movd %xmm0, %rax ; SSE2-NEXT: movl %eax, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB15_10 +; SSE2-NEXT: js .LBB29_10 ; SSE2-NEXT: # BB#11: ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0 -; SSE2-NEXT: jmp .LBB15_12 -; SSE2-NEXT: .LBB15_10: +; SSE2-NEXT: jmp .LBB29_12 +; SSE2-NEXT: .LBB29_10: ; SSE2-NEXT: shrq %rax ; SSE2-NEXT: orq %rax, %rcx ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm0 ; SSE2-NEXT: addss %xmm0, %xmm0 -; SSE2-NEXT: .LBB15_12: +; SSE2-NEXT: .LBB29_12: ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; AVX-LABEL: uitofp_4vf32_4i64: -; AVX: # BB#0: -; AVX-NEXT: vpextrq $1, %xmm0, %rax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: andl $1, %ecx -; AVX-NEXT: testq %rax, %rax -; AVX-NEXT: js .LBB15_1 -; AVX-NEXT: # BB#2: -; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 -; AVX-NEXT: jmp .LBB15_3 -; AVX-NEXT: .LBB15_1: -; AVX-NEXT: shrq %rax -; AVX-NEXT: orq %rax, %rcx -; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1 -; AVX-NEXT: vaddss %xmm1, %xmm1, %xmm1 -; AVX-NEXT: .LBB15_3: -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: andl $1, %ecx -; AVX-NEXT: testq %rax, %rax -; AVX-NEXT: js .LBB15_4 -; AVX-NEXT: # BB#5: -; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 -; AVX-NEXT: jmp .LBB15_6 -; AVX-NEXT: .LBB15_4: -; AVX-NEXT: shrq %rax -; AVX-NEXT: orq %rax, %rcx -; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2 -; AVX-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX-NEXT: .LBB15_6: -; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: andl $1, %ecx -; AVX-NEXT: testq %rax, %rax -; AVX-NEXT: js .LBB15_7 -; AVX-NEXT: # BB#8: -; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 -; AVX-NEXT: jmp .LBB15_9 -; AVX-NEXT: .LBB15_7: -; AVX-NEXT: shrq %rax -; AVX-NEXT: orq %rax, %rcx -; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2 -; AVX-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX-NEXT: .LBB15_9: -; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] -; AVX-NEXT: vpextrq $1, %xmm0, %rax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: andl $1, %ecx -; AVX-NEXT: testq %rax, %rax -; AVX-NEXT: js .LBB15_10 -; AVX-NEXT: # BB#11: -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq -; AVX-NEXT: .LBB15_10: -; AVX-NEXT: shrq %rax -; AVX-NEXT: orq %rax, %rcx -; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0 -; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; AVX1-LABEL: uitofp_4vf32_4i64: +; AVX1: # BB#0: +; AVX1-NEXT: vpextrq $1, %xmm0, %rax +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: testq %rax, %rax +; AVX1-NEXT: js .LBB29_1 +; AVX1-NEXT: # BB#2: +; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 +; AVX1-NEXT: jmp .LBB29_3 +; AVX1-NEXT: .LBB29_1: +; AVX1-NEXT: shrq %rax +; AVX1-NEXT: orq %rax, %rcx +; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1 +; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: .LBB29_3: +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: testq %rax, %rax +; AVX1-NEXT: js .LBB29_4 +; AVX1-NEXT: # BB#5: +; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 +; AVX1-NEXT: jmp .LBB29_6 +; AVX1-NEXT: .LBB29_4: +; AVX1-NEXT: shrq %rax +; AVX1-NEXT: orq %rax, %rcx +; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2 +; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: .LBB29_6: +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: testq %rax, %rax +; AVX1-NEXT: js .LBB29_7 +; AVX1-NEXT: # BB#8: +; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 +; AVX1-NEXT: jmp .LBB29_9 +; AVX1-NEXT: .LBB29_7: +; AVX1-NEXT: shrq %rax +; AVX1-NEXT: orq %rax, %rcx +; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2 +; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: .LBB29_9: +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; AVX1-NEXT: vpextrq $1, %xmm0, %rax +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: testq %rax, %rax +; AVX1-NEXT: js .LBB29_10 +; AVX1-NEXT: # BB#11: +; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 +; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; AVX1-NEXT: .LBB29_10: +; AVX1-NEXT: shrq %rax +; AVX1-NEXT: orq %rax, %rcx +; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0 +; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_4vf32_4i64: +; AVX2: # BB#0: +; AVX2-NEXT: vpextrq $1, %xmm0, %rax +; AVX2-NEXT: movl %eax, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: testq %rax, %rax +; AVX2-NEXT: js .LBB29_1 +; AVX2-NEXT: # BB#2: +; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 +; AVX2-NEXT: jmp .LBB29_3 +; AVX2-NEXT: .LBB29_1: +; AVX2-NEXT: shrq %rax +; AVX2-NEXT: orq %rax, %rcx +; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1 +; AVX2-NEXT: vaddss %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: .LBB29_3: +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: movl %eax, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: testq %rax, %rax +; AVX2-NEXT: js .LBB29_4 +; AVX2-NEXT: # BB#5: +; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 +; AVX2-NEXT: jmp .LBB29_6 +; AVX2-NEXT: .LBB29_4: +; AVX2-NEXT: shrq %rax +; AVX2-NEXT: orq %rax, %rcx +; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2 +; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: .LBB29_6: +; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: movl %eax, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: testq %rax, %rax +; AVX2-NEXT: js .LBB29_7 +; AVX2-NEXT: # BB#8: +; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 +; AVX2-NEXT: jmp .LBB29_9 +; AVX2-NEXT: .LBB29_7: +; AVX2-NEXT: shrq %rax +; AVX2-NEXT: orq %rax, %rcx +; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2 +; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: .LBB29_9: +; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; AVX2-NEXT: vpextrq $1, %xmm0, %rax +; AVX2-NEXT: movl %eax, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: testq %rax, %rax +; AVX2-NEXT: js .LBB29_10 +; AVX2-NEXT: # BB#11: +; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 +; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; AVX2-NEXT: .LBB29_10: +; AVX2-NEXT: shrq %rax +; AVX2-NEXT: orq %rax, %rcx +; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0 +; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq %cvt = uitofp <4 x i64> %a to <4 x float> ret <4 x float> %cvt } + +define <8 x float> @uitofp_8vf32_i16(<8 x i16> %a) { +; SSE2-LABEL: uitofp_8vf32_i16: +; SSE2: # BB#0: +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] +; SSE2-NEXT: pand .LCPI30_0(%rip), %xmm0 +; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; AVX1-LABEL: uitofp_8vf32_i16: +; AVX1: # BB#0: +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_8vf32_i16: +; AVX2: # BB#0: +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX2-NEXT: retq + %cvt = uitofp <8 x i16> %a to <8 x float> + ret <8 x float> %cvt +} + +define <8 x float> @uitofp_8vf32_i8(<16 x i8> %a) { +; SSE2-LABEL: uitofp_8vf32_i8: +; SSE2: # BB#0: +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] +; SSE2-NEXT: pand .LCPI31_0(%rip), %xmm0 +; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; AVX1-LABEL: uitofp_8vf32_i8: +; AVX1: # BB#0: +; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vandps .LCPI31_0(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_8vf32_i8: +; AVX2: # BB#0: +; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX2-NEXT: vpbroadcastd .LCPI31_0(%rip), %ymm1 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX2-NEXT: retq + %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %cvt = uitofp <8 x i8> %shuf to <8 x float> + ret <8 x float> %cvt +} + +; +; Aggregates +; + +%Arguments = type <{ <8 x i8>, <8 x i16>, <8 x float>* }> +define void @aggregate_sitofp_8f32_i16(%Arguments* nocapture readonly %a0) { +; SSE2-LABEL: aggregate_sitofp_8f32_i16: +; SSE2: # BB#0: +; SSE2-NEXT: movq 24(%rdi), %rax +; SSE2-NEXT: movdqu 8(%rdi), %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE2-NEXT: movaps %xmm0, (%rax) +; SSE2-NEXT: movaps %xmm1, 16(%rax) +; SSE2-NEXT: retq +; +; AVX1-LABEL: aggregate_sitofp_8f32_i16: +; AVX1: # BB#0: +; AVX1-NEXT: movq 24(%rdi), %rax +; AVX1-NEXT: vmovdqu 8(%rdi), %xmm0 +; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX1-NEXT: vmovaps %ymm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: aggregate_sitofp_8f32_i16: +; AVX2: # BB#0: +; AVX2-NEXT: movq 24(%rdi), %rax +; AVX2-NEXT: vpmovsxwd 8(%rdi), %ymm0 +; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX2-NEXT: vmovaps %ymm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %1 = load %Arguments, %Arguments* %a0, align 1 + %2 = extractvalue %Arguments %1, 1 + %3 = extractvalue %Arguments %1, 2 + %4 = sitofp <8 x i16> %2 to <8 x float> + store <8 x float> %4, <8 x float>* %3, align 32 + ret void +} diff --git a/test/CodeGen/X86/vec_shift8.ll b/test/CodeGen/X86/vec_shift8.ll index a32cb30b0b26..9d19f667ea9b 100644 --- a/test/CodeGen/X86/vec_shift8.ll +++ b/test/CodeGen/X86/vec_shift8.ll @@ -8,114 +8,83 @@ define <2 x i64> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind readnone ssp { entry: -; SSE2: pextrw $7, %xmm0, %eax -; SSE2-NEXT: pextrw $7, %xmm1, %ecx -; SSE2-NEXT: shll %cl, %eax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: pextrw $3, %xmm0, %eax -; SSE2-NEXT: pextrw $3, %xmm1, %ecx -; SSE2-NEXT: shll %cl, %eax -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE2-NEXT: pextrw $5, %xmm0, %eax -; SSE2-NEXT: pextrw $5, %xmm1, %ecx -; SSE2-NEXT: shll %cl, %eax -; SSE2-NEXT: movd %eax, %xmm4 -; SSE2-NEXT: pextrw $1, %xmm0, %eax -; SSE2-NEXT: pextrw $1, %xmm1, %ecx -; SSE2-NEXT: shll %cl, %eax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] -; SSE2-NEXT: pextrw $6, %xmm0, %eax -; SSE2-NEXT: pextrw $6, %xmm1, %ecx -; SSE2-NEXT: shll %cl, %eax -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: pextrw $2, %xmm0, %eax -; SSE2-NEXT: pextrw $2, %xmm1, %ecx -; SSE2-NEXT: shll %cl, %eax -; SSE2-NEXT: movd %eax, %xmm4 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; SSE2-NEXT: pextrw $4, %xmm0, %eax -; SSE2-NEXT: pextrw $4, %xmm1, %ecx -; SSE2-NEXT: shll %cl, %eax -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: shll %cl, %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; ALL-NOT: shll +; +; SSE2: psllw $12, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psraw $15, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pandn %xmm0, %xmm3 +; SSE2-NEXT: psllw $8, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: paddw %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psraw $15, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pandn %xmm0, %xmm3 +; SSE2-NEXT: psllw $4, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: paddw %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psraw $15, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pandn %xmm0, %xmm3 +; SSE2-NEXT: psllw $2, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: paddw %xmm1, %xmm1 +; SSE2-NEXT: psraw $15, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm0, %xmm2 +; SSE2-NEXT: psllw $1, %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE41: pextrw $1, %xmm0, %eax -; SSE41-NEXT: pextrw $1, %xmm1, %ecx -; SSE41-NEXT: shll %cl, %eax -; SSE41-NEXT: movd %xmm0, %edx -; SSE41-NEXT: movd %xmm1, %ecx -; SSE41-NEXT: shll %cl, %edx -; SSE41-NEXT: movd %edx, %xmm2 -; SSE41-NEXT: pinsrw $1, %eax, %xmm2 -; SSE41-NEXT: pextrw $2, %xmm0, %eax -; SSE41-NEXT: pextrw $2, %xmm1, %ecx -; SSE41-NEXT: shll %cl, %eax -; SSE41-NEXT: pinsrw $2, %eax, %xmm2 -; SSE41-NEXT: pextrw $3, %xmm0, %eax -; SSE41-NEXT: pextrw $3, %xmm1, %ecx -; SSE41-NEXT: shll %cl, %eax -; SSE41-NEXT: pinsrw $3, %eax, %xmm2 -; SSE41-NEXT: pextrw $4, %xmm0, %eax -; SSE41-NEXT: pextrw $4, %xmm1, %ecx -; SSE41-NEXT: shll %cl, %eax -; SSE41-NEXT: pinsrw $4, %eax, %xmm2 -; SSE41-NEXT: pextrw $5, %xmm0, %eax -; SSE41-NEXT: pextrw $5, %xmm1, %ecx -; SSE41-NEXT: shll %cl, %eax -; SSE41-NEXT: pinsrw $5, %eax, %xmm2 -; SSE41-NEXT: pextrw $6, %xmm0, %eax -; SSE41-NEXT: pextrw $6, %xmm1, %ecx -; SSE41-NEXT: shll %cl, %eax -; SSE41-NEXT: pinsrw $6, %eax, %xmm2 -; SSE41-NEXT: pextrw $7, %xmm0, %eax -; SSE41-NEXT: pextrw $7, %xmm1, %ecx -; SSE41-NEXT: shll %cl, %eax -; SSE41-NEXT: pinsrw $7, %eax, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: psllw $12, %xmm0 +; SSE41-NEXT: psllw $4, %xmm1 +; SSE41-NEXT: por %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: paddw %xmm3, %xmm3 +; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: psllw $8, %xmm4 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendvb %xmm4, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm1 +; SSE41-NEXT: psllw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm1 +; SSE41-NEXT: psllw $2, %xmm1 +; SSE41-NEXT: paddw %xmm3, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm1 +; SSE41-NEXT: psllw $1, %xmm1 +; SSE41-NEXT: paddw %xmm3, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; -; AVX: vpextrw $1, %xmm0, %eax -; AVX-NEXT: vpextrw $1, %xmm1, %ecx -; AVX-NEXT: shll %cl, %eax -; AVX-NEXT: vmovd %xmm0, %edx -; AVX-NEXT: vmovd %xmm1, %ecx -; AVX-NEXT: shll %cl, %edx -; AVX-NEXT: vmovd %edx, %xmm2 -; AVX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $2, %xmm0, %eax -; AVX-NEXT: vpextrw $2, %xmm1, %ecx -; AVX-NEXT: shll %cl, %eax -; AVX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $3, %xmm0, %eax -; AVX-NEXT: vpextrw $3, %xmm1, %ecx -; AVX-NEXT: shll %cl, %eax -; AVX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $4, %xmm0, %eax -; AVX-NEXT: vpextrw $4, %xmm1, %ecx -; AVX-NEXT: shll %cl, %eax -; AVX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $5, %xmm0, %eax -; AVX-NEXT: vpextrw $5, %xmm1, %ecx -; AVX-NEXT: shll %cl, %eax -; AVX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $6, %xmm0, %eax -; AVX-NEXT: vpextrw $6, %xmm1, %ecx -; AVX-NEXT: shll %cl, %eax -; AVX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $7, %xmm0, %eax -; AVX-NEXT: vpextrw $7, %xmm1, %ecx -; AVX-NEXT: shll %cl, %eax -; AVX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0 +; AVX: vpsllw $12, %xmm1, %xmm2 +; AVX-NEXT: vpsllw $4, %xmm1, %xmm1 +; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpsllw $8, %xmm0, %xmm3 +; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsllw $2, %xmm0, %xmm1 +; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsllw $1, %xmm0, %xmm1 +; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %shl = shl <8 x i16> %r, %a %tmp2 = bitcast <8 x i16> %shl to <2 x i64> @@ -124,88 +93,66 @@ entry: define <2 x i64> @shl_16i8(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp { entry: -; SSE2: psllw $5, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: pand %xmm1, %xmm3 -; SSE2-NEXT: pcmpeqb %xmm2, %xmm3 -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: pandn %xmm0, %xmm4 -; SSE2-NEXT: psllw $4, %xmm0 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: pand %xmm3, %xmm0 -; SSE2-NEXT: por %xmm4, %xmm0 -; SSE2-NEXT: paddb %xmm1, %xmm1 -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: pand %xmm1, %xmm3 -; SSE2-NEXT: pcmpeqb %xmm2, %xmm3 -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: pandn %xmm0, %xmm4 -; SSE2-NEXT: psllw $2, %xmm0 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: pand %xmm3, %xmm0 -; SSE2-NEXT: por %xmm4, %xmm0 -; SSE2-NEXT: paddb %xmm1, %xmm1 -; SSE2-NEXT: pand %xmm2, %xmm1 -; SSE2-NEXT: pcmpeqb %xmm2, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm0, %xmm2 -; SSE2-NEXT: paddb %xmm0, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2: psllw $5, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: pcmpgtb %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm3, %xmm4 +; SSE2-NEXT: pandn %xmm0, %xmm4 +; SSE2-NEXT: psllw $4, %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: paddb %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: pcmpgtb %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm3, %xmm4 +; SSE2-NEXT: pandn %xmm0, %xmm4 +; SSE2-NEXT: psllw $2, %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: paddb %xmm1, %xmm1 +; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pandn %xmm0, %xmm1 +; SSE2-NEXT: paddb %xmm0, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE41: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psllw $5, %xmm1 -; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm5 -; SSE41-NEXT: paddb %xmm5, %xmm5 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pand %xmm5, %xmm4 -; SSE41-NEXT: pcmpeqb %xmm3, %xmm4 -; SSE41-NEXT: pand %xmm3, %xmm1 -; SSE41-NEXT: pcmpeqb %xmm3, %xmm1 -; SSE41-NEXT: movdqa %xmm2, %xmm6 -; SSE41-NEXT: psllw $4, %xmm6 -; SSE41-NEXT: pand {{.*}}(%rip), %xmm6 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm6, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psllw $2, %xmm1 -; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pblendvb %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: paddb %xmm1, %xmm1 -; SSE41-NEXT: paddb %xmm5, %xmm5 -; SSE41-NEXT: pand %xmm3, %xmm5 -; SSE41-NEXT: pcmpeqb %xmm5, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41: movdqa %xmm0, %xmm2 +; SSE41-NEXT: psllw $5, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm3 +; SSE41-NEXT: psllw $4, %xmm3 +; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendvb %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm3 +; SSE41-NEXT: psllw $2, %xmm3 +; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 +; SSE41-NEXT: paddb %xmm1, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendvb %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm3 +; SSE41-NEXT: paddb %xmm3, %xmm3 +; SSE41-NEXT: paddb %xmm1, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendvb %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; -; AVX: vpsllw $5, %xmm1, %xmm1 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX-NEXT: vpand %xmm2, %xmm3, %xmm4 -; AVX-NEXT: vpcmpeqb %xmm3, %xmm4, %xmm4 -; AVX-NEXT: vpand %xmm1, %xmm3, %xmm1 -; AVX-NEXT: vpcmpeqb %xmm3, %xmm1, %xmm1 -; AVX-NEXT: vpsllw $4, %xmm0, %xmm5 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm5, %xmm5 -; AVX-NEXT: vpblendvb %xmm1, %xmm5, %xmm0, %xmm0 -; AVX-NEXT: vpsllw $2, %xmm0, %xmm1 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vpblendvb %xmm4, %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm1 -; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpand %xmm2, %xmm3, %xmm2 -; AVX-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX: vpsllw $5, %xmm1, %xmm1 +; AVX-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpsllw $2, %xmm0, %xmm2 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 ; AVX-NEXT: retq %shl = shl <16 x i8> %r, %a %tmp2 = bitcast <16 x i8> %shl to <2 x i64> @@ -214,114 +161,83 @@ entry: define <2 x i64> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind readnone ssp { entry: -; SSE2: pextrw $7, %xmm1, %ecx -; SSE2-NEXT: pextrw $7, %xmm0, %eax -; SSE2-NEXT: sarw %cl, %ax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: pextrw $3, %xmm1, %ecx -; SSE2-NEXT: pextrw $3, %xmm0, %eax -; SSE2-NEXT: sarw %cl, %ax -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE2-NEXT: pextrw $5, %xmm1, %ecx -; SSE2-NEXT: pextrw $5, %xmm0, %eax -; SSE2-NEXT: sarw %cl, %ax -; SSE2-NEXT: movd %eax, %xmm4 -; SSE2-NEXT: pextrw $1, %xmm1, %ecx -; SSE2-NEXT: pextrw $1, %xmm0, %eax -; SSE2-NEXT: sarw %cl, %ax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] -; SSE2-NEXT: pextrw $6, %xmm1, %ecx -; SSE2-NEXT: pextrw $6, %xmm0, %eax -; SSE2-NEXT: sarw %cl, %ax -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: pextrw $2, %xmm1, %ecx -; SSE2-NEXT: pextrw $2, %xmm0, %eax -; SSE2-NEXT: sarw %cl, %ax -; SSE2-NEXT: movd %eax, %xmm4 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; SSE2-NEXT: pextrw $4, %xmm1, %ecx -; SSE2-NEXT: pextrw $4, %xmm0, %eax -; SSE2-NEXT: sarw %cl, %ax -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: sarw %cl, %ax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; ALL-NOT: sarw +; +; SSE2: psllw $12, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psraw $15, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pandn %xmm0, %xmm3 +; SSE2-NEXT: psraw $8, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: paddw %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psraw $15, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pandn %xmm0, %xmm3 +; SSE2-NEXT: psraw $4, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: paddw %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psraw $15, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pandn %xmm0, %xmm3 +; SSE2-NEXT: psraw $2, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: paddw %xmm1, %xmm1 +; SSE2-NEXT: psraw $15, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm0, %xmm2 +; SSE2-NEXT: psraw $1, %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE41: pextrw $1, %xmm1, %ecx -; SSE41-NEXT: pextrw $1, %xmm0, %eax -; SSE41-NEXT: sarw %cl, %ax -; SSE41-NEXT: movd %xmm1, %ecx -; SSE41-NEXT: movd %xmm0, %edx -; SSE41-NEXT: sarw %cl, %dx -; SSE41-NEXT: movd %edx, %xmm2 -; SSE41-NEXT: pinsrw $1, %eax, %xmm2 -; SSE41-NEXT: pextrw $2, %xmm1, %ecx -; SSE41-NEXT: pextrw $2, %xmm0, %eax -; SSE41-NEXT: sarw %cl, %ax -; SSE41-NEXT: pinsrw $2, %eax, %xmm2 -; SSE41-NEXT: pextrw $3, %xmm1, %ecx -; SSE41-NEXT: pextrw $3, %xmm0, %eax -; SSE41-NEXT: sarw %cl, %ax -; SSE41-NEXT: pinsrw $3, %eax, %xmm2 -; SSE41-NEXT: pextrw $4, %xmm1, %ecx -; SSE41-NEXT: pextrw $4, %xmm0, %eax -; SSE41-NEXT: sarw %cl, %ax -; SSE41-NEXT: pinsrw $4, %eax, %xmm2 -; SSE41-NEXT: pextrw $5, %xmm1, %ecx -; SSE41-NEXT: pextrw $5, %xmm0, %eax -; SSE41-NEXT: sarw %cl, %ax -; SSE41-NEXT: pinsrw $5, %eax, %xmm2 -; SSE41-NEXT: pextrw $6, %xmm1, %ecx -; SSE41-NEXT: pextrw $6, %xmm0, %eax -; SSE41-NEXT: sarw %cl, %ax -; SSE41-NEXT: pinsrw $6, %eax, %xmm2 -; SSE41-NEXT: pextrw $7, %xmm1, %ecx -; SSE41-NEXT: pextrw $7, %xmm0, %eax -; SSE41-NEXT: sarw %cl, %ax -; SSE41-NEXT: pinsrw $7, %eax, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: psllw $12, %xmm0 +; SSE41-NEXT: psllw $4, %xmm1 +; SSE41-NEXT: por %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: paddw %xmm3, %xmm3 +; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: psraw $8, %xmm4 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendvb %xmm4, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm1 +; SSE41-NEXT: psraw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm1 +; SSE41-NEXT: psraw $2, %xmm1 +; SSE41-NEXT: paddw %xmm3, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm1 +; SSE41-NEXT: psraw $1, %xmm1 +; SSE41-NEXT: paddw %xmm3, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; -; AVX: vpextrw $1, %xmm1, %ecx -; AVX-NEXT: vpextrw $1, %xmm0, %eax -; AVX-NEXT: sarw %cl, %ax -; AVX-NEXT: vmovd %xmm1, %ecx -; AVX-NEXT: vmovd %xmm0, %edx -; AVX-NEXT: sarw %cl, %dx -; AVX-NEXT: vmovd %edx, %xmm2 -; AVX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $2, %xmm1, %ecx -; AVX-NEXT: vpextrw $2, %xmm0, %eax -; AVX-NEXT: sarw %cl, %ax -; AVX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $3, %xmm1, %ecx -; AVX-NEXT: vpextrw $3, %xmm0, %eax -; AVX-NEXT: sarw %cl, %ax -; AVX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $4, %xmm1, %ecx -; AVX-NEXT: vpextrw $4, %xmm0, %eax -; AVX-NEXT: sarw %cl, %ax -; AVX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $5, %xmm1, %ecx -; AVX-NEXT: vpextrw $5, %xmm0, %eax -; AVX-NEXT: sarw %cl, %ax -; AVX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $6, %xmm1, %ecx -; AVX-NEXT: vpextrw $6, %xmm0, %eax -; AVX-NEXT: sarw %cl, %ax -; AVX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $7, %xmm1, %ecx -; AVX-NEXT: vpextrw $7, %xmm0, %eax -; AVX-NEXT: sarw %cl, %ax -; AVX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0 +; AVX: vpsllw $12, %xmm1, %xmm2 +; AVX-NEXT: vpsllw $4, %xmm1, %xmm1 +; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpsraw $8, %xmm0, %xmm3 +; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vpsraw $4, %xmm0, %xmm1 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsraw $2, %xmm0, %xmm1 +; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsraw $1, %xmm0, %xmm1 +; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %ashr = ashr <8 x i16> %r, %a %tmp2 = bitcast <8 x i16> %ashr to <2 x i64> @@ -330,282 +246,122 @@ entry: define <2 x i64> @ashr_16i8(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp { entry: +; ALL-NOT: sarb ; -; SSE2: pushq %rbp -; SSE2-NEXT: pushq %r15 -; SSE2-NEXT: pushq %r14 -; SSE2-NEXT: pushq %r13 -; SSE2-NEXT: pushq %r12 -; SSE2-NEXT: pushq %rbx -; SSE2-NEXT: movaps %xmm1, -24(%rsp) -; SSE2-NEXT: movaps %xmm0, -40(%rsp) -; SSE2-NEXT: movb -9(%rsp), %cl -; SSE2-NEXT: movb -25(%rsp), %al -; SSE2-NEXT: sarb %cl, %al -; SSE2-NEXT: movzbl %al, %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movb -17(%rsp), %cl -; SSE2-NEXT: movb -33(%rsp), %al -; SSE2-NEXT: sarb %cl, %al -; SSE2-NEXT: movb -13(%rsp), %cl -; SSE2-NEXT: movzbl %al, %eax -; SSE2-NEXT: movl %eax, -44(%rsp) -; SSE2-NEXT: movb -29(%rsp), %al -; SSE2-NEXT: sarb %cl, %al -; SSE2-NEXT: movzbl %al, %r9d -; SSE2-NEXT: movb -21(%rsp), %cl -; SSE2-NEXT: movb -37(%rsp), %al -; SSE2-NEXT: sarb %cl, %al -; SSE2-NEXT: movb -11(%rsp), %cl -; SSE2-NEXT: movzbl %al, %r10d -; SSE2-NEXT: movb -27(%rsp), %al -; SSE2-NEXT: sarb %cl, %al -; SSE2-NEXT: movb -19(%rsp), %cl -; SSE2-NEXT: movzbl %al, %r11d -; SSE2-NEXT: movb -35(%rsp), %al -; SSE2-NEXT: sarb %cl, %al -; SSE2-NEXT: movb -15(%rsp), %cl -; SSE2-NEXT: movzbl %al, %r14d -; SSE2-NEXT: movb -31(%rsp), %al -; SSE2-NEXT: sarb %cl, %al -; SSE2-NEXT: movzbl %al, %r15d -; SSE2-NEXT: movb -23(%rsp), %cl -; SSE2-NEXT: movb -39(%rsp), %al -; SSE2-NEXT: sarb %cl, %al -; SSE2-NEXT: movb -10(%rsp), %cl -; SSE2-NEXT: movzbl %al, %r12d -; SSE2-NEXT: movb -26(%rsp), %al -; SSE2-NEXT: sarb %cl, %al -; SSE2-NEXT: movb -18(%rsp), %cl -; SSE2-NEXT: movzbl %al, %r13d -; SSE2-NEXT: movb -34(%rsp), %al -; SSE2-NEXT: sarb %cl, %al -; SSE2-NEXT: movb -14(%rsp), %cl -; SSE2-NEXT: movzbl %al, %r8d -; SSE2-NEXT: movb -30(%rsp), %al -; SSE2-NEXT: sarb %cl, %al -; SSE2-NEXT: movb -22(%rsp), %cl -; SSE2-NEXT: movzbl %al, %ebp -; SSE2-NEXT: movb -38(%rsp), %al -; SSE2-NEXT: sarb %cl, %al -; SSE2-NEXT: movb -12(%rsp), %cl -; SSE2-NEXT: movzbl %al, %edi -; SSE2-NEXT: movb -28(%rsp), %dl -; SSE2-NEXT: sarb %cl, %dl -; SSE2-NEXT: movb -20(%rsp), %cl -; SSE2-NEXT: movzbl %dl, %esi -; SSE2-NEXT: movb -36(%rsp), %bl -; SSE2-NEXT: sarb %cl, %bl -; SSE2-NEXT: movb -16(%rsp), %cl -; SSE2-NEXT: movzbl %bl, %ebx -; SSE2-NEXT: movb -32(%rsp), %al -; SSE2-NEXT: sarb %cl, %al -; SSE2-NEXT: movzbl %al, %edx -; SSE2-NEXT: movb -24(%rsp), %cl -; SSE2-NEXT: movb -40(%rsp), %al -; SSE2-NEXT: sarb %cl, %al -; SSE2-NEXT: movzbl %al, %eax -; SSE2-NEXT: movd -44(%rsp), %xmm1 -; SSE2: movd %r9d, %xmm2 -; SSE2-NEXT: movd %r10d, %xmm3 -; SSE2-NEXT: movd %r11d, %xmm4 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: movd %r14d, %xmm0 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-NEXT: movd %r15d, %xmm1 -; SSE2-NEXT: movd %r12d, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] -; SSE2-NEXT: movd %r13d, %xmm0 -; SSE2-NEXT: movd %r8d, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: movd %ebp, %xmm0 -; SSE2-NEXT: movd %edi, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-NEXT: movd %esi, %xmm0 -; SSE2-NEXT: movd %ebx, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: movd %edx, %xmm4 -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-NEXT: popq %rbx -; SSE2-NEXT: popq %r12 -; SSE2-NEXT: popq %r13 -; SSE2-NEXT: popq %r14 -; SSE2-NEXT: popq %r15 -; SSE2-NEXT: popq %rbp +; SSE2: punpckhbw {{.*#}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] +; SSE2-NEXT: psllw $5, %xmm1 +; SSE2-NEXT: punpckhbw {{.*#}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15] +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: pxor %xmm5, %xmm5 +; SSE2-NEXT: pcmpgtw %xmm4, %xmm5 +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pandn %xmm2, %xmm6 +; SSE2-NEXT: psraw $4, %xmm2 +; SSE2-NEXT: pand %xmm5, %xmm2 +; SSE2-NEXT: por %xmm6, %xmm2 +; SSE2-NEXT: paddw %xmm4, %xmm4 +; SSE2-NEXT: pxor %xmm5, %xmm5 +; SSE2-NEXT: pcmpgtw %xmm4, %xmm5 +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pandn %xmm2, %xmm6 +; SSE2-NEXT: psraw $2, %xmm2 +; SSE2-NEXT: pand %xmm5, %xmm2 +; SSE2-NEXT: por %xmm6, %xmm2 +; SSE2-NEXT: paddw %xmm4, %xmm4 +; SSE2-NEXT: pxor %xmm5, %xmm5 +; SSE2-NEXT: pcmpgtw %xmm4, %xmm5 +; SSE2-NEXT: movdqa %xmm5, %xmm4 +; SSE2-NEXT: pandn %xmm2, %xmm4 +; SSE2-NEXT: psraw $1, %xmm2 +; SSE2-NEXT: pand %xmm5, %xmm2 +; SSE2-NEXT: por %xmm4, %xmm2 +; SSE2-NEXT: psrlw $8, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklbw {{.*#}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: pxor %xmm4, %xmm4 +; SSE2-NEXT: pcmpgtw %xmm1, %xmm4 +; SSE2-NEXT: movdqa %xmm4, %xmm5 +; SSE2-NEXT: pandn %xmm0, %xmm5 +; SSE2-NEXT: psraw $4, %xmm0 +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: por %xmm5, %xmm0 +; SSE2-NEXT: paddw %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm4, %xmm4 +; SSE2-NEXT: pcmpgtw %xmm1, %xmm4 +; SSE2-NEXT: movdqa %xmm4, %xmm5 +; SSE2-NEXT: pandn %xmm0, %xmm5 +; SSE2-NEXT: psraw $2, %xmm0 +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: por %xmm5, %xmm0 +; SSE2-NEXT: paddw %xmm1, %xmm1 +; SSE2-NEXT: pcmpgtw %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm3, %xmm1 +; SSE2-NEXT: pandn %xmm0, %xmm1 +; SSE2-NEXT: psraw $1, %xmm0 +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: psrlw $8, %xmm0 +; SSE2-NEXT: packuswb %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE41: pextrb $1, %xmm1, %ecx -; SSE41-NEXT: pextrb $1, %xmm0, %eax -; SSE41-NEXT: sarb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pextrb $0, %xmm1, %ecx -; SSE41-NEXT: pextrb $0, %xmm0, %edx -; SSE41-NEXT: sarb %cl, %dl -; SSE41-NEXT: movzbl %dl, %ecx -; SSE41-NEXT: movd %ecx, %xmm2 -; SSE41-NEXT: pinsrb $1, %eax, %xmm2 -; SSE41-NEXT: pextrb $2, %xmm1, %ecx -; SSE41-NEXT: pextrb $2, %xmm0, %eax -; SSE41-NEXT: sarb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $2, %eax, %xmm2 -; SSE41-NEXT: pextrb $3, %xmm1, %ecx -; SSE41-NEXT: pextrb $3, %xmm0, %eax -; SSE41-NEXT: sarb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $3, %eax, %xmm2 -; SSE41-NEXT: pextrb $4, %xmm1, %ecx -; SSE41-NEXT: pextrb $4, %xmm0, %eax -; SSE41-NEXT: sarb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $4, %eax, %xmm2 -; SSE41-NEXT: pextrb $5, %xmm1, %ecx -; SSE41-NEXT: pextrb $5, %xmm0, %eax -; SSE41-NEXT: sarb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $5, %eax, %xmm2 -; SSE41-NEXT: pextrb $6, %xmm1, %ecx -; SSE41-NEXT: pextrb $6, %xmm0, %eax -; SSE41-NEXT: sarb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $6, %eax, %xmm2 -; SSE41-NEXT: pextrb $7, %xmm1, %ecx -; SSE41-NEXT: pextrb $7, %xmm0, %eax -; SSE41-NEXT: sarb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $7, %eax, %xmm2 -; SSE41-NEXT: pextrb $8, %xmm1, %ecx -; SSE41-NEXT: pextrb $8, %xmm0, %eax -; SSE41-NEXT: sarb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $8, %eax, %xmm2 -; SSE41-NEXT: pextrb $9, %xmm1, %ecx -; SSE41-NEXT: pextrb $9, %xmm0, %eax -; SSE41-NEXT: sarb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $9, %eax, %xmm2 -; SSE41-NEXT: pextrb $10, %xmm1, %ecx -; SSE41-NEXT: pextrb $10, %xmm0, %eax -; SSE41-NEXT: sarb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $10, %eax, %xmm2 -; SSE41-NEXT: pextrb $11, %xmm1, %ecx -; SSE41-NEXT: pextrb $11, %xmm0, %eax -; SSE41-NEXT: sarb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $11, %eax, %xmm2 -; SSE41-NEXT: pextrb $12, %xmm1, %ecx -; SSE41-NEXT: pextrb $12, %xmm0, %eax -; SSE41-NEXT: sarb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $12, %eax, %xmm2 -; SSE41-NEXT: pextrb $13, %xmm1, %ecx -; SSE41-NEXT: pextrb $13, %xmm0, %eax -; SSE41-NEXT: sarb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $13, %eax, %xmm2 -; SSE41-NEXT: pextrb $14, %xmm1, %ecx -; SSE41-NEXT: pextrb $14, %xmm0, %eax -; SSE41-NEXT: sarb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $14, %eax, %xmm2 -; SSE41-NEXT: pextrb $15, %xmm1, %ecx -; SSE41-NEXT: pextrb $15, %xmm0, %eax -; SSE41-NEXT: sarb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $15, %eax, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41: movdqa %xmm0, %xmm2 +; SSE41-NEXT: psllw $5, %xmm1 +; SSE41-NEXT: punpckhbw {{.*#}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] +; SSE41-NEXT: punpckhbw {{.*#}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] +; SSE41-NEXT: movdqa %xmm3, %xmm4 +; SSE41-NEXT: psraw $4, %xmm4 +; SSE41-NEXT: pblendvb %xmm4, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm4 +; SSE41-NEXT: psraw $2, %xmm4 +; SSE41-NEXT: paddw %xmm0, %xmm0 +; SSE41-NEXT: pblendvb %xmm4, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm4 +; SSE41-NEXT: psraw $1, %xmm4 +; SSE41-NEXT: paddw %xmm0, %xmm0 +; SSE41-NEXT: pblendvb %xmm4, %xmm3 +; SSE41-NEXT: psrlw $8, %xmm3 +; SSE41-NEXT: punpcklbw {{.*#}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE41-NEXT: punpcklbw {{.*#}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psraw $4, %xmm2 +; SSE41-NEXT: pblendvb %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psraw $2, %xmm2 +; SSE41-NEXT: paddw %xmm0, %xmm0 +; SSE41-NEXT: pblendvb %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psraw $1, %xmm2 +; SSE41-NEXT: paddw %xmm0, %xmm0 +; SSE41-NEXT: pblendvb %xmm2, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: packuswb %xmm3, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX: vpextrb $1, %xmm1, %ecx -; AVX-NEXT: vpextrb $1, %xmm0, %eax -; AVX-NEXT: sarb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpextrb $0, %xmm1, %ecx -; AVX-NEXT: vpextrb $0, %xmm0, %edx -; AVX-NEXT: sarb %cl, %dl -; AVX-NEXT: movzbl %dl, %ecx -; AVX-NEXT: vmovd %ecx, %xmm2 -; AVX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $2, %xmm1, %ecx -; AVX-NEXT: vpextrb $2, %xmm0, %eax -; AVX-NEXT: sarb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $3, %xmm1, %ecx -; AVX-NEXT: vpextrb $3, %xmm0, %eax -; AVX-NEXT: sarb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $4, %xmm1, %ecx -; AVX-NEXT: vpextrb $4, %xmm0, %eax -; AVX-NEXT: sarb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $5, %xmm1, %ecx -; AVX-NEXT: vpextrb $5, %xmm0, %eax -; AVX-NEXT: sarb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $6, %xmm1, %ecx -; AVX-NEXT: vpextrb $6, %xmm0, %eax -; AVX-NEXT: sarb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $7, %xmm1, %ecx -; AVX-NEXT: vpextrb $7, %xmm0, %eax -; AVX-NEXT: sarb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $8, %xmm1, %ecx -; AVX-NEXT: vpextrb $8, %xmm0, %eax -; AVX-NEXT: sarb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $9, %xmm1, %ecx -; AVX-NEXT: vpextrb $9, %xmm0, %eax -; AVX-NEXT: sarb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $10, %xmm1, %ecx -; AVX-NEXT: vpextrb $10, %xmm0, %eax -; AVX-NEXT: sarb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $11, %xmm1, %ecx -; AVX-NEXT: vpextrb $11, %xmm0, %eax -; AVX-NEXT: sarb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $12, %xmm1, %ecx -; AVX-NEXT: vpextrb $12, %xmm0, %eax -; AVX-NEXT: sarb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $13, %xmm1, %ecx -; AVX-NEXT: vpextrb $13, %xmm0, %eax -; AVX-NEXT: sarb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $14, %xmm1, %ecx -; AVX-NEXT: vpextrb $14, %xmm0, %eax -; AVX-NEXT: sarb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $15, %xmm1, %ecx -; AVX-NEXT: vpextrb $15, %xmm0, %eax -; AVX-NEXT: sarb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 +; AVX: vpsllw $5, %xmm1, %xmm1 +; AVX-NEXT: vpunpckhbw {{.*#}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] +; AVX-NEXT: vpunpckhbw {{.*#}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; AVX-NEXT: vpsraw $4, %xmm3, %xmm4 +; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 +; AVX-NEXT: vpsraw $2, %xmm3, %xmm4 +; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 +; AVX-NEXT: vpsraw $1, %xmm3, %xmm4 +; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2 +; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2 +; AVX-NEXT: vpunpcklbw {{.*#}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX-NEXT: vpunpcklbw {{.*#}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; AVX-NEXT: vpsraw $4, %xmm0, %xmm3 +; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vpsraw $2, %xmm0, %xmm3 +; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vpsraw $1, %xmm0, %xmm3 +; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX-NEXT: retq %ashr = ashr <16 x i8> %r, %a %tmp2 = bitcast <16 x i8> %ashr to <2 x i64> @@ -614,118 +370,83 @@ entry: define <2 x i64> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind readnone ssp { entry: - -; SSE2: pextrw $7, %xmm0, %eax -; SSE2-NEXT: pextrw $7, %xmm1, %ecx -; SSE2-NEXT: shrl %cl, %eax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: pextrw $3, %xmm0, %eax -; SSE2-NEXT: pextrw $3, %xmm1, %ecx -; SSE2-NEXT: shrl %cl, %eax -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE2-NEXT: pextrw $5, %xmm0, %eax -; SSE2-NEXT: pextrw $5, %xmm1, %ecx -; SSE2-NEXT: shrl %cl, %eax -; SSE2-NEXT: movd %eax, %xmm4 -; SSE2-NEXT: pextrw $1, %xmm0, %eax -; SSE2-NEXT: pextrw $1, %xmm1, %ecx -; SSE2-NEXT: shrl %cl, %eax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] -; SSE2-NEXT: pextrw $6, %xmm0, %eax -; SSE2-NEXT: pextrw $6, %xmm1, %ecx -; SSE2-NEXT: shrl %cl, %eax -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: pextrw $2, %xmm0, %eax -; SSE2-NEXT: pextrw $2, %xmm1, %ecx -; SSE2-NEXT: shrl %cl, %eax -; SSE2-NEXT: movd %eax, %xmm4 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; SSE2-NEXT: pextrw $4, %xmm0, %eax -; SSE2-NEXT: pextrw $4, %xmm1, %ecx -; SSE2-NEXT: shrl %cl, %eax -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: movzwl %ax, %eax -; SSE2-NEXT: shrl %cl, %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; ALL-NOT: shrl +; +; SSE2: psllw $12, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psraw $15, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pandn %xmm0, %xmm3 +; SSE2-NEXT: psrlw $8, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: paddw %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psraw $15, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pandn %xmm0, %xmm3 +; SSE2-NEXT: psrlw $4, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: paddw %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psraw $15, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pandn %xmm0, %xmm3 +; SSE2-NEXT: psrlw $2, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: paddw %xmm1, %xmm1 +; SSE2-NEXT: psraw $15, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm0, %xmm2 +; SSE2-NEXT: psrlw $1, %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE41: pextrw $1, %xmm0, %eax -; SSE41-NEXT: pextrw $1, %xmm1, %ecx -; SSE41-NEXT: shrl %cl, %eax -; SSE41-NEXT: movd %xmm1, %ecx -; SSE41-NEXT: movd %xmm0, %edx -; SSE41-NEXT: movzwl %dx, %edx -; SSE41-NEXT: shrl %cl, %edx -; SSE41-NEXT: movd %edx, %xmm2 -; SSE41-NEXT: pinsrw $1, %eax, %xmm2 -; SSE41-NEXT: pextrw $2, %xmm0, %eax -; SSE41-NEXT: pextrw $2, %xmm1, %ecx -; SSE41-NEXT: shrl %cl, %eax -; SSE41-NEXT: pinsrw $2, %eax, %xmm2 -; SSE41-NEXT: pextrw $3, %xmm0, %eax -; SSE41-NEXT: pextrw $3, %xmm1, %ecx -; SSE41-NEXT: shrl %cl, %eax -; SSE41-NEXT: pinsrw $3, %eax, %xmm2 -; SSE41-NEXT: pextrw $4, %xmm0, %eax -; SSE41-NEXT: pextrw $4, %xmm1, %ecx -; SSE41-NEXT: shrl %cl, %eax -; SSE41-NEXT: pinsrw $4, %eax, %xmm2 -; SSE41-NEXT: pextrw $5, %xmm0, %eax -; SSE41-NEXT: pextrw $5, %xmm1, %ecx -; SSE41-NEXT: shrl %cl, %eax -; SSE41-NEXT: pinsrw $5, %eax, %xmm2 -; SSE41-NEXT: pextrw $6, %xmm0, %eax -; SSE41-NEXT: pextrw $6, %xmm1, %ecx -; SSE41-NEXT: shrl %cl, %eax -; SSE41-NEXT: pinsrw $6, %eax, %xmm2 -; SSE41-NEXT: pextrw $7, %xmm0, %eax -; SSE41-NEXT: pextrw $7, %xmm1, %ecx -; SSE41-NEXT: shrl %cl, %eax -; SSE41-NEXT: pinsrw $7, %eax, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: psllw $12, %xmm0 +; SSE41-NEXT: psllw $4, %xmm1 +; SSE41-NEXT: por %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: paddw %xmm3, %xmm3 +; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: psrlw $8, %xmm4 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendvb %xmm4, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm1 +; SSE41-NEXT: psrlw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm1 +; SSE41-NEXT: psrlw $2, %xmm1 +; SSE41-NEXT: paddw %xmm3, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm1 +; SSE41-NEXT: psrlw $1, %xmm1 +; SSE41-NEXT: paddw %xmm3, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; -; AVX: vpextrw $1, %xmm0, %eax -; AVX-NEXT: vpextrw $1, %xmm1, %ecx -; AVX-NEXT: shrl %cl, %eax -; AVX-NEXT: vmovd %xmm1, %ecx -; AVX-NEXT: vmovd %xmm0, %edx -; AVX-NEXT: movzwl %dx, %edx -; AVX-NEXT: shrl %cl, %edx -; AVX-NEXT: vmovd %edx, %xmm2 -; AVX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $2, %xmm0, %eax -; AVX-NEXT: vpextrw $2, %xmm1, %ecx -; AVX-NEXT: shrl %cl, %eax -; AVX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $3, %xmm0, %eax -; AVX-NEXT: vpextrw $3, %xmm1, %ecx -; AVX-NEXT: shrl %cl, %eax -; AVX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $4, %xmm0, %eax -; AVX-NEXT: vpextrw $4, %xmm1, %ecx -; AVX-NEXT: shrl %cl, %eax -; AVX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $5, %xmm0, %eax -; AVX-NEXT: vpextrw $5, %xmm1, %ecx -; AVX-NEXT: shrl %cl, %eax -; AVX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $6, %xmm0, %eax -; AVX-NEXT: vpextrw $6, %xmm1, %ecx -; AVX-NEXT: shrl %cl, %eax -; AVX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrw $7, %xmm0, %eax -; AVX-NEXT: vpextrw $7, %xmm1, %ecx -; AVX-NEXT: shrl %cl, %eax -; AVX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0 +; AVX: vpsllw $12, %xmm1, %xmm2 +; AVX-NEXT: vpsllw $4, %xmm1, %xmm1 +; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm2 +; AVX-NEXT: vpsrlw $8, %xmm0, %xmm3 +; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $2, %xmm0, %xmm1 +; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $1, %xmm0, %xmm1 +; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %lshr = lshr <8 x i16> %r, %a %tmp2 = bitcast <8 x i16> %lshr to <2 x i64> @@ -734,281 +455,71 @@ entry: define <2 x i64> @lshr_16i8(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp { entry: -; SSE2: pushq %rbp -; SSE2-NEXT: pushq %r15 -; SSE2-NEXT: pushq %r14 -; SSE2-NEXT: pushq %r13 -; SSE2-NEXT: pushq %r12 -; SSE2-NEXT: pushq %rbx -; SSE2-NEXT: movaps %xmm1, -24(%rsp) -; SSE2-NEXT: movaps %xmm0, -40(%rsp) -; SSE2-NEXT: movb -9(%rsp), %cl -; SSE2-NEXT: movb -25(%rsp), %al -; SSE2-NEXT: shrb %cl, %al -; SSE2-NEXT: movzbl %al, %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movb -17(%rsp), %cl -; SSE2-NEXT: movb -33(%rsp), %al -; SSE2-NEXT: shrb %cl, %al -; SSE2-NEXT: movb -13(%rsp), %cl -; SSE2-NEXT: movzbl %al, %eax -; SSE2-NEXT: movl %eax, -44(%rsp) -; SSE2-NEXT: movb -29(%rsp), %al -; SSE2-NEXT: shrb %cl, %al -; SSE2-NEXT: movzbl %al, %r9d -; SSE2-NEXT: movb -21(%rsp), %cl -; SSE2-NEXT: movb -37(%rsp), %al -; SSE2-NEXT: shrb %cl, %al -; SSE2-NEXT: movb -11(%rsp), %cl -; SSE2-NEXT: movzbl %al, %r10d -; SSE2-NEXT: movb -27(%rsp), %al -; SSE2-NEXT: shrb %cl, %al -; SSE2-NEXT: movb -19(%rsp), %cl -; SSE2-NEXT: movzbl %al, %r11d -; SSE2-NEXT: movb -35(%rsp), %al -; SSE2-NEXT: shrb %cl, %al -; SSE2-NEXT: movb -15(%rsp), %cl -; SSE2-NEXT: movzbl %al, %r14d -; SSE2-NEXT: movb -31(%rsp), %al -; SSE2-NEXT: shrb %cl, %al -; SSE2-NEXT: movzbl %al, %r15d -; SSE2-NEXT: movb -23(%rsp), %cl -; SSE2-NEXT: movb -39(%rsp), %al -; SSE2-NEXT: shrb %cl, %al -; SSE2-NEXT: movb -10(%rsp), %cl -; SSE2-NEXT: movzbl %al, %r12d -; SSE2-NEXT: movb -26(%rsp), %al -; SSE2-NEXT: shrb %cl, %al -; SSE2-NEXT: movb -18(%rsp), %cl -; SSE2-NEXT: movzbl %al, %r13d -; SSE2-NEXT: movb -34(%rsp), %al -; SSE2-NEXT: shrb %cl, %al -; SSE2-NEXT: movb -14(%rsp), %cl -; SSE2-NEXT: movzbl %al, %r8d -; SSE2-NEXT: movb -30(%rsp), %al -; SSE2-NEXT: shrb %cl, %al -; SSE2-NEXT: movb -22(%rsp), %cl -; SSE2-NEXT: movzbl %al, %ebp -; SSE2-NEXT: movb -38(%rsp), %al -; SSE2-NEXT: shrb %cl, %al -; SSE2-NEXT: movb -12(%rsp), %cl -; SSE2-NEXT: movzbl %al, %edi -; SSE2-NEXT: movb -28(%rsp), %dl -; SSE2-NEXT: shrb %cl, %dl -; SSE2-NEXT: movb -20(%rsp), %cl -; SSE2-NEXT: movzbl %dl, %esi -; SSE2-NEXT: movb -36(%rsp), %bl -; SSE2-NEXT: shrb %cl, %bl -; SSE2-NEXT: movb -16(%rsp), %cl -; SSE2-NEXT: movzbl %bl, %ebx -; SSE2-NEXT: movb -32(%rsp), %al -; SSE2-NEXT: shrb %cl, %al -; SSE2-NEXT: movzbl %al, %edx -; SSE2-NEXT: movb -24(%rsp), %cl -; SSE2-NEXT: movb -40(%rsp), %al -; SSE2-NEXT: shrb %cl, %al -; SSE2-NEXT: movzbl %al, %eax -; SSE2-NEXT: movd -44(%rsp), %xmm1 -; SSE2: movd %r9d, %xmm2 -; SSE2-NEXT: movd %r10d, %xmm3 -; SSE2-NEXT: movd %r11d, %xmm4 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: movd %r14d, %xmm0 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-NEXT: movd %r15d, %xmm1 -; SSE2-NEXT: movd %r12d, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] -; SSE2-NEXT: movd %r13d, %xmm0 -; SSE2-NEXT: movd %r8d, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: movd %ebp, %xmm0 -; SSE2-NEXT: movd %edi, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-NEXT: movd %esi, %xmm0 -; SSE2-NEXT: movd %ebx, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: movd %edx, %xmm4 -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-NEXT: popq %rbx -; SSE2-NEXT: popq %r12 -; SSE2-NEXT: popq %r13 -; SSE2-NEXT: popq %r14 -; SSE2-NEXT: popq %r15 -; SSE2-NEXT: popq %rbp +; ALL-NOT: shrb +; +; SSE2: psllw $5, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: pcmpgtb %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm3, %xmm4 +; SSE2-NEXT: pandn %xmm0, %xmm4 +; SSE2-NEXT: psrlw $4, %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: paddb %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: pcmpgtb %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm3, %xmm4 +; SSE2-NEXT: pandn %xmm0, %xmm4 +; SSE2-NEXT: psrlw $2, %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: paddb %xmm1, %xmm1 +; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pandn %xmm0, %xmm1 +; SSE2-NEXT: psrlw $1, %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE41: pextrb $1, %xmm1, %ecx -; SSE41-NEXT: pextrb $1, %xmm0, %eax -; SSE41-NEXT: shrb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pextrb $0, %xmm1, %ecx -; SSE41-NEXT: pextrb $0, %xmm0, %edx -; SSE41-NEXT: shrb %cl, %dl -; SSE41-NEXT: movzbl %dl, %ecx -; SSE41-NEXT: movd %ecx, %xmm2 -; SSE41-NEXT: pinsrb $1, %eax, %xmm2 -; SSE41-NEXT: pextrb $2, %xmm1, %ecx -; SSE41-NEXT: pextrb $2, %xmm0, %eax -; SSE41-NEXT: shrb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $2, %eax, %xmm2 -; SSE41-NEXT: pextrb $3, %xmm1, %ecx -; SSE41-NEXT: pextrb $3, %xmm0, %eax -; SSE41-NEXT: shrb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $3, %eax, %xmm2 -; SSE41-NEXT: pextrb $4, %xmm1, %ecx -; SSE41-NEXT: pextrb $4, %xmm0, %eax -; SSE41-NEXT: shrb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $4, %eax, %xmm2 -; SSE41-NEXT: pextrb $5, %xmm1, %ecx -; SSE41-NEXT: pextrb $5, %xmm0, %eax -; SSE41-NEXT: shrb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $5, %eax, %xmm2 -; SSE41-NEXT: pextrb $6, %xmm1, %ecx -; SSE41-NEXT: pextrb $6, %xmm0, %eax -; SSE41-NEXT: shrb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $6, %eax, %xmm2 -; SSE41-NEXT: pextrb $7, %xmm1, %ecx -; SSE41-NEXT: pextrb $7, %xmm0, %eax -; SSE41-NEXT: shrb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $7, %eax, %xmm2 -; SSE41-NEXT: pextrb $8, %xmm1, %ecx -; SSE41-NEXT: pextrb $8, %xmm0, %eax -; SSE41-NEXT: shrb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $8, %eax, %xmm2 -; SSE41-NEXT: pextrb $9, %xmm1, %ecx -; SSE41-NEXT: pextrb $9, %xmm0, %eax -; SSE41-NEXT: shrb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $9, %eax, %xmm2 -; SSE41-NEXT: pextrb $10, %xmm1, %ecx -; SSE41-NEXT: pextrb $10, %xmm0, %eax -; SSE41-NEXT: shrb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $10, %eax, %xmm2 -; SSE41-NEXT: pextrb $11, %xmm1, %ecx -; SSE41-NEXT: pextrb $11, %xmm0, %eax -; SSE41-NEXT: shrb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $11, %eax, %xmm2 -; SSE41-NEXT: pextrb $12, %xmm1, %ecx -; SSE41-NEXT: pextrb $12, %xmm0, %eax -; SSE41-NEXT: shrb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $12, %eax, %xmm2 -; SSE41-NEXT: pextrb $13, %xmm1, %ecx -; SSE41-NEXT: pextrb $13, %xmm0, %eax -; SSE41-NEXT: shrb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $13, %eax, %xmm2 -; SSE41-NEXT: pextrb $14, %xmm1, %ecx -; SSE41-NEXT: pextrb $14, %xmm0, %eax -; SSE41-NEXT: shrb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $14, %eax, %xmm2 -; SSE41-NEXT: pextrb $15, %xmm1, %ecx -; SSE41-NEXT: pextrb $15, %xmm0, %eax -; SSE41-NEXT: shrb %cl, %al -; SSE41-NEXT: movzbl %al, %eax -; SSE41-NEXT: pinsrb $15, %eax, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41: movdqa %xmm0, %xmm2 +; SSE41-NEXT: psllw $5, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm3 +; SSE41-NEXT: psrlw $4, %xmm3 +; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendvb %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm3 +; SSE41-NEXT: psrlw $2, %xmm3 +; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 +; SSE41-NEXT: paddb %xmm1, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendvb %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm3 +; SSE41-NEXT: psrlw $1, %xmm3 +; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 +; SSE41-NEXT: paddb %xmm1, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendvb %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; -; AVX: vpextrb $1, %xmm1, %ecx -; AVX-NEXT: vpextrb $1, %xmm0, %eax -; AVX-NEXT: shrb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpextrb $0, %xmm1, %ecx -; AVX-NEXT: vpextrb $0, %xmm0, %edx -; AVX-NEXT: shrb %cl, %dl -; AVX-NEXT: movzbl %dl, %ecx -; AVX-NEXT: vmovd %ecx, %xmm2 -; AVX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $2, %xmm1, %ecx -; AVX-NEXT: vpextrb $2, %xmm0, %eax -; AVX-NEXT: shrb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $3, %xmm1, %ecx -; AVX-NEXT: vpextrb $3, %xmm0, %eax -; AVX-NEXT: shrb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $4, %xmm1, %ecx -; AVX-NEXT: vpextrb $4, %xmm0, %eax -; AVX-NEXT: shrb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $5, %xmm1, %ecx -; AVX-NEXT: vpextrb $5, %xmm0, %eax -; AVX-NEXT: shrb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $6, %xmm1, %ecx -; AVX-NEXT: vpextrb $6, %xmm0, %eax -; AVX-NEXT: shrb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $7, %xmm1, %ecx -; AVX-NEXT: vpextrb $7, %xmm0, %eax -; AVX-NEXT: shrb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $8, %xmm1, %ecx -; AVX-NEXT: vpextrb $8, %xmm0, %eax -; AVX-NEXT: shrb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $9, %xmm1, %ecx -; AVX-NEXT: vpextrb $9, %xmm0, %eax -; AVX-NEXT: shrb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $10, %xmm1, %ecx -; AVX-NEXT: vpextrb $10, %xmm0, %eax -; AVX-NEXT: shrb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $11, %xmm1, %ecx -; AVX-NEXT: vpextrb $11, %xmm0, %eax -; AVX-NEXT: shrb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $12, %xmm1, %ecx -; AVX-NEXT: vpextrb $12, %xmm0, %eax -; AVX-NEXT: shrb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $13, %xmm1, %ecx -; AVX-NEXT: vpextrb $13, %xmm0, %eax -; AVX-NEXT: shrb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $14, %xmm1, %ecx -; AVX-NEXT: vpextrb $14, %xmm0, %eax -; AVX-NEXT: shrb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $15, %xmm1, %ecx -; AVX-NEXT: vpextrb $15, %xmm0, %eax -; AVX-NEXT: shrb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 +; AVX: vpsllw $5, %xmm1, %xmm1 +; AVX-NEXT: vpsrlw $4, %xmm0, %xmm2 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $2, %xmm0, %xmm2 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $1, %xmm0, %xmm2 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 ; AVX-NEXT: retq %lshr = lshr <16 x i8> %r, %a %tmp2 = bitcast <16 x i8> %lshr to <2 x i64> diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll index 8dc76231856a..2c6c8a3e7ade 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -88,7 +88,7 @@ define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) { define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) { ; ALL-LABEL: shuffle_v8f64_01014545: ; ALL: # BB#0: -; ALL-NEXT: vpermpd $68, %zmm0, %zmm0 +; ALL-NEXT: vshuff64x2 $160, %zmm0, %zmm0, %zmm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> ret <8 x double> %shuffle @@ -650,7 +650,7 @@ define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) { ; ALL-LABEL: shuffle_v8i64_01014545: ; ALL: # BB#0: -; ALL-NEXT: vpermq $68, %zmm0, %zmm0 +; ALL-NEXT: vshufi64x2 $160, %zmm0, %zmm0, %zmm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> ret <8 x i64> %shuffle diff --git a/test/CodeGen/X86/win32-eh-states.ll b/test/CodeGen/X86/win32-eh-states.ll index 8db127df6da7..0aae8c4d0189 100644 --- a/test/CodeGen/X86/win32-eh-states.ll +++ b/test/CodeGen/X86/win32-eh-states.ll @@ -30,7 +30,7 @@ $"\01??_R0H@8" = comdat any @"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat @llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata" -define void @f() #0 { +define void @f() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: invoke void @may_throw(i32 1) to label %invoke.cont unwind label %lpad @@ -46,14 +46,14 @@ try.cont.9: ; preds = %invoke.cont.3, %inv ret void lpad: ; preds = %catch, %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %0 = landingpad { i8*, i32 } catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0 %1 = extractvalue { i8*, i32 } %0, 0 %2 = extractvalue { i8*, i32 } %0, 1 br label %catch.dispatch.4 lpad.1: ; preds = %invoke.cont - %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) + %3 = landingpad { i8*, i32 } catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*) %4 = extractvalue { i8*, i32 } %3, 0 %5 = extractvalue { i8*, i32 } %3, 1 @@ -110,3 +110,5 @@ eh.resume: ; preds = %catch.dispatch.4 ; CHECK: movl $3, Lf$frame_escape_{{[0-9]+.*}} ; CHECK: movl $3, (%esp) ; CHECK: calll _may_throw + +; CHECK: .safeseh ___ehhandler$f diff --git a/test/CodeGen/X86/win32-eh.ll b/test/CodeGen/X86/win32-eh.ll index 42c9d9e2240d..f235d2884d03 100644 --- a/test/CodeGen/X86/win32-eh.ll +++ b/test/CodeGen/X86/win32-eh.ll @@ -6,16 +6,27 @@ declare i32 @_except_handler4(...) declare i32 @__CxxFrameHandler3(...) declare void @llvm.eh.begincatch(i8*, i8*) declare void @llvm.eh.endcatch() +declare i32 @llvm.eh.typeid.for(i8*) -define void @use_except_handler3() { +define internal i32 @catchall_filt() { + ret i32 1 +} + +define void @use_except_handler3() personality i32 (...)* @_except_handler3 { +entry: invoke void @may_throw_or_crash() to label %cont unwind label %catchall cont: ret void catchall: - landingpad { i8*, i32 } personality i32 (...)* @_except_handler3 - catch i8* null - br label %cont + %0 = landingpad { i8*, i32 } + catch i8* bitcast (i32 ()* @catchall_filt to i8*) + %1 = extractvalue { i8*, i32 } %0, 1 + %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @catchall_filt to i8*)) #4 + %matches = icmp eq i32 %1, %2 + br i1 %matches, label %cont, label %eh.resume +eh.resume: + resume { i8*, i32 } %0 } ; CHECK-LABEL: _use_except_handler3: @@ -34,15 +45,27 @@ catchall: ; CHECK: movl %[[next]], %fs:0 ; CHECK: retl -define void @use_except_handler4() { +; CHECK: .section .xdata,"dr" +; CHECK-LABEL: L__ehtable$use_except_handler3: +; CHECK-NEXT: .long -1 +; CHECK-NEXT: .long _catchall_filt +; CHECK-NEXT: .long Ltmp{{[0-9]+}} + +define void @use_except_handler4() personality i32 (...)* @_except_handler4 { +entry: invoke void @may_throw_or_crash() to label %cont unwind label %catchall cont: ret void catchall: - landingpad { i8*, i32 } personality i32 (...)* @_except_handler4 - catch i8* null - br label %cont + %0 = landingpad { i8*, i32 } + catch i8* bitcast (i32 ()* @catchall_filt to i8*) + %1 = extractvalue { i8*, i32 } %0, 1 + %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @catchall_filt to i8*)) #4 + %matches = icmp eq i32 %1, %2 + br i1 %matches, label %cont, label %eh.resume +eh.resume: + resume { i8*, i32 } %0 } ; CHECK-LABEL: _use_except_handler4: @@ -64,13 +87,23 @@ catchall: ; CHECK: movl %[[next]], %fs:0 ; CHECK: retl -define void @use_CxxFrameHandler3() { +; CHECK: .section .xdata,"dr" +; CHECK-LABEL: L__ehtable$use_except_handler4: +; CHECK-NEXT: .long -2 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 9999 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long -2 +; CHECK-NEXT: .long _catchall_filt +; CHECK-NEXT: .long Ltmp{{[0-9]+}} + +define void @use_CxxFrameHandler3() personality i32 (...)* @__CxxFrameHandler3 { invoke void @may_throw_or_crash() to label %cont unwind label %catchall cont: ret void catchall: - %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__CxxFrameHandler3 + %ehvals = landingpad { i8*, i32 } catch i8* null %ehptr = extractvalue { i8*, i32 } %ehvals, 0 call void @llvm.eh.begincatch(i8* %ehptr, i8* null) @@ -110,3 +143,7 @@ catchall: ; CHECK-LABEL: ___ehhandler$use_CxxFrameHandler3: ; CHECK: movl $L__ehtable$use_CxxFrameHandler3, %eax ; CHECK: jmp ___CxxFrameHandler3 # TAILCALL + +; CHECK: .safeseh __except_handler3 +; CHECK: .safeseh __except_handler4 +; CHECK: .safeseh ___ehhandler$use_CxxFrameHandler3 diff --git a/test/CodeGen/X86/win64_call_epi.ll b/test/CodeGen/X86/win64_call_epi.ll index 71c44b085004..096cbe41c540 100644 --- a/test/CodeGen/X86/win64_call_epi.ll +++ b/test/CodeGen/X86/win64_call_epi.ll @@ -5,7 +5,7 @@ declare void @baz() declare i32 @personality(...) ; Check for 'nop' between the last call and the epilogue. -define void @foo1() { +define void @foo1() personality i32 (...)* @personality { invoke void @bar() to label %normal @@ -15,7 +15,7 @@ normal: ret void catch: - %1 = landingpad { i8*, i32 } personality i32 (...)* @personality cleanup + %1 = landingpad { i8*, i32 } cleanup resume { i8*, i32 } %1 } ; WIN64-LABEL: foo1: diff --git a/test/CodeGen/X86/win64_eh.ll b/test/CodeGen/X86/win64_eh.ll index d668f43c895e..cb9d026bec2d 100644 --- a/test/CodeGen/X86/win64_eh.ll +++ b/test/CodeGen/X86/win64_eh.ll @@ -101,7 +101,7 @@ declare void @_d_eh_resume_unwind(i8*) declare i32 @bar() -define i32 @foo4() #0 { +define i32 @foo4() #0 personality i32 (i32, i32, i64, i8*, i8*)* @_d_eh_personality { entry: %step = alloca i32, align 4 store i32 0, i32* %step @@ -115,7 +115,7 @@ finally: br label %endtryfinally landingpad: - %landing_pad = landingpad { i8*, i32 } personality i32 (i32, i32, i64, i8*, i8*)* @_d_eh_personality + %landing_pad = landingpad { i8*, i32 } cleanup %tmp3 = extractvalue { i8*, i32 } %landing_pad, 0 store i32 2, i32* %step diff --git a/test/CodeGen/X86/win_eh_prepare.ll b/test/CodeGen/X86/win_eh_prepare.ll index a33dd92ad72a..3e3f9af05822 100644 --- a/test/CodeGen/X86/win_eh_prepare.ll +++ b/test/CodeGen/X86/win_eh_prepare.ll @@ -11,7 +11,7 @@ declare i32 @__C_specific_handler(...) declare i32 @__gxx_personality_seh0(...) declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind -define i32 @use_seh() { +define i32 @use_seh() personality i32 (...)* @__C_specific_handler { entry: invoke void @maybe_throw() to label %cont unwind label %lpad @@ -20,7 +20,7 @@ cont: ret i32 0 lpad: - %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler + %ehvals = landingpad { i8*, i32 } cleanup catch i8* bitcast (i32 (i8*, i8*)* @filt_g to i8*) %ehsel = extractvalue { i8*, i32 } %ehvals, 1 @@ -51,7 +51,7 @@ define internal i32 @filt_g(i8*, i8*) { ; A MinGW64-ish EH style. It could happen if a binary uses both MSVC CRT and ; mingw CRT and is linked with LTO. -define i32 @use_gcc() { +define i32 @use_gcc() personality i32 (...)* @__gxx_personality_seh0 { entry: invoke void @maybe_throw() to label %cont unwind label %lpad @@ -60,7 +60,7 @@ cont: ret i32 0 lpad: - %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_seh0 + %ehvals = landingpad { i8*, i32 } cleanup catch i8* bitcast (i8** @_ZTIi to i8*) %ehsel = extractvalue { i8*, i32 } %ehvals, 1 diff --git a/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll b/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll index a2c5b3a6eedf..248a9202e997 100644 --- a/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll +++ b/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll @@ -27,12 +27,8 @@ define void @foo1(<4 x float> %val, <4 x float> %test, <4 x double>* %p) nounwin ; CHECK-NEXT: .long 1 ## 0x1 ; CHECK-NEXT: .long 1 ## 0x1 ; CHECK-LABEL: foo1: -; FIXME: The operation gets scalarized. If/when the compiler learns to better -; use [V]CVTDQ2PD, this will need updated. -; CHECK: cvtsi2sdq -; CHECK: cvtsi2sdq -; CHECK: cvtsi2sdq -; CHECK: cvtsi2sdq +; CHECK: cvtdq2pd +; CHECK: cvtdq2pd %cmp = fcmp oeq <4 x float> %val, %test %ext = zext <4 x i1> %cmp to <4 x i32> %result = sitofp <4 x i32> %ext to <4 x double> diff --git a/test/CodeGen/XCore/exception.ll b/test/CodeGen/XCore/exception.ll index 705c6b42ade7..dd7e012f7378 100644 --- a/test/CodeGen/XCore/exception.ll +++ b/test/CodeGen/XCore/exception.ll @@ -47,7 +47,7 @@ entry: ; CHECK: entsp 4 ; CHECK: .cfi_def_cfa_offset 16 ; CHECK: .cfi_offset 15, 0 -define void @fn_catch() { +define void @fn_catch() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: ; N.B. we alloc no variables, hence force compiler to spill @@ -77,7 +77,7 @@ cont: ; CHECK: ldw r6, r0[0] ; CHECK: bl __cxa_end_catch lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } cleanup catch i8* bitcast (i8** @_ZTIi to i8*) catch i8* bitcast (i8** @_ZTId to i8*) |