summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/Analysis/BranchProbabilityInfo/basic.ll225
-rw-r--r--test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll27
-rw-r--r--test/Analysis/ScalarEvolution/or-as-add.ll38
-rw-r--r--test/Bitcode/DIExpression-aggresult.ll36
-rw-r--r--test/Bitcode/DIExpression-aggresult.ll.bcbin0 -> 1344 bytes
-rw-r--r--test/Bitcode/DIExpression-deref.ll27
-rw-r--r--test/Bitcode/DIExpression-deref.ll.bcbin0 -> 1012 bytes
-rw-r--r--test/Bitcode/thinlto-alias.ll36
-rw-r--r--test/Bitcode/thinlto-function-summary-callgraph-pgo.ll28
-rw-r--r--test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll48
-rw-r--r--test/Bitcode/thinlto-function-summary-callgraph.ll31
-rw-r--r--test/Bitcode/thinlto-function-summary-originalnames.ll8
-rw-r--r--test/Bitcode/thinlto-function-summary-refgraph.ll59
-rw-r--r--test/Bitcode/thinlto-function-summary.ll25
-rw-r--r--test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll16
-rw-r--r--test/CodeGen/AArch64/arm64-abi.ll4
-rw-r--r--test/CodeGen/AArch64/nonlazybind.ll40
-rw-r--r--test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll1326
-rw-r--r--test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll6
-rw-r--r--test/CodeGen/AMDGPU/exceed-max-sgprs.ll2
-rw-r--r--test/CodeGen/AMDGPU/flat-scratch-reg.ll59
-rw-r--r--test/CodeGen/AMDGPU/frame-index-amdgiz.ll55
-rw-r--r--test/CodeGen/AMDGPU/hsa-func-align.ll18
-rw-r--r--test/CodeGen/AMDGPU/hsa-func.ll27
-rw-r--r--test/CodeGen/AMDGPU/loop_break.ll2
-rw-r--r--test/CodeGen/AMDGPU/multi-divergent-exit-region.ll180
-rw-r--r--test/CodeGen/AMDGPU/nested-loop-conditions.ll23
-rw-r--r--test/CodeGen/AMDGPU/ret_jump.ll2
-rw-r--r--test/CodeGen/AMDGPU/select-vectors.ll389
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir241
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll39
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-isel.ll56
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-legalizer.mir156
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir164
-rw-r--r--test/CodeGen/ARM/alloc-no-stack-realign.ll101
-rw-r--r--test/CodeGen/ARM/build-attributes.ll461
-rw-r--r--test/CodeGen/ARM/darwin-tls-preserved.ll24
-rw-r--r--test/CodeGen/ARM/divmod-hwdiv.ll37
-rw-r--r--test/CodeGen/ARM/fpoffset_overflow.mir94
-rw-r--r--test/CodeGen/ARM/memcpy-inline.ll17
-rw-r--r--test/CodeGen/ARM/memset-inline.ll6
-rw-r--r--test/CodeGen/ARM/vbits.ll560
-rw-r--r--test/CodeGen/ARM/vector-load.ll17
-rw-r--r--test/CodeGen/ARM/vector-store.ll10
-rw-r--r--test/CodeGen/ARM/vlddup.ll17
-rw-r--r--test/CodeGen/ARM/vldlane.ll16
-rw-r--r--test/CodeGen/ARM/vtbl.ll2
-rw-r--r--test/CodeGen/AVR/alloca.ll6
-rw-r--r--test/CodeGen/AVR/call.ll29
-rw-r--r--test/CodeGen/AVR/directmem.ll32
-rw-r--r--test/CodeGen/AVR/inline-asm/multibyte.ll135
-rw-r--r--test/CodeGen/AVR/varargs.ll8
-rw-r--r--test/CodeGen/Hexagon/addrmode-globoff.mir25
-rw-r--r--test/CodeGen/Mips/msa/shift_constant_pool.ll171
-rw-r--r--test/CodeGen/Mips/msa/shift_no_and.ll460
-rw-r--r--test/CodeGen/PowerPC/andc.ll50
-rw-r--r--test/CodeGen/WebAssembly/returned.ll31
-rw-r--r--test/CodeGen/X86/GlobalISel/X86-regbankselect.mir28
-rw-r--r--test/CodeGen/X86/GlobalISel/binop.ll (renamed from test/CodeGen/X86/GlobalISel/binop-isel.ll)0
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-constant.mir (renamed from test/CodeGen/X86/GlobalISel/legalize-const.mir)0
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-trunc.mir31
-rw-r--r--test/CodeGen/X86/GlobalISel/memop.ll (renamed from test/CodeGen/X86/GlobalISel/memop-isel.ll)0
-rw-r--r--test/CodeGen/X86/GlobalISel/select-add.mir226
-rw-r--r--test/CodeGen/X86/GlobalISel/select-frameIndex.mir (renamed from test/CodeGen/X86/GlobalISel/frameIndex-instructionselect.mir)0
-rw-r--r--test/CodeGen/X86/GlobalISel/select-memop.mir (renamed from test/CodeGen/X86/GlobalISel/x86_64-instructionselect.mir)448
-rw-r--r--test/CodeGen/X86/GlobalISel/select-sub.mir225
-rw-r--r--test/CodeGen/X86/GlobalISel/select-trunc.mir183
-rw-r--r--test/CodeGen/X86/GlobalISel/trunc.ll57
-rw-r--r--test/CodeGen/X86/MergeConsecutiveStores.ll19
-rw-r--r--test/CodeGen/X86/avx-logic.ll36
-rw-r--r--test/CodeGen/X86/avx512-ext.ll8
-rw-r--r--test/CodeGen/X86/avx512-mask-op.ll15
-rw-r--r--test/CodeGen/X86/bswap_tree.ll105
-rw-r--r--test/CodeGen/X86/bswap_tree2.ll150
-rw-r--r--test/CodeGen/X86/combine-or.ll16
-rw-r--r--test/CodeGen/X86/dbg-baseptr.ll75
-rw-r--r--test/CodeGen/X86/extract-store.ll37
-rw-r--r--test/CodeGen/X86/fp128-extract.ll22
-rw-r--r--test/CodeGen/X86/i64-to-float.ll46
-rw-r--r--test/CodeGen/X86/known-signbits-vector.ll28
-rw-r--r--test/CodeGen/X86/madd.ll324
-rw-r--r--test/CodeGen/X86/merge_store.ll31
-rw-r--r--test/CodeGen/X86/sse-schedule.ll2415
-rw-r--r--test/CodeGen/X86/sse2-schedule.ll6039
-rw-r--r--test/CodeGen/X86/tail-merge-after-mbp.ll94
-rw-r--r--test/CodeGen/X86/tail-merge-after-mbp.mir105
-rw-r--r--test/CodeGen/X86/vector-rotate-128.ll11
-rw-r--r--test/CodeGen/X86/vector-rotate-256.ll17
-rw-r--r--test/CodeGen/X86/x86-16.ll9
-rw-r--r--test/DebugInfo/AMDGPU/code-pointer-size.ll73
-rw-r--r--test/DebugInfo/AMDGPU/dwarfdump-relocs.ll72
-rw-r--r--test/DebugInfo/AMDGPU/pointer-address-space-dwarf-v1.ll70
-rw-r--r--test/DebugInfo/AMDGPU/variable-locations-dwarf-v1.ll92
-rw-r--r--test/DebugInfo/AMDGPU/variable-locations.ll4
-rw-r--r--test/DebugInfo/ARM/selectiondag-deadcode.ll2
-rw-r--r--test/DebugInfo/Generic/block-asan.ll2
-rw-r--r--test/DebugInfo/X86/dbg-declare-arg.ll16
-rw-r--r--test/DebugInfo/X86/dbg_value_direct.ll2
-rw-r--r--test/DebugInfo/X86/debug-info-block-captured-self.ll4
-rw-r--r--test/DebugInfo/X86/dw_op_minus.ll13
-rw-r--r--test/DebugInfo/X86/dw_op_minus_direct.ll15
-rw-r--r--test/DebugInfo/X86/fi-expr.ll35
-rw-r--r--test/DebugInfo/X86/sret.ll20
-rw-r--r--test/Instrumentation/AddressSanitizer/debug_info.ll6
-rw-r--r--test/Instrumentation/SanitizerCoverage/coverage.ll32
-rw-r--r--test/Instrumentation/SanitizerCoverage/tracing.ll15
-rw-r--r--test/MC/AArch64/basic-a64-diagnostics.s24
-rw-r--r--test/MC/AMDGPU/gfx7_asm_all.s7217
-rw-r--r--test/MC/AMDGPU/gfx8_asm_all.s7966
-rw-r--r--test/MC/ARM/assembly-default-build-attributes.s43
-rw-r--r--test/MC/ARM/multi-section-mapping.s19
-rw-r--r--test/TableGen/intrinsic-long-name.td2
-rw-r--r--test/TableGen/intrinsic-varargs.td2
-rw-r--r--test/ThinLTO/X86/autoupgrade.ll6
-rw-r--r--test/ThinLTO/X86/distributed_indexes.ll14
-rw-r--r--test/Transforms/CodeGenPrepare/split-indirect-loop.ll37
-rw-r--r--test/Transforms/GVN/non-integral-pointers.ll39
-rw-r--r--test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll15
-rw-r--r--test/Transforms/InstCombine/amdgcn-demanded-vector-elts.ll1196
-rw-r--r--test/Transforms/InstCombine/call-cast-attrs.ll29
-rw-r--r--test/Transforms/InstCombine/constant-fold-math.ll18
-rw-r--r--test/Transforms/InstCombine/div-shift.ll15
-rw-r--r--test/Transforms/InstCombine/div.ll39
-rw-r--r--test/Transforms/InstCombine/pr32686.ll23
-rw-r--r--test/Transforms/InstCombine/rem.ll22
-rw-r--r--test/Transforms/InstCombine/shift.ll20
-rw-r--r--test/Transforms/InstCombine/vector-casts.ll6
-rw-r--r--test/Transforms/InstSimplify/AndOrXor.ll20
-rw-r--r--test/Transforms/InstSimplify/icmp-ranges.ll2912
-rw-r--r--test/Transforms/InstSimplify/shufflevector.ll67
-rw-r--r--test/Transforms/InstSimplify/vector_gep.ll26
-rw-r--r--test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll8
-rw-r--r--test/Transforms/LoopUnroll/peel-loop-negative.ll28
-rw-r--r--test/Transforms/LoopUnroll/peel-loop-not-forced.ll149
-rw-r--r--test/Transforms/NewGVN/non-integral-pointers.ll39
-rw-r--r--test/Transforms/PhaseOrdering/globalaa-retained.ll46
-rw-r--r--test/Transforms/SLPVectorizer/X86/reorder_phi.ll54
-rw-r--r--test/Transforms/SafeStack/X86/debug-loc.ll4
-rw-r--r--test/Transforms/SampleProfile/Inputs/indirect-call.prof6
-rw-r--r--test/Transforms/SampleProfile/indirect-call.ll44
-rw-r--r--test/Transforms/StructurizeCFG/invert-compare.ll60
-rw-r--r--test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll12
-rw-r--r--test/Transforms/StructurizeCFG/post-order-traversal-bug.ll3
-rw-r--r--test/tools/gold/X86/thinlto.ll22
-rw-r--r--test/tools/llvm-lto/thinlto.ll8
-rw-r--r--test/tools/llvm-symbolizer/Inputs/discrimbin9973 -> 9056 bytes
-rw-r--r--test/tools/llvm-symbolizer/Inputs/discrim.c13
-rw-r--r--test/tools/llvm-symbolizer/Inputs/discrim.inp9
-rw-r--r--test/tools/llvm-symbolizer/padding-x86_64.ll40
-rw-r--r--test/tools/llvm-symbolizer/sym-verbose.test92
-rw-r--r--test/tools/llvm-xray/X86/extract-instrmap-symbolize.ll10
-rw-r--r--test/tools/llvm-xray/X86/extract-instrmap.ll8
152 files changed, 30425 insertions, 6982 deletions
diff --git a/test/Analysis/BranchProbabilityInfo/basic.ll b/test/Analysis/BranchProbabilityInfo/basic.ll
index 94ea5a3d1d8ea..84936b7761caa 100644
--- a/test/Analysis/BranchProbabilityInfo/basic.ll
+++ b/test/Analysis/BranchProbabilityInfo/basic.ll
@@ -372,3 +372,228 @@ exit:
ret i32 %result
}
+define i32 @test_unreachable_with_prof_greater(i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_prof_greater'
+entry:
+ %cond = icmp eq i32 %a, 42
+ br i1 %cond, label %exit, label %unr, !prof !4
+
+; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge entry -> unr probability is 0x00000800 / 0x80000000 = 0.00%
+
+unr:
+ unreachable
+
+exit:
+ ret i32 %b
+}
+
+!4 = !{!"branch_weights", i32 0, i32 1}
+
+define i32 @test_unreachable_with_prof_equal(i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_prof_equal'
+entry:
+ %cond = icmp eq i32 %a, 42
+ br i1 %cond, label %exit, label %unr, !prof !5
+
+; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge entry -> unr probability is 0x00000800 / 0x80000000 = 0.00%
+
+unr:
+ unreachable
+
+exit:
+ ret i32 %b
+}
+
+!5 = !{!"branch_weights", i32 1048575, i32 1}
+
+define i32 @test_unreachable_with_prof_zero(i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_prof_zero'
+entry:
+ %cond = icmp eq i32 %a, 42
+ br i1 %cond, label %exit, label %unr, !prof !6
+
+; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge entry -> unr probability is 0x00000800 / 0x80000000 = 0.00%
+
+unr:
+ unreachable
+
+exit:
+ ret i32 %b
+}
+
+!6 = !{!"branch_weights", i32 0, i32 0}
+
+define i32 @test_unreachable_with_prof_less(i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_prof_less'
+entry:
+ %cond = icmp eq i32 %a, 42
+ br i1 %cond, label %exit, label %unr, !prof !7
+
+; CHECK: edge entry -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge entry -> unr probability is 0x00000000 / 0x80000000 = 0.00%
+
+unr:
+ unreachable
+
+exit:
+ ret i32 %b
+}
+
+!7 = !{!"branch_weights", i32 1, i32 0}
+
+define i32 @test_unreachable_with_switch_prof1(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_switch_prof1'
+entry:
+ switch i32 %i, label %case_a [ i32 1, label %case_b
+ i32 2, label %case_c
+ i32 3, label %case_d
+ i32 4, label %case_e ], !prof !8
+; CHECK: edge entry -> case_a probability is 0x00000800 / 0x80000000 = 0.00%
+; CHECK: edge entry -> case_b probability is 0x07fffe01 / 0x80000000 = 6.25%
+; CHECK: edge entry -> case_c probability is 0x67fffdff / 0x80000000 = 81.25% [HOT edge]
+; CHECK: edge entry -> case_d probability is 0x07fffdff / 0x80000000 = 6.25%
+; CHECK: edge entry -> case_e probability is 0x07fffdff / 0x80000000 = 6.25%
+
+case_a:
+ unreachable
+
+case_b:
+ br label %exit
+; CHECK: edge case_b -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+case_c:
+ br label %exit
+; CHECK: edge case_c -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+case_d:
+ br label %exit
+; CHECK: edge case_d -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+case_e:
+ br label %exit
+; CHECK: edge case_e -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+ %result = phi i32 [ %b, %case_b ],
+ [ %c, %case_c ],
+ [ %d, %case_d ],
+ [ %e, %case_e ]
+ ret i32 %result
+}
+
+!8 = !{!"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4}
+
+define i32 @test_unreachable_with_switch_prof2(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_switch_prof2'
+entry:
+ switch i32 %i, label %case_a [ i32 1, label %case_b
+ i32 2, label %case_c
+ i32 3, label %case_d
+ i32 4, label %case_e ], !prof !9
+; CHECK: edge entry -> case_a probability is 0x00000400 / 0x80000000 = 0.00%
+; CHECK: edge entry -> case_b probability is 0x00000400 / 0x80000000 = 0.00%
+; CHECK: edge entry -> case_c probability is 0x6aaaa800 / 0x80000000 = 83.33% [HOT edge]
+; CHECK: edge entry -> case_d probability is 0x0aaaa7ff / 0x80000000 = 8.33%
+; CHECK: edge entry -> case_e probability is 0x0aaaa7ff / 0x80000000 = 8.33%
+
+case_a:
+ unreachable
+
+case_b:
+ unreachable
+
+case_c:
+ br label %exit
+; CHECK: edge case_c -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+case_d:
+ br label %exit
+; CHECK: edge case_d -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+case_e:
+ br label %exit
+; CHECK: edge case_e -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+ %result = phi i32 [ %c, %case_c ],
+ [ %d, %case_d ],
+ [ %e, %case_e ]
+ ret i32 %result
+}
+
+!9 = !{!"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4}
+
+define i32 @test_unreachable_with_switch_prof3(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_switch_prof3'
+entry:
+ switch i32 %i, label %case_a [ i32 1, label %case_b
+ i32 2, label %case_c
+ i32 3, label %case_d
+ i32 4, label %case_e ], !prof !10
+; CHECK: edge entry -> case_a probability is 0x00000000 / 0x80000000 = 0.00%
+; CHECK: edge entry -> case_b probability is 0x00000400 / 0x80000000 = 0.00%
+; CHECK: edge entry -> case_c probability is 0x6e08fa2e / 0x80000000 = 85.96% [HOT edge]
+; CHECK: edge entry -> case_d probability is 0x08fb80e9 / 0x80000000 = 7.02%
+; CHECK: edge entry -> case_e probability is 0x08fb80e9 / 0x80000000 = 7.02%
+
+case_a:
+ unreachable
+
+case_b:
+ unreachable
+
+case_c:
+ br label %exit
+; CHECK: edge case_c -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+case_d:
+ br label %exit
+; CHECK: edge case_d -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+case_e:
+ br label %exit
+; CHECK: edge case_e -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+ %result = phi i32 [ %c, %case_c ],
+ [ %d, %case_d ],
+ [ %e, %case_e ]
+ ret i32 %result
+}
+
+!10 = !{!"branch_weights", i32 0, i32 4, i32 64, i32 4, i32 4}
+
+define i32 @test_unreachable_with_switch_prof4(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_switch_prof4'
+entry:
+ switch i32 %i, label %case_a [ i32 1, label %case_b
+ i32 2, label %case_c
+ i32 3, label %case_d
+ i32 4, label %case_e ], !prof !11
+; CHECK: edge entry -> case_a probability is 0x1999999a / 0x80000000 = 20.00%
+; CHECK: edge entry -> case_b probability is 0x1999999a / 0x80000000 = 20.00%
+; CHECK: edge entry -> case_c probability is 0x1999999a / 0x80000000 = 20.00%
+; CHECK: edge entry -> case_d probability is 0x1999999a / 0x80000000 = 20.00%
+; CHECK: edge entry -> case_e probability is 0x1999999a / 0x80000000 = 20.00%
+
+case_a:
+ unreachable
+
+case_b:
+ unreachable
+
+case_c:
+ unreachable
+
+case_d:
+ unreachable
+
+case_e:
+ unreachable
+
+}
+
+!11 = !{!"branch_weights", i32 0, i32 4, i32 64, i32 4, i32 4}
diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
index 0acb050c2519e..8ae44387c1da7 100644
--- a/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
+++ b/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
@@ -1,5 +1,6 @@
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'test_amdgpu_ps':
; CHECK: DIVERGENT:
; CHECK-NOT: %arg0
; CHECK-NOT: %arg1
@@ -9,7 +10,31 @@
; CHECK: DIVERGENT: float %arg5
; CHECK: DIVERGENT: i32 %arg6
-define amdgpu_ps void @main([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
+define amdgpu_ps void @test_amdgpu_ps([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
+ ret void
+}
+
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'test_amdgpu_kernel':
+; CHECK-NOT: %arg0
+; CHECK-NOT: %arg1
+; CHECK-NOT: %arg2
+; CHECK-NOT: %arg3
+; CHECK-NOT: %arg4
+; CHECK-NOT: %arg5
+; CHECK-NOT: %arg6
+define amdgpu_kernel void @test_amdgpu_kernel([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
+ ret void
+}
+
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'test_c':
+; CHECK: DIVERGENT:
+; CHECK: DIVERGENT:
+; CHECK: DIVERGENT:
+; CHECK: DIVERGENT:
+; CHECK: DIVERGENT:
+; CHECK: DIVERGENT:
+; CHECK: DIVERGENT:
+define void @test_c([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
ret void
}
diff --git a/test/Analysis/ScalarEvolution/or-as-add.ll b/test/Analysis/ScalarEvolution/or-as-add.ll
new file mode 100644
index 0000000000000..ac4e65a20f218
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/or-as-add.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+declare void @z(i32)
+declare void @z2(i64)
+
+define void @fun(i1 %bool, i32 %x) {
+entry:
+ br label %body
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %bottom_zero = mul i32 %i, 2
+ %a = or i32 %bottom_zero, 1
+ call void @z(i32 %a)
+ %bool_ext = zext i1 %bool to i32
+ %b = or i32 %bool_ext, %bottom_zero
+ call void @z(i32 %b)
+ %shifted = lshr i32 %x, 31
+ %c = or i32 %shifted, %bottom_zero
+ call void @z(i32 %c)
+ %i_ext = zext i32 %i to i64
+ %d = or i64 %i_ext, 4294967296
+ call void @z2(i64 %d)
+ %i.next = add i32 %i, 1
+ %cond = icmp eq i32 %i.next, 10
+ br i1 %cond, label %exit, label %body
+exit:
+ ret void
+}
+
+; CHECK: %a = or i32 %bottom_zero, 1
+; CHECK-NEXT: --> {1,+,2}<%body>
+; CHECK: %b = or i32 %bool_ext, %bottom_zero
+; CHECK-NEXT: --> {(zext i1 %bool to i32),+,2}
+; CHECK: %c = or i32 %shifted, %bottom_zero
+; CHECK-NEXT: --> {(%x /u -2147483648),+,2}<%body>
+; CHECK: %d = or i64 %i_ext, 4294967296
+; CHECK-NEXT: --> {4294967296,+,1}<nuw><nsw><%body>
+
diff --git a/test/Bitcode/DIExpression-aggresult.ll b/test/Bitcode/DIExpression-aggresult.ll
new file mode 100644
index 0000000000000..5ce936d7074da
--- /dev/null
+++ b/test/Bitcode/DIExpression-aggresult.ll
@@ -0,0 +1,36 @@
+; RUN: llvm-dis -o - %s.bc | FileCheck %s
+%class.A = type { i32, i32, i32, i32 }
+
+define void @_Z3fooi(%class.A* sret %agg.result) #0 !dbg !3 {
+ ; CHECK: call void @llvm.dbg.declare({{.*}}, metadata ![[EXPR:[0-9]+]]), !dbg
+ ; CHECK: ![[EXPR]] = !DIExpression()
+ call void @llvm.dbg.declare(metadata %class.A* %agg.result, metadata !13, metadata !16), !dbg !17
+ ret void, !dbg !17
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { ssp }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "a.cc", directory: "/tmp")
+!2 = !{i32 1, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 4, type: !4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!4 = !DISubroutineType(types: !5)
+!5 = !{!6}
+!6 = !DICompositeType(tag: DW_TAG_class_type, name: "A", scope: !0, file: !1, line: 2, size: 128, align: 32, elements: !7)
+!7 = !{!8, !10, !11, !12}
+!8 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !1, file: !1, line: 2, baseType: !9, size: 32, align: 32)
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !1, file: !1, line: 2, baseType: !9, size: 32, align: 32, offset: 32)
+!11 = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !1, file: !1, line: 2, baseType: !9, size: 32, align: 32, offset: 64)
+!12 = !DIDerivedType(tag: DW_TAG_member, name: "o", scope: !1, file: !1, line: 2, baseType: !9, size: 32, align: 32, offset: 96)
+!13 = !DILocalVariable(name: "my_a", scope: !14, file: !1, line: 9, type: !15)
+!14 = distinct !DILexicalBlock(scope: !3, file: !1, line: 4, column: 14)
+!15 = !DIDerivedType(tag: DW_TAG_reference_type, file: !1, baseType: !6)
+!16 = !DIExpression(DW_OP_deref)
+!17 = !DILocation(line: 9, column: 5, scope: !3)
diff --git a/test/Bitcode/DIExpression-aggresult.ll.bc b/test/Bitcode/DIExpression-aggresult.ll.bc
new file mode 100644
index 0000000000000..bcf6e175b4d04
--- /dev/null
+++ b/test/Bitcode/DIExpression-aggresult.ll.bc
Binary files differ
diff --git a/test/Bitcode/DIExpression-deref.ll b/test/Bitcode/DIExpression-deref.ll
new file mode 100644
index 0000000000000..3a161b8ee4d29
--- /dev/null
+++ b/test/Bitcode/DIExpression-deref.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-dis -o - %s.bc | FileCheck %s
+
+!llvm.dbg.cu = !{!1}
+!llvm.module.flags = !{!20, !21}
+
+!0 = distinct !DIGlobalVariable(name: "g", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true)
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang (llvm/trunk 288154)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4)
+!2 = !DIFile(filename: "a.c", directory: "/")
+!3 = !{}
+!4 = !{!10, !11, !12, !13}
+!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+; DW_OP_deref should be moved to the back of the expression.
+;
+; CHECK: !DIExpression(DW_OP_plus, 0, DW_OP_deref, DW_OP_LLVM_fragment, 8, 32)
+!6 = !DIExpression(DW_OP_deref, DW_OP_plus, 0, DW_OP_LLVM_fragment, 8, 32)
+; CHECK: !DIExpression(DW_OP_plus, 0, DW_OP_deref)
+!7 = !DIExpression(DW_OP_deref, DW_OP_plus, 0)
+; CHECK: !DIExpression(DW_OP_plus, 1, DW_OP_deref)
+!8 = !DIExpression(DW_OP_plus, 1, DW_OP_deref)
+; CHECK: !DIExpression(DW_OP_deref)
+!9 = !DIExpression(DW_OP_deref)
+!10 = !DIGlobalVariableExpression(var: !0, expr: !6)
+!11 = !DIGlobalVariableExpression(var: !0, expr: !7)
+!12 = !DIGlobalVariableExpression(var: !0, expr: !8)
+!13 = !DIGlobalVariableExpression(var: !0, expr: !9)
+!20 = !{i32 2, !"Dwarf Version", i32 4}
+!21 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Bitcode/DIExpression-deref.ll.bc b/test/Bitcode/DIExpression-deref.ll.bc
new file mode 100644
index 0000000000000..5297bf9f17b49
--- /dev/null
+++ b/test/Bitcode/DIExpression-deref.ll.bc
Binary files differ
diff --git a/test/Bitcode/thinlto-alias.ll b/test/Bitcode/thinlto-alias.ll
index cfdf8f7b0bd96..2c235f0620ecb 100644
--- a/test/Bitcode/thinlto-alias.ll
+++ b/test/Bitcode/thinlto-alias.ll
@@ -5,33 +5,31 @@
; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o
; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
+; CHECK: <SOURCE_FILENAME
+; "main"
+; CHECK-NEXT: <FUNCTION op0=0 op1=4
+; "analias"
+; CHECK-NEXT: <FUNCTION op0=4 op1=7
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
; CHECK-NEXT: <VERSION
-; See if the call to func is registered, using the expected callsite count
-; and value id matching the subsequent value symbol table.
-; CHECK-NEXT: <PERMODULE {{.*}} op4=[[FUNCID:[0-9]+]]/>
+; See if the call to func is registered.
+; The value id 1 matches the second FUNCTION record above.
+; CHECK-NEXT: <PERMODULE {{.*}} op4=1/>
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
-; CHECK-NEXT: <VALUE_SYMTAB
-; CHECK-NEXT: <FNENTRY {{.*}} record string = 'main'
-; External function analias should have entry with value id FUNCID
-; CHECK-NEXT: <ENTRY {{.*}} op0=[[FUNCID]] {{.*}} record string = 'analias'
-; CHECK-NEXT: </VALUE_SYMTAB>
+
+; CHECK: <STRTAB_BLOCK
+; CHECK-NEXT: blob data = 'mainanalias'
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
; COMBINED-NEXT: <VERSION
-; See if the call to analias is registered, using the expected callsite count
-; and value id matching the subsequent value symbol table.
-; COMBINED-NEXT: <COMBINED {{.*}} op5=[[ALIASID:[0-9]+]]/>
-; Followed by the alias and aliasee
+; See if the call to analias is registered, using the expected value id.
+; COMBINED-NEXT: <VALUE_GUID op0=[[ALIASID:[0-9]+]] op1=-5751648690987223394/>
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID op0=[[ALIASEEID:[0-9]+]] op1=-1039159065113703048/>
+; COMBINED-NEXT: <COMBINED {{.*}} op5=[[ALIASID]]/>
; COMBINED-NEXT: <COMBINED {{.*}}
-; COMBINED-NEXT: <COMBINED_ALIAS {{.*}} op3=[[ALIASEEID:[0-9]+]]
+; COMBINED-NEXT: <COMBINED_ALIAS {{.*}} op3=[[ALIASEEID]]
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK
-; COMBINED-NEXT: <VALUE_SYMTAB
-; Entry for function func should have entry with value id ALIASID
-; COMBINED-NEXT: <COMBINED_ENTRY {{.*}} op0=[[ALIASID]] op1=-5751648690987223394/>
-; COMBINED-NEXT: <COMBINED
-; COMBINED-NEXT: <COMBINED_ENTRY {{.*}} op0=[[ALIASEEID]] op1=-1039159065113703048/>
-; COMBINED-NEXT: </VALUE_SYMTAB>
; ModuleID = 'thinlto-function-summary-callgraph.ll'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll b/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
index 713e36dd14d60..7f9d6d95f506b 100644
--- a/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
+++ b/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
@@ -10,31 +10,27 @@
; RUN: llvm-lto -thinlto-index-stats %p/Inputs/thinlto-function-summary-callgraph-pgo.1.bc | FileCheck %s --check-prefix=OLD
; RUN: llvm-lto -thinlto-index-stats %p/Inputs/thinlto-function-summary-callgraph-pgo-combined.1.bc | FileCheck %s --check-prefix=OLD-COMBINED
+; CHECK: <SOURCE_FILENAME
+; CHECK-NEXT: <FUNCTION
+; "func"
+; CHECK-NEXT: <FUNCTION op0=4 op1=4
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
; CHECK-NEXT: <VERSION
-; See if the call to func is registered, using the expected callsite count
-; and hotness type, with value id matching the subsequent value symbol table.
-; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=[[FUNCID:[0-9]+]] op5=2/>
+; See if the call to func is registered, using the expected hotness type.
+; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=1 op5=2/>
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
-; CHECK-NEXT: <VALUE_SYMTAB
-; CHECK-NEXT: <FNENTRY {{.*}} record string = 'main'
-; External function func should have entry with value id FUNCID
-; CHECK-NEXT: <ENTRY {{.*}} op0=[[FUNCID]] {{.*}} record string = 'func'
-; CHECK-NEXT: </VALUE_SYMTAB>
+; CHECK: <STRTAB_BLOCK
+; CHECK-NEXT: blob data = 'mainfunc'
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
; COMBINED-NEXT: <VERSION
+; COMBINED-NEXT: <VALUE_GUID op0=[[FUNCID:[0-9]+]] op1=7289175272376759421/>
+; COMBINED-NEXT: <VALUE_GUID
; COMBINED-NEXT: <COMBINED
-; See if the call to func is registered, using the expected callsite count
-; and hotness type, with value id matching the subsequent value symbol table.
+; See if the call to func is registered, using the expected hotness type.
; op6=2 which is hotnessType::None.
-; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op5=[[FUNCID:[0-9]+]] op6=2/>
+; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op5=[[FUNCID]] op6=2/>
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
-; COMBINED-NEXT: <VALUE_SYMTAB
-; Entry for function func should have entry with value id FUNCID
-; COMBINED-NEXT: <COMBINED_ENTRY {{.*}} op0=[[FUNCID]] op1=7289175272376759421/>
-; COMBINED-NEXT: <COMBINED
-; COMBINED-NEXT: </VALUE_SYMTAB>
; ModuleID = 'thinlto-function-summary-callgraph.ll'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll b/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll
index 3a5adea202e2a..982bb5cb7e531 100644
--- a/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll
+++ b/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll
@@ -6,27 +6,45 @@
; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
+; CHECK: <SOURCE_FILENAME
+; "hot_function"
+; CHECK-NEXT: <FUNCTION op0=0 op1=12
+; "hot1"
+; CHECK-NEXT: <FUNCTION op0=12 op1=4
+; "hot2"
+; CHECK-NEXT: <FUNCTION op0=16 op1=4
+; "hot3"
+; CHECK-NEXT: <FUNCTION op0=20 op1=4
+; "hot4"
+; CHECK-NEXT: <FUNCTION op0=24 op1=4
+; "cold"
+; CHECK-NEXT: <FUNCTION op0=28 op1=4
+; "none1"
+; CHECK-NEXT: <FUNCTION op0=32 op1=5
+; "none2"
+; CHECK-NEXT: <FUNCTION op0=37 op1=5
+; "none3"
+; CHECK-NEXT: <FUNCTION op0=42 op1=5
; CHECK-LABEL: <GLOBALVAL_SUMMARY_BLOCK
; CHECK-NEXT: <VERSION
-; See if the call to func is registered, using the expected callsite count
-; and profile count, with value id matching the subsequent value symbol table.
-; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=[[HOT1:.*]] op5=3 op6=[[COLD:.*]] op7=1 op8=[[HOT2:.*]] op9=3 op10=[[HOT4:.*]] op11=3 op12=[[NONE1:.*]] op13=2 op14=[[HOT3:.*]] op15=3 op16=[[NONE2:.*]] op17=2 op18=[[NONE3:.*]] op19=2 op20=[[LEGACY:.*]] op21=3/>
+; CHECK-NEXT: <VALUE_GUID op0=25 op1=123/>
+; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123
+; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=1 op5=3 op6=5 op7=1 op8=2 op9=3 op10=4 op11=3 op12=6 op13=2 op14=3 op15=3 op16=7 op17=2 op18=8 op19=2 op20=25 op21=3/>
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
-; CHECK-LABEL: <VALUE_SYMTAB
-; CHECK-NEXT: <FNENTRY {{.*}} record string = 'hot_function
-; CHECK-DAG: <ENTRY abbrevid=6 op0=[[NONE1]] {{.*}} record string = 'none1'
-; CHECK-DAG: <ENTRY abbrevid=6 op0=[[COLD]] {{.*}} record string = 'cold'
-; CHECK-DAG: <ENTRY abbrevid=6 op0=[[NONE2]] {{.*}} record string = 'none2'
-; CHECK-DAG: <ENTRY abbrevid=6 op0=[[NONE3]] {{.*}} record string = 'none3'
-; CHECK-DAG: <ENTRY abbrevid=6 op0=[[HOT1]] {{.*}} record string = 'hot1'
-; CHECK-DAG: <ENTRY abbrevid=6 op0=[[HOT2]] {{.*}} record string = 'hot2'
-; CHECK-DAG: <ENTRY abbrevid=6 op0=[[HOT3]] {{.*}} record string = 'hot3'
-; CHECK-DAG: <ENTRY abbrevid=6 op0=[[HOT4]] {{.*}} record string = 'hot4'
-; CHECK-DAG: <COMBINED_ENTRY abbrevid=11 op0=[[LEGACY]] op1=123/>
-; CHECK-LABEL: </VALUE_SYMTAB>
+
+; CHECK: <STRTAB_BLOCK
+; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3hot4coldnone1none2none3'
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
; COMBINED-NEXT: <VERSION
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
diff --git a/test/Bitcode/thinlto-function-summary-callgraph.ll b/test/Bitcode/thinlto-function-summary-callgraph.ll
index c00907b7fb291..8cc60ad633621 100644
--- a/test/Bitcode/thinlto-function-summary-callgraph.ll
+++ b/test/Bitcode/thinlto-function-summary-callgraph.ll
@@ -10,30 +10,27 @@
; RUN: llvm-lto -thinlto-index-stats %p/Inputs/thinlto-function-summary-callgraph.1.bc | FileCheck %s --check-prefix=OLD
; RUN: llvm-lto -thinlto-index-stats %p/Inputs/thinlto-function-summary-callgraph-combined.1.bc | FileCheck %s --check-prefix=OLD-COMBINED
+; CHECK: <SOURCE_FILENAME
+; CHECK-NEXT: <FUNCTION
+; "func"
+; CHECK-NEXT: <FUNCTION op0=4 op1=4
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
; CHECK-NEXT: <VERSION
-; See if the call to func is registered, using the expected callsite count
-; and value id matching the subsequent value symbol table.
-; CHECK-NEXT: <PERMODULE {{.*}} op4=[[FUNCID:[0-9]+]]/>
+; See if the call to func is registered.
+; CHECK-NEXT: <PERMODULE {{.*}} op4=1/>
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
-; CHECK-NEXT: <VALUE_SYMTAB
-; CHECK-NEXT: <FNENTRY {{.*}} record string = 'main'
-; External function func should have entry with value id FUNCID
-; CHECK-NEXT: <ENTRY {{.*}} op0=[[FUNCID]] {{.*}} record string = 'func'
-; CHECK-NEXT: </VALUE_SYMTAB>
+; CHECK: <STRTAB_BLOCK
+; CHECK-NEXT: blob data = 'mainfunc'
+
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
; COMBINED-NEXT: <VERSION
+; COMBINED-NEXT: <VALUE_GUID op0=[[FUNCID:[0-9]+]] op1=7289175272376759421/>
+; COMBINED-NEXT: <VALUE_GUID
; COMBINED-NEXT: <COMBINED
-; See if the call to func is registered, using the expected callsite count
-; and value id matching the subsequent value symbol table.
-; COMBINED-NEXT: <COMBINED {{.*}} op5=[[FUNCID:[0-9]+]]/>
+; See if the call to func is registered.
+; COMBINED-NEXT: <COMBINED {{.*}} op5=[[FUNCID]]/>
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
-; COMBINED-NEXT: <VALUE_SYMTAB
-; Entry for function func should have entry with value id FUNCID
-; COMBINED-NEXT: <COMBINED_ENTRY {{.*}} op0=[[FUNCID]] op1=7289175272376759421/>
-; COMBINED-NEXT: <COMBINED
-; COMBINED-NEXT: </VALUE_SYMTAB>
; ModuleID = 'thinlto-function-summary-callgraph.ll'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -49,4 +46,4 @@ entry:
declare void @func(...) #1
; OLD: Index {{.*}} contains 1 nodes (1 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls)
-; OLD-COMBINED: Index {{.*}} contains 2 nodes (2 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls) \ No newline at end of file
+; OLD-COMBINED: Index {{.*}} contains 2 nodes (2 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls)
diff --git a/test/Bitcode/thinlto-function-summary-originalnames.ll b/test/Bitcode/thinlto-function-summary-originalnames.ll
index 8777bd9160765..afc9772484ef0 100644
--- a/test/Bitcode/thinlto-function-summary-originalnames.ll
+++ b/test/Bitcode/thinlto-function-summary-originalnames.ll
@@ -5,6 +5,9 @@
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
; COMBINED-NEXT: <VERSION
+; COMBINED-NEXT: <VALUE_GUID {{.*}} op1=4947176790635855146/>
+; COMBINED-NEXT: <VALUE_GUID {{.*}} op1=-6591587165810580810/>
+; COMBINED-NEXT: <VALUE_GUID {{.*}} op1=-4377693495213223786/>
; COMBINED-DAG: <COMBINED
; COMBINED-DAG: <COMBINED_ORIGINAL_NAME op0=6699318081062747564/>
; COMBINED-DAG: <COMBINED_GLOBALVAR_INIT_REFS
@@ -12,11 +15,6 @@
; COMBINED-DAG: <COMBINED_ALIAS
; COMBINED-DAG: <COMBINED_ORIGINAL_NAME op0=-4170563161550796836/>
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
-; COMBINED-NEXT: <VALUE_SYMTAB
-; COMBINED-NEXT: <COMBINED_ENTRY {{.*}} op1=4947176790635855146/>
-; COMBINED-NEXT: <COMBINED_ENTRY {{.*}} op1=-6591587165810580810/>
-; COMBINED-NEXT: <COMBINED_ENTRY {{.*}} op1=-4377693495213223786/>
-; COMBINED-NEXT: </VALUE_SYMTAB>
source_filename = "/path/to/source.c"
diff --git a/test/Bitcode/thinlto-function-summary-refgraph.ll b/test/Bitcode/thinlto-function-summary-refgraph.ll
index 882f86509ab1a..b52fce7917911 100644
--- a/test/Bitcode/thinlto-function-summary-refgraph.ll
+++ b/test/Bitcode/thinlto-function-summary-refgraph.ll
@@ -2,6 +2,32 @@
; RUN: opt -module-summary %s -o %t.o
; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s
+; CHECK: <SOURCE_FILENAME
+; "bar"
+; CHECK-NEXT: <GLOBALVAR {{.*}} op0=0 op1=3
+; "globalvar"
+; CHECK-NEXT: <GLOBALVAR {{.*}} op0=3 op1=9
+; "func"
+; CHECK-NEXT: <FUNCTION op0=12 op1=4
+; "func2"
+; CHECK-NEXT: <FUNCTION op0=16 op1=5
+; "foo"
+; CHECK-NEXT: <FUNCTION op0=21 op1=3
+; "func3"
+; CHECK-NEXT: <FUNCTION op0=24 op1=5
+; "W"
+; CHECK-NEXT: <FUNCTION op0=29 op1=1
+; "X"
+; CHECK-NEXT: <FUNCTION op0=30 op1=1
+; "Y"
+; CHECK-NEXT: <FUNCTION op0=31 op1=1
+; "Z"
+; CHECK-NEXT: <FUNCTION op0=32 op1=1
+; "llvm.ctpop.i8"
+; CHECK-NEXT: <FUNCTION op0=33 op1=13
+; "main"
+; CHECK-NEXT: <FUNCTION op0=46 op1=4
+
; See if the calls and other references are recorded properly using the
; expected value id and other information as appropriate (callsite cout
; for calls). Use different linkage types for the various test cases to
@@ -11,37 +37,32 @@
; llvm.ctpop.i8.
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
; Function main contains call to func, as well as address reference to func:
-; CHECK-DAG: <PERMODULE {{.*}} op0=[[MAINID:[0-9]+]] op1=0 {{.*}} op3=1 op4=[[FUNCID:[0-9]+]] op5=[[FUNCID]]/>
+; op0=main op4=func op5=func
+; CHECK-DAG: <PERMODULE {{.*}} op0=11 op1=0 {{.*}} op3=1 op4=2 op5=2/>
; Function W contains a call to func3 as well as a reference to globalvar:
-; CHECK-DAG: <PERMODULE {{.*}} op0=[[WID:[0-9]+]] op1=5 {{.*}} op3=1 op4=[[GLOBALVARID:[0-9]+]] op5=[[FUNC3ID:[0-9]+]]/>
+; op0=W op4=globalvar op5=func3
+; CHECK-DAG: <PERMODULE {{.*}} op0=6 op1=5 {{.*}} op3=1 op4=1 op5=5/>
; Function X contains call to foo, as well as address reference to foo
; which is in the same instruction as the call:
-; CHECK-DAG: <PERMODULE {{.*}} op0=[[XID:[0-9]+]] op1=1 {{.*}} op3=1 op4=[[FOOID:[0-9]+]] op5=[[FOOID]]/>
+; op0=X op4=foo op5=foo
+; CHECK-DAG: <PERMODULE {{.*}} op0=7 op1=1 {{.*}} op3=1 op4=4 op5=4/>
; Function Y contains call to func2, and ensures we don't incorrectly add
; a reference to it when reached while earlier analyzing the phi using its
; return value:
-; CHECK-DAG: <PERMODULE {{.*}} op0=[[YID:[0-9]+]] op1=8 {{.*}} op3=0 op4=[[FUNC2ID:[0-9]+]]/>
+; op0=Y op4=func2
+; CHECK-DAG: <PERMODULE {{.*}} op0=8 op1=8 {{.*}} op3=0 op4=3/>
; Function Z contains call to func2, and ensures we don't incorrectly add
; a reference to it when reached while analyzing subsequent use of its return
; value:
-; CHECK-DAG: <PERMODULE {{.*}} op0=[[ZID:[0-9]+]] op1=3 {{.*}} op3=0 op4=[[FUNC2ID:[0-9]+]]/>
+; op0=Z op4=func2
+; CHECK-DAG: <PERMODULE {{.*}} op0=9 op1=3 {{.*}} op3=0 op4=3/>
; Variable bar initialization contains address reference to func:
-; CHECK-DAG: <PERMODULE_GLOBALVAR_INIT_REFS {{.*}} op0=[[BARID:[0-9]+]] op1=0 op2=[[FUNCID]]/>
+; op0=bar op2=func
+; CHECK-DAG: <PERMODULE_GLOBALVAR_INIT_REFS {{.*}} op0=0 op1=0 op2=2/>
; CHECK: </GLOBALVAL_SUMMARY_BLOCK>
-; CHECK-NEXT: <VALUE_SYMTAB
-; CHECK-DAG: <ENTRY {{.*}} op0=[[BARID]] {{.*}} record string = 'bar'
-; CHECK-DAG: <ENTRY {{.*}} op0=[[FUNCID]] {{.*}} record string = 'func'
-; CHECK-DAG: <ENTRY {{.*}} op0=[[FOOID]] {{.*}} record string = 'foo'
-; CHECK-DAG: <FNENTRY {{.*}} op0=[[MAINID]] {{.*}} record string = 'main'
-; CHECK-DAG: <FNENTRY {{.*}} op0=[[WID]] {{.*}} record string = 'W'
-; CHECK-DAG: <FNENTRY {{.*}} op0=[[XID]] {{.*}} record string = 'X'
-; CHECK-DAG: <FNENTRY {{.*}} op0=[[YID]] {{.*}} record string = 'Y'
-; CHECK-DAG: <FNENTRY {{.*}} op0=[[ZID]] {{.*}} record string = 'Z'
-; CHECK-DAG: <ENTRY {{.*}} op0=[[FUNC2ID]] {{.*}} record string = 'func2'
-; CHECK-DAG: <ENTRY {{.*}} op0=[[FUNC3ID]] {{.*}} record string = 'func3'
-; CHECK-DAG: <ENTRY {{.*}} op0=[[GLOBALVARID]] {{.*}} record string = 'globalvar'
-; CHECK: </VALUE_SYMTAB>
+; CHECK: <STRTAB_BLOCK
+; CHECK-NEXT: blob data = 'barglobalvarfuncfunc2foofunc3WXYZllvm.ctpop.i8main'
; ModuleID = 'thinlto-function-summary-refgraph.ll'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/test/Bitcode/thinlto-function-summary.ll b/test/Bitcode/thinlto-function-summary.ll
index ff61b7713f0f4..6b8bfbb292cd2 100644
--- a/test/Bitcode/thinlto-function-summary.ll
+++ b/test/Bitcode/thinlto-function-summary.ll
@@ -2,9 +2,19 @@
; RUN: opt -passes=name-anon-globals -module-summary < %s | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=BC
; Check for summary block/records.
-; Check the value ids in the summary entries against the
-; same in the ValueSumbolTable, to ensure the ordering is stable.
-; Also check the linkage field on the summary entries.
+; BC: <SOURCE_FILENAME
+; "h"
+; BC-NEXT: <GLOBALVAR {{.*}} op0=0 op1=1
+; "foo"
+; BC-NEXT: <FUNCTION op0=1 op1=3
+; "bar"
+; BC-NEXT: <FUNCTION op0=4 op1=3
+; "anon.[32 chars].0"
+; BC-NEXT: <FUNCTION op0=7 op1=39
+; "variadic"
+; BC-NEXT: <FUNCTION op0=46 op1=8
+; "f"
+; BC-NEXT: <ALIAS op0=54 op1=1
; BC: <GLOBALVAL_SUMMARY_BLOCK
; BC-NEXT: <VERSION
; BC-NEXT: <PERMODULE {{.*}} op0=1 op1=0
@@ -13,13 +23,8 @@
; BC-NEXT: <PERMODULE {{.*}} op0=4 op1=16
; BC-NEXT: <ALIAS {{.*}} op0=5 op1=0 op2=3
; BC-NEXT: </GLOBALVAL_SUMMARY_BLOCK
-; BC-NEXT: <VALUE_SYMTAB
-; BC-NEXT: <FNENTRY {{.*}} op0=4 {{.*}}> record string = 'variadic'
-; BC-NEXT: <FNENTRY {{.*}} op0=1 {{.*}}> record string = 'foo'
-; BC-NEXT: <FNENTRY {{.*}} op0=2 {{.*}}> record string = 'bar'
-; BC-NEXT: <ENTRY {{.*}} op0=5 {{.*}}> record string = 'f'
-; BC-NEXT: <ENTRY {{.*}} record string = 'h'
-; BC-NEXT: <FNENTRY {{.*}} op0=3 {{.*}}> record string = 'anon.
+; BC: <STRTAB_BLOCK
+; BC-NEXT: blob data = 'hfoobaranon.{{................................}}.0variadicf'
; RUN: opt -name-anon-globals -module-summary < %s | llvm-dis | FileCheck %s
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
index e40199d82c9dd..71ea9d54f647a 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -154,3 +154,19 @@ continue:
define fp128 @test_quad_dump() {
ret fp128 0xL00000000000000004000000000000000
}
+
+; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %vreg0<def>(p0) = G_EXTRACT_VECTOR_ELT %vreg1, %vreg2; (in function: vector_of_pointers_extractelement)
+; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_extractelement
+; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_extractelement:
+define void @vector_of_pointers_extractelement() {
+ %dummy = extractelement <2 x i16*> undef, i32 0
+ ret void
+}
+
+; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %vreg0<def>(<2 x p0>) = G_INSERT_VECTOR_ELT %vreg1, %vreg2, %vreg3; (in function: vector_of_pointers_insertelement
+; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_insertelement
+; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_insertelement:
+define void @vector_of_pointers_insertelement() {
+ %dummy = insertelement <2 x i16*> undef, i16* null, i32 0
+ ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-abi.ll b/test/CodeGen/AArch64/arm64-abi.ll
index 6cf0ab35b9b52..5be84b7d493b7 100644
--- a/test/CodeGen/AArch64/arm64-abi.ll
+++ b/test/CodeGen/AArch64/arm64-abi.ll
@@ -43,9 +43,7 @@ entry:
; CHECK-LABEL: i8i16caller
; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5.
; They are i8, i16, i8 and i8.
-; CHECK-DAG: strb {{w[0-9]+}}, [sp, #5]
-; CHECK-DAG: strb {{w[0-9]+}}, [sp, #4]
-; CHECK-DAG: strh {{w[0-9]+}}, [sp, #2]
+; CHECK-DAG: stur {{w[0-9]+}}, [sp, #2]
; CHECK-DAG: strb {{w[0-9]+}}, [sp]
; CHECK: bl
; FAST-LABEL: i8i16caller
diff --git a/test/CodeGen/AArch64/nonlazybind.ll b/test/CodeGen/AArch64/nonlazybind.ll
new file mode 100644
index 0000000000000..4355d45fe84da
--- /dev/null
+++ b/test/CodeGen/AArch64/nonlazybind.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-enable-nonlazybind | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-ios %s -o - | FileCheck %s --check-prefix=CHECK-NORMAL
+
+define void @local() nonlazybind {
+ ret void
+}
+
+declare void @nonlocal() nonlazybind
+
+define void @test_laziness() {
+; CHECK-LABEL: test_laziness:
+
+; CHECK: bl _local
+
+; CHECK: adrp x[[TMP:[0-9]+]], _nonlocal@GOTPAGE
+; CHECK: ldr [[FUNC:x[0-9]+]], [x[[TMP]], _nonlocal@GOTPAGEOFF]
+; CHECK: blr [[FUNC]]
+
+; CHECK-NORMAL-LABEL: test_laziness:
+; CHECK-NORMAL: bl _local
+; CHEKC-NORMAL: bl _nonlocal
+
+ call void @local()
+ call void @nonlocal()
+ ret void
+}
+
+define void @test_laziness_tail() {
+; CHECK-LABEL: test_laziness_tail:
+
+; CHECK: adrp x[[TMP:[0-9]+]], _nonlocal@GOTPAGE
+; CHECK: ldr [[FUNC:x[0-9]+]], [x[[TMP]], _nonlocal@GOTPAGEOFF]
+; CHECK: br [[FUNC]]
+
+; CHECK-NORMAL-LABEL: test_laziness_tail:
+; CHECK-NORMAL: b _nonlocal
+
+ tail call void @nonlocal()
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll b/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll
index 95a206e1dd00d..8e5a512dd3c91 100644
--- a/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll
+++ b/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll
@@ -3,333 +3,358 @@
; GCN-LABEL: @add_i3(
; SI: %r = add i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @add_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @add_i3(i3 %a, i3 %b) {
%r = add i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @add_nsw_i3(
; SI: %r = add nsw i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @add_nsw_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @add_nsw_i3(i3 %a, i3 %b) {
%r = add nsw i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @add_nuw_i3(
; SI: %r = add nuw i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @add_nuw_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @add_nuw_i3(i3 %a, i3 %b) {
%r = add nuw i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @add_nuw_nsw_i3(
; SI: %r = add nuw nsw i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @add_nuw_nsw_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @add_nuw_nsw_i3(i3 %a, i3 %b) {
%r = add nuw nsw i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_i3(
; SI: %r = sub i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @sub_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @sub_i3(i3 %a, i3 %b) {
%r = sub i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_nsw_i3(
; SI: %r = sub nsw i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @sub_nsw_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @sub_nsw_i3(i3 %a, i3 %b) {
%r = sub nsw i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_nuw_i3(
; SI: %r = sub nuw i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @sub_nuw_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @sub_nuw_i3(i3 %a, i3 %b) {
%r = sub nuw i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_nuw_nsw_i3(
; SI: %r = sub nuw nsw i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @sub_nuw_nsw_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @sub_nuw_nsw_i3(i3 %a, i3 %b) {
%r = sub nuw nsw i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_i3(
; SI: %r = mul i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @mul_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @mul_i3(i3 %a, i3 %b) {
%r = mul i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_nsw_i3(
; SI: %r = mul nsw i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @mul_nsw_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @mul_nsw_i3(i3 %a, i3 %b) {
%r = mul nsw i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_nuw_i3(
; SI: %r = mul nuw i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @mul_nuw_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @mul_nuw_i3(i3 %a, i3 %b) {
%r = mul nuw i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_nuw_nsw_i3(
; SI: %r = mul nuw nsw i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @mul_nuw_nsw_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @mul_nuw_nsw_i3(i3 %a, i3 %b) {
%r = mul nuw nsw i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @urem_i3(
; SI: %r = urem i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = urem i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @urem_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @urem_i3(i3 %a, i3 %b) {
%r = urem i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @srem_i3(
; SI: %r = srem i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = sext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = sext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = srem i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @srem_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @srem_i3(i3 %a, i3 %b) {
%r = srem i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_i3(
; SI: %r = shl i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @shl_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @shl_i3(i3 %a, i3 %b) {
%r = shl i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_nsw_i3(
; SI: %r = shl nsw i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @shl_nsw_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @shl_nsw_i3(i3 %a, i3 %b) {
%r = shl nsw i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_nuw_i3(
; SI: %r = shl nuw i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @shl_nuw_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @shl_nuw_i3(i3 %a, i3 %b) {
%r = shl nuw i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_nuw_nsw_i3(
; SI: %r = shl nuw nsw i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @shl_nuw_nsw_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @shl_nuw_nsw_i3(i3 %a, i3 %b) {
%r = shl nuw nsw i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @lshr_i3(
; SI: %r = lshr i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @lshr_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @lshr_i3(i3 %a, i3 %b) {
%r = lshr i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @lshr_exact_i3(
; SI: %r = lshr exact i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @lshr_exact_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @lshr_exact_i3(i3 %a, i3 %b) {
%r = lshr exact i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @ashr_i3(
; SI: %r = ashr i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = sext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = sext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @ashr_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @ashr_i3(i3 %a, i3 %b) {
%r = ashr i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @ashr_exact_i3(
; SI: %r = ashr exact i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = sext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = sext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @ashr_exact_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @ashr_exact_i3(i3 %a, i3 %b) {
%r = ashr exact i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @and_i3(
; SI: %r = and i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @and_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @and_i3(i3 %a, i3 %b) {
%r = and i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @or_i3(
; SI: %r = or i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @or_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @or_i3(i3 %a, i3 %b) {
%r = or i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @xor_i3(
; SI: %r = xor i3 %a, %b
-; SI-NEXT: ret i3 %r
+; SI-NEXT: store volatile i3 %r
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @xor_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @xor_i3(i3 %a, i3 %b) {
%r = xor i3 %a, %b
- ret i3 %r
+ store volatile i3 %r, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_eq_i3(
; SI: %cmp = icmp eq i3 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
-; SI-NEXT: ret i3 %sel
+; SI-NEXT: store volatile i3 %sel
; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]]
@@ -337,17 +362,18 @@ define i3 @xor_i3(i3 %a, i3 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
-; VI-NEXT: ret i3 %[[SEL_3]]
-define i3 @select_eq_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[SEL_3]]
+define amdgpu_kernel void @select_eq_i3(i3 %a, i3 %b) {
%cmp = icmp eq i3 %a, %b
%sel = select i1 %cmp, i3 %a, i3 %b
- ret i3 %sel
+ store volatile i3 %sel, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ne_i3(
; SI: %cmp = icmp ne i3 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
-; SI-NEXT: ret i3 %sel
+; SI-NEXT: store volatile i3 %sel
; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]]
@@ -355,17 +381,18 @@ define i3 @select_eq_i3(i3 %a, i3 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
-; VI-NEXT: ret i3 %[[SEL_3]]
-define i3 @select_ne_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[SEL_3]]
+define amdgpu_kernel void @select_ne_i3(i3 %a, i3 %b) {
%cmp = icmp ne i3 %a, %b
%sel = select i1 %cmp, i3 %a, i3 %b
- ret i3 %sel
+ store volatile i3 %sel, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ugt_i3(
; SI: %cmp = icmp ugt i3 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
-; SI-NEXT: ret i3 %sel
+; SI-NEXT: store volatile i3 %sel
; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]]
@@ -373,17 +400,18 @@ define i3 @select_ne_i3(i3 %a, i3 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
-; VI-NEXT: ret i3 %[[SEL_3]]
-define i3 @select_ugt_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[SEL_3]]
+define amdgpu_kernel void @select_ugt_i3(i3 %a, i3 %b) {
%cmp = icmp ugt i3 %a, %b
%sel = select i1 %cmp, i3 %a, i3 %b
- ret i3 %sel
+ store volatile i3 %sel, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_uge_i3(
; SI: %cmp = icmp uge i3 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
-; SI-NEXT: ret i3 %sel
+; SI-NEXT: store volatile i3 %sel
; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]]
@@ -391,17 +419,18 @@ define i3 @select_ugt_i3(i3 %a, i3 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
-; VI-NEXT: ret i3 %[[SEL_3]]
-define i3 @select_uge_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[SEL_3]]
+define amdgpu_kernel void @select_uge_i3(i3 %a, i3 %b) {
%cmp = icmp uge i3 %a, %b
%sel = select i1 %cmp, i3 %a, i3 %b
- ret i3 %sel
+ store volatile i3 %sel, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ult_i3(
; SI: %cmp = icmp ult i3 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
-; SI-NEXT: ret i3 %sel
+; SI-NEXT: store volatile i3 %sel
; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]]
@@ -409,17 +438,18 @@ define i3 @select_uge_i3(i3 %a, i3 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
-; VI-NEXT: ret i3 %[[SEL_3]]
-define i3 @select_ult_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[SEL_3]]
+define amdgpu_kernel void @select_ult_i3(i3 %a, i3 %b) {
%cmp = icmp ult i3 %a, %b
%sel = select i1 %cmp, i3 %a, i3 %b
- ret i3 %sel
+ store volatile i3 %sel, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ule_i3(
; SI: %cmp = icmp ule i3 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
-; SI-NEXT: ret i3 %sel
+; SI-NEXT: store volatile i3 %sel
; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]]
@@ -427,17 +457,18 @@ define i3 @select_ult_i3(i3 %a, i3 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
-; VI-NEXT: ret i3 %[[SEL_3]]
-define i3 @select_ule_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[SEL_3]]
+define amdgpu_kernel void @select_ule_i3(i3 %a, i3 %b) {
%cmp = icmp ule i3 %a, %b
%sel = select i1 %cmp, i3 %a, i3 %b
- ret i3 %sel
+ store volatile i3 %sel, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_sgt_i3(
; SI: %cmp = icmp sgt i3 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
-; SI-NEXT: ret i3 %sel
+; SI-NEXT: store volatile i3 %sel
; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]]
@@ -445,17 +476,18 @@ define i3 @select_ule_i3(i3 %a, i3 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
-; VI-NEXT: ret i3 %[[SEL_3]]
-define i3 @select_sgt_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[SEL_3]]
+define amdgpu_kernel void @select_sgt_i3(i3 %a, i3 %b) {
%cmp = icmp sgt i3 %a, %b
%sel = select i1 %cmp, i3 %a, i3 %b
- ret i3 %sel
+ store volatile i3 %sel, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_sge_i3(
; SI: %cmp = icmp sge i3 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
-; SI-NEXT: ret i3 %sel
+; SI-NEXT: store volatile i3 %sel
; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]]
@@ -463,17 +495,18 @@ define i3 @select_sgt_i3(i3 %a, i3 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
-; VI-NEXT: ret i3 %[[SEL_3]]
-define i3 @select_sge_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[SEL_3]]
+define amdgpu_kernel void @select_sge_i3(i3 %a, i3 %b) {
%cmp = icmp sge i3 %a, %b
%sel = select i1 %cmp, i3 %a, i3 %b
- ret i3 %sel
+ store volatile i3 %sel, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_slt_i3(
; SI: %cmp = icmp slt i3 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
-; SI-NEXT: ret i3 %sel
+; SI-NEXT: store volatile i3 %sel
; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]]
@@ -481,17 +514,18 @@ define i3 @select_sge_i3(i3 %a, i3 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
-; VI-NEXT: ret i3 %[[SEL_3]]
-define i3 @select_slt_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[SEL_3]]
+define amdgpu_kernel void @select_slt_i3(i3 %a, i3 %b) {
%cmp = icmp slt i3 %a, %b
%sel = select i1 %cmp, i3 %a, i3 %b
- ret i3 %sel
+ store volatile i3 %sel, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_sle_i3(
; SI: %cmp = icmp sle i3 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
-; SI-NEXT: ret i3 %sel
+; SI-NEXT: store volatile i3 %sel
; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]]
@@ -499,384 +533,415 @@ define i3 @select_slt_i3(i3 %a, i3 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
-; VI-NEXT: ret i3 %[[SEL_3]]
-define i3 @select_sle_i3(i3 %a, i3 %b) {
+; VI-NEXT: store volatile i3 %[[SEL_3]]
+define amdgpu_kernel void @select_sle_i3(i3 %a, i3 %b) {
%cmp = icmp sle i3 %a, %b
%sel = select i1 %cmp, i3 %a, i3 %b
- ret i3 %sel
+ store volatile i3 %sel, i3 addrspace(1)* undef
+ ret void
}
declare i3 @llvm.bitreverse.i3(i3)
; GCN-LABEL: @bitreverse_i3(
; SI: %brev = call i3 @llvm.bitreverse.i3(i3 %a)
-; SI-NEXT: ret i3 %brev
+; SI-NEXT: store volatile i3 %brev
; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
; VI-NEXT: %[[R_32:[0-9]+]] = call i32 @llvm.bitreverse.i32(i32 %[[A_32]])
; VI-NEXT: %[[S_32:[0-9]+]] = lshr i32 %[[R_32]], 29
; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[S_32]] to i3
-; VI-NEXT: ret i3 %[[R_3]]
-define i3 @bitreverse_i3(i3 %a) {
+; VI-NEXT: store volatile i3 %[[R_3]]
+define amdgpu_kernel void @bitreverse_i3(i3 %a) {
%brev = call i3 @llvm.bitreverse.i3(i3 %a)
- ret i3 %brev
+ store volatile i3 %brev, i3 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @add_i16(
; SI: %r = add i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @add_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @add_i16(i16 %a, i16 %b) {
%r = add i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @constant_add_i16(
-; VI: ret i16 3
-define i16 @constant_add_i16() {
+; VI: store volatile i16 3
+define amdgpu_kernel void @constant_add_i16() {
%r = add i16 1, 2
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @constant_add_nsw_i16(
-; VI: ret i16 3
-define i16 @constant_add_nsw_i16() {
+; VI: store volatile i16 3
+define amdgpu_kernel void @constant_add_nsw_i16() {
%r = add nsw i16 1, 2
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @constant_add_nuw_i16(
-; VI: ret i16 3
-define i16 @constant_add_nuw_i16() {
+; VI: store volatile i16 3
+define amdgpu_kernel void @constant_add_nuw_i16() {
%r = add nsw i16 1, 2
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @add_nsw_i16(
; SI: %r = add nsw i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @add_nsw_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @add_nsw_i16(i16 %a, i16 %b) {
%r = add nsw i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @add_nuw_i16(
; SI: %r = add nuw i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @add_nuw_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @add_nuw_i16(i16 %a, i16 %b) {
%r = add nuw i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @add_nuw_nsw_i16(
; SI: %r = add nuw nsw i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @add_nuw_nsw_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @add_nuw_nsw_i16(i16 %a, i16 %b) {
%r = add nuw nsw i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_i16(
; SI: %r = sub i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @sub_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @sub_i16(i16 %a, i16 %b) {
%r = sub i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_nsw_i16(
; SI: %r = sub nsw i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @sub_nsw_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @sub_nsw_i16(i16 %a, i16 %b) {
%r = sub nsw i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_nuw_i16(
; SI: %r = sub nuw i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @sub_nuw_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @sub_nuw_i16(i16 %a, i16 %b) {
%r = sub nuw i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_nuw_nsw_i16(
; SI: %r = sub nuw nsw i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @sub_nuw_nsw_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @sub_nuw_nsw_i16(i16 %a, i16 %b) {
%r = sub nuw nsw i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_i16(
; SI: %r = mul i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @mul_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @mul_i16(i16 %a, i16 %b) {
%r = mul i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_nsw_i16(
; SI: %r = mul nsw i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @mul_nsw_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @mul_nsw_i16(i16 %a, i16 %b) {
%r = mul nsw i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_nuw_i16(
; SI: %r = mul nuw i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @mul_nuw_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @mul_nuw_i16(i16 %a, i16 %b) {
%r = mul nuw i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_nuw_nsw_i16(
; SI: %r = mul nuw nsw i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @mul_nuw_nsw_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @mul_nuw_nsw_i16(i16 %a, i16 %b) {
%r = mul nuw nsw i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @urem_i16(
; SI: %r = urem i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = urem i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @urem_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @urem_i16(i16 %a, i16 %b) {
%r = urem i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @srem_i16(
; SI: %r = srem i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = srem i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @srem_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @srem_i16(i16 %a, i16 %b) {
%r = srem i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_i16(
; SI: %r = shl i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @shl_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @shl_i16(i16 %a, i16 %b) {
%r = shl i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_nsw_i16(
; SI: %r = shl nsw i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @shl_nsw_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @shl_nsw_i16(i16 %a, i16 %b) {
%r = shl nsw i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_nuw_i16(
; SI: %r = shl nuw i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @shl_nuw_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @shl_nuw_i16(i16 %a, i16 %b) {
%r = shl nuw i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_nuw_nsw_i16(
; SI: %r = shl nuw nsw i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @shl_nuw_nsw_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @shl_nuw_nsw_i16(i16 %a, i16 %b) {
%r = shl nuw nsw i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @lshr_i16(
; SI: %r = lshr i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @lshr_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @lshr_i16(i16 %a, i16 %b) {
%r = lshr i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @lshr_exact_i16(
; SI: %r = lshr exact i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @lshr_exact_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @lshr_exact_i16(i16 %a, i16 %b) {
%r = lshr exact i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @ashr_i16(
; SI: %r = ashr i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @ashr_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @ashr_i16(i16 %a, i16 %b) {
%r = ashr i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @ashr_exact_i16(
; SI: %r = ashr exact i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @ashr_exact_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @ashr_exact_i16(i16 %a, i16 %b) {
%r = ashr exact i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @constant_lshr_exact_i16(
-; VI: ret i16 2
-define i16 @constant_lshr_exact_i16(i16 %a, i16 %b) {
+; VI: store volatile i16 2
+define amdgpu_kernel void @constant_lshr_exact_i16(i16 %a, i16 %b) {
%r = lshr exact i16 4, 1
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @and_i16(
; SI: %r = and i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @and_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @and_i16(i16 %a, i16 %b) {
%r = and i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @or_i16(
; SI: %r = or i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @or_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @or_i16(i16 %a, i16 %b) {
%r = or i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @xor_i16(
; SI: %r = xor i16 %a, %b
-; SI-NEXT: ret i16 %r
+; SI-NEXT: store volatile i16 %r
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @xor_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @xor_i16(i16 %a, i16 %b) {
%r = xor i16 %a, %b
- ret i16 %r
+ store volatile i16 %r, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_eq_i16(
; SI: %cmp = icmp eq i16 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
-; SI-NEXT: ret i16 %sel
+; SI-NEXT: store volatile i16 %sel
; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]]
@@ -884,17 +949,18 @@ define i16 @xor_i16(i16 %a, i16 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI-NEXT: ret i16 %[[SEL_16]]
-define i16 @select_eq_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[SEL_16]]
+define amdgpu_kernel void @select_eq_i16(i16 %a, i16 %b) {
%cmp = icmp eq i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
- ret i16 %sel
+ store volatile i16 %sel, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ne_i16(
; SI: %cmp = icmp ne i16 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
-; SI-NEXT: ret i16 %sel
+; SI-NEXT: store volatile i16 %sel
; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]]
@@ -902,17 +968,18 @@ define i16 @select_eq_i16(i16 %a, i16 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI-NEXT: ret i16 %[[SEL_16]]
-define i16 @select_ne_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[SEL_16]]
+define amdgpu_kernel void @select_ne_i16(i16 %a, i16 %b) {
%cmp = icmp ne i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
- ret i16 %sel
+ store volatile i16 %sel, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ugt_i16(
; SI: %cmp = icmp ugt i16 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
-; SI-NEXT: ret i16 %sel
+; SI-NEXT: store volatile i16 %sel
; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]]
@@ -920,17 +987,18 @@ define i16 @select_ne_i16(i16 %a, i16 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI-NEXT: ret i16 %[[SEL_16]]
-define i16 @select_ugt_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[SEL_16]]
+define amdgpu_kernel void @select_ugt_i16(i16 %a, i16 %b) {
%cmp = icmp ugt i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
- ret i16 %sel
+ store volatile i16 %sel, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_uge_i16(
; SI: %cmp = icmp uge i16 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
-; SI-NEXT: ret i16 %sel
+; SI-NEXT: store volatile i16 %sel
; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]]
@@ -938,17 +1006,18 @@ define i16 @select_ugt_i16(i16 %a, i16 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI-NEXT: ret i16 %[[SEL_16]]
-define i16 @select_uge_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[SEL_16]]
+define amdgpu_kernel void @select_uge_i16(i16 %a, i16 %b) {
%cmp = icmp uge i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
- ret i16 %sel
+ store volatile i16 %sel, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ult_i16(
; SI: %cmp = icmp ult i16 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
-; SI-NEXT: ret i16 %sel
+; SI-NEXT: store volatile i16 %sel
; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]]
@@ -956,17 +1025,18 @@ define i16 @select_uge_i16(i16 %a, i16 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI-NEXT: ret i16 %[[SEL_16]]
-define i16 @select_ult_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[SEL_16]]
+define amdgpu_kernel void @select_ult_i16(i16 %a, i16 %b) {
%cmp = icmp ult i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
- ret i16 %sel
+ store volatile i16 %sel, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ule_i16(
; SI: %cmp = icmp ule i16 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
-; SI-NEXT: ret i16 %sel
+; SI-NEXT: store volatile i16 %sel
; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]]
@@ -974,17 +1044,18 @@ define i16 @select_ult_i16(i16 %a, i16 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI-NEXT: ret i16 %[[SEL_16]]
-define i16 @select_ule_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[SEL_16]]
+define amdgpu_kernel void @select_ule_i16(i16 %a, i16 %b) {
%cmp = icmp ule i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
- ret i16 %sel
+ store volatile i16 %sel, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_sgt_i16(
; SI: %cmp = icmp sgt i16 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
-; SI-NEXT: ret i16 %sel
+; SI-NEXT: store volatile i16 %sel
; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]]
@@ -992,17 +1063,18 @@ define i16 @select_ule_i16(i16 %a, i16 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI-NEXT: ret i16 %[[SEL_16]]
-define i16 @select_sgt_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[SEL_16]]
+define amdgpu_kernel void @select_sgt_i16(i16 %a, i16 %b) {
%cmp = icmp sgt i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
- ret i16 %sel
+ store volatile i16 %sel, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_sge_i16(
; SI: %cmp = icmp sge i16 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
-; SI-NEXT: ret i16 %sel
+; SI-NEXT: store volatile i16 %sel
; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]]
@@ -1010,17 +1082,18 @@ define i16 @select_sgt_i16(i16 %a, i16 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI-NEXT: ret i16 %[[SEL_16]]
-define i16 @select_sge_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[SEL_16]]
+define amdgpu_kernel void @select_sge_i16(i16 %a, i16 %b) {
%cmp = icmp sge i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
- ret i16 %sel
+ store volatile i16 %sel, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_slt_i16(
; SI: %cmp = icmp slt i16 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
-; SI-NEXT: ret i16 %sel
+; SI-NEXT: store volatile i16 %sel
; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]]
@@ -1028,17 +1101,18 @@ define i16 @select_sge_i16(i16 %a, i16 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI-NEXT: ret i16 %[[SEL_16]]
-define i16 @select_slt_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[SEL_16]]
+define amdgpu_kernel void @select_slt_i16(i16 %a, i16 %b) {
%cmp = icmp slt i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
- ret i16 %sel
+ store volatile i16 %sel, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_sle_i16(
; SI: %cmp = icmp sle i16 %a, %b
; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
-; SI-NEXT: ret i16 %sel
+; SI-NEXT: store volatile i16 %sel
; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]]
@@ -1046,356 +1120,384 @@ define i16 @select_slt_i16(i16 %a, i16 %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI-NEXT: ret i16 %[[SEL_16]]
-define i16 @select_sle_i16(i16 %a, i16 %b) {
+; VI-NEXT: store volatile i16 %[[SEL_16]]
+define amdgpu_kernel void @select_sle_i16(i16 %a, i16 %b) {
%cmp = icmp sle i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
- ret i16 %sel
+ store volatile i16 %sel, i16 addrspace(1)* undef
+ ret void
}
declare i16 @llvm.bitreverse.i16(i16)
+
; GCN-LABEL: @bitreverse_i16(
; SI: %brev = call i16 @llvm.bitreverse.i16(i16 %a)
-; SI-NEXT: ret i16 %brev
+; SI-NEXT: store volatile i16 %brev
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI-NEXT: %[[R_32:[0-9]+]] = call i32 @llvm.bitreverse.i32(i32 %[[A_32]])
; VI-NEXT: %[[S_32:[0-9]+]] = lshr i32 %[[R_32]], 16
; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[S_32]] to i16
-; VI-NEXT: ret i16 %[[R_16]]
-define i16 @bitreverse_i16(i16 %a) {
+; VI-NEXT: store volatile i16 %[[R_16]]
+define amdgpu_kernel void @bitreverse_i16(i16 %a) {
%brev = call i16 @llvm.bitreverse.i16(i16 %a)
- ret i16 %brev
+ store volatile i16 %brev, i16 addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @add_3xi15(
; SI: %r = add <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @add_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @add_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = add <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @add_nsw_3xi15(
; SI: %r = add nsw <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @add_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @add_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = add nsw <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @add_nuw_3xi15(
; SI: %r = add nuw <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @add_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @add_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = add nuw <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @add_nuw_nsw_3xi15(
; SI: %r = add nuw nsw <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @add_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @add_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = add nuw nsw <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_3xi15(
; SI: %r = sub <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @sub_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @sub_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = sub <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_nsw_3xi15(
; SI: %r = sub nsw <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @sub_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @sub_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = sub nsw <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_nuw_3xi15(
; SI: %r = sub nuw <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @sub_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @sub_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = sub nuw <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_nuw_nsw_3xi15(
; SI: %r = sub nuw nsw <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @sub_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @sub_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = sub nuw nsw <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_3xi15(
; SI: %r = mul <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @mul_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @mul_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = mul <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_nsw_3xi15(
; SI: %r = mul nsw <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @mul_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @mul_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = mul nsw <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_nuw_3xi15(
; SI: %r = mul nuw <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @mul_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @mul_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = mul nuw <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_nuw_nsw_3xi15(
; SI: %r = mul nuw nsw <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @mul_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @mul_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = mul nuw nsw <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @urem_3xi15(
; SI: %r = urem <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = urem <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @urem_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @urem_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = urem <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @srem_3xi15(
; SI: %r = srem <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = srem <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @srem_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @srem_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = srem <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_3xi15(
; SI: %r = shl <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @shl_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @shl_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = shl <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_nsw_3xi15(
; SI: %r = shl nsw <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @shl_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @shl_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = shl nsw <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_nuw_3xi15(
; SI: %r = shl nuw <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @shl_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @shl_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = shl nuw <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_nuw_nsw_3xi15(
; SI: %r = shl nuw nsw <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @shl_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @shl_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = shl nuw nsw <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @lshr_3xi15(
; SI: %r = lshr <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @lshr_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @lshr_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = lshr <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @lshr_exact_3xi15(
; SI: %r = lshr exact <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @lshr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @lshr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = lshr exact <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @ashr_3xi15(
; SI: %r = ashr <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @ashr_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @ashr_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = ashr <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @ashr_exact_3xi15(
; SI: %r = ashr exact <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @ashr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @ashr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = ashr exact <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @and_3xi15(
; SI: %r = and <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @and_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @and_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = and <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @or_3xi15(
; SI: %r = or <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @or_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @or_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = or <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @xor_3xi15(
; SI: %r = xor <3 x i15> %a, %b
-; SI-NEXT: ret <3 x i15> %r
+; SI-NEXT: store volatile <3 x i15> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @xor_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @xor_3xi15(<3 x i15> %a, <3 x i15> %b) {
%r = xor <3 x i15> %a, %b
- ret <3 x i15> %r
+ store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_eq_3xi15(
; SI: %cmp = icmp eq <3 x i15> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
-; SI-NEXT: ret <3 x i15> %sel
+; SI-NEXT: store volatile <3 x i15> %sel
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -1403,17 +1505,18 @@ define <3 x i15> @xor_3xi15(<3 x i15> %a, <3 x i15> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[SEL_15]]
-define <3 x i15> @select_eq_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
+define amdgpu_kernel void @select_eq_3xi15(<3 x i15> %a, <3 x i15> %b) {
%cmp = icmp eq <3 x i15> %a, %b
%sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
- ret <3 x i15> %sel
+ store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ne_3xi15(
; SI: %cmp = icmp ne <3 x i15> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
-; SI-NEXT: ret <3 x i15> %sel
+; SI-NEXT: store volatile <3 x i15> %sel
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -1421,17 +1524,18 @@ define <3 x i15> @select_eq_3xi15(<3 x i15> %a, <3 x i15> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[SEL_15]]
-define <3 x i15> @select_ne_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
+define amdgpu_kernel void @select_ne_3xi15(<3 x i15> %a, <3 x i15> %b) {
%cmp = icmp ne <3 x i15> %a, %b
%sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
- ret <3 x i15> %sel
+ store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ugt_3xi15(
; SI: %cmp = icmp ugt <3 x i15> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
-; SI-NEXT: ret <3 x i15> %sel
+; SI-NEXT: store volatile <3 x i15> %sel
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -1439,17 +1543,18 @@ define <3 x i15> @select_ne_3xi15(<3 x i15> %a, <3 x i15> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[SEL_15]]
-define <3 x i15> @select_ugt_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
+define amdgpu_kernel void @select_ugt_3xi15(<3 x i15> %a, <3 x i15> %b) {
%cmp = icmp ugt <3 x i15> %a, %b
%sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
- ret <3 x i15> %sel
+ store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_uge_3xi15(
; SI: %cmp = icmp uge <3 x i15> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
-; SI-NEXT: ret <3 x i15> %sel
+; SI-NEXT: store volatile <3 x i15> %sel
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -1457,17 +1562,18 @@ define <3 x i15> @select_ugt_3xi15(<3 x i15> %a, <3 x i15> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[SEL_15]]
-define <3 x i15> @select_uge_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
+define amdgpu_kernel void @select_uge_3xi15(<3 x i15> %a, <3 x i15> %b) {
%cmp = icmp uge <3 x i15> %a, %b
%sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
- ret <3 x i15> %sel
+ store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ult_3xi15(
; SI: %cmp = icmp ult <3 x i15> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
-; SI-NEXT: ret <3 x i15> %sel
+; SI-NEXT: store volatile <3 x i15> %sel
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -1475,17 +1581,18 @@ define <3 x i15> @select_uge_3xi15(<3 x i15> %a, <3 x i15> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[SEL_15]]
-define <3 x i15> @select_ult_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
+define amdgpu_kernel void @select_ult_3xi15(<3 x i15> %a, <3 x i15> %b) {
%cmp = icmp ult <3 x i15> %a, %b
%sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
- ret <3 x i15> %sel
+ store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ule_3xi15(
; SI: %cmp = icmp ule <3 x i15> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
-; SI-NEXT: ret <3 x i15> %sel
+; SI-NEXT: store volatile <3 x i15> %sel
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -1493,17 +1600,18 @@ define <3 x i15> @select_ult_3xi15(<3 x i15> %a, <3 x i15> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[SEL_15]]
-define <3 x i15> @select_ule_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
+define amdgpu_kernel void @select_ule_3xi15(<3 x i15> %a, <3 x i15> %b) {
%cmp = icmp ule <3 x i15> %a, %b
%sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
- ret <3 x i15> %sel
+ store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_sgt_3xi15(
; SI: %cmp = icmp sgt <3 x i15> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
-; SI-NEXT: ret <3 x i15> %sel
+; SI-NEXT: store volatile <3 x i15> %sel
; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -1511,17 +1619,18 @@ define <3 x i15> @select_ule_3xi15(<3 x i15> %a, <3 x i15> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[SEL_15]]
-define <3 x i15> @select_sgt_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
+define amdgpu_kernel void @select_sgt_3xi15(<3 x i15> %a, <3 x i15> %b) {
%cmp = icmp sgt <3 x i15> %a, %b
%sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
- ret <3 x i15> %sel
+ store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_sge_3xi15(
; SI: %cmp = icmp sge <3 x i15> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
-; SI-NEXT: ret <3 x i15> %sel
+; SI-NEXT: store volatile <3 x i15> %sel
; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -1529,17 +1638,18 @@ define <3 x i15> @select_sgt_3xi15(<3 x i15> %a, <3 x i15> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[SEL_15]]
-define <3 x i15> @select_sge_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
+define amdgpu_kernel void @select_sge_3xi15(<3 x i15> %a, <3 x i15> %b) {
%cmp = icmp sge <3 x i15> %a, %b
%sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
- ret <3 x i15> %sel
+ store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_slt_3xi15(
; SI: %cmp = icmp slt <3 x i15> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
-; SI-NEXT: ret <3 x i15> %sel
+; SI-NEXT: store volatile <3 x i15> %sel
; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -1547,17 +1657,18 @@ define <3 x i15> @select_sge_3xi15(<3 x i15> %a, <3 x i15> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[SEL_15]]
-define <3 x i15> @select_slt_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
+define amdgpu_kernel void @select_slt_3xi15(<3 x i15> %a, <3 x i15> %b) {
%cmp = icmp slt <3 x i15> %a, %b
%sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
- ret <3 x i15> %sel
+ store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_sle_3xi15(
; SI: %cmp = icmp sle <3 x i15> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
-; SI-NEXT: ret <3 x i15> %sel
+; SI-NEXT: store volatile <3 x i15> %sel
; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -1565,356 +1676,383 @@ define <3 x i15> @select_slt_3xi15(<3 x i15> %a, <3 x i15> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[SEL_15]]
-define <3 x i15> @select_sle_3xi15(<3 x i15> %a, <3 x i15> %b) {
+; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
+define amdgpu_kernel void @select_sle_3xi15(<3 x i15> %a, <3 x i15> %b) {
%cmp = icmp sle <3 x i15> %a, %b
%sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
- ret <3 x i15> %sel
+ store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
+ ret void
}
declare <3 x i15> @llvm.bitreverse.v3i15(<3 x i15>)
; GCN-LABEL: @bitreverse_3xi15(
; SI: %brev = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> %a)
-; SI-NEXT: ret <3 x i15> %brev
+; SI-NEXT: store volatile <3 x i15> %brev
; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> %[[A_32]])
; VI-NEXT: %[[S_32:[0-9]+]] = lshr <3 x i32> %[[R_32]], <i32 17, i32 17, i32 17>
; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[S_32]] to <3 x i15>
-; VI-NEXT: ret <3 x i15> %[[R_15]]
-define <3 x i15> @bitreverse_3xi15(<3 x i15> %a) {
+; VI-NEXT: store volatile <3 x i15> %[[R_15]]
+define amdgpu_kernel void @bitreverse_3xi15(<3 x i15> %a) {
%brev = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> %a)
- ret <3 x i15> %brev
+ store volatile <3 x i15> %brev, <3 x i15> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @add_3xi16(
; SI: %r = add <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @add_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @add_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = add <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @add_nsw_3xi16(
; SI: %r = add nsw <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = add nsw <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @add_nuw_3xi16(
; SI: %r = add nuw <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = add nuw <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @add_nuw_nsw_3xi16(
; SI: %r = add nuw nsw <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = add nuw nsw <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_3xi16(
; SI: %r = sub <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @sub_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @sub_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = sub <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_nsw_3xi16(
; SI: %r = sub nsw <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = sub nsw <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_nuw_3xi16(
; SI: %r = sub nuw <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = sub nuw <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @sub_nuw_nsw_3xi16(
; SI: %r = sub nuw nsw <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = sub nuw nsw <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_3xi16(
; SI: %r = mul <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @mul_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @mul_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = mul <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_nsw_3xi16(
; SI: %r = mul nsw <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = mul nsw <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_nuw_3xi16(
; SI: %r = mul nuw <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = mul nuw <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @mul_nuw_nsw_3xi16(
; SI: %r = mul nuw nsw <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = mul nuw nsw <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @urem_3xi16(
; SI: %r = urem <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = urem <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @urem_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @urem_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = urem <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @srem_3xi16(
; SI: %r = srem <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = srem <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @srem_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @srem_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = srem <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_3xi16(
; SI: %r = shl <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @shl_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @shl_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = shl <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_nsw_3xi16(
; SI: %r = shl nsw <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = shl nsw <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_nuw_3xi16(
; SI: %r = shl nuw <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = shl nuw <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @shl_nuw_nsw_3xi16(
; SI: %r = shl nuw nsw <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = shl nuw nsw <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @lshr_3xi16(
; SI: %r = lshr <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = lshr <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @lshr_exact_3xi16(
; SI: %r = lshr exact <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = lshr exact <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @ashr_3xi16(
; SI: %r = ashr <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = ashr <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @ashr_exact_3xi16(
; SI: %r = ashr exact <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = ashr exact <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @and_3xi16(
; SI: %r = and <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @and_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @and_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = and <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @or_3xi16(
; SI: %r = or <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @or_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @or_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = or <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @xor_3xi16(
; SI: %r = xor <3 x i16> %a, %b
-; SI-NEXT: ret <3 x i16> %r
+; SI-NEXT: store volatile <3 x i16> %r
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]]
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @xor_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @xor_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = xor <3 x i16> %a, %b
- ret <3 x i16> %r
+ store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_eq_3xi16(
; SI: %cmp = icmp eq <3 x i16> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
-; SI-NEXT: ret <3 x i16> %sel
+; SI-NEXT: store volatile <3 x i16> %sel
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -1922,17 +2060,18 @@ define <3 x i16> @xor_3xi16(<3 x i16> %a, <3 x i16> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[SEL_16]]
-define <3 x i16> @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
+define amdgpu_kernel void @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp eq <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
- ret <3 x i16> %sel
+ store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ne_3xi16(
; SI: %cmp = icmp ne <3 x i16> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
-; SI-NEXT: ret <3 x i16> %sel
+; SI-NEXT: store volatile <3 x i16> %sel
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -1940,17 +2079,18 @@ define <3 x i16> @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[SEL_16]]
-define <3 x i16> @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
+define amdgpu_kernel void @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp ne <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
- ret <3 x i16> %sel
+ store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ugt_3xi16(
; SI: %cmp = icmp ugt <3 x i16> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
-; SI-NEXT: ret <3 x i16> %sel
+; SI-NEXT: store volatile <3 x i16> %sel
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -1958,17 +2098,18 @@ define <3 x i16> @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[SEL_16]]
-define <3 x i16> @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
+define amdgpu_kernel void @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp ugt <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
- ret <3 x i16> %sel
+ store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_uge_3xi16(
; SI: %cmp = icmp uge <3 x i16> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
-; SI-NEXT: ret <3 x i16> %sel
+; SI-NEXT: store volatile <3 x i16> %sel
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -1976,17 +2117,18 @@ define <3 x i16> @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[SEL_16]]
-define <3 x i16> @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
+define amdgpu_kernel void @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp uge <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
- ret <3 x i16> %sel
+ store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ult_3xi16(
; SI: %cmp = icmp ult <3 x i16> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
-; SI-NEXT: ret <3 x i16> %sel
+; SI-NEXT: store volatile <3 x i16> %sel
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -1994,17 +2136,18 @@ define <3 x i16> @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[SEL_16]]
-define <3 x i16> @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
+define amdgpu_kernel void @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp ult <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
- ret <3 x i16> %sel
+ store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_ule_3xi16(
; SI: %cmp = icmp ule <3 x i16> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
-; SI-NEXT: ret <3 x i16> %sel
+; SI-NEXT: store volatile <3 x i16> %sel
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -2012,17 +2155,18 @@ define <3 x i16> @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[SEL_16]]
-define <3 x i16> @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
+define amdgpu_kernel void @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp ule <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
- ret <3 x i16> %sel
+ store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_sgt_3xi16(
; SI: %cmp = icmp sgt <3 x i16> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
-; SI-NEXT: ret <3 x i16> %sel
+; SI-NEXT: store volatile <3 x i16> %sel
; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -2030,17 +2174,18 @@ define <3 x i16> @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[SEL_16]]
-define <3 x i16> @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
+define amdgpu_kernel void @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp sgt <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
- ret <3 x i16> %sel
+ store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_sge_3xi16(
; SI: %cmp = icmp sge <3 x i16> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
-; SI-NEXT: ret <3 x i16> %sel
+; SI-NEXT: store volatile <3 x i16> %sel
; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -2048,17 +2193,18 @@ define <3 x i16> @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[SEL_16]]
-define <3 x i16> @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
+define amdgpu_kernel void @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp sge <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
- ret <3 x i16> %sel
+ store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_slt_3xi16(
; SI: %cmp = icmp slt <3 x i16> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
-; SI-NEXT: ret <3 x i16> %sel
+; SI-NEXT: store volatile <3 x i16> %sel
; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -2066,17 +2212,18 @@ define <3 x i16> @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[SEL_16]]
-define <3 x i16> @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
+define amdgpu_kernel void @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp slt <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
- ret <3 x i16> %sel
+ store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
+ ret void
}
; GCN-LABEL: @select_sle_3xi16(
; SI: %cmp = icmp sle <3 x i16> %a, %b
; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
-; SI-NEXT: ret <3 x i16> %sel
+; SI-NEXT: store volatile <3 x i16> %sel
; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]]
@@ -2084,23 +2231,26 @@ define <3 x i16> @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) {
; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[SEL_16]]
-define <3 x i16> @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) {
+; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
+define amdgpu_kernel void @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp sle <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
- ret <3 x i16> %sel
+ store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
+ ret void
}
declare <3 x i16> @llvm.bitreverse.v3i16(<3 x i16>)
+
; GCN-LABEL: @bitreverse_3xi16(
; SI: %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a)
-; SI-NEXT: ret <3 x i16> %brev
+; SI-NEXT: store volatile <3 x i16> %brev
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI-NEXT: %[[R_32:[0-9]+]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> %[[A_32]])
; VI-NEXT: %[[S_32:[0-9]+]] = lshr <3 x i32> %[[R_32]], <i32 16, i32 16, i32 16>
; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[S_32]] to <3 x i16>
-; VI-NEXT: ret <3 x i16> %[[R_16]]
-define <3 x i16> @bitreverse_3xi16(<3 x i16> %a) {
+; VI-NEXT: store volatile <3 x i16> %[[R_16]]
+define amdgpu_kernel void @bitreverse_3xi16(<3 x i16> %a) {
%brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a)
- ret <3 x i16> %brev
+ store volatile <3 x i16> %brev, <3 x i16> addrspace(1)* undef
+ ret void
}
diff --git a/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll b/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll
index 88ba310a92cae..a68ddabd95609 100644
--- a/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll
+++ b/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll
@@ -1253,8 +1253,8 @@ define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a,
; NOTES-NEXT: Owner Data size Description
; NOTES-NEXT: AMD 0x00000008 Unknown note type: (0x00000001)
; NOTES-NEXT: AMD 0x0000001b Unknown note type: (0x00000003)
-; GFX700: AMD 0x00009171 Unknown note type: (0x0000000a)
-; GFX800: AMD 0x00009190 Unknown note type: (0x0000000a)
-; GFX900: AMD 0x00009171 Unknown note type: (0x0000000a)
+; GFX700: AMD 0x00008b06 Unknown note type: (0x0000000a)
+; GFX800: AMD 0x00008e6a Unknown note type: (0x0000000a)
+; GFX900: AMD 0x00008b06 Unknown note type: (0x0000000a)
; PARSER: AMDGPU Code Object Metadata Parser Test: PASS
diff --git a/test/CodeGen/AMDGPU/exceed-max-sgprs.ll b/test/CodeGen/AMDGPU/exceed-max-sgprs.ll
index 40d115bfc0606..207dfce75f162 100644
--- a/test/CodeGen/AMDGPU/exceed-max-sgprs.ll
+++ b/test/CodeGen/AMDGPU/exceed-max-sgprs.ll
@@ -38,7 +38,7 @@ define amdgpu_kernel void @use_too_many_sgprs_bonaire() #1 {
ret void
}
-; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_bonaire_flat_scr
+; ERROR: error: scalar registers limit of 104 exceeded (108) in use_too_many_sgprs_bonaire_flat_scr
define amdgpu_kernel void @use_too_many_sgprs_bonaire_flat_scr() #1 {
call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
diff --git a/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/test/CodeGen/AMDGPU/flat-scratch-reg.ll
index 23f40daf3d237..5705cbc99443a 100644
--- a/test/CodeGen/AMDGPU/flat-scratch-reg.ll
+++ b/test/CodeGen/AMDGPU/flat-scratch-reg.ll
@@ -44,12 +44,12 @@ entry:
; HSA-VI-NOXNACK: is_xnack_enabled = 0
; HSA-VI-XNACK: is_xnack_enabled = 1
-; CI: ; NumSgprs: 8
-; VI-NOXNACK: ; NumSgprs: 8
-; VI-XNACK: ; NumSgprs: 12
-; HSA-CI: ; NumSgprs: 8
-; HSA-VI-NOXNACK: ; NumSgprs: 8
-; HSA-VI-XNACK: ; NumSgprs: 12
+; CI: ; NumSgprs: 12
+; VI-NOXNACK: ; NumSgprs: 14
+; VI-XNACK: ; NumSgprs: 14
+; HSA-CI: ; NumSgprs: 12
+; HSA-VI-NOXNACK: ; NumSgprs: 14
+; HSA-VI-XNACK: ; NumSgprs: 14
define amdgpu_kernel void @no_vcc_flat() {
entry:
call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"()
@@ -60,14 +60,49 @@ entry:
; HSA-NOXNACK: is_xnack_enabled = 0
; HSA-XNACK: is_xnack_enabled = 1
-; CI: ; NumSgprs: 10
-; VI-NOXNACK: ; NumSgprs: 10
-; VI-XNACK: ; NumSgprs: 12
-; HSA-CI: ; NumSgprs: 10
-; HSA-VI-NOXNACK: ; NumSgprs: 10
-; HSA-VI-XNACK: ; NumSgprs: 12
+; CI: ; NumSgprs: 12
+; VI-NOXNACK: ; NumSgprs: 14
+; VI-XNACK: ; NumSgprs: 14
+; HSA-CI: ; NumSgprs: 12
+; HSA-VI-NOXNACK: ; NumSgprs: 14
+; HSA-VI-XNACK: ; NumSgprs: 14
define amdgpu_kernel void @vcc_flat() {
entry:
call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"()
ret void
}
+
+; Make sure used SGPR count for flat_scr is correct when there is no
+; scratch usage and implicit flat uses.
+
+; GCN-LABEL: {{^}}use_flat_scr:
+; CI: NumSgprs: 4
+; VI-NOXNACK: NumSgprs: 6
+; VI-XNACK: NumSgprs: 6
+define amdgpu_kernel void @use_flat_scr() #0 {
+entry:
+ call void asm sideeffect "; clobber ", "~{FLAT_SCR}"()
+ ret void
+}
+
+; GCN-LABEL: {{^}}use_flat_scr_lo:
+; CI: NumSgprs: 4
+; VI-NOXNACK: NumSgprs: 6
+; VI-XNACK: NumSgprs: 6
+define amdgpu_kernel void @use_flat_scr_lo() #0 {
+entry:
+ call void asm sideeffect "; clobber ", "~{FLAT_SCR_LO}"()
+ ret void
+}
+
+; GCN-LABEL: {{^}}use_flat_scr_hi:
+; CI: NumSgprs: 4
+; VI-NOXNACK: NumSgprs: 6
+; VI-XNACK: NumSgprs: 6
+define amdgpu_kernel void @use_flat_scr_hi() #0 {
+entry:
+ call void asm sideeffect "; clobber ", "~{FLAT_SCR_HI}"()
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/frame-index-amdgiz.ll b/test/CodeGen/AMDGPU/frame-index-amdgiz.ll
new file mode 100644
index 0000000000000..dd46403b68af1
--- /dev/null
+++ b/test/CodeGen/AMDGPU/frame-index-amdgiz.ll
@@ -0,0 +1,55 @@
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+;
+; The original OpenCL kernel:
+; kernel void f(global int *a, int i, int j) {
+; int x[100];
+; x[i] = 7;
+; a[0] = x[j];
+; }
+; clang -cc1 -triple amdgcn---amdgizcl -emit-llvm -o -
+
+target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
+target triple = "amdgcn---amdgiz"
+
+define amdgpu_kernel void @f(i32 addrspace(1)* nocapture %a, i32 %i, i32 %j) local_unnamed_addr #0 {
+entry:
+; CHECK: s_load_dword s2, s[0:1], 0xb
+; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; CHECK: s_load_dword s0, s[0:1], 0xc
+; CHECK: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; CHECK: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; CHECK: s_mov_b32 s10, -1
+; CHECK: s_waitcnt lgkmcnt(0)
+; CHECK: s_lshl_b32 s1, s2, 2
+; CHECK: v_mov_b32_e32 v0, 4
+; CHECK: s_mov_b32 s11, 0xe8f000
+; CHECK: v_add_i32_e32 v1, vcc, s1, v0
+; CHECK: v_mov_b32_e32 v2, 7
+; CHECK: s_lshl_b32 s0, s0, 2
+; CHECK: buffer_store_dword v2, v1, s[8:11], s3 offen
+; CHECK: v_add_i32_e32 v0, vcc, s0, v0
+; CHECK: buffer_load_dword v0, v0, s[8:11], s3 offen
+; CHECK: s_mov_b32 s7, 0xf000
+; CHECK: s_mov_b32 s6, -1
+; CHECK: s_waitcnt vmcnt(0)
+; CHECK: buffer_store_dword v0, off, s[4:7], 0
+; CHECK: s_endpgm
+
+ %x = alloca [100 x i32], align 4, addrspace(5)
+ %0 = bitcast [100 x i32] addrspace(5)* %x to i8 addrspace(5)*
+ call void @llvm.lifetime.start.p5i8(i64 400, i8 addrspace(5)* nonnull %0) #0
+ %arrayidx = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %i
+ store i32 7, i32 addrspace(5)* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %j
+ %1 = load i32, i32 addrspace(5)* %arrayidx2, align 4
+ store i32 %1, i32 addrspace(1)* %a, align 4
+ call void @llvm.lifetime.end.p5i8(i64 400, i8 addrspace(5)* nonnull %0) #0
+ ret void
+}
+
+declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #1
+
+declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { argmemonly nounwind }
diff --git a/test/CodeGen/AMDGPU/hsa-func-align.ll b/test/CodeGen/AMDGPU/hsa-func-align.ll
new file mode 100644
index 0000000000000..a00f5e2669d1d
--- /dev/null
+++ b/test/CodeGen/AMDGPU/hsa-func-align.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=HSA %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj < %s | llvm-readobj -symbols -s -sd | FileCheck -check-prefix=ELF %s
+
+; ELF: Section {
+; ELF: Name: .text
+; ELF: SHF_ALLOC (0x2)
+; ELF: SHF_EXECINSTR (0x4)
+; ELF: AddressAlignment: 32
+; ELF: }
+
+; HSA: .globl simple_align16
+; HSA: .p2align 5
+define void @simple_align16(i32 addrspace(1)* addrspace(2)* %ptr.out) align 32 {
+entry:
+ %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/hsa-func.ll b/test/CodeGen/AMDGPU/hsa-func.ll
index b4cdd4030d86a..d96b796d44950 100644
--- a/test/CodeGen/AMDGPU/hsa-func.ll
+++ b/test/CodeGen/AMDGPU/hsa-func.ll
@@ -14,6 +14,7 @@
; ELF: Flags [ (0x6)
; ELF: SHF_ALLOC (0x2)
; ELF: SHF_EXECINSTR (0x4)
+; ELF: AddressAlignment: 4
; ELF: }
; ELF: SHT_NOTE
@@ -26,7 +27,7 @@
; ELF: Symbol {
; ELF: Name: simple
-; ELF: Size: 292
+; ELF: Size: 44
; ELF: Type: Function (0x2)
; ELF: }
@@ -36,12 +37,13 @@
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
; HSA-NOT: .amdgpu_hsa_kernel simple
+; HSA: .globl simple
+; HSA: .p2align 2
; HSA: {{^}}simple:
-; HSA: .amd_kernel_code_t
-; HSA: enable_sgpr_private_segment_buffer = 1
-; HSA: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: .end_amd_kernel_code_t
-; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
+; HSA-NOT: amd_kernel_code_t
+
+; FIXME: Check this isn't a kernarg load when calling convention implemented.
+; XHSA-NOT: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
; Make sure we are setting the ATC bit:
; HSA-CI: s_mov_b32 s[[HI:[0-9]]], 0x100f000
@@ -52,9 +54,20 @@
; HSA: .Lfunc_end0:
; HSA: .size simple, .Lfunc_end0-simple
-
+; HSA: ; Function info:
+; HSA-NOT: COMPUTE_PGM_RSRC2
define void @simple(i32 addrspace(1)* %out) {
entry:
store i32 0, i32 addrspace(1)* %out
ret void
}
+
+; Ignore explicit alignment that is too low.
+; HSA: .globl simple_align2
+; HSA: .p2align 2
+define void @simple_align2(i32 addrspace(1)* addrspace(2)* %ptr.out) align 2 {
+entry:
+ %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/loop_break.ll b/test/CodeGen/AMDGPU/loop_break.ll
index b9df2cb779ad0..84c42e8bd1e06 100644
--- a/test/CodeGen/AMDGPU/loop_break.ll
+++ b/test/CodeGen/AMDGPU/loop_break.ll
@@ -10,7 +10,7 @@
; OPT: bb4:
; OPT: load volatile
-; OPT: xor i1 %cmp1
+; OPT: %cmp1 = icmp sge i32 %tmp, %load
; OPT: call i64 @llvm.amdgcn.if.break(
; OPT: br label %Flow
diff --git a/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
index 9d0b6b395996b..4bd8bff4809af 100644
--- a/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
+++ b/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
@@ -9,18 +9,19 @@
; StructurizeCFG.
; IR-LABEL: @multi_divergent_region_exit_ret_ret(
-; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
-; IR: %2 = extractvalue { i1, i64 } %1, 0
-; IR: %3 = extractvalue { i1, i64 } %1, 1
-; IR: br i1 %2, label %LeafBlock1, label %Flow
+; IR: %Pivot = icmp sge i32 %tmp16, 2
+; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot)
+; IR: %1 = extractvalue { i1, i64 } %0, 0
+; IR: %2 = extractvalue { i1, i64 } %0, 1
+; IR: br i1 %1, label %LeafBlock1, label %Flow
; IR: Flow:
-; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
-; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
-; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
-; IR: %7 = extractvalue { i1, i64 } %6, 0
-; IR: %8 = extractvalue { i1, i64 } %6, 1
-; IR: br i1 %7, label %LeafBlock, label %Flow1
+; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
+; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ]
+; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2)
+; IR: %6 = extractvalue { i1, i64 } %5, 0
+; IR: %7 = extractvalue { i1, i64 } %5, 1
+; IR: br i1 %6, label %LeafBlock, label %Flow1
; IR: LeafBlock:
; IR: br label %Flow1
@@ -29,32 +30,32 @@
; IR: br label %Flow{{$}}
; IR: Flow2:
-; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %19)
-; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
-; IR: %13 = extractvalue { i1, i64 } %12, 0
-; IR: %14 = extractvalue { i1, i64 } %12, 1
-; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
+; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %16)
+; IR: [[IF:%[0-9]+]] = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
+; IR: %10 = extractvalue { i1, i64 } [[IF]], 0
+; IR: %11 = extractvalue { i1, i64 } [[IF]], 1
+; IR: br i1 %10, label %exit0, label %UnifiedReturnBlock
; IR: exit0:
; IR: store volatile i32 9, i32 addrspace(1)* undef
; IR: br label %UnifiedReturnBlock
; IR: Flow1:
-; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
-; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %8)
-; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
-; IR: %18 = extractvalue { i1, i64 } %17, 0
-; IR: %19 = extractvalue { i1, i64 } %17, 1
-; IR: br i1 %18, label %exit1, label %Flow2
+; IR: %12 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %3, %Flow ]
+; IR: %13 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %7)
+; IR: %14 = call { i1, i64 } @llvm.amdgcn.if(i1 %13)
+; IR: %15 = extractvalue { i1, i64 } %14, 0
+; IR: %16 = extractvalue { i1, i64 } %14, 1
+; IR: br i1 %15, label %exit1, label %Flow2
; IR: exit1:
; IR: store volatile i32 17, i32 addrspace(3)* undef
; IR: br label %Flow2
; IR: UnifiedReturnBlock:
-; IR: call void @llvm.amdgcn.end.cf(i64 %14)
+; IR: call void @llvm.amdgcn.end.cf(i64 %11)
; IR: ret void
@@ -64,11 +65,9 @@
; GCN: s_xor_b64
-; FIXME: Why is this compare essentially repeated?
-; GCN: v_cmp_eq_u32_e32 vcc, 1, [[REG:v[0-9]+]]
-; GCN-NEXT: v_cmp_ne_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1, [[REG]]
+; GCN: ; %LeafBlock
+; GCN: v_cmp_ne_u32_e32 vcc, 1, [[REG:v[0-9]+]]
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, vcc
-; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1
; GCN: ; %Flow1
; GCN-NEXT: s_or_b64 exec, exec
@@ -126,14 +125,15 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
}
; IR-LABEL: @multi_divergent_region_exit_unreachable_unreachable(
-; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
+; IR: %Pivot = icmp sge i32 %tmp16, 2
+; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot)
-; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
+; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2)
-; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %19)
-; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
-; IR: br i1 %13, label %exit0, label %UnifiedUnreachableBlock
+; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %16)
+; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
+; IR: br i1 %10, label %exit0, label %UnifiedUnreachableBlock
; IR: UnifiedUnreachableBlock:
@@ -181,51 +181,49 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
}
; IR-LABEL: @multi_exit_region_divergent_ret_uniform_ret(
-; IR: %divergent.cond0 = icmp slt i32 %tmp16, 2
+; IR: %divergent.cond0 = icmp sge i32 %tmp16, 2
; IR: llvm.amdgcn.if
; IR: br i1
; IR: {{^}}Flow:
-; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
-; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
-; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
-; IR: br i1 %7, label %LeafBlock, label %Flow1
+; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
+; IR: %4 = phi i1 [ %uniform.cond0, %LeafBlock1 ], [ false, %entry ]
+; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2)
+; IR: br i1 %6, label %LeafBlock, label %Flow1
; IR: {{^}}LeafBlock:
-; IR: %divergent.cond1 = icmp eq i32 %tmp16, 1
-; IR: %9 = xor i1 %divergent.cond1, true
+; IR: %divergent.cond1 = icmp ne i32 %tmp16, 1
; IR: br label %Flow1
; IR: LeafBlock1:
-; IR: %uniform.cond0 = icmp eq i32 %arg3, 2
-; IR: %10 = xor i1 %uniform.cond0, true
+; IR: %uniform.cond0 = icmp ne i32 %arg3, 2
; IR: br label %Flow
; IR: Flow2:
-; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %19)
-; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
-; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
+; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %16)
+; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
+; IR: br i1 %10, label %exit0, label %UnifiedReturnBlock
; IR: exit0:
; IR: store volatile i32 9, i32 addrspace(1)* undef
; IR: br label %UnifiedReturnBlock
; IR: {{^}}Flow1:
-; IR: %15 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ]
-; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %8)
-; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
-; IR: %18 = extractvalue { i1, i64 } %17, 0
-; IR: %19 = extractvalue { i1, i64 } %17, 1
-; IR: br i1 %18, label %exit1, label %Flow2
+; IR: %12 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %3, %Flow ]
+; IR: %13 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %7)
+; IR: %14 = call { i1, i64 } @llvm.amdgcn.if(i1 %13)
+; IR: %15 = extractvalue { i1, i64 } %14, 0
+; IR: %16 = extractvalue { i1, i64 } %14, 1
+; IR: br i1 %15, label %exit1, label %Flow2
; IR: exit1:
; IR: store volatile i32 17, i32 addrspace(3)* undef
; IR: br label %Flow2
; IR: UnifiedReturnBlock:
-; IR: call void @llvm.amdgcn.end.cf(i64 %14)
+; IR: call void @llvm.amdgcn.end.cf(i64 %11)
; IR: ret void
define amdgpu_kernel void @multi_exit_region_divergent_ret_uniform_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
entry:
@@ -264,17 +262,18 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
}
; IR-LABEL: @multi_exit_region_uniform_ret_divergent_ret(
-; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
-; IR: br i1 %2, label %LeafBlock1, label %Flow
+; IR: %Pivot = icmp sge i32 %tmp16, 2
+; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot)
+; IR: br i1 %1, label %LeafBlock1, label %Flow
; IR: Flow:
-; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
-; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
-; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
+; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
+; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ]
+; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2)
-; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %19)
-; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
+; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %16)
+; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
define amdgpu_kernel void @multi_exit_region_uniform_ret_divergent_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
entry:
@@ -314,13 +313,13 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
; IR-LABEL: @multi_divergent_region_exit_ret_ret_return_value(
; IR: Flow2:
-; IR: %11 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ]
-; IR: %12 = phi i1 [ false, %exit1 ], [ %16, %Flow1 ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %20)
+; IR: %8 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ]
+; IR: %9 = phi i1 [ false, %exit1 ], [ %13, %Flow1 ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %17)
; IR: UnifiedReturnBlock:
-; IR: %UnifiedRetVal = phi float [ %11, %Flow2 ], [ 1.000000e+00, %exit0 ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %15)
+; IR: %UnifiedRetVal = phi float [ %8, %Flow2 ], [ 1.000000e+00, %exit0 ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %12)
; IR: ret float %UnifiedRetVal
define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) #0 {
entry:
@@ -387,31 +386,32 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
}
; IR-LABEL: @multi_divergent_region_exit_ret_unreachable(
-; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
+; IR: %Pivot = icmp sge i32 %tmp16, 2
+; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot)
; IR: Flow:
-; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
-; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
-; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
+; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
+; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ]
+; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2)
; IR: Flow2:
-; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %19)
-; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
-; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
+; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %16)
+; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
+; IR: br i1 %10, label %exit0, label %UnifiedReturnBlock
; IR: exit0:
; IR-NEXT: store volatile i32 17, i32 addrspace(3)* undef
; IR-NEXT: br label %UnifiedReturnBlock
; IR: Flow1:
-; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
-; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %8)
-; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
-; IR: %18 = extractvalue { i1, i64 } %17, 0
-; IR: %19 = extractvalue { i1, i64 } %17, 1
-; IR: br i1 %18, label %exit1, label %Flow2
+; IR: %12 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %3, %Flow ]
+; IR: %13 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %7)
+; IR: %14 = call { i1, i64 } @llvm.amdgcn.if(i1 %13)
+; IR: %15 = extractvalue { i1, i64 } %14, 0
+; IR: %16 = extractvalue { i1, i64 } %14, 1
+; IR: br i1 %15, label %exit1, label %Flow2
; IR: exit1:
; IR-NEXT: store volatile i32 9, i32 addrspace(1)* undef
@@ -419,7 +419,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
; IR-NEXT: br label %Flow2
; IR: UnifiedReturnBlock:
-; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
+; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11)
; IR-NEXT: ret void
define amdgpu_kernel void @multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
entry:
@@ -475,7 +475,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
; IR-NEXT: br label %Flow2
; IR: UnifiedReturnBlock: ; preds = %exit0, %Flow2
-; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
+; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11)
; IR-NEXT: ret void
define amdgpu_kernel void @indirect_multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
entry:
@@ -622,15 +622,15 @@ uniform.ret:
; IR-LABEL: @uniform_complex_multi_ret_nest_in_divergent_triangle(
; IR: Flow1: ; preds = %uniform.ret1, %uniform.multi.exit.region
-; IR: %8 = phi i1 [ false, %uniform.ret1 ], [ true, %uniform.multi.exit.region ]
-; IR: br i1 %8, label %uniform.if, label %Flow2
+; IR: %6 = phi i1 [ false, %uniform.ret1 ], [ true, %uniform.multi.exit.region ]
+; IR: br i1 %6, label %uniform.if, label %Flow2
; IR: Flow: ; preds = %uniform.then, %uniform.if
-; IR: %11 = phi i1 [ %10, %uniform.then ], [ %9, %uniform.if ]
-; IR: br i1 %11, label %uniform.endif, label %uniform.ret0
+; IR: %7 = phi i1 [ %uniform.cond2, %uniform.then ], [ %uniform.cond1, %uniform.if ]
+; IR: br i1 %7, label %uniform.endif, label %uniform.ret0
; IR: UnifiedReturnBlock: ; preds = %Flow3, %Flow2
-; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %6)
+; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %5)
; IR-NEXT: ret void
define amdgpu_kernel void @uniform_complex_multi_ret_nest_in_divergent_triangle(i32 %arg0) #0 {
entry:
diff --git a/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/test/CodeGen/AMDGPU/nested-loop-conditions.ll
index 672549c8ea636..c0b4eaff60aac 100644
--- a/test/CodeGen/AMDGPU/nested-loop-conditions.ll
+++ b/test/CodeGen/AMDGPU/nested-loop-conditions.ll
@@ -133,9 +133,9 @@ bb23: ; preds = %bb10
; IR: Flow1:
; IR-NEXT: %loop.phi = phi i64 [ %loop.phi9, %Flow6 ], [ %phi.broken, %bb14 ]
-; IR-NEXT: %13 = phi <4 x i32> [ %29, %Flow6 ], [ undef, %bb14 ]
-; IR-NEXT: %14 = phi i32 [ %30, %Flow6 ], [ undef, %bb14 ]
-; IR-NEXT: %15 = phi i1 [ %31, %Flow6 ], [ false, %bb14 ]
+; IR-NEXT: %13 = phi <4 x i32> [ %28, %Flow6 ], [ undef, %bb14 ]
+; IR-NEXT: %14 = phi i32 [ %29, %Flow6 ], [ undef, %bb14 ]
+; IR-NEXT: %15 = phi i1 [ %30, %Flow6 ], [ false, %bb14 ]
; IR-NEXT: %16 = phi i1 [ false, %Flow6 ], [ %8, %bb14 ]
; IR-NEXT: %17 = call i64 @llvm.amdgcn.else.break(i64 %11, i64 %loop.phi)
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11)
@@ -144,9 +144,9 @@ bb23: ; preds = %bb10
; IR: Flow2:
; IR-NEXT: %loop.phi10 = phi i64 [ %loop.phi11, %Flow5 ], [ %12, %bb16 ]
-; IR-NEXT: %19 = phi <4 x i32> [ %29, %Flow5 ], [ undef, %bb16 ]
-; IR-NEXT: %20 = phi i32 [ %30, %Flow5 ], [ undef, %bb16 ]
-; IR-NEXT: %21 = phi i1 [ %31, %Flow5 ], [ false, %bb16 ]
+; IR-NEXT: %19 = phi <4 x i32> [ %28, %Flow5 ], [ undef, %bb16 ]
+; IR-NEXT: %20 = phi i32 [ %29, %Flow5 ], [ undef, %bb16 ]
+; IR-NEXT: %21 = phi i1 [ %30, %Flow5 ], [ false, %bb16 ]
; IR-NEXT: %22 = phi i1 [ false, %Flow5 ], [ false, %bb16 ]
; IR-NEXT: %23 = phi i1 [ false, %Flow5 ], [ %8, %bb16 ]
; IR-NEXT: %24 = call { i1, i64 } @llvm.amdgcn.if(i1 %23)
@@ -156,16 +156,15 @@ bb23: ; preds = %bb10
; IR: bb21:
; IR: %tmp12 = icmp slt i32 %tmp11, 9
-; IR-NEXT: %27 = xor i1 %tmp12, true
-; IR-NEXT: %28 = call i64 @llvm.amdgcn.if.break(i1 %27, i64 %phi.broken)
+; IR-NEXT: %27 = call i64 @llvm.amdgcn.if.break(i1 %tmp12, i64 %phi.broken)
; IR-NEXT: br label %Flow3
; IR: Flow3:
; IR-NEXT: %loop.phi11 = phi i64 [ %phi.broken, %bb21 ], [ %phi.broken, %Flow2 ]
-; IR-NEXT: %loop.phi9 = phi i64 [ %28, %bb21 ], [ %loop.phi10, %Flow2 ]
-; IR-NEXT: %29 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ]
-; IR-NEXT: %30 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ]
-; IR-NEXT: %31 = phi i1 [ %27, %bb21 ], [ %21, %Flow2 ]
+; IR-NEXT: %loop.phi9 = phi i64 [ %27, %bb21 ], [ %loop.phi10, %Flow2 ]
+; IR-NEXT: %28 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ]
+; IR-NEXT: %29 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ]
+; IR-NEXT: %30 = phi i1 [ %tmp12, %bb21 ], [ %21, %Flow2 ]
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %26)
; IR-NEXT: br i1 %22, label %bb31.loopexit, label %Flow4
diff --git a/test/CodeGen/AMDGPU/ret_jump.ll b/test/CodeGen/AMDGPU/ret_jump.ll
index f2fbacbab82e7..748f98a12c591 100644
--- a/test/CodeGen/AMDGPU/ret_jump.ll
+++ b/test/CodeGen/AMDGPU/ret_jump.ll
@@ -56,7 +56,7 @@ ret.bb: ; preds = %else, %main_body
}
; GCN-LABEL: {{^}}uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable:
-; GCN: s_cbranch_vccnz [[RET_BB:BB[0-9]+_[0-9]+]]
+; GCN: s_cbranch_scc1 [[RET_BB:BB[0-9]+_[0-9]+]]
; GCN: ; BB#{{[0-9]+}}: ; %else
; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
diff --git a/test/CodeGen/AMDGPU/select-vectors.ll b/test/CodeGen/AMDGPU/select-vectors.ll
index 8710fc8c7307b..4b00a48211ecf 100644
--- a/test/CodeGen/AMDGPU/select-vectors.ll
+++ b/test/CodeGen/AMDGPU/select-vectors.ll
@@ -1,69 +1,186 @@
-; RUN: llc -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; Test expansion of scalar selects on vectors.
; Evergreen not enabled since it seems to be having problems with doubles.
+; GCN-LABEL: {{^}}v_select_v2i8:
+; SI: v_cndmask_b32
+; SI-NOT: cndmask
-; FUNC-LABEL: {{^}}select_v4i8:
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-define amdgpu_kernel void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) nounwind {
+; GFX9: v_cndmask_b32
+; GFX9-NOT: cndmask
+
+; This is worse when i16 is legal and packed is not because
+; SelectionDAGBuilder for some reason changes the select type.
+; VI: v_cndmask_b32
+; VI: v_cndmask_b32
+define amdgpu_kernel void @v_select_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %a.ptr, <2 x i8> addrspace(1)* %b.ptr, i32 %c) #0 {
+ %a = load <2 x i8>, <2 x i8> addrspace(1)* %a.ptr, align 2
+ %b = load <2 x i8>, <2 x i8> addrspace(1)* %b.ptr, align 2
+ %cmp = icmp eq i32 %c, 0
+ %select = select i1 %cmp, <2 x i8> %a, <2 x i8> %b
+ store <2 x i8> %select, <2 x i8> addrspace(1)* %out, align 2
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_select_v4i8:
+; GCN: v_cndmask_b32_e32
+; GCN-NOT: cndmask
+define amdgpu_kernel void @v_select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %a.ptr, <4 x i8> addrspace(1)* %b.ptr, i32 %c) #0 {
+ %a = load <4 x i8>, <4 x i8> addrspace(1)* %a.ptr
+ %b = load <4 x i8>, <4 x i8> addrspace(1)* %b.ptr
+ %cmp = icmp eq i32 %c, 0
+ %select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b
+ store <4 x i8> %select, <4 x i8> addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_select_v8i8:
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN-NOT: cndmask
+define amdgpu_kernel void @v_select_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %a.ptr, <8 x i8> addrspace(1)* %b.ptr, i32 %c) #0 {
+ %a = load <8 x i8>, <8 x i8> addrspace(1)* %a.ptr
+ %b = load <8 x i8>, <8 x i8> addrspace(1)* %b.ptr
+ %cmp = icmp eq i32 %c, 0
+ %select = select i1 %cmp, <8 x i8> %a, <8 x i8> %b
+ store <8 x i8> %select, <8 x i8> addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_select_v16i8:
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN-NOT: cndmask
+define amdgpu_kernel void @v_select_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(1)* %a.ptr, <16 x i8> addrspace(1)* %b.ptr, i32 %c) #0 {
+ %a = load <16 x i8>, <16 x i8> addrspace(1)* %a.ptr
+ %b = load <16 x i8>, <16 x i8> addrspace(1)* %b.ptr
+ %cmp = icmp eq i32 %c, 0
+ %select = select i1 %cmp, <16 x i8> %a, <16 x i8> %b
+ store <16 x i8> %select, <16 x i8> addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}select_v4i8:
+; GCN: v_cndmask_b32_e32
+; GCN-NOT: cndmask
+define amdgpu_kernel void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) #0 {
%cmp = icmp eq i8 %c, 0
%select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b
store <4 x i8> %select, <4 x i8> addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: {{^}}select_v4i16:
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
+; GCN-LABEL: {{^}}select_v2i16:
+; GCN: v_cndmask_b32_e32
+; GCN-NOT: v_cndmask_b32
+define amdgpu_kernel void @select_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b, i32 %c) #0 {
+ %cmp = icmp eq i32 %c, 0
+ %select = select i1 %cmp, <2 x i16> %a, <2 x i16> %b
+ store <2 x i16> %select, <2 x i16> addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_select_v2i16:
+; GCN: v_cndmask_b32_e32
+; GCN-NOT: cndmask
+define amdgpu_kernel void @v_select_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr, i32 %c) #0 {
+ %a = load <2 x i16>, <2 x i16> addrspace(1)* %a.ptr
+ %b = load <2 x i16>, <2 x i16> addrspace(1)* %b.ptr
+ %cmp = icmp eq i32 %c, 0
+ %select = select i1 %cmp, <2 x i16> %a, <2 x i16> %b
+ store <2 x i16> %select, <2 x i16> addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_select_v3i16:
; SI: v_cndmask_b32_e32
-define amdgpu_kernel void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b, i32 %c) nounwind {
+; SI: cndmask
+; SI-NOT: cndmask
+
+; GFX9: v_cndmask_b32_e32
+; GFX9: cndmask
+; GFX9-NOT: cndmask
+
+; VI: v_cndmask_b32
+; VI: v_cndmask_b32
+; VI: v_cndmask_b32
+define amdgpu_kernel void @v_select_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %a.ptr, <3 x i16> addrspace(1)* %b.ptr, i32 %c) #0 {
+ %a = load <3 x i16>, <3 x i16> addrspace(1)* %a.ptr
+ %b = load <3 x i16>, <3 x i16> addrspace(1)* %b.ptr
+ %cmp = icmp eq i32 %c, 0
+ %select = select i1 %cmp, <3 x i16> %a, <3 x i16> %b
+ store <3 x i16> %select, <3 x i16> addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_select_v4i16:
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN-NOT: cndmask
+define amdgpu_kernel void @v_select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %a.ptr, <4 x i16> addrspace(1)* %b.ptr, i32 %c) #0 {
+ %a = load <4 x i16>, <4 x i16> addrspace(1)* %a.ptr
+ %b = load <4 x i16>, <4 x i16> addrspace(1)* %b.ptr
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x i16> %a, <4 x i16> %b
store <4 x i16> %select, <4 x i16> addrspace(1)* %out, align 4
ret void
}
+; GCN-LABEL: {{^}}v_select_v8i16:
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN-NOT: cndmask
+define amdgpu_kernel void @v_select_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %a.ptr, <8 x i16> addrspace(1)* %b.ptr, i32 %c) #0 {
+ %a = load <8 x i16>, <8 x i16> addrspace(1)* %a.ptr
+ %b = load <8 x i16>, <8 x i16> addrspace(1)* %b.ptr
+ %cmp = icmp eq i32 %c, 0
+ %select = select i1 %cmp, <8 x i16> %a, <8 x i16> %b
+ store <8 x i16> %select, <8 x i16> addrspace(1)* %out, align 4
+ ret void
+}
+
; FIXME: Expansion with bitwise operations may be better if doing a
; vector select with SGPR inputs.
-; FUNC-LABEL: {{^}}s_select_v2i32:
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) nounwind {
+; GCN-LABEL: {{^}}s_select_v2i32:
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: buffer_store_dwordx2
+define amdgpu_kernel void @s_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) #0 {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
store <2 x i32> %select, <2 x i32> addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: {{^}}s_select_v4i32:
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: buffer_store_dwordx4
-define amdgpu_kernel void @s_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) nounwind {
+; GCN-LABEL: {{^}}s_select_v4i32:
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: buffer_store_dwordx4
+define amdgpu_kernel void @s_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) #0 {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x i32> %a, <4 x i32> %b
store <4 x i32> %select, <4 x i32> addrspace(1)* %out, align 16
ret void
}
-; FUNC-LABEL: {{^}}v_select_v4i32:
-; SI: buffer_load_dwordx4
-; SI: v_cmp_lt_u32_e64 vcc, s{{[0-9]+}}, 32
-; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
-; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
-; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
-; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
-; SI: buffer_store_dwordx4
+; GCN-LABEL: {{^}}v_select_v4i32:
+; GCN: buffer_load_dwordx4
+; GCN: v_cmp_lt_u32_e64 vcc, s{{[0-9]+}}, 32
+; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
+; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
+; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
+; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
+; GCN: buffer_store_dwordx4
define amdgpu_kernel void @v_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %cond) #0 {
bb:
%tmp2 = icmp ult i32 %cond, 32
@@ -73,68 +190,68 @@ bb:
ret void
}
-; FUNC-LABEL: {{^}}select_v8i32:
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-define amdgpu_kernel void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) nounwind {
+; GCN-LABEL: {{^}}select_v8i32:
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+define amdgpu_kernel void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) #0 {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <8 x i32> %a, <8 x i32> %b
store <8 x i32> %select, <8 x i32> addrspace(1)* %out, align 16
ret void
}
-; FUNC-LABEL: {{^}}s_select_v2f32:
-; SI-DAG: s_load_dwordx2 s{{\[}}[[ALO:[0-9]+]]:[[AHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
-; SI-DAG: s_load_dwordx2 s{{\[}}[[BLO:[0-9]+]]:[[BHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}}
+; GCN-LABEL: {{^}}s_select_v2f32:
+; GCN-DAG: s_load_dwordx2 s{{\[}}[[ALO:[0-9]+]]:[[AHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dwordx2 s{{\[}}[[BLO:[0-9]+]]:[[BHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}}
-; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[AHI]]
-; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]]
-; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]]
-; SI-DAG: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
+; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[AHI]]
+; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]]
+; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]]
+; GCN-DAG: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
-; SI: v_cndmask_b32_e32
-; SI: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]]
-; SI: v_cndmask_b32_e32
-; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) nounwind {
+; GCN: v_cndmask_b32_e32
+; GCN: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]]
+; GCN: v_cndmask_b32_e32
+; GCN: buffer_store_dwordx2
+define amdgpu_kernel void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) #0 {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <2 x float> %a, <2 x float> %b
store <2 x float> %select, <2 x float> addrspace(1)* %out, align 16
ret void
}
-; FUNC-LABEL: {{^}}s_select_v4f32:
-; SI: s_load_dwordx4
-; SI: s_load_dwordx4
-; SI: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
+; GCN-LABEL: {{^}}s_select_v4f32:
+; GCN: s_load_dwordx4
+; GCN: s_load_dwordx4
+; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
-; SI: buffer_store_dwordx4
-define amdgpu_kernel void @s_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b, i32 %c) nounwind {
+; GCN: buffer_store_dwordx4
+define amdgpu_kernel void @s_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b, i32 %c) #0 {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x float> %a, <4 x float> %b
store <4 x float> %select, <4 x float> addrspace(1)* %out, align 16
ret void
}
-; FUNC-LABEL: {{^}}v_select_v4f32:
-; SI: buffer_load_dwordx4
-; SI: v_cmp_lt_u32_e64 vcc, s{{[0-9]+}}, 32
-; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
-; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
-; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
-; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
-; SI: buffer_store_dwordx4
+; GCN-LABEL: {{^}}v_select_v4f32:
+; GCN: buffer_load_dwordx4
+; GCN: v_cmp_lt_u32_e64 vcc, s{{[0-9]+}}, 32
+; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
+; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
+; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
+; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
+; GCN: buffer_store_dwordx4
define amdgpu_kernel void @v_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %cond) #0 {
bb:
%tmp2 = icmp ult i32 %cond, 32
@@ -144,74 +261,112 @@ bb:
ret void
}
-; FUNC-LABEL: {{^}}select_v8f32:
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-define amdgpu_kernel void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) nounwind {
+; GCN-LABEL: {{^}}select_v8f32:
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+define amdgpu_kernel void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) #0 {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <8 x float> %a, <8 x float> %b
store <8 x float> %select, <8 x float> addrspace(1)* %out, align 16
ret void
}
-; FUNC-LABEL: {{^}}select_v2f64:
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-define amdgpu_kernel void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) nounwind {
+; GCN-LABEL: {{^}}select_v2f64:
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+define amdgpu_kernel void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) #0 {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <2 x double> %a, <2 x double> %b
store <2 x double> %select, <2 x double> addrspace(1)* %out, align 16
ret void
}
-; FUNC-LABEL: {{^}}select_v4f64:
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-define amdgpu_kernel void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) nounwind {
+; GCN-LABEL: {{^}}select_v4f64:
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+define amdgpu_kernel void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) #0 {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x double> %a, <4 x double> %b
store <4 x double> %select, <4 x double> addrspace(1)* %out, align 16
ret void
}
-; FUNC-LABEL: {{^}}select_v8f64:
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-; SI: v_cndmask_b32_e32
-define amdgpu_kernel void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) nounwind {
+; GCN-LABEL: {{^}}select_v8f64:
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+define amdgpu_kernel void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) #0 {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <8 x double> %a, <8 x double> %b
store <8 x double> %select, <8 x double> addrspace(1)* %out, align 16
ret void
}
+; GCN-LABEL: {{^}}v_select_v2f16:
+; GCN: v_cndmask_b32_e32
+; GCN-NOT: cndmask
+define amdgpu_kernel void @v_select_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %a.ptr, <2 x half> addrspace(1)* %b.ptr, i32 %c) #0 {
+ %a = load <2 x half>, <2 x half> addrspace(1)* %a.ptr
+ %b = load <2 x half>, <2 x half> addrspace(1)* %b.ptr
+ %cmp = icmp eq i32 %c, 0
+ %select = select i1 %cmp, <2 x half> %a, <2 x half> %b
+ store <2 x half> %select, <2 x half> addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_select_v3f16:
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN-NOT: cndmask
+define amdgpu_kernel void @v_select_v3f16(<3 x half> addrspace(1)* %out, <3 x half> addrspace(1)* %a.ptr, <3 x half> addrspace(1)* %b.ptr, i32 %c) #0 {
+ %a = load <3 x half>, <3 x half> addrspace(1)* %a.ptr
+ %b = load <3 x half>, <3 x half> addrspace(1)* %b.ptr
+ %cmp = icmp eq i32 %c, 0
+ %select = select i1 %cmp, <3 x half> %a, <3 x half> %b
+ store <3 x half> %select, <3 x half> addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_select_v4f16:
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN-NOT: cndmask
+define amdgpu_kernel void @v_select_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %a.ptr, <4 x half> addrspace(1)* %b.ptr, i32 %c) #0 {
+ %a = load <4 x half>, <4 x half> addrspace(1)* %a.ptr
+ %b = load <4 x half>, <4 x half> addrspace(1)* %b.ptr
+ %cmp = icmp eq i32 %c, 0
+ %select = select i1 %cmp, <4 x half> %a, <4 x half> %b
+ store <4 x half> %select, <4 x half> addrspace(1)* %out, align 4
+ ret void
+}
+
; Function Attrs: nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.x() #1
diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
index 66d9033a6d7cb..21c774133f896 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
@@ -12,6 +12,15 @@
define void @test_fadd_s32() #0 { ret void }
define void @test_fadd_s64() #0 { ret void }
+ define void @test_sub_s8() { ret void }
+ define void @test_sub_s16() { ret void }
+ define void @test_sub_s32() { ret void }
+
+ define void @test_mul_s8() #1 { ret void }
+ define void @test_mul_s16() #1 { ret void }
+ define void @test_mul_s32() #1 { ret void }
+ define void @test_mulv5_s32() { ret void }
+
define void @test_load_from_stack() { ret void }
define void @test_load_f32() #0 { ret void }
define void @test_load_f64() #0 { ret void }
@@ -24,6 +33,7 @@
define void @test_soft_fp_double() #0 { ret void }
attributes #0 = { "target-features"="+vfp2,-neonfp" }
+ attributes #1 = { "target-features"="+v6" }
...
---
name: test_zext_s1
@@ -297,6 +307,237 @@ body: |
; CHECK: BX_RET 14, _, implicit %d0
...
---
+name: test_sub_s8
+# CHECK-LABEL: name: test_sub_s8
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+# CHECK-DAG: id: 0, class: gpr
+# CHECK-DAG: id: 1, class: gpr
+# CHECK-DAG: id: 2, class: gpr
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s8) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(s8) = COPY %r1
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+ %2(s8) = G_SUB %0, %1
+ ; CHECK: [[VREGRES:%[0-9]+]] = SUBrr [[VREGX]], [[VREGY]], 14, _, _
+
+ %r0 = COPY %2(s8)
+ ; CHECK: %r0 = COPY [[VREGRES]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name: test_sub_s16
+# CHECK-LABEL: name: test_sub_s16
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+# CHECK-DAG: id: 0, class: gpr
+# CHECK-DAG: id: 1, class: gpr
+# CHECK-DAG: id: 2, class: gpr
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s16) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(s16) = COPY %r1
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+ %2(s16) = G_SUB %0, %1
+ ; CHECK: [[VREGRES:%[0-9]+]] = SUBrr [[VREGX]], [[VREGY]], 14, _, _
+
+ %r0 = COPY %2(s16)
+ ; CHECK: %r0 = COPY [[VREGRES]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name: test_sub_s32
+# CHECK-LABEL: name: test_sub_s32
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+# CHECK: id: 0, class: gpr
+# CHECK: id: 1, class: gpr
+# CHECK: id: 2, class: gpr
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(s32) = COPY %r1
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+ %2(s32) = G_SUB %0, %1
+ ; CHECK: [[VREGRES:%[0-9]+]] = SUBrr [[VREGX]], [[VREGY]], 14, _, _
+
+ %r0 = COPY %2(s32)
+ ; CHECK: %r0 = COPY [[VREGRES]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name: test_mul_s8
+# CHECK-LABEL: name: test_mul_s8
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+# CHECK-DAG: id: 0, class: gprnopc
+# CHECK-DAG: id: 1, class: gprnopc
+# CHECK-DAG: id: 2, class: gprnopc
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s8) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(s8) = COPY %r1
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+ %2(s8) = G_MUL %0, %1
+ ; CHECK: [[VREGRES:%[0-9]+]] = MUL [[VREGX]], [[VREGY]], 14, _, _
+
+ %r0 = COPY %2(s8)
+ ; CHECK: %r0 = COPY [[VREGRES]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name: test_mul_s16
+# CHECK-LABEL: name: test_mul_s16
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+# CHECK-DAG: id: 0, class: gprnopc
+# CHECK-DAG: id: 1, class: gprnopc
+# CHECK-DAG: id: 2, class: gprnopc
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s16) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(s16) = COPY %r1
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+ %2(s16) = G_MUL %0, %1
+ ; CHECK: [[VREGRES:%[0-9]+]] = MUL [[VREGX]], [[VREGY]], 14, _, _
+
+ %r0 = COPY %2(s16)
+ ; CHECK: %r0 = COPY [[VREGRES]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name: test_mul_s32
+# CHECK-LABEL: name: test_mul_s32
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+# CHECK: id: 0, class: gprnopc
+# CHECK: id: 1, class: gprnopc
+# CHECK: id: 2, class: gprnopc
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(s32) = COPY %r1
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+ %2(s32) = G_MUL %0, %1
+ ; CHECK: [[VREGRES:%[0-9]+]] = MUL [[VREGX]], [[VREGY]], 14, _, _
+
+ %r0 = COPY %2(s32)
+ ; CHECK: %r0 = COPY [[VREGRES]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name: test_mulv5_s32
+# CHECK-LABEL: name: test_mulv5_s32
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+# CHECK: id: 0, class: gprnopc
+# CHECK: id: 1, class: gprnopc
+# CHECK: id: 2, class: gprnopc
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(s32) = COPY %r1
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+ %2(s32) = G_MUL %0, %1
+ ; CHECK: early-clobber [[VREGRES:%[0-9]+]] = MULv5 [[VREGX]], [[VREGY]], 14, _, _
+
+ %r0 = COPY %2(s32)
+ ; CHECK: %r0 = COPY [[VREGRES]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
name: test_load_from_stack
# CHECK-LABEL: name: test_load_from_stack
legalized: true
diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
index a7f5ec33bee3c..cf77ce352074d 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
@@ -35,6 +35,19 @@ entry:
ret i8 %sum
}
+define i8 @test_sub_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: name: test_sub_i8
+; CHECK: liveins: %r0, %r1
+; CHECK-DAG: [[VREGX:%[0-9]+]](s8) = COPY %r0
+; CHECK-DAG: [[VREGY:%[0-9]+]](s8) = COPY %r1
+; CHECK: [[RES:%[0-9]+]](s8) = G_SUB [[VREGX]], [[VREGY]]
+; CHECK: %r0 = COPY [[RES]](s8)
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+ %res = sub i8 %x, %y
+ ret i8 %res
+}
+
define signext i8 @test_return_sext_i8(i8 %x) {
; CHECK-LABEL: name: test_return_sext_i8
; CHECK: liveins: %r0
@@ -59,6 +72,19 @@ entry:
ret i16 %sum
}
+define i16 @test_sub_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: name: test_sub_i16
+; CHECK: liveins: %r0, %r1
+; CHECK-DAG: [[VREGX:%[0-9]+]](s16) = COPY %r0
+; CHECK-DAG: [[VREGY:%[0-9]+]](s16) = COPY %r1
+; CHECK: [[RES:%[0-9]+]](s16) = G_SUB [[VREGX]], [[VREGY]]
+; CHECK: %r0 = COPY [[RES]](s16)
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+ %res = sub i16 %x, %y
+ ret i16 %res
+}
+
define zeroext i16 @test_return_zext_i16(i16 %x) {
; CHECK-LABEL: name: test_return_zext_i16
; CHECK: liveins: %r0
@@ -83,6 +109,19 @@ entry:
ret i32 %sum
}
+define i32 @test_sub_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: name: test_sub_i32
+; CHECK: liveins: %r0, %r1
+; CHECK-DAG: [[VREGX:%[0-9]+]](s32) = COPY %r0
+; CHECK-DAG: [[VREGY:%[0-9]+]](s32) = COPY %r1
+; CHECK: [[RES:%[0-9]+]](s32) = G_SUB [[VREGX]], [[VREGY]]
+; CHECK: %r0 = COPY [[RES]](s32)
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+ %res = sub i32 %x, %y
+ ret i32 %res
+}
+
define i32 @test_stack_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
; CHECK-LABEL: name: test_stack_args
; CHECK: fixedStack:
diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel.ll b/test/CodeGen/ARM/GlobalISel/arm-isel.ll
index 236dcbeb84c52..f3ca2915f306e 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-isel.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-isel.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel %s -o - | FileCheck %s
+; RUN: llc -mtriple arm-unknown -mattr=+vfp2,+v6 -global-isel %s -o - | FileCheck %s
define void @test_void_return() {
; CHECK-LABEL: test_void_return:
@@ -67,6 +67,60 @@ entry:
ret i32 %sum
}
+define i8 @test_sub_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: test_sub_i8:
+; CHECK: sub r0, r0, r1
+; CHECK: bx lr
+entry:
+ %sum = sub i8 %x, %y
+ ret i8 %sum
+}
+
+define i16 @test_sub_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: test_sub_i16:
+; CHECK: sub r0, r0, r1
+; CHECK: bx lr
+entry:
+ %sum = sub i16 %x, %y
+ ret i16 %sum
+}
+
+define i32 @test_sub_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: test_sub_i32:
+; CHECK: sub r0, r0, r1
+; CHECK: bx lr
+entry:
+ %sum = sub i32 %x, %y
+ ret i32 %sum
+}
+
+define i8 @test_mul_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: test_mul_i8:
+; CHECK: mul r0, r0, r1
+; CHECK: bx lr
+entry:
+ %sum = mul i8 %x, %y
+ ret i8 %sum
+}
+
+define i16 @test_mul_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: test_mul_i16:
+; CHECK: mul r0, r0, r1
+; CHECK: bx lr
+entry:
+ %sum = mul i16 %x, %y
+ ret i16 %sum
+}
+
+define i32 @test_mul_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: test_mul_i32:
+; CHECK: mul r0, r0, r1
+; CHECK: bx lr
+entry:
+ %sum = mul i32 %x, %y
+ ret i32 %sum
+}
+
define i32 @test_stack_args_i32(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
; CHECK-LABEL: test_stack_args_i32:
; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4
diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
index cbff7e12fb77c..625d35acf17b9 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
@@ -7,6 +7,14 @@
define void @test_add_s16() { ret void }
define void @test_add_s32() { ret void }
+ define void @test_sub_s8() { ret void }
+ define void @test_sub_s16() { ret void }
+ define void @test_sub_s32() { ret void }
+
+ define void @test_mul_s8() { ret void }
+ define void @test_mul_s16() { ret void }
+ define void @test_mul_s32() { ret void }
+
define void @test_load_from_stack() { ret void }
define void @test_legal_loads() #0 { ret void }
define void @test_legal_stores() #0 { ret void }
@@ -139,6 +147,154 @@ body: |
...
---
+name: test_sub_s8
+# CHECK-LABEL: name: test_sub_s8
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s8) = COPY %r0
+ %1(s8) = COPY %r1
+ %2(s8) = G_SUB %0, %1
+ ; G_SUB with s8 is legal, so we should find it unchanged in the output
+ ; CHECK: {{%[0-9]+}}(s8) = G_SUB {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s8)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_sub_s16
+# CHECK-LABEL: name: test_sub_s16
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s16) = COPY %r0
+ %1(s16) = COPY %r1
+ %2(s16) = G_SUB %0, %1
+ ; G_SUB with s16 is legal, so we should find it unchanged in the output
+ ; CHECK: {{%[0-9]+}}(s16) = G_SUB {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s16)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_sub_s32
+# CHECK-LABEL: name: test_sub_s32
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = G_SUB %0, %1
+ ; G_SUB with s32 is legal, so we should find it unchanged in the output
+ ; CHECK: {{%[0-9]+}}(s32) = G_SUB {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s32)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_mul_s8
+# CHECK-LABEL: name: test_mul_s8
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s8) = COPY %r0
+ %1(s8) = COPY %r1
+ %2(s8) = G_MUL %0, %1
+ ; G_MUL with s8 is legal, so we should find it unchanged in the output
+ ; CHECK: {{%[0-9]+}}(s8) = G_MUL {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s8)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_mul_s16
+# CHECK-LABEL: name: test_mul_s16
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s16) = COPY %r0
+ %1(s16) = COPY %r1
+ %2(s16) = G_MUL %0, %1
+ ; G_MUL with s16 is legal, so we should find it unchanged in the output
+ ; CHECK: {{%[0-9]+}}(s16) = G_MUL {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s16)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_mul_s32
+# CHECK-LABEL: name: test_mul_s32
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = G_MUL %0, %1
+ ; G_MUL with s32 is legal, so we should find it unchanged in the output
+ ; CHECK: {{%[0-9]+}}(s32) = G_MUL {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s32)
+ BX_RET 14, _, implicit %r0
+
+...
+---
name: test_load_from_stack
# CHECK-LABEL: name: test_load_from_stack
legalized: false
diff --git a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
index fbf8d81322f8f..e7935832f98a8 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
@@ -5,6 +5,14 @@
define void @test_add_s8() { ret void }
define void @test_add_s1() { ret void }
+ define void @test_sub_s32() { ret void }
+ define void @test_sub_s16() { ret void }
+ define void @test_sub_s8() { ret void }
+
+ define void @test_mul_s32() { ret void }
+ define void @test_mul_s16() { ret void }
+ define void @test_mul_s8() { ret void }
+
define void @test_loads() #0 { ret void }
define void @test_stores() #0 { ret void }
@@ -126,6 +134,162 @@ body: |
...
---
+name: test_sub_s32
+# CHECK-LABEL: name: test_sub_s32
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = G_SUB %0, %1
+ %r0 = COPY %2(s32)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_sub_s16
+# CHECK-LABEL: name: test_sub_s16
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s16) = COPY %r0
+ %1(s16) = COPY %r1
+ %2(s16) = G_SUB %0, %1
+ %r0 = COPY %2(s16)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_sub_s8
+# CHECK-LABEL: name: test_sub_s8
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s8) = COPY %r0
+ %1(s8) = COPY %r1
+ %2(s8) = G_SUB %0, %1
+ %r0 = COPY %2(s8)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_mul_s32
+# CHECK-LABEL: name: test_mul_s32
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = G_MUL %0, %1
+ %r0 = COPY %2(s32)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_mul_s16
+# CHECK-LABEL: name: test_mul_s16
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s16) = COPY %r0
+ %1(s16) = COPY %r1
+ %2(s16) = G_MUL %0, %1
+ %r0 = COPY %2(s16)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_mul_s8
+# CHECK-LABEL: name: test_mul_s8
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s8) = COPY %r0
+ %1(s8) = COPY %r1
+ %2(s8) = G_MUL %0, %1
+ %r0 = COPY %2(s8)
+ BX_RET 14, _, implicit %r0
+
+...
+---
name: test_loads
# CHECK-LABEL: name: test_loads
legalized: true
diff --git a/test/CodeGen/ARM/alloc-no-stack-realign.ll b/test/CodeGen/ARM/alloc-no-stack-realign.ll
index 0e077b3aee5a1..64c279b0f2187 100644
--- a/test/CodeGen/ARM/alloc-no-stack-realign.ll
+++ b/test/CodeGen/ARM/alloc-no-stack-realign.ll
@@ -7,31 +7,32 @@
define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" {
entry:
-; CHECK-LABEL: test1
-; CHECK: ldr r[[R1:[0-9]+]], [pc, r1]
-; CHECK: add r[[R2:[0-9]+]], r1, #48
-; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK: mov r[[R2:[0-9]+]], r[[R1]]
-; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32
-; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK: mov r[[R1:[0-9]+]], sp
-; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK: add r[[R2:[0-9]+]], r[[R1]], #32
-; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
-; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK: add r[[R1:[0-9]+]], r0, #48
-; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK: add r[[R1:[0-9]+]], r0, #32
-; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]!
-; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128]
+; CHECK-LABEL: test1:
+; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]]
+; CHECK: mov r[[R2:[0-9]+]], r[[R1]]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: mov r[[R1:[0-9]+]], #32
+; CHECK: mov r[[R2:[0-9]+]], sp
+; CHECK: mov r[[R3:[0-9]+]], r[[R2]]
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128], r[[R1]]
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add r[[R1:[0-9]+]], r0, #48
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: add r[[R1:[0-9]+]], r0, #32
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]
%retval = alloca <16 x float>, align 16
%0 = load <16 x float>, <16 x float>* @T3_retval, align 16
store <16 x float> %0, <16 x float>* %retval
@@ -42,30 +43,32 @@ entry:
define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp {
entry:
-; CHECK: ldr r[[R1:[0-9]+]], [pc, r1]
-; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48
-; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK: mov r[[R2:[0-9]+]], r[[R1]]
-; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32
-; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK: mov r[[R1:[0-9]+]], sp
-; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK: orr r[[R2:[0-9]+]], r[[R1]], #32
-; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
-; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK: add r[[R1:[0-9]+]], r0, #48
-; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK: add r[[R1:[0-9]+]], r0, #32
-; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]!
-; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128]
+; CHECK-LABEL: test2:
+; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]]
+; CHECK: mov r[[R2:[0-9]+]], r[[R1]]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: mov r[[R1:[0-9]+]], #32
+; CHECK: mov r[[R2:[0-9]+]], sp
+; CHECK: mov r[[R3:[0-9]+]], r[[R2]]
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128], r[[R1]]
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add r[[R1:[0-9]+]], r0, #48
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: add r[[R1:[0-9]+]], r0, #32
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]
%retval = alloca <16 x float>, align 16
diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll
index fc85a3a2e6834..699ef6e92a4ff 100644
--- a/test/CodeGen/ARM/build-attributes.ll
+++ b/test/CodeGen/ARM/build-attributes.ll
@@ -231,6 +231,11 @@
; V6: .eabi_attribute 6, 6
; V6: .eabi_attribute 8, 1
;; We assume round-to-nearest by default (matches GCC)
+; V6-NOT: .eabi_attribute 27
+; V6-NOT: .eabi_attribute 36
+; V6-NOT: .eabi_attribute 42
+; V6-NOT: .eabi_attribute 44
+; V6-NOT: .eabi_attribute 68
; V6-NOT: .eabi_attribute 19
;; The default choice made by llc is for a V6 CPU without an FPU.
;; This is not an interesting detail, but for such CPUs, the default intention is to use
@@ -242,13 +247,8 @@
; V6: .eabi_attribute 23, 3
; V6: .eabi_attribute 24, 1
; V6: .eabi_attribute 25, 1
-; V6-NOT: .eabi_attribute 27
; V6-NOT: .eabi_attribute 28
-; V6-NOT: .eabi_attribute 36
; V6: .eabi_attribute 38, 1
-; V6-NOT: .eabi_attribute 42
-; V6-NOT: .eabi_attribute 44
-; V6-NOT: .eabi_attribute 68
; V6-FAST-NOT: .eabi_attribute 19
;; Despite the V6 CPU having no FPU by default, we chose to flush to
@@ -262,9 +262,14 @@
;; We emit 6, 12 for both v6-M and v6S-M, technically this is incorrect for
;; V6-M, however we don't model the OS extension so this is fine.
; V6M: .eabi_attribute 6, 12
-; V6M-NOT: .eabi_attribute 7
+; V6M: .eabi_attribute 7, 77
; V6M: .eabi_attribute 8, 0
; V6M: .eabi_attribute 9, 1
+; V6M-NOT: .eabi_attribute 27
+; V6M-NOT: .eabi_attribute 36
+; V6M-NOT: .eabi_attribute 42
+; V6M-NOT: .eabi_attribute 44
+; V6M-NOT: .eabi_attribute 68
; V6M-NOT: .eabi_attribute 19
;; The default choice made by llc is for a V6M CPU without an FPU.
;; This is not an interesting detail, but for such CPUs, the default intention is to use
@@ -276,13 +281,8 @@
; V6M: .eabi_attribute 23, 3
; V6M: .eabi_attribute 24, 1
; V6M: .eabi_attribute 25, 1
-; V6M-NOT: .eabi_attribute 27
; V6M-NOT: .eabi_attribute 28
-; V6M-NOT: .eabi_attribute 36
; V6M: .eabi_attribute 38, 1
-; V6M-NOT: .eabi_attribute 42
-; V6M-NOT: .eabi_attribute 44
-; V6M-NOT: .eabi_attribute 68
; V6M-FAST-NOT: .eabi_attribute 19
;; Despite the V6M CPU having no FPU by default, we chose to flush to
@@ -298,6 +298,11 @@
; ARM1156T2F-S: .eabi_attribute 8, 1
; ARM1156T2F-S: .eabi_attribute 9, 2
; ARM1156T2F-S: .fpu vfpv2
+; ARM1156T2F-S-NOT: .eabi_attribute 27
+; ARM1156T2F-S-NOT: .eabi_attribute 36
+; ARM1156T2F-S-NOT: .eabi_attribute 42
+; ARM1156T2F-S-NOT: .eabi_attribute 44
+; ARM1156T2F-S-NOT: .eabi_attribute 68
; ARM1156T2F-S-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; ARM1156T2F-S: .eabi_attribute 20, 1
@@ -306,13 +311,8 @@
; ARM1156T2F-S: .eabi_attribute 23, 3
; ARM1156T2F-S: .eabi_attribute 24, 1
; ARM1156T2F-S: .eabi_attribute 25, 1
-; ARM1156T2F-S-NOT: .eabi_attribute 27
; ARM1156T2F-S-NOT: .eabi_attribute 28
-; ARM1156T2F-S-NOT: .eabi_attribute 36
; ARM1156T2F-S: .eabi_attribute 38, 1
-; ARM1156T2F-S-NOT: .eabi_attribute 42
-; ARM1156T2F-S-NOT: .eabi_attribute 44
-; ARM1156T2F-S-NOT: .eabi_attribute 68
; ARM1156T2F-S-FAST-NOT: .eabi_attribute 19
;; V6 cores default to flush to positive zero (value 0). Note that value 2 is also equally
@@ -327,6 +327,11 @@
; V7M: .eabi_attribute 7, 77
; V7M: .eabi_attribute 8, 0
; V7M: .eabi_attribute 9, 2
+; V7M-NOT: .eabi_attribute 27
+; V7M-NOT: .eabi_attribute 36
+; V7M-NOT: .eabi_attribute 42
+; V7M-NOT: .eabi_attribute 44
+; V7M-NOT: .eabi_attribute 68
; V7M-NOT: .eabi_attribute 19
;; The default choice made by llc is for a V7M CPU without an FPU.
;; This is not an interesting detail, but for such CPUs, the default intention is to use
@@ -338,13 +343,8 @@
; V7M: .eabi_attribute 23, 3
; V7M: .eabi_attribute 24, 1
; V7M: .eabi_attribute 25, 1
-; V7M-NOT: .eabi_attribute 27
; V7M-NOT: .eabi_attribute 28
-; V7M-NOT: .eabi_attribute 36
; V7M: .eabi_attribute 38, 1
-; V7M-NOT: .eabi_attribute 42
-; V7M-NOT: .eabi_attribute 44
-; V7M-NOT: .eabi_attribute 68
; V7M-FAST-NOT: .eabi_attribute 19
;; Despite the V7M CPU having no FPU by default, we chose to flush
@@ -357,6 +357,11 @@
; V7: .syntax unified
; V7: .eabi_attribute 6, 10
+; V7-NOT: .eabi_attribute 27
+; V7-NOT: .eabi_attribute 36
+; V7-NOT: .eabi_attribute 42
+; V7-NOT: .eabi_attribute 44
+; V7-NOT: .eabi_attribute 68
; V7-NOT: .eabi_attribute 19
;; In safe-maths mode we default to an IEEE 754 compliant choice.
; V7: .eabi_attribute 20, 1
@@ -365,13 +370,8 @@
; V7: .eabi_attribute 23, 3
; V7: .eabi_attribute 24, 1
; V7: .eabi_attribute 25, 1
-; V7-NOT: .eabi_attribute 27
; V7-NOT: .eabi_attribute 28
-; V7-NOT: .eabi_attribute 36
; V7: .eabi_attribute 38, 1
-; V7-NOT: .eabi_attribute 42
-; V7-NOT: .eabi_attribute 44
-; V7-NOT: .eabi_attribute 68
; V7-FAST-NOT: .eabi_attribute 19
;; The default CPU does have an FPU and it must be VFPv3 or better, so it flushes
@@ -386,6 +386,9 @@
; V7VE: .eabi_attribute 7, 65 @ Tag_CPU_arch_profile
; V7VE: .eabi_attribute 8, 1 @ Tag_ARM_ISA_use
; V7VE: .eabi_attribute 9, 2 @ Tag_THUMB_ISA_use
+; V7VE: .eabi_attribute 42, 1 @ Tag_MPextension_use
+; V7VE: .eabi_attribute 44, 2 @ Tag_DIV_use
+; V7VE: .eabi_attribute 68, 3 @ Tag_Virtualization_use
; V7VE: .eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use
; V7VE: .eabi_attribute 20, 1 @ Tag_ABI_FP_denormal
; V7VE: .eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions
@@ -393,19 +396,16 @@
; V7VE: .eabi_attribute 24, 1 @ Tag_ABI_align_needed
; V7VE: .eabi_attribute 25, 1 @ Tag_ABI_align_preserved
; V7VE: .eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format
-; V7VE: .eabi_attribute 42, 1 @ Tag_MPextension_use
-; V7VE: .eabi_attribute 44, 2 @ Tag_DIV_use
-; V7VE: .eabi_attribute 68, 3 @ Tag_Virtualization_use
; V8: .syntax unified
; V8: .eabi_attribute 67, "2.09"
; V8: .eabi_attribute 6, 14
+; V8-NOT: .eabi_attribute 44
; V8-NOT: .eabi_attribute 19
; V8: .eabi_attribute 20, 1
; V8: .eabi_attribute 21, 1
; V8-NOT: .eabi_attribute 22
; V8: .eabi_attribute 23, 3
-; V8-NOT: .eabi_attribute 44
; V8-FAST-NOT: .eabi_attribute 19
;; The default does have an FPU, and for V8-A, it flushes preserving sign.
@@ -496,6 +496,30 @@
; CORTEX-A7-FPUV4: .fpu vfpv4
; CORTEX-A7-CHECK-NOT: .eabi_attribute 19
+
+; Tag_FP_HP_extension
+; CORTEX-A7-CHECK: .eabi_attribute 36, 1
+; CORTEX-A7-NOFPU-NOT: .eabi_attribute 36
+; CORTEX-A7-FPUV4: .eabi_attribute 36, 1
+
+; Tag_MPextension_use
+; CORTEX-A7-CHECK: .eabi_attribute 42, 1
+; CORTEX-A7-NOFPU: .eabi_attribute 42, 1
+; CORTEX-A7-FPUV4: .eabi_attribute 42, 1
+
+; Tag_DIV_use
+; CORTEX-A7-CHECK: .eabi_attribute 44, 2
+; CORTEX-A7-NOFPU: .eabi_attribute 44, 2
+; CORTEX-A7-FPUV4: .eabi_attribute 44, 2
+
+; Tag_DSP_extension
+; CORTEX-A7-CHECK-NOT: .eabi_attribute 46
+
+; Tag_Virtualization_use
+; CORTEX-A7-CHECK: .eabi_attribute 68, 3
+; CORTEX-A7-NOFPU: .eabi_attribute 68, 3
+; CORTEX-A7-FPUV4: .eabi_attribute 68, 3
+
; Tag_ABI_FP_denormal
;; We default to IEEE 754 compliance
; CORTEX-A7-CHECK: .eabi_attribute 20, 1
@@ -535,40 +559,20 @@
; CORTEX-A7-NOFPU: .eabi_attribute 25, 1
; CORTEX-A7-FPUV4: .eabi_attribute 25, 1
-; Tag_FP_HP_extension
-; CORTEX-A7-CHECK: .eabi_attribute 36, 1
-; CORTEX-A7-NOFPU-NOT: .eabi_attribute 36
-; CORTEX-A7-FPUV4: .eabi_attribute 36, 1
-
; Tag_FP_16bit_format
; CORTEX-A7-CHECK: .eabi_attribute 38, 1
; CORTEX-A7-NOFPU: .eabi_attribute 38, 1
; CORTEX-A7-FPUV4: .eabi_attribute 38, 1
-; Tag_MPextension_use
-; CORTEX-A7-CHECK: .eabi_attribute 42, 1
-; CORTEX-A7-NOFPU: .eabi_attribute 42, 1
-; CORTEX-A7-FPUV4: .eabi_attribute 42, 1
-
-; Tag_DIV_use
-; CORTEX-A7-CHECK: .eabi_attribute 44, 2
-; CORTEX-A7-NOFPU: .eabi_attribute 44, 2
-; CORTEX-A7-FPUV4: .eabi_attribute 44, 2
-
-; Tag_DSP_extension
-; CORTEX-A7-CHECK-NOT: .eabi_attribute 46
-
-; Tag_Virtualization_use
-; CORTEX-A7-CHECK: .eabi_attribute 68, 3
-; CORTEX-A7-NOFPU: .eabi_attribute 68, 3
-; CORTEX-A7-FPUV4: .eabi_attribute 68, 3
-
; CORTEX-A5-DEFAULT: .cpu cortex-a5
; CORTEX-A5-DEFAULT: .eabi_attribute 6, 10
; CORTEX-A5-DEFAULT: .eabi_attribute 7, 65
; CORTEX-A5-DEFAULT: .eabi_attribute 8, 1
; CORTEX-A5-DEFAULT: .eabi_attribute 9, 2
; CORTEX-A5-DEFAULT: .fpu neon-vfpv4
+; CORTEX-A5-DEFAULT: .eabi_attribute 42, 1
+; CORTEX-A5-DEFAULT-NOT: .eabi_attribute 44
+; CORTEX-A5-DEFAULT: .eabi_attribute 68, 1
; CORTEX-A5-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A5-DEFAULT: .eabi_attribute 20, 1
@@ -577,9 +581,6 @@
; CORTEX-A5-DEFAULT: .eabi_attribute 23, 3
; CORTEX-A5-DEFAULT: .eabi_attribute 24, 1
; CORTEX-A5-DEFAULT: .eabi_attribute 25, 1
-; CORTEX-A5-DEFAULT: .eabi_attribute 42, 1
-; CORTEX-A5-DEFAULT-NOT: .eabi_attribute 44
-; CORTEX-A5-DEFAULT: .eabi_attribute 68, 1
; CORTEX-A5-DEFAULT-FAST-NOT: .eabi_attribute 19
;; The A5 defaults to a VFPv4 FPU, so it flushed preserving the sign when -ffast-math
@@ -595,6 +596,8 @@
; CORTEX-A5-NONEON: .eabi_attribute 8, 1
; CORTEX-A5-NONEON: .eabi_attribute 9, 2
; CORTEX-A5-NONEON: .fpu vfpv4-d16
+; CORTEX-A5-NONEON: .eabi_attribute 42, 1
+; CORTEX-A5-NONEON: .eabi_attribute 68, 1
;; We default to IEEE 754 compliance
; CORTEX-A5-NONEON: .eabi_attribute 20, 1
; CORTEX-A5-NONEON: .eabi_attribute 21, 1
@@ -602,8 +605,6 @@
; CORTEX-A5-NONEON: .eabi_attribute 23, 3
; CORTEX-A5-NONEON: .eabi_attribute 24, 1
; CORTEX-A5-NONEON: .eabi_attribute 25, 1
-; CORTEX-A5-NONEON: .eabi_attribute 42, 1
-; CORTEX-A5-NONEON: .eabi_attribute 68, 1
; CORTEX-A5-NONEON-FAST-NOT: .eabi_attribute 19
;; The A5 defaults to a VFPv4 FPU, so it flushed preserving sign when -ffast-math
@@ -619,6 +620,8 @@
; CORTEX-A5-NOFPU: .eabi_attribute 8, 1
; CORTEX-A5-NOFPU: .eabi_attribute 9, 2
; CORTEX-A5-NOFPU-NOT: .fpu
+; CORTEX-A5-NOFPU: .eabi_attribute 42, 1
+; CORTEX-A5-NOFPU: .eabi_attribute 68, 1
; CORTEX-A5-NOFPU-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A5-NOFPU: .eabi_attribute 20, 1
@@ -627,8 +630,6 @@
; CORTEX-A5-NOFPU: .eabi_attribute 23, 3
; CORTEX-A5-NOFPU: .eabi_attribute 24, 1
; CORTEX-A5-NOFPU: .eabi_attribute 25, 1
-; CORTEX-A5-NOFPU: .eabi_attribute 42, 1
-; CORTEX-A5-NOFPU: .eabi_attribute 68, 1
; CORTEX-A5-NOFPU-FAST-NOT: .eabi_attribute 19
;; Despite there being no FPU, we chose to flush to zero preserving
@@ -645,6 +646,11 @@
; CORTEX-A8-SOFT: .eabi_attribute 8, 1
; CORTEX-A8-SOFT: .eabi_attribute 9, 2
; CORTEX-A8-SOFT: .fpu neon
+; CORTEX-A8-SOFT-NOT: .eabi_attribute 27
+; CORTEX-A8-SOFT-NOT: .eabi_attribute 36, 1
+; CORTEX-A8-SOFT-NOT: .eabi_attribute 42, 1
+; CORTEX-A8-SOFT-NOT: .eabi_attribute 44
+; CORTEX-A8-SOFT: .eabi_attribute 68, 1
; CORTEX-A8-SOFT-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A8-SOFT: .eabi_attribute 20, 1
@@ -653,13 +659,8 @@
; CORTEX-A8-SOFT: .eabi_attribute 23, 3
; CORTEX-A8-SOFT: .eabi_attribute 24, 1
; CORTEX-A8-SOFT: .eabi_attribute 25, 1
-; CORTEX-A8-SOFT-NOT: .eabi_attribute 27
; CORTEX-A8-SOFT-NOT: .eabi_attribute 28
-; CORTEX-A8-SOFT-NOT: .eabi_attribute 36, 1
; CORTEX-A8-SOFT: .eabi_attribute 38, 1
-; CORTEX-A8-SOFT-NOT: .eabi_attribute 42, 1
-; CORTEX-A8-SOFT-NOT: .eabi_attribute 44
-; CORTEX-A8-SOFT: .eabi_attribute 68, 1
; CORTEX-A9-SOFT: .cpu cortex-a9
; CORTEX-A9-SOFT: .eabi_attribute 6, 10
@@ -667,6 +668,11 @@
; CORTEX-A9-SOFT: .eabi_attribute 8, 1
; CORTEX-A9-SOFT: .eabi_attribute 9, 2
; CORTEX-A9-SOFT: .fpu neon
+; CORTEX-A9-SOFT-NOT: .eabi_attribute 27
+; CORTEX-A9-SOFT: .eabi_attribute 36, 1
+; CORTEX-A9-SOFT: .eabi_attribute 42, 1
+; CORTEX-A9-SOFT-NOT: .eabi_attribute 44
+; CORTEX-A9-SOFT: .eabi_attribute 68, 1
; CORTEX-A9-SOFT-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A9-SOFT: .eabi_attribute 20, 1
@@ -675,13 +681,8 @@
; CORTEX-A9-SOFT: .eabi_attribute 23, 3
; CORTEX-A9-SOFT: .eabi_attribute 24, 1
; CORTEX-A9-SOFT: .eabi_attribute 25, 1
-; CORTEX-A9-SOFT-NOT: .eabi_attribute 27
; CORTEX-A9-SOFT-NOT: .eabi_attribute 28
-; CORTEX-A9-SOFT: .eabi_attribute 36, 1
; CORTEX-A9-SOFT: .eabi_attribute 38, 1
-; CORTEX-A9-SOFT: .eabi_attribute 42, 1
-; CORTEX-A9-SOFT-NOT: .eabi_attribute 44
-; CORTEX-A9-SOFT: .eabi_attribute 68, 1
; CORTEX-A8-SOFT-FAST-NOT: .eabi_attribute 19
; CORTEX-A9-SOFT-FAST-NOT: .eabi_attribute 19
@@ -699,6 +700,10 @@
; CORTEX-A8-HARD: .eabi_attribute 8, 1
; CORTEX-A8-HARD: .eabi_attribute 9, 2
; CORTEX-A8-HARD: .fpu neon
+; CORTEX-A8-HARD-NOT: .eabi_attribute 27
+; CORTEX-A8-HARD-NOT: .eabi_attribute 36, 1
+; CORTEX-A8-HARD-NOT: .eabi_attribute 42, 1
+; CORTEX-A8-HARD: .eabi_attribute 68, 1
; CORTEX-A8-HARD-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A8-HARD: .eabi_attribute 20, 1
@@ -707,12 +712,8 @@
; CORTEX-A8-HARD: .eabi_attribute 23, 3
; CORTEX-A8-HARD: .eabi_attribute 24, 1
; CORTEX-A8-HARD: .eabi_attribute 25, 1
-; CORTEX-A8-HARD-NOT: .eabi_attribute 27
; CORTEX-A8-HARD: .eabi_attribute 28, 1
-; CORTEX-A8-HARD-NOT: .eabi_attribute 36, 1
; CORTEX-A8-HARD: .eabi_attribute 38, 1
-; CORTEX-A8-HARD-NOT: .eabi_attribute 42, 1
-; CORTEX-A8-HARD: .eabi_attribute 68, 1
@@ -722,6 +723,10 @@
; CORTEX-A9-HARD: .eabi_attribute 8, 1
; CORTEX-A9-HARD: .eabi_attribute 9, 2
; CORTEX-A9-HARD: .fpu neon
+; CORTEX-A9-HARD-NOT: .eabi_attribute 27
+; CORTEX-A9-HARD: .eabi_attribute 36, 1
+; CORTEX-A9-HARD: .eabi_attribute 42, 1
+; CORTEX-A9-HARD: .eabi_attribute 68, 1
; CORTEX-A9-HARD-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A9-HARD: .eabi_attribute 20, 1
@@ -730,12 +735,8 @@
; CORTEX-A9-HARD: .eabi_attribute 23, 3
; CORTEX-A9-HARD: .eabi_attribute 24, 1
; CORTEX-A9-HARD: .eabi_attribute 25, 1
-; CORTEX-A9-HARD-NOT: .eabi_attribute 27
; CORTEX-A9-HARD: .eabi_attribute 28, 1
-; CORTEX-A9-HARD: .eabi_attribute 36, 1
; CORTEX-A9-HARD: .eabi_attribute 38, 1
-; CORTEX-A9-HARD: .eabi_attribute 42, 1
-; CORTEX-A9-HARD: .eabi_attribute 68, 1
; CORTEX-A8-HARD-FAST-NOT: .eabi_attribute 19
;; The A8 defaults to a VFPv3 FPU, so it flushes preserving the sign when
@@ -759,6 +760,9 @@
; CORTEX-A12-DEFAULT: .eabi_attribute 8, 1
; CORTEX-A12-DEFAULT: .eabi_attribute 9, 2
; CORTEX-A12-DEFAULT: .fpu neon-vfpv4
+; CORTEX-A12-DEFAULT: .eabi_attribute 42, 1
+; CORTEX-A12-DEFAULT: .eabi_attribute 44, 2
+; CORTEX-A12-DEFAULT: .eabi_attribute 68, 3
; CORTEX-A12-DEFAULT-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A12-DEFAULT: .eabi_attribute 20, 1
@@ -767,9 +771,6 @@
; CORTEX-A12-DEFAULT: .eabi_attribute 23, 3
; CORTEX-A12-DEFAULT: .eabi_attribute 24, 1
; CORTEX-A12-DEFAULT: .eabi_attribute 25, 1
-; CORTEX-A12-DEFAULT: .eabi_attribute 42, 1
-; CORTEX-A12-DEFAULT: .eabi_attribute 44, 2
-; CORTEX-A12-DEFAULT: .eabi_attribute 68, 3
; CORTEX-A12-DEFAULT-FAST-NOT: .eabi_attribute 19
;; The A12 defaults to a VFPv3 FPU, so it flushes preserving the sign when
@@ -785,6 +786,9 @@
; CORTEX-A12-NOFPU: .eabi_attribute 8, 1
; CORTEX-A12-NOFPU: .eabi_attribute 9, 2
; CORTEX-A12-NOFPU-NOT: .fpu
+; CORTEX-A12-NOFPU: .eabi_attribute 42, 1
+; CORTEX-A12-NOFPU: .eabi_attribute 44, 2
+; CORTEX-A12-NOFPU: .eabi_attribute 68, 3
; CORTEX-A12-NOFPU-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A12-NOFPU: .eabi_attribute 20, 1
@@ -793,9 +797,6 @@
; CORTEX-A12-NOFPU: .eabi_attribute 23, 3
; CORTEX-A12-NOFPU: .eabi_attribute 24, 1
; CORTEX-A12-NOFPU: .eabi_attribute 25, 1
-; CORTEX-A12-NOFPU: .eabi_attribute 42, 1
-; CORTEX-A12-NOFPU: .eabi_attribute 44, 2
-; CORTEX-A12-NOFPU: .eabi_attribute 68, 3
; CORTEX-A12-NOFPU-FAST-NOT: .eabi_attribute 19
;; Despite there being no FPU, we chose to flush to zero preserving
@@ -812,6 +813,11 @@
; CORTEX-A15: .eabi_attribute 8, 1
; CORTEX-A15: .eabi_attribute 9, 2
; CORTEX-A15: .fpu neon-vfpv4
+; CORTEX-A15-NOT: .eabi_attribute 27
+; CORTEX-A15: .eabi_attribute 36, 1
+; CORTEX-A15: .eabi_attribute 42, 1
+; CORTEX-A15: .eabi_attribute 44, 2
+; CORTEX-A15: .eabi_attribute 68, 3
; CORTEX-A15-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A15: .eabi_attribute 20, 1
@@ -820,13 +826,8 @@
; CORTEX-A15: .eabi_attribute 23, 3
; CORTEX-A15: .eabi_attribute 24, 1
; CORTEX-A15: .eabi_attribute 25, 1
-; CORTEX-A15-NOT: .eabi_attribute 27
; CORTEX-A15-NOT: .eabi_attribute 28
-; CORTEX-A15: .eabi_attribute 36, 1
; CORTEX-A15: .eabi_attribute 38, 1
-; CORTEX-A15: .eabi_attribute 42, 1
-; CORTEX-A15: .eabi_attribute 44, 2
-; CORTEX-A15: .eabi_attribute 68, 3
; CORTEX-A15-FAST-NOT: .eabi_attribute 19
;; The A15 defaults to a VFPv3 FPU, so it flushes preserving the sign when
@@ -842,6 +843,9 @@
; CORTEX-A17-DEFAULT: .eabi_attribute 8, 1
; CORTEX-A17-DEFAULT: .eabi_attribute 9, 2
; CORTEX-A17-DEFAULT: .fpu neon-vfpv4
+; CORTEX-A17-DEFAULT: .eabi_attribute 42, 1
+; CORTEX-A17-DEFAULT: .eabi_attribute 44, 2
+; CORTEX-A17-DEFAULT: .eabi_attribute 68, 3
; CORTEX-A17-DEFAULT-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A17-DEFAULT: .eabi_attribute 20, 1
@@ -850,9 +854,6 @@
; CORTEX-A17-DEFAULT: .eabi_attribute 23, 3
; CORTEX-A17-DEFAULT: .eabi_attribute 24, 1
; CORTEX-A17-DEFAULT: .eabi_attribute 25, 1
-; CORTEX-A17-DEFAULT: .eabi_attribute 42, 1
-; CORTEX-A17-DEFAULT: .eabi_attribute 44, 2
-; CORTEX-A17-DEFAULT: .eabi_attribute 68, 3
; CORTEX-A17-FAST-NOT: .eabi_attribute 19
;; The A17 defaults to a VFPv3 FPU, so it flushes preserving the sign when
@@ -868,6 +869,9 @@
; CORTEX-A17-NOFPU: .eabi_attribute 8, 1
; CORTEX-A17-NOFPU: .eabi_attribute 9, 2
; CORTEX-A17-NOFPU-NOT: .fpu
+; CORTEX-A17-NOFPU: .eabi_attribute 42, 1
+; CORTEX-A17-NOFPU: .eabi_attribute 44, 2
+; CORTEX-A17-NOFPU: .eabi_attribute 68, 3
; CORTEX-A17-NOFPU-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A17-NOFPU: .eabi_attribute 20, 1
@@ -876,9 +880,6 @@
; CORTEX-A17-NOFPU: .eabi_attribute 23, 3
; CORTEX-A17-NOFPU: .eabi_attribute 24, 1
; CORTEX-A17-NOFPU: .eabi_attribute 25, 1
-; CORTEX-A17-NOFPU: .eabi_attribute 42, 1
-; CORTEX-A17-NOFPU: .eabi_attribute 44, 2
-; CORTEX-A17-NOFPU: .eabi_attribute 68, 3
; CORTEX-A17-NOFPU-NOT: .eabi_attribute 19
;; Despite there being no FPU, we chose to flush to zero preserving
@@ -897,25 +898,25 @@
; CORTEX-M0: .cpu cortex-m0
; CORTEX-M0: .eabi_attribute 6, 12
-; CORTEX-M0-NOT: .eabi_attribute 7
+; CORTEX-M0: .eabi_attribute 7, 77
; CORTEX-M0: .eabi_attribute 8, 0
; CORTEX-M0: .eabi_attribute 9, 1
+; CORTEX-M0-NOT: .eabi_attribute 27
+; CORTEX-M0-NOT: .eabi_attribute 36
+; CORTEX-M0: .eabi_attribute 34, 0
+; CORTEX-M0-NOT: .eabi_attribute 42
+; CORTEX-M0-NOT: .eabi_attribute 44
+; CORTEX-M0-NOT: .eabi_attribute 68
; CORTEX-M0-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-M0: .eabi_attribute 20, 1
; CORTEX-M0: .eabi_attribute 21, 1
; CORTEX-M0-NOT: .eabi_attribute 22
; CORTEX-M0: .eabi_attribute 23, 3
-; CORTEX-M0: .eabi_attribute 34, 0
; CORTEX-M0: .eabi_attribute 24, 1
; CORTEX-M0: .eabi_attribute 25, 1
-; CORTEX-M0-NOT: .eabi_attribute 27
; CORTEX-M0-NOT: .eabi_attribute 28
-; CORTEX-M0-NOT: .eabi_attribute 36
; CORTEX-M0: .eabi_attribute 38, 1
-; CORTEX-M0-NOT: .eabi_attribute 42
-; CORTEX-M0-NOT: .eabi_attribute 44
-; CORTEX-M0-NOT: .eabi_attribute 68
; CORTEX-M0-FAST-NOT: .eabi_attribute 19
;; Despite the M0 CPU having no FPU in this scenario, we chose to
@@ -930,9 +931,14 @@
; CORTEX-M0PLUS: .cpu cortex-m0plus
; CORTEX-M0PLUS: .eabi_attribute 6, 12
-; CORTEX-M0PLUS-NOT: .eabi_attribute 7
+; CORTEX-M0PLUS: .eabi_attribute 7, 77
; CORTEX-M0PLUS: .eabi_attribute 8, 0
; CORTEX-M0PLUS: .eabi_attribute 9, 1
+; CORTEX-M0PLUS-NOT: .eabi_attribute 27
+; CORTEX-M0PLUS-NOT: .eabi_attribute 36
+; CORTEX-M0PLUS-NOT: .eabi_attribute 42
+; CORTEX-M0PLUS-NOT: .eabi_attribute 44
+; CORTEX-M0PLUS-NOT: .eabi_attribute 68
; CORTEX-M0PLUS-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-M0PLUS: .eabi_attribute 20, 1
@@ -941,13 +947,8 @@
; CORTEX-M0PLUS: .eabi_attribute 23, 3
; CORTEX-M0PLUS: .eabi_attribute 24, 1
; CORTEX-M0PLUS: .eabi_attribute 25, 1
-; CORTEX-M0PLUS-NOT: .eabi_attribute 27
; CORTEX-M0PLUS-NOT: .eabi_attribute 28
-; CORTEX-M0PLUS-NOT: .eabi_attribute 36
; CORTEX-M0PLUS: .eabi_attribute 38, 1
-; CORTEX-M0PLUS-NOT: .eabi_attribute 42
-; CORTEX-M0PLUS-NOT: .eabi_attribute 44
-; CORTEX-M0PLUS-NOT: .eabi_attribute 68
; CORTEX-M0PLUS-FAST-NOT: .eabi_attribute 19
;; Despite the M0+ CPU having no FPU in this scenario, we chose to
@@ -962,9 +963,14 @@
; CORTEX-M1: .cpu cortex-m1
; CORTEX-M1: .eabi_attribute 6, 12
-; CORTEX-M1-NOT: .eabi_attribute 7
+; CORTEX-M1: .eabi_attribute 7, 77
; CORTEX-M1: .eabi_attribute 8, 0
; CORTEX-M1: .eabi_attribute 9, 1
+; CORTEX-M1-NOT: .eabi_attribute 27
+; CORTEX-M1-NOT: .eabi_attribute 36
+; CORTEX-M1-NOT: .eabi_attribute 42
+; CORTEX-M1-NOT: .eabi_attribute 44
+; CORTEX-M1-NOT: .eabi_attribute 68
; CORTEX-M1-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-M1: .eabi_attribute 20, 1
@@ -973,13 +979,8 @@
; CORTEX-M1: .eabi_attribute 23, 3
; CORTEX-M1: .eabi_attribute 24, 1
; CORTEX-M1: .eabi_attribute 25, 1
-; CORTEX-M1-NOT: .eabi_attribute 27
; CORTEX-M1-NOT: .eabi_attribute 28
-; CORTEX-M1-NOT: .eabi_attribute 36
; CORTEX-M1: .eabi_attribute 38, 1
-; CORTEX-M1-NOT: .eabi_attribute 42
-; CORTEX-M1-NOT: .eabi_attribute 44
-; CORTEX-M1-NOT: .eabi_attribute 68
; CORTEX-M1-FAST-NOT: .eabi_attribute 19
;; Despite the M1 CPU having no FPU in this scenario, we chose to
@@ -994,9 +995,13 @@
; SC000: .cpu sc000
; SC000: .eabi_attribute 6, 12
-; SC000-NOT: .eabi_attribute 7
+; SC000: .eabi_attribute 7, 77
; SC000: .eabi_attribute 8, 0
; SC000: .eabi_attribute 9, 1
+; SC000-NOT: .eabi_attribute 27
+; SC000-NOT: .eabi_attribute 42
+; SC000-NOT: .eabi_attribute 44
+; SC000-NOT: .eabi_attribute 68
; SC000-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; SC000: .eabi_attribute 20, 1
@@ -1005,13 +1010,8 @@
; SC000: .eabi_attribute 23, 3
; SC000: .eabi_attribute 24, 1
; SC000: .eabi_attribute 25, 1
-; SC000-NOT: .eabi_attribute 27
; SC000-NOT: .eabi_attribute 28
-; SC000-NOT: .eabi_attribute 36
; SC000: .eabi_attribute 38, 1
-; SC000-NOT: .eabi_attribute 42
-; SC000-NOT: .eabi_attribute 44
-; SC000-NOT: .eabi_attribute 68
; SC000-FAST-NOT: .eabi_attribute 19
;; Despite the SC000 CPU having no FPU in this scenario, we chose to
@@ -1029,6 +1029,11 @@
; CORTEX-M3: .eabi_attribute 7, 77
; CORTEX-M3: .eabi_attribute 8, 0
; CORTEX-M3: .eabi_attribute 9, 2
+; CORTEX-M3-NOT: .eabi_attribute 27
+; CORTEX-M3-NOT: .eabi_attribute 36
+; CORTEX-M3-NOT: .eabi_attribute 42
+; CORTEX-M3-NOT: .eabi_attribute 44
+; CORTEX-M3-NOT: .eabi_attribute 68
; CORTEX-M3-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-M3: .eabi_attribute 20, 1
@@ -1037,13 +1042,8 @@
; CORTEX-M3: .eabi_attribute 23, 3
; CORTEX-M3: .eabi_attribute 24, 1
; CORTEX-M3: .eabi_attribute 25, 1
-; CORTEX-M3-NOT: .eabi_attribute 27
; CORTEX-M3-NOT: .eabi_attribute 28
-; CORTEX-M3-NOT: .eabi_attribute 36
; CORTEX-M3: .eabi_attribute 38, 1
-; CORTEX-M3-NOT: .eabi_attribute 42
-; CORTEX-M3-NOT: .eabi_attribute 44
-; CORTEX-M3-NOT: .eabi_attribute 68
; CORTEX-M3-FAST-NOT: .eabi_attribute 19
;; Despite there being no FPU, we chose to flush to zero preserving
@@ -1059,6 +1059,11 @@
; SC300: .eabi_attribute 7, 77
; SC300: .eabi_attribute 8, 0
; SC300: .eabi_attribute 9, 2
+; SC300-NOT: .eabi_attribute 27
+; SC300-NOT: .eabi_attribute 36
+; SC300-NOT: .eabi_attribute 42
+; SC300-NOT: .eabi_attribute 44
+; SC300-NOT: .eabi_attribute 68
; SC300-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; SC300: .eabi_attribute 20, 1
@@ -1067,13 +1072,8 @@
; SC300: .eabi_attribute 23, 3
; SC300: .eabi_attribute 24, 1
; SC300: .eabi_attribute 25, 1
-; SC300-NOT: .eabi_attribute 27
; SC300-NOT: .eabi_attribute 28
-; SC300-NOT: .eabi_attribute 36
; SC300: .eabi_attribute 38, 1
-; SC300-NOT: .eabi_attribute 42
-; SC300-NOT: .eabi_attribute 44
-; SC300-NOT: .eabi_attribute 68
; SC300-FAST-NOT: .eabi_attribute 19
;; Despite there being no FPU, we chose to flush to zero preserving
@@ -1090,6 +1090,11 @@
; CORTEX-M4-SOFT: .eabi_attribute 8, 0
; CORTEX-M4-SOFT: .eabi_attribute 9, 2
; CORTEX-M4-SOFT: .fpu fpv4-sp-d16
+; CORTEX-M4-SOFT: .eabi_attribute 27, 1
+; CORTEX-M4-SOFT: .eabi_attribute 36, 1
+; CORTEX-M4-SOFT-NOT: .eabi_attribute 42
+; CORTEX-M4-SOFT-NOT: .eabi_attribute 44
+; CORTEX-M4-SOFT-NOT: .eabi_attribute 68
; CORTEX-M4-SOFT-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-M4-SOFT: .eabi_attribute 20, 1
@@ -1098,13 +1103,8 @@
; CORTEX-M4-SOFT: .eabi_attribute 23, 3
; CORTEX-M4-SOFT: .eabi_attribute 24, 1
; CORTEX-M4-SOFT: .eabi_attribute 25, 1
-; CORTEX-M4-SOFT: .eabi_attribute 27, 1
; CORTEX-M4-SOFT-NOT: .eabi_attribute 28
-; CORTEX-M4-SOFT: .eabi_attribute 36, 1
; CORTEX-M4-SOFT: .eabi_attribute 38, 1
-; CORTEX-M4-SOFT-NOT: .eabi_attribute 42
-; CORTEX-M4-SOFT-NOT: .eabi_attribute 44
-; CORTEX-M4-SOFT-NOT: .eabi_attribute 68
; CORTEX-M4-SOFT-FAST-NOT: .eabi_attribute 19
;; The M4 defaults to a VFPv4 FPU, so it flushes preserving the sign when
@@ -1120,6 +1120,11 @@
; CORTEX-M4-HARD: .eabi_attribute 8, 0
; CORTEX-M4-HARD: .eabi_attribute 9, 2
; CORTEX-M4-HARD: .fpu fpv4-sp-d16
+; CORTEX-M4-HARD: .eabi_attribute 27, 1
+; CORTEX-M4-HARD: .eabi_attribute 36, 1
+; CORTEX-M4-HARD-NOT: .eabi_attribute 42
+; CORTEX-M4-HARD-NOT: .eabi_attribute 44
+; CORTEX-M4-HARD-NOT: .eabi_attribute 68
; CORTEX-M4-HARD-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-M4-HARD: .eabi_attribute 20, 1
@@ -1128,13 +1133,8 @@
; CORTEX-M4-HARD: .eabi_attribute 23, 3
; CORTEX-M4-HARD: .eabi_attribute 24, 1
; CORTEX-M4-HARD: .eabi_attribute 25, 1
-; CORTEX-M4-HARD: .eabi_attribute 27, 1
; CORTEX-M4-HARD: .eabi_attribute 28, 1
-; CORTEX-M4-HARD: .eabi_attribute 36, 1
; CORTEX-M4-HARD: .eabi_attribute 38, 1
-; CORTEX-M4-HARD-NOT: .eabi_attribute 42
-; CORTEX-M4-HARD-NOT: .eabi_attribute 44
-; CORTEX-M4-HARD-NOT: .eabi_attribute 68
; CORTEX-M4-HARD-FAST-NOT: .eabi_attribute 19
;; The M4 defaults to a VFPv4 FPU, so it flushes preserving the sign when
@@ -1152,6 +1152,11 @@
; CORTEX-M7-SOFT-NOT: .fpu
; CORTEX-M7-SINGLE: .fpu fpv5-sp-d16
; CORTEX-M7-DOUBLE: .fpu fpv5-d16
+; CORTEX-M7-SOFT-NOT: .eabi_attribute 27
+; CORTEX-M7-SINGLE: .eabi_attribute 27, 1
+; CORTEX-M7-DOUBLE-NOT: .eabi_attribute 27
+; CORTEX-M7: .eabi_attribute 36, 1
+; CORTEX-M7-NOT: .eabi_attribute 44
; CORTEX-M7: .eabi_attribute 17, 1
; CORTEX-M7-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
@@ -1161,12 +1166,7 @@
; CORTEX-M7: .eabi_attribute 23, 3
; CORTEX-M7: .eabi_attribute 24, 1
; CORTEX-M7: .eabi_attribute 25, 1
-; CORTEX-M7-SOFT-NOT: .eabi_attribute 27
-; CORTEX-M7-SINGLE: .eabi_attribute 27, 1
-; CORTEX-M7-DOUBLE-NOT: .eabi_attribute 27
-; CORTEX-M7: .eabi_attribute 36, 1
; CORTEX-M7: .eabi_attribute 38, 1
-; CORTEX-M7-NOT: .eabi_attribute 44
; CORTEX-M7: .eabi_attribute 14, 0
; CORTEX-M7-NOFPU-FAST-NOT: .eabi_attribute 19
@@ -1186,6 +1186,10 @@
; CORTEX-R4: .eabi_attribute 8, 1
; CORTEX-R4: .eabi_attribute 9, 2
; CORTEX-R4-NOT: .fpu vfpv3-d16
+; CORTEX-R4-NOT: .eabi_attribute 36
+; CORTEX-R4-NOT: .eabi_attribute 42
+; CORTEX-R4-NOT: .eabi_attribute 44
+; CORTEX-R4-NOT: .eabi_attribute 68
; CORTEX-R4-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-R4: .eabi_attribute 20, 1
@@ -1195,11 +1199,7 @@
; CORTEX-R4: .eabi_attribute 24, 1
; CORTEX-R4: .eabi_attribute 25, 1
; CORTEX-R4-NOT: .eabi_attribute 28
-; CORTEX-R4-NOT: .eabi_attribute 36
; CORTEX-R4: .eabi_attribute 38, 1
-; CORTEX-R4-NOT: .eabi_attribute 42
-; CORTEX-R4-NOT: .eabi_attribute 44
-; CORTEX-R4-NOT: .eabi_attribute 68
; CORTEX-R4F: .cpu cortex-r4f
; CORTEX-R4F: .eabi_attribute 6, 10
@@ -1207,6 +1207,11 @@
; CORTEX-R4F: .eabi_attribute 8, 1
; CORTEX-R4F: .eabi_attribute 9, 2
; CORTEX-R4F: .fpu vfpv3-d16
+; CORTEX-R4F-NOT: .eabi_attribute 27, 1
+; CORTEX-R4F-NOT: .eabi_attribute 36
+; CORTEX-R4F-NOT: .eabi_attribute 42
+; CORTEX-R4F-NOT: .eabi_attribute 44
+; CORTEX-R4F-NOT: .eabi_attribute 68
; CORTEX-R4F-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-R4F: .eabi_attribute 20, 1
@@ -1215,13 +1220,8 @@
; CORTEX-R4F: .eabi_attribute 23, 3
; CORTEX-R4F: .eabi_attribute 24, 1
; CORTEX-R4F: .eabi_attribute 25, 1
-; CORTEX-R4F-NOT: .eabi_attribute 27, 1
; CORTEX-R4F-NOT: .eabi_attribute 28
-; CORTEX-R4F-NOT: .eabi_attribute 36
; CORTEX-R4F: .eabi_attribute 38, 1
-; CORTEX-R4F-NOT: .eabi_attribute 42
-; CORTEX-R4F-NOT: .eabi_attribute 44
-; CORTEX-R4F-NOT: .eabi_attribute 68
; CORTEX-R5: .cpu cortex-r5
; CORTEX-R5: .eabi_attribute 6, 10
@@ -1229,6 +1229,11 @@
; CORTEX-R5: .eabi_attribute 8, 1
; CORTEX-R5: .eabi_attribute 9, 2
; CORTEX-R5: .fpu vfpv3-d16
+; CORTEX-R5-NOT: .eabi_attribute 27, 1
+; CORTEX-R5-NOT: .eabi_attribute 36
+; CORTEX-R5: .eabi_attribute 44, 2
+; CORTEX-R5-NOT: .eabi_attribute 42
+; CORTEX-R5-NOT: .eabi_attribute 68
; CORTEX-R5-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-R5: .eabi_attribute 20, 1
@@ -1237,13 +1242,8 @@
; CORTEX-R5: .eabi_attribute 23, 3
; CORTEX-R5: .eabi_attribute 24, 1
; CORTEX-R5: .eabi_attribute 25, 1
-; CORTEX-R5-NOT: .eabi_attribute 27, 1
; CORTEX-R5-NOT: .eabi_attribute 28
-; CORTEX-R5-NOT: .eabi_attribute 36
; CORTEX-R5: .eabi_attribute 38, 1
-; CORTEX-R5-NOT: .eabi_attribute 42
-; CORTEX-R5: .eabi_attribute 44, 2
-; CORTEX-R5-NOT: .eabi_attribute 68
; CORTEX-R5-FAST-NOT: .eabi_attribute 19
;; The R5 has the VFPv3 FP unit, which always flushes preserving sign.
@@ -1258,6 +1258,10 @@
; CORTEX-R7: .eabi_attribute 8, 1
; CORTEX-R7: .eabi_attribute 9, 2
; CORTEX-R7: .fpu vfpv3-d16-fp16
+; CORTEX-R7: .eabi_attribute 36, 1
+; CORTEX-R7: .eabi_attribute 42, 1
+; CORTEX-R7: .eabi_attribute 44, 2
+; CORTEX-R7-NOT: .eabi_attribute 68
; CORTEX-R7-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-R7: .eabi_attribute 20, 1
@@ -1267,11 +1271,7 @@
; CORTEX-R7: .eabi_attribute 24, 1
; CORTEX-R7: .eabi_attribute 25, 1
; CORTEX-R7-NOT: .eabi_attribute 28
-; CORTEX-R7: .eabi_attribute 36, 1
; CORTEX-R7: .eabi_attribute 38, 1
-; CORTEX-R7: .eabi_attribute 42, 1
-; CORTEX-R7: .eabi_attribute 44, 2
-; CORTEX-R7-NOT: .eabi_attribute 68
; CORTEX-R7-FAST-NOT: .eabi_attribute 19
;; The R7 has the VFPv3 FP unit, which always flushes preserving sign.
@@ -1286,6 +1286,10 @@
; CORTEX-R8: .eabi_attribute 8, 1
; CORTEX-R8: .eabi_attribute 9, 2
; CORTEX-R8: .fpu vfpv3-d16-fp16
+; CORTEX-R8: .eabi_attribute 36, 1
+; CORTEX-R8: .eabi_attribute 42, 1
+; CORTEX-R8: .eabi_attribute 44, 2
+; CORTEX-R8-NOT: .eabi_attribute 68
; CORTEX-R8-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-R8: .eabi_attribute 20, 1
@@ -1295,11 +1299,7 @@
; CORTEX-R8: .eabi_attribute 24, 1
; CORTEX-R8: .eabi_attribute 25, 1
; CORTEX-R8-NOT: .eabi_attribute 28
-; CORTEX-R8: .eabi_attribute 36, 1
; CORTEX-R8: .eabi_attribute 38, 1
-; CORTEX-R8: .eabi_attribute 42, 1
-; CORTEX-R8: .eabi_attribute 44, 2
-; CORTEX-R8-NOT: .eabi_attribute 68
; CORTEX-R8-FAST-NOT: .eabi_attribute 19
;; The R8 has the VFPv3 FP unit, which always flushes preserving sign.
@@ -1315,6 +1315,11 @@
; CORTEX-A32: .eabi_attribute 9, 2
; CORTEX-A32: .fpu crypto-neon-fp-armv8
; CORTEX-A32: .eabi_attribute 12, 3
+; CORTEX-A32-NOT: .eabi_attribute 27
+; CORTEX-A32: .eabi_attribute 36, 1
+; CORTEX-A32: .eabi_attribute 42, 1
+; CORTEX-A32-NOT: .eabi_attribute 44
+; CORTEX-A32: .eabi_attribute 68, 3
; CORTEX-A32-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A32: .eabi_attribute 20, 1
@@ -1323,13 +1328,8 @@
; CORTEX-A32: .eabi_attribute 23, 3
; CORTEX-A32: .eabi_attribute 24, 1
; CORTEX-A32: .eabi_attribute 25, 1
-; CORTEX-A32-NOT: .eabi_attribute 27
; CORTEX-A32-NOT: .eabi_attribute 28
-; CORTEX-A32: .eabi_attribute 36, 1
; CORTEX-A32: .eabi_attribute 38, 1
-; CORTEX-A32: .eabi_attribute 42, 1
-; CORTEX-A32-NOT: .eabi_attribute 44
-; CORTEX-A32: .eabi_attribute 68, 3
; CORTEX-A32-FAST-NOT: .eabi_attribute 19
;; The A32 has the ARMv8 FP unit, which always flushes preserving sign.
@@ -1343,20 +1343,20 @@
; CORTEX-M23: .eabi_attribute 7, 77
; CORTEX-M23: .eabi_attribute 8, 0
; CORTEX-M23: .eabi_attribute 9, 3
+; CORTEX-M23-NOT: .eabi_attribute 27
+; CORTEX-M23: .eabi_attribute 34, 1
+; CORTEX-M23-NOT: .eabi_attribute 44
; CORTEX-M23: .eabi_attribute 17, 1
;; We default to IEEE 754 compliance
; CORTEX-M23-NOT: .eabi_attribute 19
; CORTEX-M23: .eabi_attribute 20, 1
; CORTEX-M23: .eabi_attribute 21, 1
; CORTEX-M23: .eabi_attribute 23, 3
-; CORTEX-M23: .eabi_attribute 34, 1
; CORTEX-M23: .eabi_attribute 24, 1
-; CORTEX-M23-NOT: .eabi_attribute 27
; CORTEX-M23-NOT: .eabi_attribute 28
; CORTEX-M23: .eabi_attribute 25, 1
; CORTEX-M23: .eabi_attribute 38, 1
; CORTEX-M23: .eabi_attribute 14, 0
-; CORTEX-M23-NOT: .eabi_attribute 44
; CORTEX-M33: .cpu cortex-m33
; CORTEX-M33: .eabi_attribute 6, 17
@@ -1364,21 +1364,21 @@
; CORTEX-M33: .eabi_attribute 8, 0
; CORTEX-M33: .eabi_attribute 9, 3
; CORTEX-M33: .fpu fpv5-sp-d16
+; CORTEX-M33: .eabi_attribute 27, 1
+; CORTEX-M33: .eabi_attribute 36, 1
+; CORTEX-M33-NOT: .eabi_attribute 44
+; CORTEX-M33: .eabi_attribute 46, 1
+; CORTEX-M33: .eabi_attribute 34, 1
; CORTEX-M33: .eabi_attribute 17, 1
;; We default to IEEE 754 compliance
; CORTEX-M23-NOT: .eabi_attribute 19
; CORTEX-M33: .eabi_attribute 20, 1
; CORTEX-M33: .eabi_attribute 21, 1
; CORTEX-M33: .eabi_attribute 23, 3
-; CORTEX-M33: .eabi_attribute 34, 1
; CORTEX-M33: .eabi_attribute 24, 1
; CORTEX-M33: .eabi_attribute 25, 1
-; CORTEX-M33: .eabi_attribute 27, 1
; CORTEX-M33-NOT: .eabi_attribute 28
-; CORTEX-M33: .eabi_attribute 36, 1
; CORTEX-M33: .eabi_attribute 38, 1
-; CORTEX-M33: .eabi_attribute 46, 1
-; CORTEX-M33-NOT: .eabi_attribute 44
; CORTEX-M33: .eabi_attribute 14, 0
; CORTEX-M33-FAST-NOT: .eabi_attribute 19
@@ -1394,6 +1394,11 @@
; CORTEX-A35: .eabi_attribute 9, 2
; CORTEX-A35: .fpu crypto-neon-fp-armv8
; CORTEX-A35: .eabi_attribute 12, 3
+; CORTEX-A35-NOT: .eabi_attribute 27
+; CORTEX-A35: .eabi_attribute 36, 1
+; CORTEX-A35: .eabi_attribute 42, 1
+; CORTEX-A35-NOT: .eabi_attribute 44
+; CORTEX-A35: .eabi_attribute 68, 3
; CORTEX-A35-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A35: .eabi_attribute 20, 1
@@ -1402,13 +1407,8 @@
; CORTEX-A35: .eabi_attribute 23, 3
; CORTEX-A35: .eabi_attribute 24, 1
; CORTEX-A35: .eabi_attribute 25, 1
-; CORTEX-A35-NOT: .eabi_attribute 27
; CORTEX-A35-NOT: .eabi_attribute 28
-; CORTEX-A35: .eabi_attribute 36, 1
; CORTEX-A35: .eabi_attribute 38, 1
-; CORTEX-A35: .eabi_attribute 42, 1
-; CORTEX-A35-NOT: .eabi_attribute 44
-; CORTEX-A35: .eabi_attribute 68, 3
; CORTEX-A35-FAST-NOT: .eabi_attribute 19
;; The A35 has the ARMv8 FP unit, which always flushes preserving sign.
@@ -1424,6 +1424,11 @@
; CORTEX-A53: .eabi_attribute 9, 2
; CORTEX-A53: .fpu crypto-neon-fp-armv8
; CORTEX-A53: .eabi_attribute 12, 3
+; CORTEX-A53-NOT: .eabi_attribute 27
+; CORTEX-A53: .eabi_attribute 36, 1
+; CORTEX-A53: .eabi_attribute 42, 1
+; CORTEX-A53-NOT: .eabi_attribute 44
+; CORTEX-A53: .eabi_attribute 68, 3
; CORTEX-A53-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A53: .eabi_attribute 20, 1
@@ -1432,13 +1437,8 @@
; CORTEX-A53: .eabi_attribute 23, 3
; CORTEX-A53: .eabi_attribute 24, 1
; CORTEX-A53: .eabi_attribute 25, 1
-; CORTEX-A53-NOT: .eabi_attribute 27
; CORTEX-A53-NOT: .eabi_attribute 28
-; CORTEX-A53: .eabi_attribute 36, 1
; CORTEX-A53: .eabi_attribute 38, 1
-; CORTEX-A53: .eabi_attribute 42, 1
-; CORTEX-A53-NOT: .eabi_attribute 44
-; CORTEX-A53: .eabi_attribute 68, 3
; CORTEX-A53-FAST-NOT: .eabi_attribute 19
;; The A53 has the ARMv8 FP unit, which always flushes preserving sign.
@@ -1454,6 +1454,11 @@
; CORTEX-A57: .eabi_attribute 9, 2
; CORTEX-A57: .fpu crypto-neon-fp-armv8
; CORTEX-A57: .eabi_attribute 12, 3
+; CORTEX-A57-NOT: .eabi_attribute 27
+; CORTEX-A57: .eabi_attribute 36, 1
+; CORTEX-A57: .eabi_attribute 42, 1
+; CORTEX-A57-NOT: .eabi_attribute 44
+; CORTEX-A57: .eabi_attribute 68, 3
; CORTEX-A57-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A57: .eabi_attribute 20, 1
@@ -1462,13 +1467,8 @@
; CORTEX-A57: .eabi_attribute 23, 3
; CORTEX-A57: .eabi_attribute 24, 1
; CORTEX-A57: .eabi_attribute 25, 1
-; CORTEX-A57-NOT: .eabi_attribute 27
; CORTEX-A57-NOT: .eabi_attribute 28
-; CORTEX-A57: .eabi_attribute 36, 1
; CORTEX-A57: .eabi_attribute 38, 1
-; CORTEX-A57: .eabi_attribute 42, 1
-; CORTEX-A57-NOT: .eabi_attribute 44
-; CORTEX-A57: .eabi_attribute 68, 3
; CORTEX-A57-FAST-NOT: .eabi_attribute 19
;; The A57 has the ARMv8 FP unit, which always flushes preserving sign.
@@ -1484,6 +1484,11 @@
; CORTEX-A72: .eabi_attribute 9, 2
; CORTEX-A72: .fpu crypto-neon-fp-armv8
; CORTEX-A72: .eabi_attribute 12, 3
+; CORTEX-A72-NOT: .eabi_attribute 27
+; CORTEX-A72: .eabi_attribute 36, 1
+; CORTEX-A72: .eabi_attribute 42, 1
+; CORTEX-A72-NOT: .eabi_attribute 44
+; CORTEX-A72: .eabi_attribute 68, 3
; CORTEX-A72-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A72: .eabi_attribute 20, 1
@@ -1492,13 +1497,8 @@
; CORTEX-A72: .eabi_attribute 23, 3
; CORTEX-A72: .eabi_attribute 24, 1
; CORTEX-A72: .eabi_attribute 25, 1
-; CORTEX-A72-NOT: .eabi_attribute 27
; CORTEX-A72-NOT: .eabi_attribute 28
-; CORTEX-A72: .eabi_attribute 36, 1
; CORTEX-A72: .eabi_attribute 38, 1
-; CORTEX-A72: .eabi_attribute 42, 1
-; CORTEX-A72-NOT: .eabi_attribute 44
-; CORTEX-A72: .eabi_attribute 68, 3
; CORTEX-A72-FAST-NOT: .eabi_attribute 19
;; The A72 has the ARMv8 FP unit, which always flushes preserving sign.
@@ -1514,6 +1514,11 @@
; CORTEX-A73: .eabi_attribute 9, 2
; CORTEX-A73: .fpu crypto-neon-fp-armv8
; CORTEX-A73: .eabi_attribute 12, 3
+; CORTEX-A73-NOT: .eabi_attribute 27
+; CORTEX-A73: .eabi_attribute 36, 1
+; CORTEX-A73: .eabi_attribute 42, 1
+; CORTEX-A73-NOT: .eabi_attribute 44
+; CORTEX-A73: .eabi_attribute 68, 3
; CORTEX-A73-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-A73: .eabi_attribute 20, 1
@@ -1522,14 +1527,9 @@
; CORTEX-A73: .eabi_attribute 23, 3
; CORTEX-A73: .eabi_attribute 24, 1
; CORTEX-A73: .eabi_attribute 25, 1
-; CORTEX-A73-NOT: .eabi_attribute 27
; CORTEX-A73-NOT: .eabi_attribute 28
-; CORTEX-A73: .eabi_attribute 36, 1
; CORTEX-A73: .eabi_attribute 38, 1
-; CORTEX-A73: .eabi_attribute 42, 1
-; CORTEX-A73-NOT: .eabi_attribute 44
; CORTEX-A73: .eabi_attribute 14, 0
-; CORTEX-A73: .eabi_attribute 68, 3
; EXYNOS-M1: .cpu exynos-m1
; EXYNOS-M1: .eabi_attribute 6, 14
@@ -1538,6 +1538,11 @@
; EXYNOS-M1: .eabi_attribute 9, 2
; EXYNOS-M1: .fpu crypto-neon-fp-armv8
; EXYNOS-M1: .eabi_attribute 12, 3
+; EXYNOS-M1-NOT: .eabi_attribute 27
+; EXYNOS-M1: .eabi_attribute 36, 1
+; EXYNOS-M1: .eabi_attribute 42, 1
+; EXYNOS-M1-NOT: .eabi_attribute 44
+; EXYNOS-M1: .eabi_attribute 68, 3
; EXYNOS-M1-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; EXYNOS-M1: .eabi_attribute 20, 1
@@ -1546,13 +1551,8 @@
; EXYNOS-M1: .eabi_attribute 23, 3
; EXYNOS-M1: .eabi_attribute 24, 1
; EXYNOS-M1: .eabi_attribute 25, 1
-; EXYNOS-M1-NOT: .eabi_attribute 27
; EXYNOS-M1-NOT: .eabi_attribute 28
-; EXYNOS-M1: .eabi_attribute 36, 1
; EXYNOS-M1: .eabi_attribute 38, 1
-; EXYNOS-M1: .eabi_attribute 42, 1
-; EXYNOS-M1-NOT: .eabi_attribute 44
-; EXYNOS-M1: .eabi_attribute 68, 3
; EXYNOS-M1-FAST-NOT: .eabi_attribute 19
;; The exynos-m1 has the ARMv8 FP unit, which always flushes preserving sign.
@@ -1568,6 +1568,11 @@
; EXYNOS-M2: .eabi_attribute 9, 2
; EXYNOS-M2: .fpu crypto-neon-fp-armv8
; EXYNOS-M2: .eabi_attribute 12, 3
+; EXYNOS-M2-NOT: .eabi_attribute 27
+; EXYNOS-M2: .eabi_attribute 36, 1
+; EXYNOS-M2: .eabi_attribute 42, 1
+; EXYNOS-M2-NOT: .eabi_attribute 44
+; EXYNOS-M2: .eabi_attribute 68, 3
; EXYNOS-M2-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; EXYNOS-M2: .eabi_attribute 20, 1
@@ -1576,13 +1581,8 @@
; EXYNOS-M2: .eabi_attribute 23, 3
; EXYNOS-M2: .eabi_attribute 24, 1
; EXYNOS-M2: .eabi_attribute 25, 1
-; EXYNOS-M2-NOT: .eabi_attribute 27
; EXYNOS-M2-NOT: .eabi_attribute 28
-; EXYNOS-M2: .eabi_attribute 36, 1
; EXYNOS-M2: .eabi_attribute 38, 1
-; EXYNOS-M2: .eabi_attribute 42, 1
-; EXYNOS-M2-NOT: .eabi_attribute 44
-; EXYNOS-M2: .eabi_attribute 68, 3
; EXYNOS-M3: .cpu exynos-m3
; EXYNOS-M3: .eabi_attribute 6, 14
@@ -1591,6 +1591,11 @@
; EXYNOS-M3: .eabi_attribute 9, 2
; EXYNOS-M3: .fpu crypto-neon-fp-armv8
; EXYNOS-M3: .eabi_attribute 12, 3
+; EXYNOS-M3-NOT: .eabi_attribute 27
+; EXYNOS-M3: .eabi_attribute 36, 1
+; EXYNOS-M3: .eabi_attribute 42, 1
+; EXYNOS-M3-NOT: .eabi_attribute 44
+; EXYNOS-M3: .eabi_attribute 68, 3
; EXYNOS-M3-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; EXYNOS-M3: .eabi_attribute 20, 1
@@ -1599,13 +1604,8 @@
; EXYNOS-M3: .eabi_attribute 23, 3
; EXYNOS-M3: .eabi_attribute 24, 1
; EXYNOS-M3: .eabi_attribute 25, 1
-; EXYNOS-M3-NOT: .eabi_attribute 27
; EXYNOS-M3-NOT: .eabi_attribute 28
-; EXYNOS-M3: .eabi_attribute 36, 1
; EXYNOS-M3: .eabi_attribute 38, 1
-; EXYNOS-M3: .eabi_attribute 42, 1
-; EXYNOS-M3-NOT: .eabi_attribute 44
-; EXYNOS-M3: .eabi_attribute 68, 3
; GENERIC-FPU-VFPV3-FP16: .fpu vfpv3-fp16
; GENERIC-FPU-VFPV3-D16-FP16: .fpu vfpv3-d16-fp16
@@ -1619,6 +1619,11 @@
; GENERIC-ARMV8_1-A: .eabi_attribute 9, 2
; GENERIC-ARMV8_1-A: .fpu crypto-neon-fp-armv8
; GENERIC-ARMV8_1-A: .eabi_attribute 12, 4
+; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 27
+; GENERIC-ARMV8_1-A: .eabi_attribute 36, 1
+; GENERIC-ARMV8_1-A: .eabi_attribute 42, 1
+; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 44
+; GENERIC-ARMV8_1-A: .eabi_attribute 68, 3
; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; GENERIC-ARMV8_1-A: .eabi_attribute 20, 1
@@ -1627,13 +1632,8 @@
; GENERIC-ARMV8_1-A: .eabi_attribute 23, 3
; GENERIC-ARMV8_1-A: .eabi_attribute 24, 1
; GENERIC-ARMV8_1-A: .eabi_attribute 25, 1
-; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 27
; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 28
-; GENERIC-ARMV8_1-A: .eabi_attribute 36, 1
; GENERIC-ARMV8_1-A: .eabi_attribute 38, 1
-; GENERIC-ARMV8_1-A: .eabi_attribute 42, 1
-; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 44
-; GENERIC-ARMV8_1-A: .eabi_attribute 68, 3
; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 19
;; GENERIC-ARMV8_1-A has the ARMv8 FP unit, which always flushes preserving sign.
@@ -1670,23 +1670,16 @@
; ARMv8R-SP-NOT: .eabi_attribute 12
; ARMv8R-NEON: .fpu neon-fp-armv8
; ARMv8R-NEON: .eabi_attribute 12, 3 @ Tag_Advanced_SIMD_arch
-; ARMv8R: .eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use
-; ARMv8R: .eabi_attribute 20, 1 @ Tag_ABI_FP_denormal
-; ARMv8R: .eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions
-; ARMv8R: .eabi_attribute 23, 3 @ Tag_ABI_FP_number_model
-; ARMv8R: .eabi_attribute 34, 1 @ Tag_CPU_unaligned_access
-; ARMv8R: .eabi_attribute 24, 1 @ Tag_ABI_align_needed
-; ARMv8R: .eabi_attribute 25, 1 @ Tag_ABI_align_preserved
; ARMv8R-NOFPU-NOT: .eabi_attribute 27
; ARMv8R-SP: .eabi_attribute 27, 1 @ Tag_ABI_HardFP_use
; ARMv8R-NEON-NOT: .eabi_attribute 27
; ARMv8R-NOFPU-NOT: .eabi_attribute 36
; ARMv8R-SP: .eabi_attribute 36, 1 @ Tag_FP_HP_extension
; ARMv8R-NEON: .eabi_attribute 36, 1 @ Tag_FP_HP_extension
-; ARMv8R: .eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format
; ARMv8R: .eabi_attribute 42, 1 @ Tag_MPextension_use
-; ARMv8R: .eabi_attribute 14, 0 @ Tag_ABI_PCS_R9_use
; ARMv8R: .eabi_attribute 68, 2 @ Tag_Virtualization_use
+; ARMv8R: .eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format
+; ARMv8R: .eabi_attribute 14, 0 @ Tag_ABI_PCS_R9_use
define i32 @f(i64 %z) {
ret i32 0
diff --git a/test/CodeGen/ARM/darwin-tls-preserved.ll b/test/CodeGen/ARM/darwin-tls-preserved.ll
new file mode 100644
index 0000000000000..4969fabfd9b3c
--- /dev/null
+++ b/test/CodeGen/ARM/darwin-tls-preserved.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -arm-atomic-cfg-tidy=0 -o - %s | FileCheck %s
+
+@tls_var = thread_local global i32 0
+
+; r9 and r12 can be live across the asm, but those get clobbered by the TLS
+; access (in a different BB to order it).
+define i32 @test_regs_preserved(i32* %ptr1, i32* %ptr2, i1 %tst1) {
+; CHECK-LABEL: test_regs_preserved:
+; CHECK: str {{.*}}, [sp
+; CHECK: mov {{.*}}, r12
+entry:
+ call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r10},~{r11},~{r13},~{lr}"()
+ br i1 %tst1, label %get_tls, label %done
+
+get_tls:
+ %val = load i32, i32* @tls_var
+ br label %done
+
+done:
+ %res = phi i32 [%val, %get_tls], [0, %entry]
+ store i32 42, i32* %ptr1
+ store i32 42, i32* %ptr2
+ ret i32 %res
+}
diff --git a/test/CodeGen/ARM/divmod-hwdiv.ll b/test/CodeGen/ARM/divmod-hwdiv.ll
new file mode 100644
index 0000000000000..4cc316ffa3ea6
--- /dev/null
+++ b/test/CodeGen/ARM/divmod-hwdiv.ll
@@ -0,0 +1,37 @@
+; The hwdiv subtarget feature should only influence thumb, not arm.
+; RUN: llc < %s -mtriple=arm-gnueabi -mattr=+hwdiv | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
+; RUN: llc < %s -mtriple=arm-gnueabi -mattr=-hwdiv | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
+; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=+hwdiv | FileCheck %s -check-prefixes=ALL,THUMB-HWDIV
+; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=-hwdiv | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
+
+; The hwdiv-arm subtarget feature should only influence arm, not thumb.
+; RUN: llc < %s -mtriple=arm-gnueabi -mattr=+hwdiv-arm | FileCheck %s -check-prefixes=ALL,ARM-HWDIV
+; RUN: llc < %s -mtriple=arm-gnueabi -mattr=-hwdiv-arm | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
+; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=+hwdiv-arm | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
+; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=-hwdiv-arm | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
+
+define arm_aapcscc i32 @test_i32_srem(i32 %x, i32 %y) {
+; ALL-LABEL: test_i32_srem:
+; ARM-HWDIV: sdiv [[Q:r[0-9]+]], r0, r1
+; ARM-HWDIV: mul [[P:r[0-9]+]], [[Q]], r1
+; ARM-HWDIV: sub r0, r0, [[P]]
+; THUMB-HWDIV: sdiv [[Q:r[0-9]+]], r0, r1
+; THUMB-HWDIV: mls r0, [[Q]], r1, r0
+; AEABI-NOHWDIV: bl __aeabi_idivmod
+; AEABI-NOHWDIV: mov r0, r1
+ %r = srem i32 %x, %y
+ ret i32 %r
+}
+
+define arm_aapcscc i32 @test_i32_urem(i32 %x, i32 %y) {
+; ALL-LABEL: test_i32_urem:
+; ARM-HWDIV: udiv [[Q:r[0-9]+]], r0, r1
+; ARM-HWDIV: mul [[P:r[0-9]+]], [[Q]], r1
+; ARM-HWDIV: sub r0, r0, [[P]]
+; THUMB-HWDIV: udiv [[Q:r[0-9]+]], r0, r1
+; THUMB-HWDIV: mls r0, [[Q]], r1, r0
+; AEABI-NOHWDIV: bl __aeabi_uidivmod
+; AEABI-NOHWDIV: mov r0, r1
+ %r = urem i32 %x, %y
+ ret i32 %r
+}
diff --git a/test/CodeGen/ARM/fpoffset_overflow.mir b/test/CodeGen/ARM/fpoffset_overflow.mir
new file mode 100644
index 0000000000000..9c6cd931b1532
--- /dev/null
+++ b/test/CodeGen/ARM/fpoffset_overflow.mir
@@ -0,0 +1,94 @@
+# RUN: llc -o - %s -mtriple=thumbv7-- -run-pass=stack-protector -run-pass=prologepilog | FileCheck %s
+---
+# This should trigger an emergency spill in the register scavenger because the
+# frame offset into the large argument is too large.
+# CHECK-LABEL: name: func0
+# CHECK: t2STRi12 killed %r7, %sp, 0, 14, _ :: (store 4 into %stack.0)
+# CHECK: %r7 = t2ADDri killed %sp, 4096, 14, _, _
+# CHECK: %r11 = t2LDRi12 killed %r7, 36, 14, _ :: (load 4)
+# CHECK: %r7 = t2LDRi12 %sp, 0, 14, _ :: (load 4 from %stack.0)
+name: func0
+tracksRegLiveness: true
+fixedStack:
+ - { id: 0, offset: 4084, size: 4, alignment: 4, isImmutable: true,
+ isAliased: false }
+ - { id: 1, offset: -12, size: 4096, alignment: 4, isImmutable: false,
+ isAliased: false }
+body: |
+ bb.0:
+ %r0 = IMPLICIT_DEF
+ %r1 = IMPLICIT_DEF
+ %r2 = IMPLICIT_DEF
+ %r3 = IMPLICIT_DEF
+ %r4 = IMPLICIT_DEF
+ %r5 = IMPLICIT_DEF
+ %r6 = IMPLICIT_DEF
+ %r8 = IMPLICIT_DEF
+ %r9 = IMPLICIT_DEF
+ %r10 = IMPLICIT_DEF
+ %r11 = IMPLICIT_DEF
+ %r12 = IMPLICIT_DEF
+ %lr = IMPLICIT_DEF
+
+ %r11 = t2LDRi12 %fixed-stack.0, 0, 14, _ :: (load 4)
+
+ KILL %r0
+ KILL %r1
+ KILL %r2
+ KILL %r3
+ KILL %r4
+ KILL %r5
+ KILL %r6
+ KILL %r8
+ KILL %r9
+ KILL %r10
+ KILL %r11
+ KILL %r12
+ KILL %lr
+...
+---
+# This should not trigger an emergency spill yet.
+# CHECK-LABEL: name: func1
+# CHECK-NOT: t2STRi12
+# CHECK-NOT: t2ADDri
+# CHECK: %r11 = t2LDRi12 %sp, 4092, 14, _ :: (load 4)
+# CHECK-NOT: t2LDRi12
+name: func1
+tracksRegLiveness: true
+fixedStack:
+ - { id: 0, offset: 4044, size: 4, alignment: 4, isImmutable: true,
+ isAliased: false }
+ - { id: 1, offset: -12, size: 4056, alignment: 4, isImmutable: false,
+ isAliased: false }
+body: |
+ bb.0:
+ %r0 = IMPLICIT_DEF
+ %r1 = IMPLICIT_DEF
+ %r2 = IMPLICIT_DEF
+ %r3 = IMPLICIT_DEF
+ %r4 = IMPLICIT_DEF
+ %r5 = IMPLICIT_DEF
+ %r6 = IMPLICIT_DEF
+ %r8 = IMPLICIT_DEF
+ %r9 = IMPLICIT_DEF
+ %r10 = IMPLICIT_DEF
+ %r11 = IMPLICIT_DEF
+ %r12 = IMPLICIT_DEF
+ %lr = IMPLICIT_DEF
+
+ %r11 = t2LDRi12 %fixed-stack.0, 0, 14, _ :: (load 4)
+
+ KILL %r0
+ KILL %r1
+ KILL %r2
+ KILL %r3
+ KILL %r4
+ KILL %r5
+ KILL %r6
+ KILL %r8
+ KILL %r9
+ KILL %r10
+ KILL %r11
+ KILL %r12
+ KILL %lr
+...
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index d874884dcb393..fb204debf6127 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -30,10 +30,9 @@ entry:
define void @t1(i8* nocapture %C) nounwind {
entry:
; CHECK-LABEL: t1:
-; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
-; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; CHECK: adds r0, #15
-; CHECK: adds r1, #15
+; CHECK: movs [[INC:r[0-9]+]], #15
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1], [[INC]]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0], [[INC]]
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false)
@@ -43,13 +42,15 @@ entry:
define void @t2(i8* nocapture %C) nounwind {
entry:
; CHECK-LABEL: t2:
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
+; CHECK: movs [[INC:r[0-9]+]], #32
+; CHECK: add.w r3, r0, #16
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0], [[INC]]
; CHECK: movw [[REG2:r[0-9]+]], #16716
; CHECK: movt [[REG2:r[0-9]+]], #72
-; CHECK: str [[REG2]], [r0, #32]
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
-; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
+; CHECK: str [[REG2]], [r0]
; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
-; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r3]
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
ret void
}
diff --git a/test/CodeGen/ARM/memset-inline.ll b/test/CodeGen/ARM/memset-inline.ll
index f6f8d5623509e..b86874692acad 100644
--- a/test/CodeGen/ARM/memset-inline.ll
+++ b/test/CodeGen/ARM/memset-inline.ll
@@ -13,10 +13,10 @@ entry:
define void @t2() nounwind ssp {
entry:
; CHECK-LABEL: t2:
-; CHECK: add.w r1, r0, #10
; CHECK: vmov.i32 {{q[0-9]+}}, #0x0
-; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
-; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: movs r1, #10
+; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2], r1
+; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2]
%buf = alloca [26 x i8], align 1
%0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0
call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll
index db9bc6ccdd0c8..0a7f7698fa88c 100644
--- a/test/CodeGen/ARM/vbits.ll
+++ b/test/CodeGen/ARM/vbits.ll
@@ -1,8 +1,14 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a8 %s -o - | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a8 | FileCheck %s
define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: v_andi8:
-;CHECK: vand
+; CHECK-LABEL: v_andi8:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vand d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = and <8 x i8> %tmp1, %tmp2
@@ -10,8 +16,13 @@ define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: v_andi16:
-;CHECK: vand
+; CHECK-LABEL: v_andi16:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vand d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = and <4 x i16> %tmp1, %tmp2
@@ -19,8 +30,13 @@ define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: v_andi32:
-;CHECK: vand
+; CHECK-LABEL: v_andi32:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vand d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = and <2 x i32> %tmp1, %tmp2
@@ -28,8 +44,13 @@ define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK-LABEL: v_andi64:
-;CHECK: vand
+; CHECK-LABEL: v_andi64:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vand d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = and <1 x i64> %tmp1, %tmp2
@@ -37,8 +58,14 @@ define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
}
define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: v_andQi8:
-;CHECK: vand
+; CHECK-LABEL: v_andQi8:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vand q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = and <16 x i8> %tmp1, %tmp2
@@ -46,8 +73,14 @@ define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: v_andQi16:
-;CHECK: vand
+; CHECK-LABEL: v_andQi16:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vand q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = and <8 x i16> %tmp1, %tmp2
@@ -55,8 +88,14 @@ define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: v_andQi32:
-;CHECK: vand
+; CHECK-LABEL: v_andQi32:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vand q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = and <4 x i32> %tmp1, %tmp2
@@ -64,8 +103,14 @@ define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: v_andQi64:
-;CHECK: vand
+; CHECK-LABEL: v_andQi64:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vand q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = and <2 x i64> %tmp1, %tmp2
@@ -73,8 +118,13 @@ define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
}
define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: v_bici8:
-;CHECK: vbic
+; CHECK-LABEL: v_bici8:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vbic d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
@@ -83,8 +133,13 @@ define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: v_bici16:
-;CHECK: vbic
+; CHECK-LABEL: v_bici16:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vbic d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
@@ -93,8 +148,13 @@ define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: v_bici32:
-;CHECK: vbic
+; CHECK-LABEL: v_bici32:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vbic d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
@@ -103,8 +163,13 @@ define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK-LABEL: v_bici64:
-;CHECK: vbic
+; CHECK-LABEL: v_bici64:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vbic d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
@@ -113,8 +178,14 @@ define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
}
define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: v_bicQi8:
-;CHECK: vbic
+; CHECK-LABEL: v_bicQi8:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vbic q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
@@ -123,8 +194,14 @@ define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: v_bicQi16:
-;CHECK: vbic
+; CHECK-LABEL: v_bicQi16:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vbic q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
@@ -133,8 +210,14 @@ define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: v_bicQi32:
-;CHECK: vbic
+; CHECK-LABEL: v_bicQi32:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vbic q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
@@ -143,8 +226,14 @@ define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: v_bicQi64:
-;CHECK: vbic
+; CHECK-LABEL: v_bicQi64:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vbic q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
@@ -153,8 +242,13 @@ define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
}
define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: v_eori8:
-;CHECK: veor
+; CHECK-LABEL: v_eori8:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: veor d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = xor <8 x i8> %tmp1, %tmp2
@@ -162,8 +256,13 @@ define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: v_eori16:
-;CHECK: veor
+; CHECK-LABEL: v_eori16:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: veor d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = xor <4 x i16> %tmp1, %tmp2
@@ -171,8 +270,13 @@ define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: v_eori32:
-;CHECK: veor
+; CHECK-LABEL: v_eori32:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: veor d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = xor <2 x i32> %tmp1, %tmp2
@@ -180,8 +284,13 @@ define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK-LABEL: v_eori64:
-;CHECK: veor
+; CHECK-LABEL: v_eori64:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: veor d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = xor <1 x i64> %tmp1, %tmp2
@@ -189,8 +298,14 @@ define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
}
define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: v_eorQi8:
-;CHECK: veor
+; CHECK-LABEL: v_eorQi8:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: veor q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = xor <16 x i8> %tmp1, %tmp2
@@ -198,8 +313,14 @@ define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: v_eorQi16:
-;CHECK: veor
+; CHECK-LABEL: v_eorQi16:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: veor q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = xor <8 x i16> %tmp1, %tmp2
@@ -207,8 +328,14 @@ define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: v_eorQi32:
-;CHECK: veor
+; CHECK-LABEL: v_eorQi32:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: veor q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = xor <4 x i32> %tmp1, %tmp2
@@ -216,8 +343,14 @@ define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: v_eorQi64:
-;CHECK: veor
+; CHECK-LABEL: v_eorQi64:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: veor q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = xor <2 x i64> %tmp1, %tmp2
@@ -225,72 +358,113 @@ define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
}
define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: v_mvni8:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvni8:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r0]
+; CHECK-NEXT: vmvn d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
ret <8 x i8> %tmp2
}
define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: v_mvni16:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvni16:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r0]
+; CHECK-NEXT: vmvn d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
ret <4 x i16> %tmp2
}
define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: v_mvni32:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvni32:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r0]
+; CHECK-NEXT: vmvn d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
ret <2 x i32> %tmp2
}
define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
-;CHECK-LABEL: v_mvni64:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvni64:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r0]
+; CHECK-NEXT: vmvn d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = xor <1 x i64> %tmp1, < i64 -1 >
ret <1 x i64> %tmp2
}
define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: v_mvnQi8:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvnQi8:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT: vmvn q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
ret <16 x i8> %tmp2
}
define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: v_mvnQi16:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvnQi16:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT: vmvn q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
ret <8 x i16> %tmp2
}
define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: v_mvnQi32:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvnQi32:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT: vmvn q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
ret <4 x i32> %tmp2
}
define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
-;CHECK-LABEL: v_mvnQi64:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvnQi64:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT: vmvn q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
ret <2 x i64> %tmp2
}
define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: v_orri8:
-;CHECK: vorr
+; CHECK-LABEL: v_orri8:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vorr d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = or <8 x i8> %tmp1, %tmp2
@@ -298,8 +472,13 @@ define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: v_orri16:
-;CHECK: vorr
+; CHECK-LABEL: v_orri16:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vorr d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = or <4 x i16> %tmp1, %tmp2
@@ -307,8 +486,13 @@ define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: v_orri32:
-;CHECK: vorr
+; CHECK-LABEL: v_orri32:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vorr d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = or <2 x i32> %tmp1, %tmp2
@@ -316,8 +500,13 @@ define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK-LABEL: v_orri64:
-;CHECK: vorr
+; CHECK-LABEL: v_orri64:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vorr d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = or <1 x i64> %tmp1, %tmp2
@@ -325,8 +514,14 @@ define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
}
define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: v_orrQi8:
-;CHECK: vorr
+; CHECK-LABEL: v_orrQi8:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vorr q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = or <16 x i8> %tmp1, %tmp2
@@ -334,8 +529,14 @@ define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: v_orrQi16:
-;CHECK: vorr
+; CHECK-LABEL: v_orrQi16:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vorr q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = or <8 x i16> %tmp1, %tmp2
@@ -343,8 +544,14 @@ define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: v_orrQi32:
-;CHECK: vorr
+; CHECK-LABEL: v_orrQi32:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vorr q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = or <4 x i32> %tmp1, %tmp2
@@ -352,8 +559,14 @@ define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: v_orrQi64:
-;CHECK: vorr
+; CHECK-LABEL: v_orrQi64:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vorr q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = or <2 x i64> %tmp1, %tmp2
@@ -361,8 +574,13 @@ define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
}
define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: v_orni8:
-;CHECK: vorn
+; CHECK-LABEL: v_orni8:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vorn d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
@@ -371,8 +589,13 @@ define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: v_orni16:
-;CHECK: vorn
+; CHECK-LABEL: v_orni16:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vorn d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
@@ -381,8 +604,13 @@ define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: v_orni32:
-;CHECK: vorn
+; CHECK-LABEL: v_orni32:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vorn d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
@@ -391,8 +619,13 @@ define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK-LABEL: v_orni64:
-;CHECK: vorn
+; CHECK-LABEL: v_orni64:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vorn d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
@@ -401,8 +634,14 @@ define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
}
define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: v_ornQi8:
-;CHECK: vorn
+; CHECK-LABEL: v_ornQi8:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vorn q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
@@ -411,8 +650,14 @@ define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: v_ornQi16:
-;CHECK: vorn
+; CHECK-LABEL: v_ornQi16:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vorn q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
@@ -421,8 +666,14 @@ define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: v_ornQi32:
-;CHECK: vorn
+; CHECK-LABEL: v_ornQi32:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vorn q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
@@ -431,8 +682,14 @@ define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: v_ornQi64:
-;CHECK: vorn
+; CHECK-LABEL: v_ornQi64:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vorn q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
@@ -441,8 +698,13 @@ define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
}
define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: vtsti8:
-;CHECK: vtst.8
+; CHECK-LABEL: vtsti8:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vtst.8 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = and <8 x i8> %tmp1, %tmp2
@@ -452,8 +714,13 @@ define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: vtsti16:
-;CHECK: vtst.16
+; CHECK-LABEL: vtsti16:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vtst.16 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = and <4 x i16> %tmp1, %tmp2
@@ -463,8 +730,13 @@ define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: vtsti32:
-;CHECK: vtst.32
+; CHECK-LABEL: vtsti32:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vtst.32 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = and <2 x i32> %tmp1, %tmp2
@@ -474,8 +746,14 @@ define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: vtstQi8:
-;CHECK: vtst.8
+; CHECK-LABEL: vtstQi8:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vtst.8 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = and <16 x i8> %tmp1, %tmp2
@@ -485,8 +763,14 @@ define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: vtstQi16:
-;CHECK: vtst.16
+; CHECK-LABEL: vtstQi16:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vtst.16 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = and <8 x i16> %tmp1, %tmp2
@@ -496,8 +780,14 @@ define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: vtstQi32:
-;CHECK: vtst.32
+; CHECK-LABEL: vtstQi32:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vtst.32 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = and <4 x i32> %tmp1, %tmp2
@@ -508,19 +798,24 @@ define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
; CHECK-LABEL: v_orrimm:
-; CHECK-NOT: vmov
-; CHECK-NOT: vmvn
-; CHECK: vorr
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r0]
+; CHECK-NEXT: vorr.i32 d16, #0x1000000
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
ret <8 x i8> %tmp3
}
define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
-; CHECK: v_orrimmQ
-; CHECK-NOT: vmov
-; CHECK-NOT: vmvn
-; CHECK: vorr
+; CHECK-LABEL: v_orrimmQ:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT: vorr.i32 q8, #0x1000000
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
ret <16 x i8> %tmp3
@@ -528,9 +823,11 @@ define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
; CHECK-LABEL: v_bicimm:
-; CHECK-NOT: vmov
-; CHECK-NOT: vmvn
-; CHECK: vbic
+; CHECK: @ BB#0:
+; CHECK-NEXT: vldr d16, [r0]
+; CHECK-NEXT: vbic.i32 d16, #0xff000000
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
ret <8 x i8> %tmp3
@@ -538,10 +835,29 @@ define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind {
; CHECK-LABEL: v_bicimmQ:
-; CHECK-NOT: vmov
-; CHECK-NOT: vmvn
-; CHECK: vbic
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT: vbic.i32 q8, #0xff000000
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
ret <16 x i8> %tmp3
}
+
+define <4 x i32> @hidden_not_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: hidden_not_v4i32:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d19, r2, r3
+; CHECK-NEXT: vmov.i32 q8, #0x6
+; CHECK-NEXT: vmov d18, r0, r1
+; CHECK-NEXT: vbic q8, q8, q9
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
+ %xor = xor <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
+ %and = and <4 x i32> %xor, <i32 6, i32 6, i32 6, i32 6>
+ ret <4 x i32> %and
+}
+
diff --git a/test/CodeGen/ARM/vector-load.ll b/test/CodeGen/ARM/vector-load.ll
index ed734723a86d3..4f7ebc938d4c7 100644
--- a/test/CodeGen/ARM/vector-load.ll
+++ b/test/CodeGen/ARM/vector-load.ll
@@ -253,11 +253,22 @@ define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) {
}
; CHECK-LABEL: test_silly_load:
-; CHECK: ldr {{r[0-9]+}}, [r0, #24]
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128]!
-; CHECK: vldr d{{[0-9]+}}, [r0]
+; CHECK: vldr d{{[0-9]+}}, [r0, #16]
+; CHECK: movs r1, #24
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128], r1
+; CHECK: ldr {{r[0-9]+}}, [r0]
define void @test_silly_load(<28 x i8>* %addr) {
load volatile <28 x i8>, <28 x i8>* %addr
ret void
}
+
+define <4 x i32>* @test_vld1_immoffset(<4 x i32>* %ptr.in, <4 x i32>* %ptr.out) {
+; CHECK-LABEL: test_vld1_immoffset:
+; CHECK: movs [[INC:r[0-9]+]], #32
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0], [[INC]]
+ %val = load <4 x i32>, <4 x i32>* %ptr.in
+ store <4 x i32> %val, <4 x i32>* %ptr.out
+ %next = getelementptr <4 x i32>, <4 x i32>* %ptr.in, i32 2
+ ret <4 x i32>* %next
+}
diff --git a/test/CodeGen/ARM/vector-store.ll b/test/CodeGen/ARM/vector-store.ll
index 161bbf1d0fde8..e8c1a78a9113b 100644
--- a/test/CodeGen/ARM/vector-store.ll
+++ b/test/CodeGen/ARM/vector-store.ll
@@ -256,3 +256,13 @@ define void @truncstore_v4i32tov4i8_fake_update(<4 x i8>** %ptr, <4 x i32> %val)
store <4 x i8>* %inc, <4 x i8>** %ptr
ret void
}
+
+define <4 x i32>* @test_vst1_1reg(<4 x i32>* %ptr.in, <4 x i32>* %ptr.out) {
+; CHECK-LABEL: test_vst1_1reg:
+; CHECK: movs [[INC:r[0-9]+]], #32
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r1], [[INC]]
+ %val = load <4 x i32>, <4 x i32>* %ptr.in
+ store <4 x i32> %val, <4 x i32>* %ptr.out
+ %next = getelementptr <4 x i32>, <4 x i32>* %ptr.out, i32 2
+ ret <4 x i32>* %next
+}
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index c6d5747f35093..71ca0f7915242 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -310,6 +310,23 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
ret <4 x i16> %tmp5
}
+define <4 x i16> @vld2dupi16_odd_update(i16** %ptr) nounwind {
+;CHECK-LABEL: vld2dupi16_odd_update:
+;CHECK: mov [[INC:r[0-9]+]], #6
+;CHECK: vld2.16 {d16[], d17[]}, [r1], [[INC]]
+ %A = load i16*, i16** %ptr
+ %A2 = bitcast i16* %A to i8*
+ %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+ %tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
+ %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp5 = add <4 x i16> %tmp2, %tmp4
+ %tmp6 = getelementptr i16, i16* %A, i32 3
+ store i16* %tmp6, i16** %ptr
+ ret <4 x i16> %tmp5
+}
+
define <2 x i32> @vld2dupi32(i8* %A) nounwind {
;CHECK-LABEL: vld2dupi32:
;Check the alignment value. Max for this instruction is 64 bits:
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 2c14bc2d8f4eb..866641f3fbbd9 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -150,6 +150,22 @@ define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
ret <2 x i32> %tmp5
}
+define <2 x i32> @vld2lanei32_odd_update(i32** %ptr, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: vld2lanei32_odd_update:
+;CHECK: mov [[INC:r[0-9]+]], #12
+;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}], [[INC]]
+ %A = load i32*, i32** %ptr
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
+ %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+ %tmp5 = add <2 x i32> %tmp3, %tmp4
+ %tmp6 = getelementptr i32, i32* %A, i32 3
+ store i32* %tmp6, i32** %ptr
+ ret <2 x i32> %tmp5
+}
+
define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vld2lanef:
;CHECK: vld2.32
diff --git a/test/CodeGen/ARM/vtbl.ll b/test/CodeGen/ARM/vtbl.ll
index e4dd572a41b4d..2e0718877e96d 100644
--- a/test/CodeGen/ARM/vtbl.ll
+++ b/test/CodeGen/ARM/vtbl.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - -verify-machineinstrs | FileCheck %s
%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
diff --git a/test/CodeGen/AVR/alloca.ll b/test/CodeGen/AVR/alloca.ll
index 579573c0a133d..37c0e62b55fde 100644
--- a/test/CodeGen/AVR/alloca.ll
+++ b/test/CodeGen/AVR/alloca.ll
@@ -45,14 +45,14 @@ entry:
define i16 @alloca_write(i16 %x) {
entry:
; CHECK-LABEL: alloca_write:
+; Small offset here
+; CHECK: std Y+23, {{.*}}
+; CHECK: std Y+24, {{.*}}
; Big offset here
; CHECK: adiw r28, 57
; CHECK: std Y+62, {{.*}}
; CHECK: std Y+63, {{.*}}
; CHECK: sbiw r28, 57
-; Small offset here
-; CHECK: std Y+23, {{.*}}
-; CHECK: std Y+24, {{.*}}
%p = alloca [15 x i16]
%k = alloca [14 x i16]
%arrayidx = getelementptr inbounds [15 x i16], [15 x i16]* %p, i16 0, i16 45
diff --git a/test/CodeGen/AVR/call.ll b/test/CodeGen/AVR/call.ll
index 58bffd3a67870..bc6cb198a9e5b 100644
--- a/test/CodeGen/AVR/call.ll
+++ b/test/CodeGen/AVR/call.ll
@@ -30,9 +30,9 @@ define i8 @calli8_reg() {
define i8 @calli8_stack() {
; CHECK-LABEL: calli8_stack:
-; CHECK: ldi [[REG1:r[0-9]+]], 11
+; CHECK: ldi [[REG1:r[0-9]+]], 10
; CHECK: push [[REG1]]
-; CHECK: ldi [[REG1]], 10
+; CHECK: ldi [[REG1]], 11
; CHECK: push [[REG1]]
; CHECK: call foo8_3
%result1 = call i8 @foo8_3(i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11)
@@ -52,14 +52,14 @@ define i16 @calli16_reg() {
define i16 @calli16_stack() {
; CHECK-LABEL: calli16_stack:
-; CHECK: ldi [[REG1:r[0-9]+]], 10
-; CHECK: ldi [[REG2:r[0-9]+]], 2
-; CHECK: push [[REG2]]
-; CHECK: push [[REG1]]
; CHECK: ldi [[REG1:r[0-9]+]], 9
; CHECK: ldi [[REG2:r[0-9]+]], 2
; CHECK: push [[REG2]]
; CHECK: push [[REG1]]
+; CHECK: ldi [[REG1:r[0-9]+]], 10
+; CHECK: ldi [[REG2:r[0-9]+]], 2
+; CHECK: push [[REG2]]
+; CHECK: push [[REG1]]
; CHECK: call foo16_2
%result1 = call i16 @foo16_2(i16 512, i16 513, i16 514, i16 515, i16 516, i16 517, i16 518, i16 519, i16 520, i16 521, i16 522)
ret i16 %result1
@@ -82,14 +82,14 @@ define i32 @calli32_reg() {
define i32 @calli32_stack() {
; CHECK-LABEL: calli32_stack:
-; CHECK: ldi [[REG1:r[0-9]+]], 15
-; CHECK: ldi [[REG2:r[0-9]+]], 2
-; CHECK: push [[REG2]]
-; CHECK: push [[REG1]]
; CHECK: ldi [[REG1:r[0-9]+]], 64
; CHECK: ldi [[REG2:r[0-9]+]], 66
; CHECK: push [[REG2]]
; CHECK: push [[REG1]]
+; CHECK: ldi [[REG1:r[0-9]+]], 15
+; CHECK: ldi [[REG2:r[0-9]+]], 2
+; CHECK: push [[REG2]]
+; CHECK: push [[REG1]]
; CHECK: call foo32_2
%result1 = call i32 @foo32_2(i32 1, i32 2, i32 3, i32 4, i32 34554432)
ret i32 %result1
@@ -112,14 +112,15 @@ define i64 @calli64_reg() {
define i64 @calli64_stack() {
; CHECK-LABEL: calli64_stack:
-; CHECK: ldi [[REG1:r[0-9]+]], 31
-; CHECK: ldi [[REG2:r[0-9]+]], 242
-; CHECK: push [[REG2]]
-; CHECK: push [[REG1]]
+
; CHECK: ldi [[REG1:r[0-9]+]], 76
; CHECK: ldi [[REG2:r[0-9]+]], 73
; CHECK: push [[REG2]]
; CHECK: push [[REG1]]
+; CHECK: ldi [[REG1:r[0-9]+]], 31
+; CHECK: ldi [[REG2:r[0-9]+]], 242
+; CHECK: push [[REG2]]
+; CHECK: push [[REG1]]
; CHECK: ldi [[REG1:r[0-9]+]], 155
; CHECK: ldi [[REG2:r[0-9]+]], 88
; CHECK: push [[REG2]]
diff --git a/test/CodeGen/AVR/directmem.ll b/test/CodeGen/AVR/directmem.ll
index a97e712ed625e..032263a9d657e 100644
--- a/test/CodeGen/AVR/directmem.ll
+++ b/test/CodeGen/AVR/directmem.ll
@@ -33,10 +33,10 @@ define i8 @global8_load() {
define void @array8_store() {
; CHECK-LABEL: array8_store:
-; CHECK: ldi [[REG1:r[0-9]+]], 1
-; CHECK: sts char.array, [[REG1]]
; CHECK: ldi [[REG2:r[0-9]+]], 2
; CHECK: sts char.array+1, [[REG2]]
+; CHECK: ldi [[REG1:r[0-9]+]], 1
+; CHECK: sts char.array, [[REG1]]
; CHECK: ldi [[REG:r[0-9]+]], 3
; CHECK: sts char.array+2, [[REG]]
store i8 1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @char.array, i32 0, i64 0)
@@ -83,14 +83,18 @@ define i16 @global16_load() {
define void @array16_store() {
; CHECK-LABEL: array16_store:
-; CHECK: ldi [[REG1:r[0-9]+]], 187
-; CHECK: ldi [[REG2:r[0-9]+]], 170
-; CHECK: sts int.array+1, [[REG2]]
-; CHECK: sts int.array, [[REG1]]
+
; CHECK: ldi [[REG1:r[0-9]+]], 204
; CHECK: ldi [[REG2:r[0-9]+]], 170
; CHECK: sts int.array+3, [[REG2]]
; CHECK: sts int.array+2, [[REG1]]
+
+; CHECK: ldi [[REG1:r[0-9]+]], 187
+; CHECK: ldi [[REG2:r[0-9]+]], 170
+; CHECK: sts int.array+1, [[REG2]]
+; CHECK: sts int.array, [[REG1]]
+
+
; CHECK: ldi [[REG1:r[0-9]+]], 221
; CHECK: ldi [[REG2:r[0-9]+]], 170
; CHECK: sts int.array+5, [[REG2]]
@@ -148,14 +152,6 @@ define i32 @global32_load() {
define void @array32_store() {
; CHECK-LABEL: array32_store:
-; CHECK: ldi [[REG1:r[0-9]+]], 27
-; CHECK: ldi [[REG2:r[0-9]+]], 172
-; CHECK: sts long.array+3, [[REG2]]
-; CHECK: sts long.array+2, [[REG1]]
-; CHECK: ldi [[REG1:r[0-9]+]], 68
-; CHECK: ldi [[REG2:r[0-9]+]], 13
-; CHECK: sts long.array+1, [[REG2]]
-; CHECK: sts long.array, [[REG1]]
; CHECK: ldi [[REG1:r[0-9]+]], 102
; CHECK: ldi [[REG2:r[0-9]+]], 85
; CHECK: sts long.array+7, [[REG2]]
@@ -164,6 +160,14 @@ define void @array32_store() {
; CHECK: ldi [[REG2:r[0-9]+]], 119
; CHECK: sts long.array+5, [[REG2]]
; CHECK: sts long.array+4, [[REG1]]
+; CHECK: ldi [[REG1:r[0-9]+]], 27
+; CHECK: ldi [[REG2:r[0-9]+]], 172
+; CHECK: sts long.array+3, [[REG2]]
+; CHECK: sts long.array+2, [[REG1]]
+; CHECK: ldi [[REG1:r[0-9]+]], 68
+; CHECK: ldi [[REG2:r[0-9]+]], 13
+; CHECK: sts long.array+1, [[REG2]]
+; CHECK: sts long.array, [[REG1]]
; CHECK: ldi [[REG1:r[0-9]+]], 170
; CHECK: ldi [[REG2:r[0-9]+]], 153
; CHECK: sts long.array+11, [[REG2]]
diff --git a/test/CodeGen/AVR/inline-asm/multibyte.ll b/test/CodeGen/AVR/inline-asm/multibyte.ll
deleted file mode 100644
index a7c8f6e75f0fb..0000000000000
--- a/test/CodeGen/AVR/inline-asm/multibyte.ll
+++ /dev/null
@@ -1,135 +0,0 @@
-; RUN: llc < %s -march=avr -no-integrated-as | FileCheck %s
-; XFAIL: *
-
-; Multibyte references
-
-; CHECK-LABEL: multibyte_i16
-define void @multibyte_i16(i16 %a) {
-entry:
-; CHECK: instr r24 r25
- call void asm sideeffect "instr ${0:A} ${0:B}", "r"(i16 %a)
-; CHECK: instr r25 r24
- call void asm sideeffect "instr ${0:B} ${0:A}", "r"(i16 %a)
- ret void
-}
-
-; CHECK-LABEL: multibyte_i32
-define void @multibyte_i32(i32 %a) {
-entry:
-; CHECK: instr r22 r23 r24 r25
- call void asm sideeffect "instr ${0:A} ${0:B} ${0:C} ${0:D}", "r"(i32 %a)
-; CHECK: instr r25 r24 r23 r22
- call void asm sideeffect "instr ${0:D} ${0:C} ${0:B} ${0:A}", "r"(i32 %a)
- ret void
-}
-
-; CHECK-LABEL: multibyte_alternative_name
-define void @multibyte_alternative_name(i16* %p) {
-entry:
-; CHECK: instr Z
- call void asm sideeffect "instr ${0:a}", "e" (i16* %p)
- ret void
-}
-
-; CHECK-LABEL: multibyte_a_i32
-define void @multibyte_a_i32() {
-entry:
- %a = alloca i32
- %0 = load i32, i32* %a
-; CHECK: instr r20 r21 r22 r23
- call void asm sideeffect "instr ${0:A} ${0:B} ${0:C} ${0:D}", "a"(i32 %0)
- ret void
-}
-
-@c = internal global i32 0
-
-; CHECK-LABEL: multibyte_b_i32
-define void @multibyte_b_i32() {
-entry:
- %0 = load i32, i32* @c
-; CHECK: instr r28 r29 r30 r31
- call void asm sideeffect "instr ${0:A} ${0:B} ${0:C} ${0:D}", "b"(i32 %0)
- ret void
-}
-
-; CHECK-LABEL: multibyte_d_i32
-define void @multibyte_d_i32() {
-entry:
- %a = alloca i32
- %0 = load i32, i32* %a
-; CHECK: instr r18 r19 r24 r25
- call void asm sideeffect "instr ${0:A} ${0:B} ${0:C} ${0:D}", "d"(i32 %0)
- ret void
-}
-
-; CHECK-LABEL: multibyte_e_i32
-define void @multibyte_e_i32() {
-entry:
- %a = alloca i32
- %0 = load i32, i32* %a
-; CHECK: instr r26 r27 r30 r31
- call void asm sideeffect "instr ${0:A} ${0:B} ${0:C} ${0:D}", "e"(i32 %0)
- ret void
-}
-
-; CHECK-LABEL: multibyte_l_i32
-define void @multibyte_l_i32() {
-entry:
- %a = alloca i32
- %0 = load i32, i32* %a
-; CHECK: instr r12 r13 r14 r15
- call void asm sideeffect "instr ${0:A} ${0:B} ${0:C} ${0:D}", "l"(i32 %0)
- ret void
-}
-
-; CHECK-LABEL: multibyte_a_i16
-define void @multibyte_a_i16() {
-entry:
- %a = alloca i16
- %0 = load i16, i16* %a
-; CHECK: instr r22 r23
- call void asm sideeffect "instr ${0:A} ${0:B}", "a"(i16 %0)
- ret void
-}
-
-; CHECK-LABEL: multibyte_b_i16
-define void @multibyte_b_i16() {
-entry:
- %a = alloca i16
- %0 = load i16, i16* %a
-; CHECK: instr r30 r31
- call void asm sideeffect "instr ${0:A} ${0:B}", "b"(i16 %0)
- ret void
-}
-
-; CHECK-LABEL: multibyte_d_i16
-define void @multibyte_d_i16() {
-entry:
- %a = alloca i16
- %0 = load i16, i16* %a
-; CHECK: instr r24 r25
- call void asm sideeffect "instr ${0:A} ${0:B}", "d"(i16 %0)
- ret void
-}
-
-; CHECK-LABEL: multibyte_e_i16
-define void @multibyte_e_i16() {
-entry:
- %a = alloca i16
- %0 = load i16, i16* %a
-; CHECK: instr r30 r31
- call void asm sideeffect "instr ${0:A} ${0:B}", "e"(i16 %0)
- ret void
-}
-
-; CHECK-LABEL: multibyte_l_i16
-define void @multibyte_l_i16() {
-entry:
- %a = alloca i16
- %0 = load i16, i16* %a
-; CHECK: instr r14 r15
- call void asm sideeffect "instr ${0:A} ${0:B}", "l"(i16 %0)
- ret void
-}
-
-
diff --git a/test/CodeGen/AVR/varargs.ll b/test/CodeGen/AVR/varargs.ll
index b35ce4c0f7aef..4959f2d880c8b 100644
--- a/test/CodeGen/AVR/varargs.ll
+++ b/test/CodeGen/AVR/varargs.ll
@@ -40,14 +40,14 @@ define i16 @varargs2(i8* nocapture %x, ...) {
declare void @var1223(i16, ...)
define void @varargcall() {
; CHECK-LABEL: varargcall:
-; CHECK: ldi [[REG1:r[0-9]+]], 191
-; CHECK: ldi [[REG2:r[0-9]+]], 223
-; CHECK: push [[REG2]]
-; CHECK: push [[REG1]]
; CHECK: ldi [[REG1:r[0-9]+]], 189
; CHECK: ldi [[REG2:r[0-9]+]], 205
; CHECK: push [[REG2]]
; CHECK: push [[REG1]]
+; CHECK: ldi [[REG1:r[0-9]+]], 191
+; CHECK: ldi [[REG2:r[0-9]+]], 223
+; CHECK: push [[REG2]]
+; CHECK: push [[REG1]]
; CHECK: ldi [[REG1:r[0-9]+]], 205
; CHECK: ldi [[REG2:r[0-9]+]], 171
; CHECK: push [[REG2]]
diff --git a/test/CodeGen/Hexagon/addrmode-globoff.mir b/test/CodeGen/Hexagon/addrmode-globoff.mir
new file mode 100644
index 0000000000000..fb22959751ac8
--- /dev/null
+++ b/test/CodeGen/Hexagon/addrmode-globoff.mir
@@ -0,0 +1,25 @@
+# RUN: llc -march=hexagon -run-pass amode-opt %s -o - | FileCheck %s
+
+--- |
+ @g0 = external global [16 x i16], align 8
+ define void @foo() {
+ ret void
+ }
+...
+
+---
+name: foo
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: %r0
+
+ ; Make sure that the offset in @g0 is 8.
+ ; CHECK: S4_storerh_ur killed %r0, 2, @g0 + 8, %r0
+
+ %r1 = A2_tfrsi @g0+4
+ %r2 = S2_addasl_rrri %r1, %r0, 2
+ S2_storerh_io %r2, 4, %r0
+...
+
diff --git a/test/CodeGen/Mips/msa/shift_constant_pool.ll b/test/CodeGen/Mips/msa/shift_constant_pool.ll
new file mode 100644
index 0000000000000..73da33361bfa0
--- /dev/null
+++ b/test/CodeGen/Mips/msa/shift_constant_pool.ll
@@ -0,0 +1,171 @@
+; Test whether the following functions, with vectors featuring negative or values larger than the element
+; bit size have their results of operations generated correctly when placed into constant pools
+
+; RUN: llc -march=mips64 -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck -check-prefixes=ALL,MIPS64 %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck -check-prefixes=ALL,MIPS32 %s
+; RUN: llc -march=mips64el -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck -check-prefixes=ALL,MIPS64 %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck -check-prefixes=ALL,MIPS32 %s
+
+@llvm_mips_bclr_w_test_const_vec_res = global <4 x i32> zeroinitializer, align 16
+
+define void @llvm_mips_bclr_w_test_const_vec() nounwind {
+entry:
+ %0 = tail call <4 x i32> @llvm.mips.bclr.w(<4 x i32> <i32 2147483649, i32 2147483649, i32 7, i32 7>, <4 x i32> <i32 -1, i32 31, i32 2, i32 34>)
+ store <4 x i32> %0, <4 x i32>* @llvm_mips_bclr_w_test_const_vec_res
+ ret void
+}
+
+declare <4 x i32> @llvm.mips.bclr.w(<4 x i32>, <4 x i32>) nounwind
+
+; MIPS32: [[LABEL:\$CPI[0-9]+_[0-9]+]]:
+; MIPS64: [[LABEL:\.LCPI[0-9]+_[0-9]+]]:
+; ALL: .4byte 1 # 0x1
+; ALL: .4byte 1 # 0x1
+; ALL: .4byte 3 # 0x3
+; ALL: .4byte 3 # 0x3
+; ALL-LABEL: llvm_mips_bclr_w_test_const_vec:
+; MIPS32: lw $[[R2:[0-9]+]], %got([[LABEL]])($[[R1:[0-9]+]])
+; MIPS32: addiu $[[R2]], $[[R2]], %lo([[LABEL]])
+; MIPS32: lw $[[R3:[0-9]+]], %got(llvm_mips_bclr_w_test_const_vec_res)($[[R1]])
+; MIPS64: ld $[[R2:[0-9]+]], %got_page([[LABEL]])($[[R1:[0-9]+]])
+; MIPS64: daddiu $[[R2]], $[[R2]], %got_ofst([[LABEL]])
+; MIPS64: ld $[[R3:[0-9]+]], %got_disp(llvm_mips_bclr_w_test_const_vec_res)($[[R1]])
+; ALL: ld.w $w0, 0($[[R2]])
+; ALL: st.w $w0, 0($[[R3]])
+
+
+@llvm_mips_bneg_w_test_const_vec_res = global <4 x i32> zeroinitializer, align 16
+
+define void @llvm_mips_bneg_w_test_const_vec() nounwind {
+entry:
+ %0 = tail call <4 x i32> @llvm.mips.bneg.w(<4 x i32> <i32 2147483649, i32 2147483649, i32 7, i32 7>, <4 x i32> <i32 -1, i32 31, i32 2, i32 34>)
+ store <4 x i32> %0, <4 x i32>* @llvm_mips_bneg_w_test_const_vec_res
+ ret void
+}
+
+declare <4 x i32> @llvm.mips.bneg.w(<4 x i32>, <4 x i32>) nounwind
+
+; MIPS32: [[LABEL:\$CPI[0-9]+_[0-9]+]]:
+; MIPS64: [[LABEL:\.LCPI[0-9]+_[0-9]+]]:
+; ALL: .4byte 1 # 0x1
+; ALL: .4byte 1 # 0x1
+; ALL: .4byte 3 # 0x3
+; ALL: .4byte 3 # 0x3
+; ALL-LABEL: llvm_mips_bneg_w_test_const_vec:
+; MIPS32: lw $[[R2:[0-9]+]], %got([[LABEL]])($[[R1:[0-9]+]])
+; MIPS32: addiu $[[R2]], $[[R2]], %lo([[LABEL]])
+; MIPS32: lw $[[R3:[0-9]+]], %got(llvm_mips_bneg_w_test_const_vec_res)($[[R1]])
+; MIPS64: ld $[[R2:[0-9]+]], %got_page([[LABEL]])($[[R1:[0-9]+]])
+; MIPS64: daddiu $[[R2]], $[[R2]], %got_ofst([[LABEL]])
+; MIPS64: ld $[[R3:[0-9]+]], %got_disp(llvm_mips_bneg_w_test_const_vec_res)($[[R1]])
+; ALL: ld.w $w0, 0($[[R2]])
+; ALL: st.w $w0, 0($[[R3]])
+
+
+@llvm_mips_bset_w_test_const_vec_res = global <4 x i32> zeroinitializer, align 16
+
+define void @llvm_mips_bset_w_test_const_vec() nounwind {
+entry:
+ %0 = tail call <4 x i32> @llvm.mips.bset.w(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 -1, i32 31, i32 2, i32 34>)
+ store <4 x i32> %0, <4 x i32>* @llvm_mips_bset_w_test_const_vec_res
+ ret void
+}
+
+declare <4 x i32> @llvm.mips.bset.w(<4 x i32>, <4 x i32>) nounwind
+
+; MIPS32: [[LABEL:\$CPI[0-9]+_[0-9]+]]:
+; MIPS64: [[LABEL:\.LCPI[0-9]+_[0-9]+]]:
+; ALL: .4byte 2147483648 # 0x80000000
+; ALL: .4byte 2147483648 # 0x80000000
+; ALL: .4byte 4 # 0x4
+; ALL: .4byte 4 # 0x4
+; ALL-LABEL: llvm_mips_bset_w_test_const_vec:
+; MIPS32: lw $[[R2:[0-9]+]], %got([[LABEL]])($[[R1:[0-9]+]])
+; MIPS32: addiu $[[R2]], $[[R2]], %lo([[LABEL]])
+; MIPS32: lw $[[R3:[0-9]+]], %got(llvm_mips_bset_w_test_const_vec_res)($[[R1]])
+; MIPS64: ld $[[R2:[0-9]+]], %got_page([[LABEL]])($[[R1:[0-9]+]])
+; MIPS64: daddiu $[[R2]], $[[R2]], %got_ofst([[LABEL]])
+; MIPS64: ld $[[R3:[0-9]+]], %got_disp(llvm_mips_bset_w_test_const_vec_res)($[[R1]])
+; ALL: ld.w $w0, 0($[[R2]])
+; ALL: st.w $w0, 0($[[R3]])
+
+@llvm_mips_sll_w_test_const_vec_res = global <4 x i32> zeroinitializer, align 16
+
+define void @llvm_mips_sll_w_test_const_vec() nounwind {
+entry:
+ %0 = tail call <4 x i32> @llvm.mips.sll.w(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 31, i32 2, i32 34>)
+ store <4 x i32> %0, <4 x i32>* @llvm_mips_sll_w_test_const_vec_res
+ ret void
+}
+
+declare <4 x i32> @llvm.mips.sll.w(<4 x i32>, <4 x i32>) nounwind
+
+; MIPS32: [[LABEL:\$CPI[0-9]+_[0-9]+]]:
+; MIPS64: [[LABEL:\.LCPI[0-9]+_[0-9]+]]:
+; ALL: .4byte 2147483648 # 0x80000000
+; ALL: .4byte 2147483648 # 0x80000000
+; ALL: .4byte 4 # 0x4
+; ALL: .4byte 4 # 0x4
+; ALL-LABEL: llvm_mips_sll_w_test_const_vec:
+; MIPS32: lw $[[R2:[0-9]+]], %got([[LABEL]])($[[R1:[0-9]+]])
+; MIPS32: addiu $[[R2]], $[[R2]], %lo([[LABEL]])
+; MIPS32: lw $[[R3:[0-9]+]], %got(llvm_mips_sll_w_test_const_vec_res)($[[R1]])
+; MIPS64: ld $[[R2:[0-9]+]], %got_page([[LABEL]])($[[R1:[0-9]+]])
+; MIPS64: daddiu $[[R2]], $[[R2]], %got_ofst([[LABEL]])
+; MIPS64: ld $[[R3:[0-9]+]], %got_disp(llvm_mips_sll_w_test_const_vec_res)($[[R1]])
+; ALL: ld.w $w0, 0($[[R2]])
+; ALL: st.w $w0, 0($[[R3]])
+
+@llvm_mips_sra_w_test_const_vec_res = global <4 x i32> zeroinitializer, align 16
+
+define void @llvm_mips_sra_w_test_const_vec() nounwind {
+entry:
+ %0 = tail call <4 x i32> @llvm.mips.sra.w(<4 x i32> <i32 -16, i32 16, i32 16, i32 16>, <4 x i32> <i32 2, i32 -30, i32 33, i32 1>)
+ store <4 x i32> %0, <4 x i32>* @llvm_mips_sra_w_test_const_vec_res
+ ret void
+}
+
+declare <4 x i32> @llvm.mips.sra.w(<4 x i32>, <4 x i32>) nounwind
+
+; MIPS32: [[LABEL:\$CPI[0-9]+_[0-9]+]]:
+; MIPS64: [[LABEL:\.LCPI[0-9]+_[0-9]+]]:
+; ALL: .4byte 4294967292 # 0xfffffffc
+; ALL: .4byte 4 # 0x4
+; ALL: .4byte 8 # 0x8
+; ALL: .4byte 8 # 0x8
+; ALL-LABEL: llvm_mips_sra_w_test_const_vec:
+; MIPS32: lw $[[R2:[0-9]+]], %got([[LABEL]])($[[R1:[0-9]+]])
+; MIPS32: addiu $[[R2]], $[[R2]], %lo([[LABEL]])
+; MIPS32: lw $[[R3:[0-9]+]], %got(llvm_mips_sra_w_test_const_vec_res)($[[R1]])
+; MIPS64: ld $[[R2:[0-9]+]], %got_page([[LABEL]])($[[R1:[0-9]+]])
+; MIPS64: daddiu $[[R2]], $[[R2]], %got_ofst([[LABEL]])
+; MIPS64: ld $[[R3:[0-9]+]], %got_disp(llvm_mips_sra_w_test_const_vec_res)($[[R1]])
+; ALL: ld.w $w0, 0($[[R2]])
+; ALL: st.w $w0, 0($[[R3]])
+
+@llvm_mips_srl_w_test_const_vec_res = global <4 x i32> zeroinitializer, align 16
+
+define void @llvm_mips_srl_w_test_const_vec() nounwind {
+entry:
+ %0 = tail call <4 x i32> @llvm.mips.srl.w(<4 x i32> <i32 -16, i32 16, i32 16, i32 16>, <4 x i32> <i32 2, i32 -30, i32 33, i32 1>)
+ store <4 x i32> %0, <4 x i32>* @llvm_mips_srl_w_test_const_vec_res
+ ret void
+}
+
+declare <4 x i32> @llvm.mips.srl.w(<4 x i32>, <4 x i32>) nounwind
+
+; MIPS32: [[LABEL:\$CPI[0-9]+_[0-9]+]]:
+; MIPS64: [[LABEL:\.LCPI[0-9]+_[0-9]+]]:
+; ALL: .4byte 1073741820 # 0x3ffffffc
+; ALL: .4byte 4 # 0x4
+; ALL: .4byte 8 # 0x8
+; ALL: .4byte 8 # 0x8
+; ALL-LABEL: llvm_mips_srl_w_test_const_vec:
+; MIPS32: lw $[[R2:[0-9]+]], %got([[LABEL]])($[[R1:[0-9]+]])
+; MIPS32: addiu $[[R2]], $[[R2]], %lo([[LABEL]])
+; MIPS32: lw $[[R3:[0-9]+]], %got(llvm_mips_srl_w_test_const_vec_res)($[[R1]])
+; MIPS64: ld $[[R2:[0-9]+]], %got_page([[LABEL]])($[[R1:[0-9]+]])
+; MIPS64: daddiu $[[R2]], $[[R2]], %got_ofst([[LABEL]])
+; MIPS64: ld $[[R3:[0-9]+]], %got_disp(llvm_mips_srl_w_test_const_vec_res)($[[R1]])
+; ALL: ld.w $w0, 0($[[R2]])
+; ALL: st.w $w0, 0($[[R3]])
diff --git a/test/CodeGen/Mips/msa/shift_no_and.ll b/test/CodeGen/Mips/msa/shift_no_and.ll
new file mode 100644
index 0000000000000..c6f90215af9c8
--- /dev/null
+++ b/test/CodeGen/Mips/msa/shift_no_and.ll
@@ -0,0 +1,460 @@
+; Test the absence of the andi.b / and.v instructions
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck %s
+
+@llvm_mips_bclr_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bclr_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bclr_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bclr_b_test() nounwind {
+entry:
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bclr_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bclr_b_ARG2
+ %2 = tail call <16 x i8> @llvm.mips.bclr.b(<16 x i8> %0, <16 x i8> %1)
+ store <16 x i8> %2, <16 x i8>* @llvm_mips_bclr_b_RES
+ ret void
+}
+
+declare <16 x i8> @llvm.mips.bclr.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK-LABEL: llvm_mips_bclr_b_test:
+; CHECK-NOT: andi.b
+; CHECK: bclr.b
+
+@llvm_mips_bclr_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bclr_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bclr_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bclr_h_test() nounwind {
+entry:
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bclr_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bclr_h_ARG2
+ %2 = tail call <8 x i16> @llvm.mips.bclr.h(<8 x i16> %0, <8 x i16> %1)
+ store <8 x i16> %2, <8 x i16>* @llvm_mips_bclr_h_RES
+ ret void
+}
+
+declare <8 x i16> @llvm.mips.bclr.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK-LABEL: llvm_mips_bclr_h_test:
+; CHECK-NOT: and.v
+; CHECK: bclr.h
+
+@llvm_mips_bclr_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bclr_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bclr_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bclr_w_test() nounwind {
+entry:
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bclr_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bclr_w_ARG2
+ %2 = tail call <4 x i32> @llvm.mips.bclr.w(<4 x i32> %0, <4 x i32> %1)
+ store <4 x i32> %2, <4 x i32>* @llvm_mips_bclr_w_RES
+ ret void
+}
+
+declare <4 x i32> @llvm.mips.bclr.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK-LABEL: llvm_mips_bclr_w_test:
+; CHECK-NOT: and.v
+; CHECK: bclr.w
+
+@llvm_mips_bclr_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bclr_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bclr_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bclr_d_test() nounwind {
+entry:
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bclr_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bclr_d_ARG2
+ %2 = tail call <2 x i64> @llvm.mips.bclr.d(<2 x i64> %0, <2 x i64> %1)
+ store <2 x i64> %2, <2 x i64>* @llvm_mips_bclr_d_RES
+ ret void
+}
+
+declare <2 x i64> @llvm.mips.bclr.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK-LABEL: llvm_mips_bclr_d_test:
+; CHECK-NOT: and.v
+; CHECK: bclr.d
+
+@llvm_mips_bneg_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bneg_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bneg_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bneg_b_test() nounwind {
+entry:
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bneg_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bneg_b_ARG2
+ %2 = tail call <16 x i8> @llvm.mips.bneg.b(<16 x i8> %0, <16 x i8> %1)
+ store <16 x i8> %2, <16 x i8>* @llvm_mips_bneg_b_RES
+ ret void
+}
+
+declare <16 x i8> @llvm.mips.bneg.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK-LABEL: llvm_mips_bneg_b_test:
+; CHECK-NOT: andi.b
+; CHECK: bneg.b
+
+@llvm_mips_bneg_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bneg_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bneg_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bneg_h_test() nounwind {
+entry:
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bneg_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bneg_h_ARG2
+ %2 = tail call <8 x i16> @llvm.mips.bneg.h(<8 x i16> %0, <8 x i16> %1)
+ store <8 x i16> %2, <8 x i16>* @llvm_mips_bneg_h_RES
+ ret void
+}
+
+declare <8 x i16> @llvm.mips.bneg.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK-LABEL: llvm_mips_bneg_h_test:
+; CHECK-NOT: and.v
+; CHECK: bneg.h
+
+@llvm_mips_bneg_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bneg_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bneg_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bneg_w_test() nounwind {
+entry:
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bneg_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bneg_w_ARG2
+ %2 = tail call <4 x i32> @llvm.mips.bneg.w(<4 x i32> %0, <4 x i32> %1)
+ store <4 x i32> %2, <4 x i32>* @llvm_mips_bneg_w_RES
+ ret void
+}
+
+declare <4 x i32> @llvm.mips.bneg.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK-LABEL: llvm_mips_bneg_w_test:
+; CHECK-NOT: and.v
+; CHECK: bneg.w
+
+@llvm_mips_bneg_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bneg_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bneg_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bneg_d_test() nounwind {
+entry:
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bneg_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bneg_d_ARG2
+ %2 = tail call <2 x i64> @llvm.mips.bneg.d(<2 x i64> %0, <2 x i64> %1)
+ store <2 x i64> %2, <2 x i64>* @llvm_mips_bneg_d_RES
+ ret void
+}
+
+declare <2 x i64> @llvm.mips.bneg.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK-LABEL: llvm_mips_bneg_d_test:
+; CHECK-NOT: and.v
+; CHECK: bneg.d
+
+@llvm_mips_bset_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bset_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bset_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bset_b_test() nounwind {
+entry:
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bset_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bset_b_ARG2
+ %2 = tail call <16 x i8> @llvm.mips.bset.b(<16 x i8> %0, <16 x i8> %1)
+ store <16 x i8> %2, <16 x i8>* @llvm_mips_bset_b_RES
+ ret void
+}
+
+declare <16 x i8> @llvm.mips.bset.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK-LABEL: llvm_mips_bset_b_test:
+; CHECK-NOT: andi.b
+; CHECK: bset.b
+
+@llvm_mips_bset_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bset_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bset_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bset_h_test() nounwind {
+entry:
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bset_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bset_h_ARG2
+ %2 = tail call <8 x i16> @llvm.mips.bset.h(<8 x i16> %0, <8 x i16> %1)
+ store <8 x i16> %2, <8 x i16>* @llvm_mips_bset_h_RES
+ ret void
+}
+
+declare <8 x i16> @llvm.mips.bset.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK-LABEL: llvm_mips_bset_h_test:
+; CHECK-NOT: and.v
+; CHECK: bset.h
+
+@llvm_mips_bset_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bset_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bset_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bset_w_test() nounwind {
+entry:
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bset_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bset_w_ARG2
+ %2 = tail call <4 x i32> @llvm.mips.bset.w(<4 x i32> %0, <4 x i32> %1)
+ store <4 x i32> %2, <4 x i32>* @llvm_mips_bset_w_RES
+ ret void
+}
+
+declare <4 x i32> @llvm.mips.bset.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK-LABEL: llvm_mips_bset_w_test:
+; CHECK-NOT: and.v
+; CHECK: bset.w
+
+@llvm_mips_bset_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bset_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bset_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bset_d_test() nounwind {
+entry:
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bset_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bset_d_ARG2
+ %2 = tail call <2 x i64> @llvm.mips.bset.d(<2 x i64> %0, <2 x i64> %1)
+ store <2 x i64> %2, <2 x i64>* @llvm_mips_bset_d_RES
+ ret void
+}
+
+declare <2 x i64> @llvm.mips.bset.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK-LABEL: llvm_mips_bset_d_test:
+; CHECK-NOT: and.v
+; CHECK: bset.d
+
+@llvm_mips_sll_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sll_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_sll_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_sll_b_test() nounwind {
+entry:
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sll_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sll_b_ARG2
+ %2 = tail call <16 x i8> @llvm.mips.sll.b(<16 x i8> %0, <16 x i8> %1)
+ store <16 x i8> %2, <16 x i8>* @llvm_mips_sll_b_RES
+ ret void
+}
+
+declare <16 x i8> @llvm.mips.sll.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK-LABEL: llvm_mips_sll_b_test:
+; CHECK-NOT: andi.b
+; CHECK: sll.b
+
+@llvm_mips_sll_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sll_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_sll_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_sll_h_test() nounwind {
+entry:
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sll_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sll_h_ARG2
+ %2 = tail call <8 x i16> @llvm.mips.sll.h(<8 x i16> %0, <8 x i16> %1)
+ store <8 x i16> %2, <8 x i16>* @llvm_mips_sll_h_RES
+ ret void
+}
+
+declare <8 x i16> @llvm.mips.sll.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK-LABEL: llvm_mips_sll_h_test:
+; CHECK-NOT: and.v
+; CHECK: sll.h
+
+@llvm_mips_sll_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sll_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_sll_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_sll_w_test() nounwind {
+entry:
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sll_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sll_w_ARG2
+ %2 = tail call <4 x i32> @llvm.mips.sll.w(<4 x i32> %0, <4 x i32> %1)
+ store <4 x i32> %2, <4 x i32>* @llvm_mips_sll_w_RES
+ ret void
+}
+
+declare <4 x i32> @llvm.mips.sll.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK-LABEL: llvm_mips_sll_w_test:
+; CHECK-NOT: and.v
+; CHECK: sll.w
+
+@llvm_mips_sll_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sll_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_sll_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_sll_d_test() nounwind {
+entry:
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sll_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sll_d_ARG2
+ %2 = tail call <2 x i64> @llvm.mips.sll.d(<2 x i64> %0, <2 x i64> %1)
+ store <2 x i64> %2, <2 x i64>* @llvm_mips_sll_d_RES
+ ret void
+}
+
+declare <2 x i64> @llvm.mips.sll.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK-LABEL: llvm_mips_sll_d_test:
+; CHECK-NOT: and.v
+; CHECK: sll.d
+
+@llvm_mips_sra_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sra_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_sra_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_sra_b_test() nounwind {
+entry:
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sra_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sra_b_ARG2
+ %2 = tail call <16 x i8> @llvm.mips.sra.b(<16 x i8> %0, <16 x i8> %1)
+ store <16 x i8> %2, <16 x i8>* @llvm_mips_sra_b_RES
+ ret void
+}
+
+declare <16 x i8> @llvm.mips.sra.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK-LABEL: llvm_mips_sra_b_test:
+; CHECK-NOT: andi.b
+; CHECK: sra.b
+
+@llvm_mips_sra_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sra_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_sra_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_sra_h_test() nounwind {
+entry:
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sra_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sra_h_ARG2
+ %2 = tail call <8 x i16> @llvm.mips.sra.h(<8 x i16> %0, <8 x i16> %1)
+ store <8 x i16> %2, <8 x i16>* @llvm_mips_sra_h_RES
+ ret void
+}
+
+declare <8 x i16> @llvm.mips.sra.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK-LABEL: llvm_mips_sra_h_test:
+; CHECK-NOT: and.v
+; CHECK: sra.h
+
+@llvm_mips_sra_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sra_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_sra_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_sra_w_test() nounwind {
+entry:
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sra_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sra_w_ARG2
+ %2 = tail call <4 x i32> @llvm.mips.sra.w(<4 x i32> %0, <4 x i32> %1)
+ store <4 x i32> %2, <4 x i32>* @llvm_mips_sra_w_RES
+ ret void
+}
+
+declare <4 x i32> @llvm.mips.sra.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK-LABEL: llvm_mips_sra_w_test:
+; CHECK-NOT: and.v
+; CHECK: sra.w
+
+@llvm_mips_sra_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sra_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_sra_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_sra_d_test() nounwind {
+entry:
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sra_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sra_d_ARG2
+ %2 = tail call <2 x i64> @llvm.mips.sra.d(<2 x i64> %0, <2 x i64> %1)
+ store <2 x i64> %2, <2 x i64>* @llvm_mips_sra_d_RES
+ ret void
+}
+
+declare <2 x i64> @llvm.mips.sra.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK-LABEL: llvm_mips_sra_d_test:
+; CHECK-NOT: and.v
+; CHECK: sra.d
+
+@llvm_mips_srl_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_srl_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_srl_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_srl_b_test() nounwind {
+entry:
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srl_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_srl_b_ARG2
+ %2 = tail call <16 x i8> @llvm.mips.srl.b(<16 x i8> %0, <16 x i8> %1)
+ store <16 x i8> %2, <16 x i8>* @llvm_mips_srl_b_RES
+ ret void
+}
+
+declare <16 x i8> @llvm.mips.srl.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK-LABEL: llvm_mips_srl_b_test:
+; CHECK-NOT: andi.b
+; CHECK: srl.b
+
+@llvm_mips_srl_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_srl_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_srl_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_srl_h_test() nounwind {
+entry:
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srl_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_srl_h_ARG2
+ %2 = tail call <8 x i16> @llvm.mips.srl.h(<8 x i16> %0, <8 x i16> %1)
+ store <8 x i16> %2, <8 x i16>* @llvm_mips_srl_h_RES
+ ret void
+}
+
+declare <8 x i16> @llvm.mips.srl.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK-LABEL: llvm_mips_srl_h_test:
+; CHECK-NOT: and.v
+; CHECK: srl.h
+
+@llvm_mips_srl_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_srl_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_srl_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_srl_w_test() nounwind {
+entry:
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srl_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_srl_w_ARG2
+ %2 = tail call <4 x i32> @llvm.mips.srl.w(<4 x i32> %0, <4 x i32> %1)
+ store <4 x i32> %2, <4 x i32>* @llvm_mips_srl_w_RES
+ ret void
+}
+
+declare <4 x i32> @llvm.mips.srl.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK-LABEL: llvm_mips_srl_w_test:
+; CHECK-NOT: and.v
+; CHECK: srl.w
+
+@llvm_mips_srl_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_srl_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_srl_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_srl_d_test() nounwind {
+entry:
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srl_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_srl_d_ARG2
+ %2 = tail call <2 x i64> @llvm.mips.srl.d(<2 x i64> %0, <2 x i64> %1)
+ store <2 x i64> %2, <2 x i64>* @llvm_mips_srl_d_RES
+ ret void
+}
+
+declare <2 x i64> @llvm.mips.srl.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK-LABEL: llvm_mips_srl_d_test:
+; CHECK-NOT: and.v
+; CHECK: srl.d
diff --git a/test/CodeGen/PowerPC/andc.ll b/test/CodeGen/PowerPC/andc.ll
index 6135db510ad53..df47bfc1e38ef 100644
--- a/test/CodeGen/PowerPC/andc.ll
+++ b/test/CodeGen/PowerPC/andc.ll
@@ -1,12 +1,13 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-apple-darwin | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-unknown | FileCheck %s
define i1 @and_cmp1(i32 %x, i32 %y) {
; CHECK-LABEL: and_cmp1:
-; CHECK: andc [[REG1:r[0-9]+]], r4, r3
-; CHECK: cntlzw [[REG2:r[0-9]+]], [[REG1]]
-; CHECK: rlwinm r3, [[REG2]], 27, 31, 31
-; CHECK: blr
-
+; CHECK: # BB#0:
+; CHECK-NEXT: andc 3, 4, 3
+; CHECK-NEXT: cntlzw 3, 3
+; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
+; CHECK-NEXT: blr
%and = and i32 %x, %y
%cmp = icmp eq i32 %and, %y
ret i1 %cmp
@@ -14,12 +15,12 @@ define i1 @and_cmp1(i32 %x, i32 %y) {
define i1 @and_cmp_const(i32 %x) {
; CHECK-LABEL: and_cmp_const:
-; CHECK: li [[REG1:r[0-9]+]], 43
-; CHECK: andc [[REG2:r[0-9]+]], [[REG1]], r3
-; CHECK: cntlzw [[REG3:r[0-9]+]], [[REG2]]
-; CHECK: rlwinm r3, [[REG3]], 27, 31, 31
-; CHECK: blr
-
+; CHECK: # BB#0:
+; CHECK-NEXT: li 4, 43
+; CHECK-NEXT: andc 3, 4, 3
+; CHECK-NEXT: cntlzw 3, 3
+; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
+; CHECK-NEXT: blr
%and = and i32 %x, 43
%cmp = icmp eq i32 %and, 43
ret i1 %cmp
@@ -27,15 +28,26 @@ define i1 @and_cmp_const(i32 %x) {
define i1 @foo(i32 %i) {
; CHECK-LABEL: foo:
-; CHECK: lis [[REG1:r[0-9]+]], 4660
-; CHECK: ori [[REG2:r[0-9]+]], [[REG1]], 22136
-; CHECK: andc [[REG3:r[0-9]+]], [[REG2]], r3
-; CHECK: cntlzw [[REG4:r[0-9]+]], [[REG3]]
-; CHECK: rlwinm r3, [[REG4]], 27, 31, 31
-; CHECK: blr
-
+; CHECK: # BB#0:
+; CHECK-NEXT: lis 4, 4660
+; CHECK-NEXT: ori 4, 4, 22136
+; CHECK-NEXT: andc 3, 4, 3
+; CHECK-NEXT: cntlzw 3, 3
+; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
+; CHECK-NEXT: blr
%and = and i32 %i, 305419896
%cmp = icmp eq i32 %and, 305419896
ret i1 %cmp
}
+define <4 x i32> @hidden_not_v4i32(<4 x i32> %x) {
+; CHECK-LABEL: hidden_not_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vspltisw 3, 6
+; CHECK-NEXT: xxlandc 34, 35, 34
+; CHECK-NEXT: blr
+ %xor = xor <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
+ %and = and <4 x i32> %xor, <i32 6, i32 6, i32 6, i32 6>
+ ret <4 x i32> %and
+}
+
diff --git a/test/CodeGen/WebAssembly/returned.ll b/test/CodeGen/WebAssembly/returned.ll
index b059fd8a59879..dfd3fad794f1e 100644
--- a/test/CodeGen/WebAssembly/returned.ll
+++ b/test/CodeGen/WebAssembly/returned.ll
@@ -47,3 +47,34 @@ define void @test_constant_arg() {
ret void
}
declare i32* @returns_arg(i32* returned)
+
+; Test that the optimization isn't performed on arguments without the
+; "returned" attribute.
+
+; CHECK-LABEL: test_other_skipped:
+; CHECK-NEXT: .param i32, i32, f64{{$}}
+; CHECK-NEXT: {{^}} i32.call $drop=, do_something@FUNCTION, $0, $1, $2{{$}}
+; CHECK-NEXT: {{^}} call do_something_with_i32@FUNCTION, $1{{$}}
+; CHECK-NEXT: {{^}} call do_something_with_double@FUNCTION, $2{{$}}
+declare i32 @do_something(i32 returned, i32, double)
+declare void @do_something_with_i32(i32)
+declare void @do_something_with_double(double)
+define void @test_other_skipped(i32 %a, i32 %b, double %c) {
+ %call = call i32 @do_something(i32 %a, i32 %b, double %c)
+ call void @do_something_with_i32(i32 %b)
+ call void @do_something_with_double(double %c)
+ ret void
+}
+
+; Test that the optimization is performed on arguments other than the first.
+
+; CHECK-LABEL: test_second_arg:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: {{^}} i32.call $push0=, do_something_else@FUNCTION, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+declare i32 @do_something_else(i32, i32 returned)
+define i32 @test_second_arg(i32 %a, i32 %b) {
+ %call = call i32 @do_something_else(i32 %a, i32 %b)
+ ret i32 %b
+}
diff --git a/test/CodeGen/X86/GlobalISel/X86-regbankselect.mir b/test/CodeGen/X86/GlobalISel/X86-regbankselect.mir
index c4e5fb2d05fc0..8e04239041a87 100644
--- a/test/CodeGen/X86/GlobalISel/X86-regbankselect.mir
+++ b/test/CodeGen/X86/GlobalISel/X86-regbankselect.mir
@@ -106,6 +106,10 @@
ret void
}
+ define void @trunc_check() {
+ ret void
+ }
+
...
---
name: test_add_i8
@@ -632,3 +636,27 @@ body: |
RET 0
...
+---
+name: trunc_check
+alignment: 4
+legalized: true
+# CHECK-LABEL: name: trunc_check
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gpr }
+# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 2, class: gpr }
+# CHECK-NEXT: - { id: 3, class: gpr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+ - { id: 3, class: _ }
+body: |
+ bb.0 (%ir-block.0):
+ %0(s32) = IMPLICIT_DEF
+ %1(s1) = G_TRUNC %0(s32)
+ %2(s8) = G_TRUNC %0(s32)
+ %3(s16) = G_TRUNC %0(s32)
+ RET 0
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/binop-isel.ll b/test/CodeGen/X86/GlobalISel/binop.ll
index 8499dd958447b..8499dd958447b 100644
--- a/test/CodeGen/X86/GlobalISel/binop-isel.ll
+++ b/test/CodeGen/X86/GlobalISel/binop.ll
diff --git a/test/CodeGen/X86/GlobalISel/legalize-const.mir b/test/CodeGen/X86/GlobalISel/legalize-constant.mir
index 612d33a77fc96..612d33a77fc96 100644
--- a/test/CodeGen/X86/GlobalISel/legalize-const.mir
+++ b/test/CodeGen/X86/GlobalISel/legalize-constant.mir
diff --git a/test/CodeGen/X86/GlobalISel/legalize-trunc.mir b/test/CodeGen/X86/GlobalISel/legalize-trunc.mir
new file mode 100644
index 0000000000000..6b390d990ecfd
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/legalize-trunc.mir
@@ -0,0 +1,31 @@
+# RUN: llc -mtriple=i386-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
+--- |
+ define void @trunc_check() {
+ ret void
+ }
+
+...
+---
+name: trunc_check
+# ALL-LABEL: name: trunc_check
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+ - { id: 3, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ %0(s32) = IMPLICIT_DEF
+ ; ALL: %1(s1) = G_TRUNC %0(s32)
+ %1(s1) = G_TRUNC %0(s32)
+
+ ; ALL: %2(s8) = G_TRUNC %0(s32)
+ %2(s8) = G_TRUNC %0(s32)
+
+ ; ALL: %3(s16) = G_TRUNC %0(s32)
+ %3(s16) = G_TRUNC %0(s32)
+ RET 0
+
+...
+
diff --git a/test/CodeGen/X86/GlobalISel/memop-isel.ll b/test/CodeGen/X86/GlobalISel/memop.ll
index 6fe66436e4a8a..6fe66436e4a8a 100644
--- a/test/CodeGen/X86/GlobalISel/memop-isel.ll
+++ b/test/CodeGen/X86/GlobalISel/memop.ll
diff --git a/test/CodeGen/X86/GlobalISel/select-add.mir b/test/CodeGen/X86/GlobalISel/select-add.mir
new file mode 100644
index 0000000000000..27fcc223d2bbe
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-add.mir
@@ -0,0 +1,226 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=SSE
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=AVX
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=AVX512ALL --check-prefix=AVX512F
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512ALL --check-prefix=AVX512VL
+
+--- |
+ define i64 @test_add_i64(i64 %arg1, i64 %arg2) {
+ %ret = add i64 %arg1, %arg2
+ ret i64 %ret
+ }
+
+ define i32 @test_add_i32(i32 %arg1, i32 %arg2) {
+ %ret = add i32 %arg1, %arg2
+ ret i32 %ret
+ }
+
+ define float @test_add_float(float %arg1, float %arg2) {
+ %ret = fadd float %arg1, %arg2
+ ret float %ret
+ }
+
+ define double @test_add_double(double %arg1, double %arg2) {
+ %ret = fadd double %arg1, %arg2
+ ret double %ret
+ }
+
+ define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %ret = add <4 x i32> %arg1, %arg2
+ ret <4 x i32> %ret
+ }
+
+ define <4 x float> @test_add_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
+ %ret = fadd <4 x float> %arg1, %arg2
+ ret <4 x float> %ret
+ }
+...
+
+---
+name: test_add_i64
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr64 }
+# ALL-NEXT: - { id: 1, class: gr64 }
+# ALL-NEXT: - { id: 2, class: gr64 }
+registers:
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+ - { id: 2, class: gpr }
+# ALL: %0 = COPY %rdi
+# ALL-NEXT: %1 = COPY %rsi
+# ALL-NEXT: %2 = ADD64rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi, %esi
+
+ %0(s64) = COPY %rdi
+ %1(s64) = COPY %rsi
+ %2(s64) = G_ADD %0, %1
+ %rax = COPY %2(s64)
+
+...
+
+---
+name: test_add_i32
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr32 }
+# ALL-NEXT: - { id: 1, class: gr32 }
+# ALL-NEXT: - { id: 2, class: gr32 }
+registers:
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+ - { id: 2, class: gpr }
+# ALL: %0 = COPY %edi
+# ALL-NEXT: %1 = COPY %esi
+# ALL-NEXT: %2 = ADD32rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi, %esi
+
+ %0(s32) = COPY %edi
+ %1(s32) = COPY %esi
+ %2(s32) = G_ADD %0, %1
+ %rax = COPY %2(s32)
+
+...
+---
+name: test_add_float
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+tracksRegLiveness: true
+# ALL: registers:
+# NO_AVX512F-NEXT: - { id: 0, class: fr32 }
+# NO_AVX512F-NEXT: - { id: 1, class: fr32 }
+# NO_AVX512F-NEXT: - { id: 2, class: fr32 }
+# AVX512ALL-NEXT: - { id: 0, class: fr32x }
+# AVX512ALL-NEXT: - { id: 1, class: fr32x }
+# AVX512ALL-NEXT: - { id: 2, class: fr32x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %0 = COPY %xmm0
+# ALL-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = ADDSSrr %0, %1
+# AVX-NEXT: %2 = VADDSSrr %0, %1
+# AVX512F-NEXT: %2 = VADDSSZrr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s32) = COPY %xmm0
+ %1(s32) = COPY %xmm1
+ %2(s32) = G_FADD %0, %1
+ %xmm0 = COPY %2(s32)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_add_double
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+tracksRegLiveness: true
+# ALL: registers:
+# NO_AVX512F-NEXT: - { id: 0, class: fr64 }
+# NO_AVX512F-NEXT: - { id: 1, class: fr64 }
+# NO_AVX512F-NEXT: - { id: 2, class: fr64 }
+# AVX512ALL-NEXT: - { id: 0, class: fr64x }
+# AVX512ALL-NEXT: - { id: 1, class: fr64x }
+# AVX512ALL-NEXT: - { id: 2, class: fr64x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %0 = COPY %xmm0
+# ALL-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = ADDSDrr %0, %1
+# AVX-NEXT: %2 = VADDSDrr %0, %1
+# AVX512F-NEXT: %2 = VADDSDZrr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s64) = COPY %xmm0
+ %1(s64) = COPY %xmm1
+ %2(s64) = G_FADD %0, %1
+ %xmm0 = COPY %2(s64)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_add_v4i32
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+tracksRegLiveness: true
+# ALL: registers:
+# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
+# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
+# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
+# AVX512VL-NEXT: - { id: 0, class: vr128x }
+# AVX512VL-NEXT: - { id: 1, class: vr128x }
+# AVX512VL-NEXT: - { id: 2, class: vr128x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %0 = COPY %xmm0
+# ALL-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = PADDDrr %0, %1
+# AVX-NEXT: %2 = VPADDDrr %0, %1
+# AVX512F-NEXT: %2 = VPADDDrr %0, %1
+# AVX512VL-NEXT: %2 = VPADDDZ128rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<4 x s32>) = COPY %xmm0
+ %1(<4 x s32>) = COPY %xmm1
+ %2(<4 x s32>) = G_ADD %0, %1
+ %xmm0 = COPY %2(<4 x s32>)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_add_v4f32
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+tracksRegLiveness: true
+# ALL: registers:
+# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
+# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
+# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
+# AVX512VL-NEXT: - { id: 0, class: vr128x }
+# AVX512VL-NEXT: - { id: 1, class: vr128x }
+# AVX512VL-NEXT: - { id: 2, class: vr128x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %0 = COPY %xmm0
+# ALL-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = ADDPSrr %0, %1
+# AVX-NEXT: %2 = VADDPSrr %0, %1
+# AVX512F-NEXT: %2 = VADDPSrr %0, %1
+# AVX512VL-NEXT: %2 = VADDPSZ128rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<4 x s32>) = COPY %xmm0
+ %1(<4 x s32>) = COPY %xmm1
+ %2(<4 x s32>) = G_FADD %0, %1
+ %xmm0 = COPY %2(<4 x s32>)
+ RET 0, implicit %xmm0
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/frameIndex-instructionselect.mir b/test/CodeGen/X86/GlobalISel/select-frameIndex.mir
index 2fa9ac23a7afa..2fa9ac23a7afa 100644
--- a/test/CodeGen/X86/GlobalISel/frameIndex-instructionselect.mir
+++ b/test/CodeGen/X86/GlobalISel/select-frameIndex.mir
diff --git a/test/CodeGen/X86/GlobalISel/x86_64-instructionselect.mir b/test/CodeGen/X86/GlobalISel/select-memop.mir
index 17522c3cb45eb..943c9aceb4d19 100644
--- a/test/CodeGen/X86/GlobalISel/x86_64-instructionselect.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop.mir
@@ -4,67 +4,7 @@
# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512ALL --check-prefix=AVX512VL
--- |
- define i64 @test_add_i64(i64 %arg1, i64 %arg2) {
- %ret = add i64 %arg1, %arg2
- ret i64 %ret
- }
-
- define i32 @test_add_i32(i32 %arg1, i32 %arg2) {
- %ret = add i32 %arg1, %arg2
- ret i32 %ret
- }
-
- define i64 @test_sub_i64(i64 %arg1, i64 %arg2) {
- %ret = sub i64 %arg1, %arg2
- ret i64 %ret
- }
-
- define i32 @test_sub_i32(i32 %arg1, i32 %arg2) {
- %ret = sub i32 %arg1, %arg2
- ret i32 %ret
- }
-
- define float @test_add_float(float %arg1, float %arg2) {
- %ret = fadd float %arg1, %arg2
- ret float %ret
- }
-
- define double @test_add_double(double %arg1, double %arg2) {
- %ret = fadd double %arg1, %arg2
- ret double %ret
- }
-
- define float @test_sub_float(float %arg1, float %arg2) {
- %ret = fsub float %arg1, %arg2
- ret float %ret
- }
-
- define double @test_sub_double(double %arg1, double %arg2) {
- %ret = fsub double %arg1, %arg2
- ret double %ret
- }
-
- define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
- %ret = add <4 x i32> %arg1, %arg2
- ret <4 x i32> %ret
- }
-
- define <4 x i32> @test_sub_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
- %ret = sub <4 x i32> %arg1, %arg2
- ret <4 x i32> %ret
- }
-
- define <4 x float> @test_add_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
- %ret = fadd <4 x float> %arg1, %arg2
- ret <4 x float> %ret
- }
-
- define <4 x float> @test_sub_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
- %ret = fsub <4 x float> %arg1, %arg2
- ret <4 x float> %ret
- }
-
- define i8 @test_load_i8(i8* %p1) {
+ define i8 @test_load_i8(i8* %p1) {
%r = load i8, i8* %p1
ret i8 %r
}
@@ -88,12 +28,12 @@
%r = load float, float* %p1
ret float %r
}
-
+
define float @test_load_float_vecreg(float* %p1) {
%r = load float, float* %p1
ret float %r
}
-
+
define double @test_load_double(double* %p1) {
%r = load double, double* %p1
@@ -139,7 +79,7 @@
store double %val, double* %p1
ret double* %p1
}
-
+
define double* @test_store_double_vec(double %val, double* %p1) {
store double %val, double* %p1
ret double* %p1
@@ -156,386 +96,6 @@
}
...
-
----
-name: test_add_i64
-legalized: true
-regBankSelected: true
-# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr64 }
-# ALL-NEXT: - { id: 1, class: gr64 }
-# ALL-NEXT: - { id: 2, class: gr64 }
-registers:
- - { id: 0, class: gpr }
- - { id: 1, class: gpr }
- - { id: 2, class: gpr }
-# ALL: %0 = COPY %rdi
-# ALL-NEXT: %1 = COPY %rsi
-# ALL-NEXT: %2 = ADD64rr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %edi, %esi
-
- %0(s64) = COPY %rdi
- %1(s64) = COPY %rsi
- %2(s64) = G_ADD %0, %1
- %rax = COPY %2(s64)
-
-...
-
----
-name: test_add_i32
-legalized: true
-regBankSelected: true
-# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
-registers:
- - { id: 0, class: gpr }
- - { id: 1, class: gpr }
- - { id: 2, class: gpr }
-# ALL: %0 = COPY %edi
-# ALL-NEXT: %1 = COPY %esi
-# ALL-NEXT: %2 = ADD32rr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %edi, %esi
-
- %0(s32) = COPY %edi
- %1(s32) = COPY %esi
- %2(s32) = G_ADD %0, %1
- %rax = COPY %2(s32)
-
-...
-
----
-name: test_sub_i64
-legalized: true
-regBankSelected: true
-# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr64 }
-# ALL-NEXT: - { id: 1, class: gr64 }
-# ALL-NEXT: - { id: 2, class: gr64 }
-registers:
- - { id: 0, class: gpr }
- - { id: 1, class: gpr }
- - { id: 2, class: gpr }
-# ALL: %0 = COPY %rdi
-# ALL-NEXT: %1 = COPY %rsi
-# ALL-NEXT: %2 = SUB64rr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %edi, %esi
-
- %0(s64) = COPY %rdi
- %1(s64) = COPY %rsi
- %2(s64) = G_SUB %0, %1
- %rax = COPY %2(s64)
-
-...
-
----
-name: test_sub_i32
-legalized: true
-regBankSelected: true
-# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
-registers:
- - { id: 0, class: gpr }
- - { id: 1, class: gpr }
- - { id: 2, class: gpr }
-# ALL: %0 = COPY %edi
-# ALL-NEXT: %1 = COPY %esi
-# ALL-NEXT: %2 = SUB32rr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %edi, %esi
-
- %0(s32) = COPY %edi
- %1(s32) = COPY %esi
- %2(s32) = G_SUB %0, %1
- %rax = COPY %2(s32)
-
-...
-
----
-name: test_add_float
-alignment: 4
-legalized: true
-regBankSelected: true
-selected: false
-tracksRegLiveness: true
-# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr32 }
-# NO_AVX512F-NEXT: - { id: 1, class: fr32 }
-# NO_AVX512F-NEXT: - { id: 2, class: fr32 }
-# AVX512ALL-NEXT: - { id: 0, class: fr32x }
-# AVX512ALL-NEXT: - { id: 1, class: fr32x }
-# AVX512ALL-NEXT: - { id: 2, class: fr32x }
-registers:
- - { id: 0, class: vecr }
- - { id: 1, class: vecr }
- - { id: 2, class: vecr }
-# ALL: %0 = COPY %xmm0
-# ALL-NEXT: %1 = COPY %xmm1
-# SSE-NEXT: %2 = ADDSSrr %0, %1
-# AVX-NEXT: %2 = VADDSSrr %0, %1
-# AVX512F-NEXT: %2 = VADDSSZrr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %xmm0, %xmm1
-
- %0(s32) = COPY %xmm0
- %1(s32) = COPY %xmm1
- %2(s32) = G_FADD %0, %1
- %xmm0 = COPY %2(s32)
- RET 0, implicit %xmm0
-
-...
----
-name: test_add_double
-alignment: 4
-legalized: true
-regBankSelected: true
-selected: false
-tracksRegLiveness: true
-# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr64 }
-# NO_AVX512F-NEXT: - { id: 1, class: fr64 }
-# NO_AVX512F-NEXT: - { id: 2, class: fr64 }
-# AVX512ALL-NEXT: - { id: 0, class: fr64x }
-# AVX512ALL-NEXT: - { id: 1, class: fr64x }
-# AVX512ALL-NEXT: - { id: 2, class: fr64x }
-registers:
- - { id: 0, class: vecr }
- - { id: 1, class: vecr }
- - { id: 2, class: vecr }
-# ALL: %0 = COPY %xmm0
-# ALL-NEXT: %1 = COPY %xmm1
-# SSE-NEXT: %2 = ADDSDrr %0, %1
-# AVX-NEXT: %2 = VADDSDrr %0, %1
-# AVX512F-NEXT: %2 = VADDSDZrr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %xmm0, %xmm1
-
- %0(s64) = COPY %xmm0
- %1(s64) = COPY %xmm1
- %2(s64) = G_FADD %0, %1
- %xmm0 = COPY %2(s64)
- RET 0, implicit %xmm0
-
-...
----
-name: test_sub_float
-alignment: 4
-legalized: true
-regBankSelected: true
-selected: false
-tracksRegLiveness: true
-# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr32 }
-# NO_AVX512F-NEXT: - { id: 1, class: fr32 }
-# NO_AVX512F-NEXT: - { id: 2, class: fr32 }
-# AVX512ALL-NEXT: - { id: 0, class: fr32x }
-# AVX512ALL-NEXT: - { id: 1, class: fr32x }
-# AVX512ALL-NEXT: - { id: 2, class: fr32x }
-registers:
- - { id: 0, class: vecr }
- - { id: 1, class: vecr }
- - { id: 2, class: vecr }
-# ALL: %0 = COPY %xmm0
-# ALL-NEXT: %1 = COPY %xmm1
-# SSE-NEXT: %2 = SUBSSrr %0, %1
-# AVX-NEXT: %2 = VSUBSSrr %0, %1
-# AVX512F-NEXT: %2 = VSUBSSZrr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %xmm0, %xmm1
-
- %0(s32) = COPY %xmm0
- %1(s32) = COPY %xmm1
- %2(s32) = G_FSUB %0, %1
- %xmm0 = COPY %2(s32)
- RET 0, implicit %xmm0
-
-...
----
-name: test_sub_double
-alignment: 4
-legalized: true
-regBankSelected: true
-selected: false
-tracksRegLiveness: true
-# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr64 }
-# NO_AVX512F-NEXT: - { id: 1, class: fr64 }
-# NO_AVX512F-NEXT: - { id: 2, class: fr64 }
-# AVX512ALL-NEXT: - { id: 0, class: fr64x }
-# AVX512ALL-NEXT: - { id: 1, class: fr64x }
-# AVX512ALL-NEXT: - { id: 2, class: fr64x }
-registers:
- - { id: 0, class: vecr }
- - { id: 1, class: vecr }
- - { id: 2, class: vecr }
-# ALL: %0 = COPY %xmm0
-# ALL-NEXT: %1 = COPY %xmm1
-# SSE-NEXT: %2 = SUBSDrr %0, %1
-# AVX-NEXT: %2 = VSUBSDrr %0, %1
-# AVX512F-NEXT: %2 = VSUBSDZrr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %xmm0, %xmm1
-
- %0(s64) = COPY %xmm0
- %1(s64) = COPY %xmm1
- %2(s64) = G_FSUB %0, %1
- %xmm0 = COPY %2(s64)
- RET 0, implicit %xmm0
-...
----
-name: test_add_v4i32
-alignment: 4
-legalized: true
-regBankSelected: true
-selected: false
-tracksRegLiveness: true
-# ALL: registers:
-# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
-registers:
- - { id: 0, class: vecr }
- - { id: 1, class: vecr }
- - { id: 2, class: vecr }
-# ALL: %0 = COPY %xmm0
-# ALL-NEXT: %1 = COPY %xmm1
-# SSE-NEXT: %2 = PADDDrr %0, %1
-# AVX-NEXT: %2 = VPADDDrr %0, %1
-# AVX512F-NEXT: %2 = VPADDDrr %0, %1
-# AVX512VL-NEXT: %2 = VPADDDZ128rr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %xmm0, %xmm1
-
- %0(<4 x s32>) = COPY %xmm0
- %1(<4 x s32>) = COPY %xmm1
- %2(<4 x s32>) = G_ADD %0, %1
- %xmm0 = COPY %2(<4 x s32>)
- RET 0, implicit %xmm0
-
-...
----
-name: test_sub_v4i32
-alignment: 4
-legalized: true
-regBankSelected: true
-selected: false
-tracksRegLiveness: true
-# ALL: registers:
-# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
-registers:
- - { id: 0, class: vecr }
- - { id: 1, class: vecr }
- - { id: 2, class: vecr }
-# ALL: %0 = COPY %xmm0
-# ALL-NEXT: %1 = COPY %xmm1
-# SSE-NEXT: %2 = PSUBDrr %0, %1
-# AVX-NEXT: %2 = VPSUBDrr %0, %1
-# AVX512F-NEXT: %2 = VPSUBDrr %0, %1
-# AVX512VL-NEXT: %2 = VPSUBDZ128rr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %xmm0, %xmm1
-
- %0(<4 x s32>) = COPY %xmm0
- %1(<4 x s32>) = COPY %xmm1
- %2(<4 x s32>) = G_SUB %0, %1
- %xmm0 = COPY %2(<4 x s32>)
- RET 0, implicit %xmm0
-
-...
----
-name: test_add_v4f32
-alignment: 4
-legalized: true
-regBankSelected: true
-selected: false
-tracksRegLiveness: true
-# ALL: registers:
-# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
-registers:
- - { id: 0, class: vecr }
- - { id: 1, class: vecr }
- - { id: 2, class: vecr }
-# ALL: %0 = COPY %xmm0
-# ALL-NEXT: %1 = COPY %xmm1
-# SSE-NEXT: %2 = ADDPSrr %0, %1
-# AVX-NEXT: %2 = VADDPSrr %0, %1
-# AVX512F-NEXT: %2 = VADDPSrr %0, %1
-# AVX512VL-NEXT: %2 = VADDPSZ128rr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %xmm0, %xmm1
-
- %0(<4 x s32>) = COPY %xmm0
- %1(<4 x s32>) = COPY %xmm1
- %2(<4 x s32>) = G_FADD %0, %1
- %xmm0 = COPY %2(<4 x s32>)
- RET 0, implicit %xmm0
-
-...
----
-name: test_sub_v4f32
-alignment: 4
-legalized: true
-regBankSelected: true
-selected: false
-tracksRegLiveness: true
-# ALL: registers:
-# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
-registers:
- - { id: 0, class: vecr }
- - { id: 1, class: vecr }
- - { id: 2, class: vecr }
-# ALL: %0 = COPY %xmm0
-# ALL-NEXT: %1 = COPY %xmm1
-# SSE-NEXT: %2 = SUBPSrr %0, %1
-# AVX-NEXT: %2 = VSUBPSrr %0, %1
-# AVX512F-NEXT: %2 = VSUBPSrr %0, %1
-# AVX512VL-NEXT: %2 = VSUBPSZ128rr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %xmm0, %xmm1
-
- %0(<4 x s32>) = COPY %xmm0
- %1(<4 x s32>) = COPY %xmm1
- %2(<4 x s32>) = G_FSUB %0, %1
- %xmm0 = COPY %2(<4 x s32>)
- RET 0, implicit %xmm0
-
-...
---
# ALL-LABEL: name: test_load_i8
name: test_load_i8
diff --git a/test/CodeGen/X86/GlobalISel/select-sub.mir b/test/CodeGen/X86/GlobalISel/select-sub.mir
new file mode 100644
index 0000000000000..d4db6eec6d802
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-sub.mir
@@ -0,0 +1,225 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=SSE
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=AVX
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=AVX512ALL --check-prefix=AVX512F
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512ALL --check-prefix=AVX512VL
+
+--- |
+ define i64 @test_sub_i64(i64 %arg1, i64 %arg2) {
+ %ret = sub i64 %arg1, %arg2
+ ret i64 %ret
+ }
+
+ define i32 @test_sub_i32(i32 %arg1, i32 %arg2) {
+ %ret = sub i32 %arg1, %arg2
+ ret i32 %ret
+ }
+
+ define float @test_sub_float(float %arg1, float %arg2) {
+ %ret = fsub float %arg1, %arg2
+ ret float %ret
+ }
+
+ define double @test_sub_double(double %arg1, double %arg2) {
+ %ret = fsub double %arg1, %arg2
+ ret double %ret
+ }
+
+ define <4 x i32> @test_sub_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %ret = sub <4 x i32> %arg1, %arg2
+ ret <4 x i32> %ret
+ }
+
+ define <4 x float> @test_sub_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
+ %ret = fsub <4 x float> %arg1, %arg2
+ ret <4 x float> %ret
+ }
+
+...
+---
+name: test_sub_i64
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr64 }
+# ALL-NEXT: - { id: 1, class: gr64 }
+# ALL-NEXT: - { id: 2, class: gr64 }
+registers:
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+ - { id: 2, class: gpr }
+# ALL: %0 = COPY %rdi
+# ALL-NEXT: %1 = COPY %rsi
+# ALL-NEXT: %2 = SUB64rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi, %esi
+
+ %0(s64) = COPY %rdi
+ %1(s64) = COPY %rsi
+ %2(s64) = G_SUB %0, %1
+ %rax = COPY %2(s64)
+
+...
+
+---
+name: test_sub_i32
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr32 }
+# ALL-NEXT: - { id: 1, class: gr32 }
+# ALL-NEXT: - { id: 2, class: gr32 }
+registers:
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+ - { id: 2, class: gpr }
+# ALL: %0 = COPY %edi
+# ALL-NEXT: %1 = COPY %esi
+# ALL-NEXT: %2 = SUB32rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi, %esi
+
+ %0(s32) = COPY %edi
+ %1(s32) = COPY %esi
+ %2(s32) = G_SUB %0, %1
+ %rax = COPY %2(s32)
+
+...
+---
+name: test_sub_float
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+tracksRegLiveness: true
+# ALL: registers:
+# NO_AVX512F-NEXT: - { id: 0, class: fr32 }
+# NO_AVX512F-NEXT: - { id: 1, class: fr32 }
+# NO_AVX512F-NEXT: - { id: 2, class: fr32 }
+# AVX512ALL-NEXT: - { id: 0, class: fr32x }
+# AVX512ALL-NEXT: - { id: 1, class: fr32x }
+# AVX512ALL-NEXT: - { id: 2, class: fr32x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %0 = COPY %xmm0
+# ALL-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = SUBSSrr %0, %1
+# AVX-NEXT: %2 = VSUBSSrr %0, %1
+# AVX512F-NEXT: %2 = VSUBSSZrr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s32) = COPY %xmm0
+ %1(s32) = COPY %xmm1
+ %2(s32) = G_FSUB %0, %1
+ %xmm0 = COPY %2(s32)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_sub_double
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+tracksRegLiveness: true
+# ALL: registers:
+# NO_AVX512F-NEXT: - { id: 0, class: fr64 }
+# NO_AVX512F-NEXT: - { id: 1, class: fr64 }
+# NO_AVX512F-NEXT: - { id: 2, class: fr64 }
+# AVX512ALL-NEXT: - { id: 0, class: fr64x }
+# AVX512ALL-NEXT: - { id: 1, class: fr64x }
+# AVX512ALL-NEXT: - { id: 2, class: fr64x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %0 = COPY %xmm0
+# ALL-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = SUBSDrr %0, %1
+# AVX-NEXT: %2 = VSUBSDrr %0, %1
+# AVX512F-NEXT: %2 = VSUBSDZrr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s64) = COPY %xmm0
+ %1(s64) = COPY %xmm1
+ %2(s64) = G_FSUB %0, %1
+ %xmm0 = COPY %2(s64)
+ RET 0, implicit %xmm0
+...
+---
+name: test_sub_v4i32
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+tracksRegLiveness: true
+# ALL: registers:
+# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
+# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
+# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
+# AVX512VL-NEXT: - { id: 0, class: vr128x }
+# AVX512VL-NEXT: - { id: 1, class: vr128x }
+# AVX512VL-NEXT: - { id: 2, class: vr128x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %0 = COPY %xmm0
+# ALL-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = PSUBDrr %0, %1
+# AVX-NEXT: %2 = VPSUBDrr %0, %1
+# AVX512F-NEXT: %2 = VPSUBDrr %0, %1
+# AVX512VL-NEXT: %2 = VPSUBDZ128rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<4 x s32>) = COPY %xmm0
+ %1(<4 x s32>) = COPY %xmm1
+ %2(<4 x s32>) = G_SUB %0, %1
+ %xmm0 = COPY %2(<4 x s32>)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_sub_v4f32
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+tracksRegLiveness: true
+# ALL: registers:
+# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
+# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
+# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
+# AVX512VL-NEXT: - { id: 0, class: vr128x }
+# AVX512VL-NEXT: - { id: 1, class: vr128x }
+# AVX512VL-NEXT: - { id: 2, class: vr128x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %0 = COPY %xmm0
+# ALL-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = SUBPSrr %0, %1
+# AVX-NEXT: %2 = VSUBPSrr %0, %1
+# AVX512F-NEXT: %2 = VSUBPSrr %0, %1
+# AVX512VL-NEXT: %2 = VSUBPSZ128rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<4 x s32>) = COPY %xmm0
+ %1(<4 x s32>) = COPY %xmm1
+ %2(<4 x s32>) = G_FSUB %0, %1
+ %xmm0 = COPY %2(<4 x s32>)
+ RET 0, implicit %xmm0
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/select-trunc.mir b/test/CodeGen/X86/GlobalISel/select-trunc.mir
new file mode 100644
index 0000000000000..714340248ff6f
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-trunc.mir
@@ -0,0 +1,183 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select %s -o - | FileCheck %s --check-prefix=CHECK
+--- |
+ define i1 @trunc_i32toi1(i32 %a) {
+ %r = trunc i32 %a to i1
+ ret i1 %r
+ }
+
+ define i8 @trunc_i32toi8(i32 %a) {
+ %r = trunc i32 %a to i8
+ ret i8 %r
+ }
+
+ define i16 @trunc_i32toi16(i32 %a) {
+ %r = trunc i32 %a to i16
+ ret i16 %r
+ }
+
+ define i8 @trunc_i64toi8(i64 %a) {
+ %r = trunc i64 %a to i8
+ ret i8 %r
+ }
+
+ define i16 @trunc_i64toi16(i64 %a) {
+ %r = trunc i64 %a to i16
+ ret i16 %r
+ }
+
+ define i32 @trunc_i64toi32(i64 %a) {
+ %r = trunc i64 %a to i32
+ ret i32 %r
+ }
+
+...
+---
+name: trunc_i32toi1
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK-LABEL: name: trunc_i32toi1
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gr32 }
+# CHECK-NEXT: - { id: 1, class: gr8 }
+registers:
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+# CHECK: body:
+# CHECK: %1 = COPY %0.sub_8
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi
+
+ %0(s32) = COPY %edi
+ %1(s1) = G_TRUNC %0(s32)
+ %al = COPY %1(s1)
+ RET 0, implicit %al
+
+...
+---
+name: trunc_i32toi8
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK-LABEL: name: trunc_i32toi8
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gr32 }
+# CHECK-NEXT: - { id: 1, class: gr8 }
+registers:
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+# CHECK: body:
+# CHECK: %1 = COPY %0.sub_8
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi
+
+ %0(s32) = COPY %edi
+ %1(s8) = G_TRUNC %0(s32)
+ %al = COPY %1(s8)
+ RET 0, implicit %al
+
+...
+---
+name: trunc_i32toi16
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK-LABEL: name: trunc_i32toi16
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gr32 }
+# CHECK-NEXT: - { id: 1, class: gr16 }
+registers:
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+# CHECK: body:
+# CHECK: %1 = COPY %0.sub_16
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi
+
+ %0(s32) = COPY %edi
+ %1(s16) = G_TRUNC %0(s32)
+ %ax = COPY %1(s16)
+ RET 0, implicit %ax
+
+...
+---
+name: trunc_i64toi8
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK-LABEL: name: trunc_i64toi8
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gr64 }
+# CHECK-NEXT: - { id: 1, class: gr8 }
+registers:
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+# CHECK: body:
+# CHECK: %1 = COPY %0.sub_8
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(s64) = COPY %rdi
+ %1(s8) = G_TRUNC %0(s64)
+ %al = COPY %1(s8)
+ RET 0, implicit %al
+
+...
+---
+name: trunc_i64toi16
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK-LABEL: name: trunc_i64toi16
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gr64 }
+# CHECK-NEXT: - { id: 1, class: gr16 }
+registers:
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+# CHECK: body:
+# CHECK: %1 = COPY %0.sub_16
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(s64) = COPY %rdi
+ %1(s16) = G_TRUNC %0(s64)
+ %ax = COPY %1(s16)
+ RET 0, implicit %ax
+
+...
+---
+name: trunc_i64toi32
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK-LABEL: name: trunc_i64toi32
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gr64 }
+# CHECK-NEXT: - { id: 1, class: gr32 }
+registers:
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+# CHECK: body:
+# CHECK: %1 = COPY %0.sub_32
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(s64) = COPY %rdi
+ %1(s32) = G_TRUNC %0(s64)
+ %eax = COPY %1(s32)
+ RET 0, implicit %eax
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/trunc.ll b/test/CodeGen/X86/GlobalISel/trunc.ll
new file mode 100644
index 0000000000000..a56fc3b5a87f4
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/trunc.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -global-isel < %s -o - | FileCheck %s --check-prefix=CHECK
+
+define i1 @trunc_i32toi1(i32 %a) {
+; CHECK-LABEL: trunc_i32toi1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+ %r = trunc i32 %a to i1
+ ret i1 %r
+}
+
+define i8 @trunc_i32toi8(i32 %a) {
+; CHECK-LABEL: trunc_i32toi8:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+ %r = trunc i32 %a to i8
+ ret i8 %r
+}
+
+define i16 @trunc_i32toi16(i32 %a) {
+; CHECK-LABEL: trunc_i32toi16:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+ %r = trunc i32 %a to i16
+ ret i16 %r
+}
+
+define i8 @trunc_i64toi8(i64 %a) {
+; CHECK-LABEL: trunc_i64toi8:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+ %r = trunc i64 %a to i8
+ ret i8 %r
+}
+
+define i16 @trunc_i64toi16(i64 %a) {
+; CHECK-LABEL: trunc_i64toi16:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+ %r = trunc i64 %a to i16
+ ret i16 %r
+}
+
+define i32 @trunc_i64toi32(i64 %a) {
+; CHECK-LABEL: trunc_i64toi32:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+ %r = trunc i64 %a to i32
+ ret i32 %r
+}
+
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll
index 4303b62544642..f89f6e1de1abe 100644
--- a/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -582,3 +582,22 @@ define void @merge_vec_element_and_scalar_load([6 x i64]* %array) {
; CHECK-NEXT: movq %rcx, 40(%rdi)
; CHECK-NEXT: retq
}
+
+
+
+; Don't let a non-consecutive store thwart merging of the last two.
+define void @almost_consecutive_stores(i8* %p) {
+ store i8 0, i8* %p
+ %p1 = getelementptr i8, i8* %p, i64 42
+ store i8 1, i8* %p1
+ %p2 = getelementptr i8, i8* %p, i64 2
+ store i8 2, i8* %p2
+ %p3 = getelementptr i8, i8* %p, i64 3
+ store i8 3, i8* %p3
+ ret void
+; CHECK-LABEL: almost_consecutive_stores
+; CHECK-DAG: movb $0, (%rdi)
+; CHECK-DAG: movb $1, 42(%rdi)
+; CHECK-DAG: movw $770, 2(%rdi)
+; CHECK: retq
+}
diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll
index e9e7d5aea2737..89abbabee27ce 100644
--- a/test/CodeGen/X86/avx-logic.ll
+++ b/test/CodeGen/X86/avx-logic.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
; CHECK-LABEL: andpd256:
@@ -271,3 +271,35 @@ entry:
ret <2 x i64> %x
}
+define <4 x i32> @and_xor_splat1_v4i32(<4 x i32> %x) nounwind {
+; AVX-LABEL: and_xor_splat1_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vandnps {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: and_xor_splat1_v4i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
+; AVX512-NEXT: vandnps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+ %xor = xor <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
+ %and = and <4 x i32> %xor, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+define <4 x i64> @and_xor_splat1_v4i64(<4 x i64> %x) nounwind {
+; AVX-LABEL: and_xor_splat1_v4i64:
+; AVX: # BB#0:
+; AVX-NEXT: vandnps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: and_xor_splat1_v4i64:
+; AVX512: # BB#0:
+; AVX512-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1
+; AVX512-NEXT: vandnps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %xor = xor <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
+ %and = and <4 x i64> %xor, <i64 1, i64 1, i64 1, i64 1>
+ ret <4 x i64> %and
+}
+
diff --git a/test/CodeGen/X86/avx512-ext.ll b/test/CodeGen/X86/avx512-ext.ll
index 796ee83b6fa79..b31b00e54e83a 100644
--- a/test/CodeGen/X86/avx512-ext.ll
+++ b/test/CodeGen/X86/avx512-ext.ll
@@ -542,7 +542,7 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind re
; KNL: ## BB#0:
; KNL-NEXT: vpslld $31, %xmm0, %xmm0
; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxdq %xmm0, %ymm0
+; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; KNL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
; KNL-NEXT: retq
@@ -923,7 +923,7 @@ define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind
; KNL: ## BB#0:
; KNL-NEXT: vpslld $31, %xmm0, %xmm0
; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxdq %xmm0, %ymm0
+; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; KNL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
; KNL-NEXT: retq
@@ -1110,7 +1110,7 @@ define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind
; KNL: ## BB#0:
; KNL-NEXT: vpslld $31, %xmm0, %xmm0
; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxdq %xmm0, %ymm0
+; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; KNL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
; KNL-NEXT: retq
@@ -1173,7 +1173,7 @@ define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind
; KNL: ## BB#0:
; KNL-NEXT: vpslld $31, %xmm1, %xmm1
; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
-; KNL-NEXT: vpmovsxdq %xmm1, %ymm1
+; KNL-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
; KNL-NEXT: retq
diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll
index aec1339d653da..7103efe050a49 100644
--- a/test/CodeGen/X86/avx512-mask-op.ll
+++ b/test/CodeGen/X86/avx512-mask-op.ll
@@ -1430,7 +1430,8 @@ define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
; KNL-LABEL: store_v2i1:
; KNL: ## BB#0:
-; KNL-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
@@ -1447,7 +1448,8 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
;
; AVX512BW-LABEL: store_v2i1:
; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
@@ -1457,7 +1459,8 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
;
; AVX512DQ-LABEL: store_v2i1:
; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512DQ-NEXT: kmovb %k0, (%rdi)
@@ -1471,7 +1474,7 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
; KNL-LABEL: store_v4i1:
; KNL: ## BB#0:
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpslld $31, %ymm0, %ymm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -1489,7 +1492,7 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
;
; AVX512BW-LABEL: store_v4i1:
; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -1500,7 +1503,7 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
;
; AVX512DQ-LABEL: store_v4i1:
; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
diff --git a/test/CodeGen/X86/bswap_tree.ll b/test/CodeGen/X86/bswap_tree.ll
new file mode 100644
index 0000000000000..35a28af855796
--- /dev/null
+++ b/test/CodeGen/X86/bswap_tree.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK64
+
+; Check reconstructing bswap from shifted masks and tree of ORs
+
+; Match a 32-bit packed halfword bswap. That is
+; ((x & 0x000000ff) << 8) |
+; ((x & 0x0000ff00) >> 8) |
+; ((x & 0x00ff0000) << 8) |
+; ((x & 0xff000000) >> 8)
+; => (rotl (bswap x), 16)
+define i32 @test1(i32 %x) nounwind {
+; CHECK-LABEL: test1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl %ecx, %edx
+; CHECK-NEXT: andl $16711680, %edx # imm = 0xFF0000
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: andl $-16777216, %eax # imm = 0xFF000000
+; CHECK-NEXT: shll $8, %edx
+; CHECK-NEXT: shrl $8, %eax
+; CHECK-NEXT: bswapl %ecx
+; CHECK-NEXT: shrl $16, %ecx
+; CHECK-NEXT: orl %edx, %eax
+; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: retl
+;
+; CHECK64-LABEL: test1:
+; CHECK64: # BB#0:
+; CHECK64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; CHECK64-NEXT: movl %edi, %eax
+; CHECK64-NEXT: andl $16711680, %eax # imm = 0xFF0000
+; CHECK64-NEXT: movl %edi, %ecx
+; CHECK64-NEXT: andl $-16777216, %ecx # imm = 0xFF000000
+; CHECK64-NEXT: shll $8, %eax
+; CHECK64-NEXT: shrl $8, %ecx
+; CHECK64-NEXT: bswapl %edi
+; CHECK64-NEXT: shrl $16, %edi
+; CHECK64-NEXT: orl %eax, %ecx
+; CHECK64-NEXT: leal (%rcx,%rdi), %eax
+; CHECK64-NEXT: retq
+ %byte0 = and i32 %x, 255 ; 0x000000ff
+ %byte1 = and i32 %x, 65280 ; 0x0000ff00
+ %byte2 = and i32 %x, 16711680 ; 0x00ff0000
+ %byte3 = and i32 %x, 4278190080 ; 0xff000000
+ %tmp0 = shl i32 %byte0, 8
+ %tmp1 = lshr i32 %byte1, 8
+ %tmp2 = shl i32 %byte2, 8
+ %tmp3 = lshr i32 %byte3, 8
+ %or0 = or i32 %tmp0, %tmp1
+ %or1 = or i32 %tmp2, %tmp3
+ %result = or i32 %or0, %or1
+ ret i32 %result
+}
+
+; the same as test1, just shifts before the "and"
+; ((x << 8) & 0x0000ff00) |
+; ((x >> 8) & 0x000000ff) |
+; ((x << 8) & 0xff000000) |
+; ((x >> 8) & 0x00ff0000)
+define i32 @test2(i32 %x) nounwind {
+; CHECK-LABEL: test2:
+; CHECK: # BB#0:
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shll $8, %ecx
+; CHECK-NEXT: shrl $8, %eax
+; CHECK-NEXT: movzwl %cx, %edx
+; CHECK-NEXT: movzbl %al, %esi
+; CHECK-NEXT: andl $-16777216, %ecx # imm = 0xFF000000
+; CHECK-NEXT: andl $16711680, %eax # imm = 0xFF0000
+; CHECK-NEXT: orl %edx, %esi
+; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: orl %esi, %eax
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: retl
+;
+; CHECK64-LABEL: test2:
+; CHECK64: # BB#0:
+; CHECK64-NEXT: movl %edi, %ecx
+; CHECK64-NEXT: shll $8, %ecx
+; CHECK64-NEXT: shrl $8, %edi
+; CHECK64-NEXT: movzwl %cx, %edx
+; CHECK64-NEXT: movzbl %dil, %eax
+; CHECK64-NEXT: andl $-16777216, %ecx # imm = 0xFF000000
+; CHECK64-NEXT: andl $16711680, %edi # imm = 0xFF0000
+; CHECK64-NEXT: orl %edx, %eax
+; CHECK64-NEXT: orl %ecx, %edi
+; CHECK64-NEXT: orl %edi, %eax
+; CHECK64-NEXT: retq
+ %byte1 = shl i32 %x, 8
+ %byte0 = lshr i32 %x, 8
+ %byte3 = shl i32 %x, 8
+ %byte2 = lshr i32 %x, 8
+ %tmp1 = and i32 %byte1, 65280 ; 0x0000ff00
+ %tmp0 = and i32 %byte0, 255 ; 0x000000ff
+ %tmp3 = and i32 %byte3, 4278190080 ; 0xff000000
+ %tmp2 = and i32 %byte2, 16711680 ; 0x00ff0000
+ %or0 = or i32 %tmp0, %tmp1
+ %or1 = or i32 %tmp2, %tmp3
+ %result = or i32 %or0, %or1
+ ret i32 %result
+}
diff --git a/test/CodeGen/X86/bswap_tree2.ll b/test/CodeGen/X86/bswap_tree2.ll
new file mode 100644
index 0000000000000..a9c74df9d0d91
--- /dev/null
+++ b/test/CodeGen/X86/bswap_tree2.ll
@@ -0,0 +1,150 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK64
+
+; Check a few invalid patterns for halfword bswap pattern matching
+
+; Don't match a near-miss 32-bit packed halfword bswap
+; (with only half of the swap tree valid).
+ define i32 @test1(i32 %x) nounwind {
+; CHECK-LABEL: test1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl %ecx, %edx
+; CHECK-NEXT: andl $16711680, %edx # imm = 0xFF0000
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: orl $-16777216, %eax # imm = 0xFF000000
+; CHECK-NEXT: shll $8, %edx
+; CHECK-NEXT: shrl $8, %eax
+; CHECK-NEXT: bswapl %ecx
+; CHECK-NEXT: shrl $16, %ecx
+; CHECK-NEXT: orl %edx, %eax
+; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: retl
+;
+; CHECK64-LABEL: test1:
+; CHECK64: # BB#0:
+; CHECK64-NEXT: movl %edi, %ecx
+; CHECK64-NEXT: andl $16711680, %ecx # imm = 0xFF0000
+; CHECK64-NEXT: movl %edi, %eax
+; CHECK64-NEXT: orl $-16777216, %eax # imm = 0xFF000000
+; CHECK64-NEXT: shll $8, %ecx
+; CHECK64-NEXT: shrl $8, %eax
+; CHECK64-NEXT: bswapl %edi
+; CHECK64-NEXT: shrl $16, %edi
+; CHECK64-NEXT: orl %ecx, %eax
+; CHECK64-NEXT: orl %edi, %eax
+; CHECK64-NEXT: retq
+ %byte0 = and i32 %x, 255 ; 0x000000ff
+ %byte1 = and i32 %x, 65280 ; 0x0000ff00
+ %byte2 = and i32 %x, 16711680 ; 0x00ff0000
+ %byte3 = or i32 %x, 4278190080 ; 0xff000000
+ %tmp0 = shl i32 %byte0, 8
+ %tmp1 = lshr i32 %byte1, 8
+ %tmp2 = shl i32 %byte2, 8
+ %tmp3 = lshr i32 %byte3, 8
+ %or0 = or i32 %tmp0, %tmp1
+ %or1 = or i32 %tmp2, %tmp3
+ %result = or i32 %or0, %or1
+ ret i32 %result
+}
+
+; Don't match a near-miss 32-bit packed halfword bswap
+; (with swapped lshr/shl)
+; ((x >> 8) & 0x0000ff00) |
+; ((x << 8) & 0x000000ff) |
+; ((x << 8) & 0xff000000) |
+; ((x >> 8) & 0x00ff0000)
+define i32 @test2(i32 %x) nounwind {
+; CHECK-LABEL: test2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: shrl $8, %eax
+; CHECK-NEXT: shll $8, %ecx
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $65280, %edx # imm = 0xFF00
+; CHECK-NEXT: andl $-16777216, %ecx # imm = 0xFF000000
+; CHECK-NEXT: andl $16711680, %eax # imm = 0xFF0000
+; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: orl %edx, %eax
+; CHECK-NEXT: retl
+;
+; CHECK64-LABEL: test2:
+; CHECK64: # BB#0:
+; CHECK64-NEXT: movl %edi, %eax
+; CHECK64-NEXT: shrl $8, %eax
+; CHECK64-NEXT: shll $8, %edi
+; CHECK64-NEXT: movl %eax, %ecx
+; CHECK64-NEXT: andl $65280, %ecx # imm = 0xFF00
+; CHECK64-NEXT: andl $-16777216, %edi # imm = 0xFF000000
+; CHECK64-NEXT: andl $16711680, %eax # imm = 0xFF0000
+; CHECK64-NEXT: orl %edi, %eax
+; CHECK64-NEXT: leal (%rax,%rcx), %eax
+; CHECK64-NEXT: retq
+ %byte1 = lshr i32 %x, 8
+ %byte0 = shl i32 %x, 8
+ %byte3 = shl i32 %x, 8
+ %byte2 = lshr i32 %x, 8
+ %tmp1 = and i32 %byte1, 65280 ; 0x0000ff00
+ %tmp0 = and i32 %byte0, 255 ; 0x000000ff
+ %tmp3 = and i32 %byte3, 4278190080 ; 0xff000000
+ %tmp2 = and i32 %byte2, 16711680 ; 0x00ff0000
+ %or0 = or i32 %tmp0, %tmp1
+ %or1 = or i32 %tmp2, %tmp3
+ %result = or i32 %or0, %or1
+ ret i32 %result
+}
+
+; Invalid pattern involving a unary op
+define i32 @test3(float %x) nounwind {
+; CHECK-LABEL: test3:
+; CHECK: # BB#0:
+; CHECK-NEXT: subl $8, %esp
+; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
+; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
+; CHECK-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; CHECK-NEXT: fistpl {{[0-9]+}}(%esp)
+; CHECK-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl %ecx, %edx
+; CHECK-NEXT: shll $8, %edx
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: shrl $8, %eax
+; CHECK-NEXT: andl $65280, %ecx # imm = 0xFF00
+; CHECK-NEXT: andl $-16777216, %edx # imm = 0xFF000000
+; CHECK-NEXT: andl $16711680, %eax # imm = 0xFF0000
+; CHECK-NEXT: orl %edx, %eax
+; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: addl $8, %esp
+; CHECK-NEXT: retl
+;
+; CHECK64-LABEL: test3:
+; CHECK64: # BB#0:
+; CHECK64-NEXT: cvttss2si %xmm0, %ecx
+; CHECK64-NEXT: movl %ecx, %edx
+; CHECK64-NEXT: shll $8, %edx
+; CHECK64-NEXT: movl %ecx, %eax
+; CHECK64-NEXT: shrl $8, %eax
+; CHECK64-NEXT: andl $65280, %ecx # imm = 0xFF00
+; CHECK64-NEXT: andl $-16777216, %edx # imm = 0xFF000000
+; CHECK64-NEXT: andl $16711680, %eax # imm = 0xFF0000
+; CHECK64-NEXT: orl %edx, %eax
+; CHECK64-NEXT: orl %ecx, %eax
+; CHECK64-NEXT: retq
+ %integer = fptosi float %x to i32
+ %byte0 = shl i32 %integer, 8
+ %byte3 = shl i32 %integer, 8
+ %byte2 = lshr i32 %integer, 8
+ %tmp1 = and i32 %integer, 65280 ; 0x0000ff00
+ %tmp0 = and i32 %byte0, 255 ; 0x000000ff
+ %tmp3 = and i32 %byte3, 4278190080 ; 0xff000000
+ %tmp2 = and i32 %byte2, 16711680 ; 0x00ff0000
+ %or0 = or i32 %tmp0, %tmp1
+ %or1 = or i32 %tmp2, %tmp3
+ %result = or i32 %or0, %or1
+ ret i32 %result
+}
diff --git a/test/CodeGen/X86/combine-or.ll b/test/CodeGen/X86/combine-or.ll
index e4cf296432ba9..d7f52d2479885 100644
--- a/test/CodeGen/X86/combine-or.ll
+++ b/test/CodeGen/X86/combine-or.ll
@@ -430,6 +430,7 @@ define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) {
ret <4 x i32> %or
}
+; TODO: Why would we do this?
; (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
define <2 x i64> @or_and_v2i64(<2 x i64> %a0) {
@@ -438,16 +439,17 @@ define <2 x i64> @or_and_v2i64(<2 x i64> %a0) {
; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
; CHECK-NEXT: orps {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
- %1 = and <2 x i64> %a0, <i64 1, i64 1>
+ %1 = and <2 x i64> %a0, <i64 7, i64 7>
%2 = or <2 x i64> %1, <i64 3, i64 3>
ret <2 x i64> %2
}
+; If all masked bits are going to be set, that's a constant fold.
+
define <4 x i32> @or_and_v4i32(<4 x i32> %a0) {
; CHECK-LABEL: or_and_v4i32:
; CHECK: # BB#0:
-; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
-; CHECK-NEXT: orps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1>
%2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
@@ -459,9 +461,7 @@ define <4 x i32> @or_and_v4i32(<4 x i32> %a0) {
define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) {
; CHECK-LABEL: or_zext_v2i32:
; CHECK: # BB#0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; CHECK-NEXT: por {{.*}}(%rip), %xmm0
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295]
; CHECK-NEXT: retq
%1 = zext <2 x i32> %a0 to <2 x i64>
%2 = or <2 x i64> %1, <i64 4294967295, i64 4294967295>
@@ -471,9 +471,7 @@ define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) {
define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) {
; CHECK-LABEL: or_zext_v4i16:
; CHECK: # BB#0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
-; CHECK-NEXT: por {{.*}}(%rip), %xmm0
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535]
; CHECK-NEXT: retq
%1 = zext <4 x i16> %a0 to <4 x i32>
%2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
diff --git a/test/CodeGen/X86/dbg-baseptr.ll b/test/CodeGen/X86/dbg-baseptr.ll
new file mode 100644
index 0000000000000..f69c78af73677
--- /dev/null
+++ b/test/CodeGen/X86/dbg-baseptr.ll
@@ -0,0 +1,75 @@
+; RUN: llc -o - %s | FileCheck %s
+; This test checks that parameters on the stack pointer are correctly
+; referenced by debug info.
+target triple = "x86_64--"
+
+@glob = external global i64
+@ptr = external global i32*
+%struct.s = type { i32, i32, i32, i32, i32 }
+
+; CHECK-LABEL: f0:
+; CHECK: DEBUG_VALUE: f:input <- [%RSP+8]
+define i32 @f0(%struct.s* byval align 8 %input) !dbg !8 {
+ call void @llvm.dbg.declare(metadata %struct.s* %input, metadata !4, metadata !17), !dbg !18
+ ret i32 42
+}
+
+; CHECK-LABEL: f1:
+; CHECK: DEBUG_VALUE: f:input <- [%RBP+16]
+define i32 @f1(%struct.s* byval align 8 %input) !dbg !8 {
+ %val = load i64, i64* @glob
+ ; this alloca should force FP usage.
+ %stackspace = alloca i32, i64 %val, align 1
+ store i32* %stackspace, i32** @ptr
+ call void @llvm.dbg.declare(metadata %struct.s* %input, metadata !4, metadata !17), !dbg !18
+ ret i32 42
+}
+
+; CHECK-LABEL: f2:
+; Just check that we are indeed aligning the stack and setting up a base pointer
+; in RBX.
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: pushq %rbx
+; CHECK: andq $-64, %rsp
+; CHECK: subq $64, %rsp
+; CHECK: movq %rsp, %rbx
+; The parameter should still be referenced through RBP though.
+; CHECK-NOT: DEBUG_VALUE: f:input <- [%RBX
+; CHECK: DEBUG_VALUE: f:input <- [%RBP+16]
+define i32 @f2(%struct.s* byval align 8 %input) !dbg !8 {
+ %val = load i64, i64* @glob
+ %stackspace = alloca i32, i64 %val, align 64
+ store i32* %stackspace, i32** @ptr
+ call void @llvm.dbg.declare(metadata %struct.s* %input, metadata !4, metadata !17), !dbg !18
+ ret i32 42
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3)
+!3 = !DIFile(filename: "dbg-baseptr.ll", directory: "/")
+!4 = !DILocalVariable(name: "input", arg: 1, scope: !8, file: !3, line: 5, type: !9)
+!5 = !{}
+
+!6 = !DISubroutineType(types: !7)
+!7 = !{!10, !9}
+
+!8 = distinct !DISubprogram(name: "f", file: !3, line: 5, type: !6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, unit: !2, variables: !5)
+
+!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s", elements: !11)
+!10 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+!11 = !{!12, !13, !14, !15, !16}
+!12 = !DIDerivedType(tag: DW_TAG_member, name: "a", baseType: !10, size: 32)
+!13 = !DIDerivedType(tag: DW_TAG_member, name: "b", baseType: !10, size: 32, offset: 32)
+!14 = !DIDerivedType(tag: DW_TAG_member, name: "c", baseType: !10, size: 32, offset: 64)
+!15 = !DIDerivedType(tag: DW_TAG_member, name: "d", baseType: !10, size: 32, offset: 96)
+!16 = !DIDerivedType(tag: DW_TAG_member, name: "e", baseType: !10, size: 32, offset: 128)
+
+!17 = !DIExpression()
+!18 = !DILocation(line: 5, scope: !8)
diff --git a/test/CodeGen/X86/extract-store.ll b/test/CodeGen/X86/extract-store.ll
index 1751f03731d3a..5286a1b635d1a 100644
--- a/test/CodeGen/X86/extract-store.ll
+++ b/test/CodeGen/X86/extract-store.ll
@@ -5,6 +5,10 @@
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE41-X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 --check-prefix=AVX-X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=AVX-X64
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx -enable-legalize-types-checking \
+; RUN: | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE-F128
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx -enable-legalize-types-checking \
+; RUN: | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE-F128
define void @extract_i8_0(i8* nocapture %dst, <16 x i8> %foo) nounwind {
; SSE2-X32-LABEL: extract_i8_0:
@@ -458,6 +462,26 @@ define void @extract_f64_1(double* nocapture %dst, <2 x double> %foo) nounwind {
ret void
}
+define void @extract_f128_0(fp128* nocapture %dst, <2 x fp128> %foo) nounwind {
+; SSE-F128-LABEL: extract_f128_0:
+; SSE-F128: # BB#0:
+; SSE-F128-NEXT: movaps %xmm0, (%rdi)
+; SSE-F128-NEXT: retq
+ %vecext = extractelement <2 x fp128> %foo, i32 0
+ store fp128 %vecext, fp128* %dst, align 1
+ ret void
+}
+
+define void @extract_f128_1(fp128* nocapture %dst, <2 x fp128> %foo) nounwind {
+; SSE-F128-LABEL: extract_f128_1:
+; SSE-F128: # BB#0:
+; SSE-F128-NEXT: movaps %xmm1, (%rdi)
+; SSE-F128-NEXT: retq
+ %vecext = extractelement <2 x fp128> %foo, i32 1
+ store fp128 %vecext, fp128* %dst, align 1
+ ret void
+}
+
define void @extract_i8_undef(i8* nocapture %dst, <16 x i8> %foo) nounwind {
; X32-LABEL: extract_i8_undef:
; X32: # BB#0:
@@ -535,3 +559,16 @@ define void @extract_f64_undef(double* nocapture %dst, <2 x double> %foo) nounwi
store double %vecext, double* %dst, align 1
ret void
}
+
+define void @extract_f128_undef(fp128* nocapture %dst, <2 x fp128> %foo) nounwind {
+; X32-LABEL: extract_f128_undef:
+; X32: # BB#0:
+; X32-NEXT: retl
+;
+; X64-LABEL: extract_f128_undef:
+; X64: # BB#0:
+; X64-NEXT: retq
+ %vecext = extractelement <2 x fp128> %foo, i32 2 ; undef
+ store fp128 %vecext, fp128* %dst, align 1
+ ret void
+}
diff --git a/test/CodeGen/X86/fp128-extract.ll b/test/CodeGen/X86/fp128-extract.ll
new file mode 100644
index 0000000000000..5006ac898c717
--- /dev/null
+++ b/test/CodeGen/X86/fp128-extract.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx \
+; RUN: -enable-legalize-types-checking | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx \
+; RUN: -enable-legalize-types-checking | FileCheck %s
+
+; Test the softened result of extractelement op code.
+define fp128 @TestExtract(<2 x double> %x) {
+entry:
+ ; Simplified instruction pattern from the output of llvm before r289042,
+ ; for a boost function ...::insert<...>::traverse<...>().
+ %a = fpext <2 x double> %x to <2 x fp128>
+ %0 = extractelement <2 x fp128> %a, i32 0
+ %1 = extractelement <2 x fp128> %a, i32 1
+ %2 = fmul fp128 %0, %1
+ ret fp128 %2
+; CHECK-LABEL: TestExtract:
+; CHECK: movaps %xmm0, (%rsp)
+; CHECK: callq __extenddftf2
+; CHECK: callq __extenddftf2
+; CHECK: callq __multf3
+; CHECK: retq
+}
diff --git a/test/CodeGen/X86/i64-to-float.ll b/test/CodeGen/X86/i64-to-float.ll
index da92bdb55d7c6..3da1a360e2904 100644
--- a/test/CodeGen/X86/i64-to-float.ll
+++ b/test/CodeGen/X86/i64-to-float.ll
@@ -224,36 +224,32 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind {
; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,0,2147483648,0]
; X64-SSE-NEXT: movdqa %xmm0, %xmm2
; X64-SSE-NEXT: pxor %xmm1, %xmm2
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551361,18446744073709551361]
-; X64-SSE-NEXT: movdqa %xmm1, %xmm4
-; X64-SSE-NEXT: pxor %xmm3, %xmm4
-; X64-SSE-NEXT: movdqa %xmm4, %xmm5
-; X64-SSE-NEXT: pcmpgtd %xmm2, %xmm5
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
-; X64-SSE-NEXT: pcmpeqd %xmm2, %xmm4
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
-; X64-SSE-NEXT: pand %xmm6, %xmm2
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
-; X64-SSE-NEXT: por %xmm2, %xmm4
-; X64-SSE-NEXT: movdqa %xmm4, %xmm2
+; X64-SSE-NEXT: movdqa {{.*#+}} xmm3 = [18446744071562067713,18446744071562067713]
+; X64-SSE-NEXT: movdqa %xmm3, %xmm4
+; X64-SSE-NEXT: pcmpgtd %xmm2, %xmm4
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; X64-SSE-NEXT: pcmpeqd %xmm3, %xmm2
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; X64-SSE-NEXT: pand %xmm5, %xmm2
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; X64-SSE-NEXT: por %xmm2, %xmm3
+; X64-SSE-NEXT: movdqa %xmm3, %xmm2
; X64-SSE-NEXT: pandn %xmm0, %xmm2
-; X64-SSE-NEXT: pand %xmm3, %xmm4
-; X64-SSE-NEXT: por %xmm2, %xmm4
-; X64-SSE-NEXT: movdqa %xmm4, %xmm0
-; X64-SSE-NEXT: pxor %xmm1, %xmm0
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255]
-; X64-SSE-NEXT: pxor %xmm2, %xmm1
-; X64-SSE-NEXT: movdqa %xmm0, %xmm3
-; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm3
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
+; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm3
+; X64-SSE-NEXT: por %xmm2, %xmm3
+; X64-SSE-NEXT: pxor %xmm3, %xmm1
+; X64-SSE-NEXT: movdqa {{.*#+}} xmm0 = [2147483903,2147483903]
+; X64-SSE-NEXT: movdqa %xmm1, %xmm2
+; X64-SSE-NEXT: pcmpgtd %xmm0, %xmm2
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
; X64-SSE-NEXT: pcmpeqd %xmm0, %xmm1
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
-; X64-SSE-NEXT: pand %xmm5, %xmm0
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
+; X64-SSE-NEXT: pand %xmm4, %xmm0
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; X64-SSE-NEXT: por %xmm0, %xmm1
; X64-SSE-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE-NEXT: pandn %xmm4, %xmm0
-; X64-SSE-NEXT: pand %xmm2, %xmm1
+; X64-SSE-NEXT: pandn %xmm3, %xmm0
+; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm1
; X64-SSE-NEXT: por %xmm0, %xmm1
; X64-SSE-NEXT: movd %xmm1, %rax
; X64-SSE-NEXT: xorps %xmm0, %xmm0
diff --git a/test/CodeGen/X86/known-signbits-vector.ll b/test/CodeGen/X86/known-signbits-vector.ll
index cea9ac26edbc5..4c3c8bbd793e5 100644
--- a/test/CodeGen/X86/known-signbits-vector.ll
+++ b/test/CodeGen/X86/known-signbits-vector.ll
@@ -100,21 +100,27 @@ define float @signbits_ashr_extract_sitofp(<2 x i64> %a0) nounwind {
define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwind {
; X32-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
; X32: # BB#0:
-; X32-NEXT: pushl %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: pushl %ebp
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl 12(%ebp), %ecx
; X32-NEXT: shrdl $30, %ecx, %eax
; X32-NEXT: sarl $30, %ecx
; X32-NEXT: vmovd %eax, %xmm0
; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X32-NEXT: vpinsrd $2, 16(%ebp), %xmm0, %xmm0
+; X32-NEXT: vpinsrd $3, 20(%ebp), %xmm0, %xmm0
+; X32-NEXT: vpsrad $3, %xmm0, %xmm1
; X32-NEXT: vpsrlq $3, %xmm0, %xmm0
-; X32-NEXT: vmovd %xmm0, %eax
-; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
-; X32-NEXT: vmovss %xmm0, (%esp)
-; X32-NEXT: flds (%esp)
-; X32-NEXT: popl %eax
+; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
+; X32-NEXT: fildll {{[0-9]+}}(%esp)
+; X32-NEXT: fstps {{[0-9]+}}(%esp)
+; X32-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NEXT: movl %ebp, %esp
+; X32-NEXT: popl %ebp
; X32-NEXT: retl
;
; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
@@ -127,7 +133,7 @@ define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwin
; X64-NEXT: vpsrlq $3, %xmm0, %xmm0
; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
; X64-NEXT: vmovq %xmm0, %rax
-; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
+; X64-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
; X64-NEXT: retq
%1 = ashr i64 %a0, 30
%2 = insertelement <2 x i64> undef, i64 %1, i32 0
diff --git a/test/CodeGen/X86/madd.ll b/test/CodeGen/X86/madd.ll
index fdc5ace8d9bcf..d332b2f3169f0 100644
--- a/test/CodeGen/X86/madd.ll
+++ b/test/CodeGen/X86/madd.ll
@@ -1,27 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512
-;SSE2-label: @_Z10test_shortPsS_i
-;SSE2: movdqu
-;SSE2-NEXT: movdqu
-;SSE2-NEXT: pmaddwd
-;SSE2-NEXT: paddd
-
-;AVX2-label: @_Z10test_shortPsS_i
-;AVX2: vmovdqu
-;AVX2-NEXT: vpmaddwd
-;AVX2-NEXT: vinserti128
-;AVX2-NEXT: vpaddd
-
-;AVX512-label: @_Z10test_shortPsS_i
-;AVX512: vmovdqu
-;AVX512-NEXT: vpmaddwd
-;AVX512-NEXT: vinserti128
-;AVX512-NEXT: vpaddd
-
define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly, i32) local_unnamed_addr #0 {
+; SSE2-LABEL: _Z10test_shortPsS_i:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movl %edx, %eax
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: .p2align 4, 0x90
+; SSE2-NEXT: .LBB0_1: # %vector.body
+; SSE2-NEXT: # =>This Inner Loop Header: Depth=1
+; SSE2-NEXT: movdqu (%rdi), %xmm2
+; SSE2-NEXT: movdqu (%rsi), %xmm3
+; SSE2-NEXT: pmaddwd %xmm2, %xmm3
+; SSE2-NEXT: paddd %xmm3, %xmm1
+; SSE2-NEXT: addq $16, %rsi
+; SSE2-NEXT: addq $16, %rdi
+; SSE2-NEXT: addq $-8, %rax
+; SSE2-NEXT: jne .LBB0_1
+; SSE2-NEXT: # BB#2: # %middle.block
+; SSE2-NEXT: paddd %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE2-NEXT: paddd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE2-NEXT: paddd %xmm0, %xmm1
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: _Z10test_shortPsS_i:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: movl %edx, %eax
+; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: .p2align 4, 0x90
+; AVX2-NEXT: .LBB0_1: # %vector.body
+; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
+; AVX2-NEXT: vmovdqu (%rsi), %xmm2
+; AVX2-NEXT: vpmaddwd (%rdi), %xmm2, %xmm2
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
+; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: addq $16, %rsi
+; AVX2-NEXT: addq $16, %rdi
+; AVX2-NEXT: addq $-8, %rax
+; AVX2-NEXT: jne .LBB0_1
+; AVX2-NEXT: # BB#2: # %middle.block
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: _Z10test_shortPsS_i:
+; AVX512: # BB#0: # %entry
+; AVX512-NEXT: movl %edx, %eax
+; AVX512-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: .p2align 4, 0x90
+; AVX512-NEXT: .LBB0_1: # %vector.body
+; AVX512-NEXT: # =>This Inner Loop Header: Depth=1
+; AVX512-NEXT: vmovdqu (%rsi), %xmm2
+; AVX512-NEXT: vpmaddwd (%rdi), %xmm2, %xmm2
+; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
+; AVX512-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; AVX512-NEXT: addq $16, %rsi
+; AVX512-NEXT: addq $16, %rdi
+; AVX512-NEXT: addq $-8, %rax
+; AVX512-NEXT: jne .LBB0_1
+; AVX512-NEXT: # BB#2: # %middle.block
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vphaddd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
entry:
%3 = zext i32 %2 to i64
br label %vector.body
@@ -54,20 +113,227 @@ middle.block:
ret i32 %13
}
-;AVX2-label: @_Z9test_charPcS_i
-;AVX2: vpmovsxbw
-;AVX2-NEXT: vpmovsxbw
-;AVX2-NEXT: vpmaddwd
-;AVX2-NEXT: vpaddd
+define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly, i32) local_unnamed_addr #0 {
+; SSE2-LABEL: test_unsigned_short:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movl %edx, %eax
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: .p2align 4, 0x90
+; SSE2-NEXT: .LBB1_1: # %vector.body
+; SSE2-NEXT: # =>This Inner Loop Header: Depth=1
+; SSE2-NEXT: movdqu (%rdi), %xmm2
+; SSE2-NEXT: movdqu (%rsi), %xmm3
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pmulhuw %xmm2, %xmm4
+; SSE2-NEXT: pmullw %xmm2, %xmm3
+; SSE2-NEXT: movdqa %xmm3, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSE2-NEXT: paddd %xmm3, %xmm1
+; SSE2-NEXT: paddd %xmm2, %xmm0
+; SSE2-NEXT: addq $16, %rsi
+; SSE2-NEXT: addq $16, %rdi
+; SSE2-NEXT: addq $-8, %rax
+; SSE2-NEXT: jne .LBB1_1
+; SSE2-NEXT: # BB#2: # %middle.block
+; SSE2-NEXT: paddd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT: paddd %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT: paddd %xmm1, %xmm0
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: test_unsigned_short:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: movl %edx, %eax
+; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: .p2align 4, 0x90
+; AVX2-NEXT: .LBB1_1: # %vector.body
+; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX2-NEXT: vpmulld %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: addq $16, %rsi
+; AVX2-NEXT: addq $16, %rdi
+; AVX2-NEXT: addq $-8, %rax
+; AVX2-NEXT: jne .LBB1_1
+; AVX2-NEXT: # BB#2: # %middle.block
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_short:
+; AVX512: # BB#0: # %entry
+; AVX512-NEXT: movl %edx, %eax
+; AVX512-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: .p2align 4, 0x90
+; AVX512-NEXT: .LBB1_1: # %vector.body
+; AVX512-NEXT: # =>This Inner Loop Header: Depth=1
+; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX512-NEXT: vpmulld %ymm1, %ymm2, %ymm1
+; AVX512-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: addq $16, %rsi
+; AVX512-NEXT: addq $16, %rdi
+; AVX512-NEXT: addq $-8, %rax
+; AVX512-NEXT: jne .LBB1_1
+; AVX512-NEXT: # BB#2: # %middle.block
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vphaddd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+entry:
+ %3 = zext i32 %2 to i64
+ br label %vector.body
-;AVX512-label: @_Z9test_charPcS_i
-;AVX512: vpmovsxbw
-;AVX512-NEXT: vpmovsxbw
-;AVX512-NEXT: vpmaddwd
-;AVX512-NEXT: vinserti64x4
-;AVX512-NEXT: vpaddd
+vector.body:
+ %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
+ %vec.phi = phi <8 x i32> [ %11, %vector.body ], [ zeroinitializer, %entry ]
+ %4 = getelementptr inbounds i16, i16* %0, i64 %index
+ %5 = bitcast i16* %4 to <8 x i16>*
+ %wide.load = load <8 x i16>, <8 x i16>* %5, align 2
+ %6 = zext <8 x i16> %wide.load to <8 x i32>
+ %7 = getelementptr inbounds i16, i16* %1, i64 %index
+ %8 = bitcast i16* %7 to <8 x i16>*
+ %wide.load14 = load <8 x i16>, <8 x i16>* %8, align 2
+ %9 = zext <8 x i16> %wide.load14 to <8 x i32>
+ %10 = mul nsw <8 x i32> %9, %6
+ %11 = add nsw <8 x i32> %10, %vec.phi
+ %index.next = add i64 %index, 8
+ %12 = icmp eq i64 %index.next, %3
+ br i1 %12, label %middle.block, label %vector.body
+
+middle.block:
+ %rdx.shuf = shufflevector <8 x i32> %11, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx = add <8 x i32> %11, %rdx.shuf
+ %rdx.shuf15 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx16 = add <8 x i32> %bin.rdx, %rdx.shuf15
+ %rdx.shuf17 = shufflevector <8 x i32> %bin.rdx16, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx18 = add <8 x i32> %bin.rdx16, %rdx.shuf17
+ %13 = extractelement <8 x i32> %bin.rdx18, i32 0
+ ret i32 %13
+}
define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i32) local_unnamed_addr #0 {
+; SSE2-LABEL: _Z9test_charPcS_i:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movl %edx, %eax
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: .p2align 4, 0x90
+; SSE2-NEXT: .LBB2_1: # %vector.body
+; SSE2-NEXT: # =>This Inner Loop Header: Depth=1
+; SSE2-NEXT: movq {{.*#+}} xmm4 = mem[0],zero
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: psraw $8, %xmm4
+; SSE2-NEXT: movq {{.*#+}} xmm5 = mem[0],zero
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: psraw $8, %xmm5
+; SSE2-NEXT: pmullw %xmm4, %xmm5
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; SSE2-NEXT: psrad $16, %xmm4
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
+; SSE2-NEXT: psrad $16, %xmm5
+; SSE2-NEXT: movq {{.*#+}} xmm6 = mem[0],zero
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: psraw $8, %xmm6
+; SSE2-NEXT: movq {{.*#+}} xmm7 = mem[0],zero
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: psraw $8, %xmm7
+; SSE2-NEXT: pmullw %xmm6, %xmm7
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; SSE2-NEXT: psrad $16, %xmm6
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4,4,5,5,6,6,7,7]
+; SSE2-NEXT: psrad $16, %xmm7
+; SSE2-NEXT: paddd %xmm7, %xmm2
+; SSE2-NEXT: paddd %xmm6, %xmm3
+; SSE2-NEXT: paddd %xmm5, %xmm1
+; SSE2-NEXT: paddd %xmm4, %xmm0
+; SSE2-NEXT: addq $16, %rsi
+; SSE2-NEXT: addq $16, %rdi
+; SSE2-NEXT: addq $-16, %rax
+; SSE2-NEXT: jne .LBB2_1
+; SSE2-NEXT: # BB#2: # %middle.block
+; SSE2-NEXT: paddd %xmm3, %xmm0
+; SSE2-NEXT: paddd %xmm2, %xmm1
+; SSE2-NEXT: paddd %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE2-NEXT: paddd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE2-NEXT: paddd %xmm0, %xmm1
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: _Z9test_charPcS_i:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: movl %edx, %eax
+; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: .p2align 4, 0x90
+; AVX2-NEXT: .LBB2_1: # %vector.body
+; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
+; AVX2-NEXT: vpmovsxbw (%rdi), %ymm2
+; AVX2-NEXT: vpmovsxbw (%rsi), %ymm3
+; AVX2-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: addq $16, %rsi
+; AVX2-NEXT: addq $16, %rdi
+; AVX2-NEXT: addq $-16, %rax
+; AVX2-NEXT: jne .LBB2_1
+; AVX2-NEXT: # BB#2: # %middle.block
+; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: _Z9test_charPcS_i:
+; AVX512: # BB#0: # %entry
+; AVX512-NEXT: movl %edx, %eax
+; AVX512-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: .p2align 4, 0x90
+; AVX512-NEXT: .LBB2_1: # %vector.body
+; AVX512-NEXT: # =>This Inner Loop Header: Depth=1
+; AVX512-NEXT: vpmovsxbw (%rdi), %ymm2
+; AVX512-NEXT: vpmovsxbw (%rsi), %ymm3
+; AVX512-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm2
+; AVX512-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; AVX512-NEXT: addq $16, %rsi
+; AVX512-NEXT: addq $16, %rdi
+; AVX512-NEXT: addq $-16, %rax
+; AVX512-NEXT: jne .LBB2_1
+; AVX512-NEXT: # BB#2: # %middle.block
+; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
+; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
+; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
+; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
+; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
entry:
%3 = zext i32 %2 to i64
br label %vector.body
diff --git a/test/CodeGen/X86/merge_store.ll b/test/CodeGen/X86/merge_store.ll
index 31c1f65824260..dcb7bd010e56b 100644
--- a/test/CodeGen/X86/merge_store.ll
+++ b/test/CodeGen/X86/merge_store.ll
@@ -28,3 +28,34 @@ entry:
for.end:
ret void
}
+
+
+
+;; CHECK-LABEL: indexed-store-merge
+
+;; We should be able to merge the 4 consecutive stores.
+;; FIXMECHECK: movl $0, 2(%rsi,%rdi)
+
+;; CHECK: movb $0, 2(%rsi,%rdi)
+;; CHECK: movb $0, 3(%rsi,%rdi)
+;; CHECK: movb $0, 4(%rsi,%rdi)
+;; CHECK: movb $0, 5(%rsi,%rdi)
+;; CHECK: movb $0, (%rsi)
+define void @indexed-store-merge(i64 %p, i8* %v) {
+entry:
+ %p2 = add nsw i64 %p, 2
+ %v2 = getelementptr i8, i8* %v, i64 %p2
+ store i8 0, i8* %v2, align 2
+ %p3 = add nsw i64 %p, 3
+ %v3 = getelementptr i8, i8* %v, i64 %p3
+ store i8 0, i8* %v3, align 1
+ %p4 = add nsw i64 %p, 4
+ %v4 = getelementptr i8, i8* %v, i64 %p4
+ store i8 0, i8* %v4, align 2
+ %p5 = add nsw i64 %p, 5
+ %v5 = getelementptr i8, i8* %v, i64 %p5
+ store i8 0, i8* %v5, align 1
+ %v0 = getelementptr i8, i8* %v, i64 0
+ store i8 0, i8* %v0, align 2
+ ret void
+}
diff --git a/test/CodeGen/X86/sse-schedule.ll b/test/CodeGen/X86/sse-schedule.ll
new file mode 100644
index 0000000000000..52e6b61aedfe8
--- /dev/null
+++ b/test/CodeGen/X86/sse-schedule.ll
@@ -0,0 +1,2415 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
+
+define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_addps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: addps %xmm1, %xmm0
+; GENERIC-NEXT: addps (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_addps:
+; ATOM: # BB#0:
+; ATOM-NEXT: addps %xmm1, %xmm0
+; ATOM-NEXT: addps (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_addps:
+; SLM: # BB#0:
+; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: addps (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_addps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_addps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_addps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fadd <4 x float> %a0, %a1
+ %2 = load <4 x float>, <4 x float> *%a2, align 16
+ %3 = fadd <4 x float> %1, %2
+ ret <4 x float> %3
+}
+
+define float @test_addss(float %a0, float %a1, float *%a2) {
+; GENERIC-LABEL: test_addss:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: addss %xmm1, %xmm0
+; GENERIC-NEXT: addss (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_addss:
+; ATOM: # BB#0:
+; ATOM-NEXT: addss %xmm1, %xmm0
+; ATOM-NEXT: addss (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_addss:
+; SLM: # BB#0:
+; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: addss (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_addss:
+; SANDY: # BB#0:
+; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_addss:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_addss:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fadd float %a0, %a1
+ %2 = load float, float *%a2, align 4
+ %3 = fadd float %1, %2
+ ret float %3
+}
+
+define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_andps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: andps %xmm1, %xmm0
+; GENERIC-NEXT: andps (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_andps:
+; ATOM: # BB#0:
+; ATOM-NEXT: andps %xmm1, %xmm0
+; ATOM-NEXT: andps (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_andps:
+; SLM: # BB#0:
+; SLM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: andps (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_andps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_andps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_andps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = bitcast <4 x float> %a0 to <4 x i32>
+ %2 = bitcast <4 x float> %a1 to <4 x i32>
+ %3 = and <4 x i32> %1, %2
+ %4 = load <4 x float>, <4 x float> *%a2, align 16
+ %5 = bitcast <4 x float> %4 to <4 x i32>
+ %6 = and <4 x i32> %3, %5
+ %7 = bitcast <4 x i32> %6 to <4 x float>
+ ret <4 x float> %7
+}
+
+define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_andnotps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: andnps %xmm1, %xmm0
+; GENERIC-NEXT: andnps (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_andnotps:
+; ATOM: # BB#0:
+; ATOM-NEXT: andnps %xmm1, %xmm0
+; ATOM-NEXT: andnps (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_andnotps:
+; SLM: # BB#0:
+; SLM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: andnps (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_andnotps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_andnotps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_andnotps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = bitcast <4 x float> %a0 to <4 x i32>
+ %2 = bitcast <4 x float> %a1 to <4 x i32>
+ %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %4 = and <4 x i32> %3, %2
+ %5 = load <4 x float>, <4 x float> *%a2, align 16
+ %6 = bitcast <4 x float> %5 to <4 x i32>
+ %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %8 = and <4 x i32> %6, %7
+ %9 = bitcast <4 x i32> %8 to <4 x float>
+ ret <4 x float> %9
+}
+
+define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_cmpps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cmpeqps %xmm0, %xmm1
+; GENERIC-NEXT: cmpeqps (%rdi), %xmm0
+; GENERIC-NEXT: orps %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cmpps:
+; ATOM: # BB#0:
+; ATOM-NEXT: cmpeqps %xmm0, %xmm1
+; ATOM-NEXT: cmpeqps (%rdi), %xmm0
+; ATOM-NEXT: orps %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cmpps:
+; SLM: # BB#0:
+; SLM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
+; SLM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cmpps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cmpps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cmpps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fcmp oeq <4 x float> %a0, %a1
+ %2 = load <4 x float>, <4 x float> *%a2, align 16
+ %3 = fcmp oeq <4 x float> %a0, %2
+ %4 = or <4 x i1> %1, %3
+ %5 = sext <4 x i1> %4 to <4 x i32>
+ %6 = bitcast <4 x i32> %5 to <4 x float>
+ ret <4 x float> %6
+}
+
+define float @test_cmpss(float %a0, float %a1, float *%a2) {
+; GENERIC-LABEL: test_cmpss:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cmpeqss %xmm1, %xmm0
+; GENERIC-NEXT: cmpeqss (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cmpss:
+; ATOM: # BB#0:
+; ATOM-NEXT: cmpeqss %xmm1, %xmm0
+; ATOM-NEXT: cmpeqss (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cmpss:
+; SLM: # BB#0:
+; SLM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cmpss:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cmpss:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cmpss:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = insertelement <4 x float> undef, float %a0, i32 0
+ %2 = insertelement <4 x float> undef, float %a1, i32 0
+ %3 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %2, i8 0)
+ %4 = load float, float *%a2, align 4
+ %5 = insertelement <4 x float> undef, float %4, i32 0
+ %6 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %3, <4 x float> %5, i8 0)
+ %7 = extractelement <4 x float> %6, i32 0
+ ret float %7
+}
+declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
+
+define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_comiss:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: comiss %xmm1, %xmm0
+; GENERIC-NEXT: setnp %al
+; GENERIC-NEXT: sete %cl
+; GENERIC-NEXT: andb %al, %cl
+; GENERIC-NEXT: comiss (%rdi), %xmm0
+; GENERIC-NEXT: setnp %al
+; GENERIC-NEXT: sete %dl
+; GENERIC-NEXT: andb %al, %dl
+; GENERIC-NEXT: orb %cl, %dl
+; GENERIC-NEXT: movzbl %dl, %eax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_comiss:
+; ATOM: # BB#0:
+; ATOM-NEXT: comiss %xmm1, %xmm0
+; ATOM-NEXT: setnp %al
+; ATOM-NEXT: sete %cl
+; ATOM-NEXT: andb %al, %cl
+; ATOM-NEXT: comiss (%rdi), %xmm0
+; ATOM-NEXT: setnp %al
+; ATOM-NEXT: sete %dl
+; ATOM-NEXT: andb %al, %dl
+; ATOM-NEXT: orb %cl, %dl
+; ATOM-NEXT: movzbl %dl, %eax
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_comiss:
+; SLM: # BB#0:
+; SLM-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: setnp %al # sched: [1:0.50]
+; SLM-NEXT: sete %cl # sched: [1:0.50]
+; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
+; SLM-NEXT: comiss (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: setnp %al # sched: [1:0.50]
+; SLM-NEXT: sete %dl # sched: [1:0.50]
+; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
+; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
+; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_comiss:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: setnp %al # sched: [1:0.33]
+; SANDY-NEXT: sete %cl # sched: [1:0.33]
+; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
+; SANDY-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: setnp %al # sched: [1:0.33]
+; SANDY-NEXT: sete %dl # sched: [1:0.33]
+; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
+; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
+; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_comiss:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-NEXT: sete %cl # sched: [1:0.50]
+; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
+; HASWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-NEXT: sete %dl # sched: [1:0.50]
+; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
+; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
+; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_comiss:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-NEXT: sete %cl # sched: [1:0.50]
+; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
+; BTVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-NEXT: sete %dl # sched: [1:0.50]
+; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
+; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
+; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
+ %2 = load <4 x float>, <4 x float> *%a2, align 4
+ %3 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %2)
+ %4 = or i32 %1, %3
+ ret i32 %4
+}
+declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define float @test_cvtsi2ss(i32 %a0, i32 *%a1) {
+; GENERIC-LABEL: test_cvtsi2ss:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtsi2ssl %edi, %xmm1
+; GENERIC-NEXT: cvtsi2ssl (%rsi), %xmm0
+; GENERIC-NEXT: addss %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtsi2ss:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvtsi2ssl (%rsi), %xmm0
+; ATOM-NEXT: cvtsi2ssl %edi, %xmm1
+; ATOM-NEXT: addss %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtsi2ss:
+; SLM: # BB#0:
+; SLM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:0.50]
+; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtsi2ss:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
+; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtsi2ss:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtsi2ss:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = sitofp i32 %a0 to float
+ %2 = load i32, i32 *%a1, align 4
+ %3 = sitofp i32 %2 to float
+ %4 = fadd float %1, %3
+ ret float %4
+}
+
+define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) {
+; GENERIC-LABEL: test_cvtsi2ssq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtsi2ssq %rdi, %xmm1
+; GENERIC-NEXT: cvtsi2ssq (%rsi), %xmm0
+; GENERIC-NEXT: addss %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtsi2ssq:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvtsi2ssq (%rsi), %xmm0
+; ATOM-NEXT: cvtsi2ssq %rdi, %xmm1
+; ATOM-NEXT: addss %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtsi2ssq:
+; SLM: # BB#0:
+; SLM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [4:0.50]
+; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtsi2ssq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
+; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtsi2ssq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtsi2ssq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = sitofp i64 %a0 to float
+ %2 = load i64, i64 *%a1, align 8
+ %3 = sitofp i64 %2 to float
+ %4 = fadd float %1, %3
+ ret float %4
+}
+
+define i32 @test_cvtss2si(float %a0, float *%a1) {
+; GENERIC-LABEL: test_cvtss2si:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtss2si %xmm0, %ecx
+; GENERIC-NEXT: cvtss2si (%rdi), %eax
+; GENERIC-NEXT: addl %ecx, %eax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtss2si:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvtss2si (%rdi), %eax
+; ATOM-NEXT: cvtss2si %xmm0, %ecx
+; ATOM-NEXT: addl %ecx, %eax
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtss2si:
+; SLM: # BB#0:
+; SLM-NEXT: cvtss2si (%rdi), %eax # sched: [7:1.00]
+; SLM-NEXT: cvtss2si %xmm0, %ecx # sched: [4:0.50]
+; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtss2si:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [3:1.00]
+; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [7:1.00]
+; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtss2si:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
+; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00]
+; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtss2si:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00]
+; BTVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [3:1.00]
+; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = insertelement <4 x float> undef, float %a0, i32 0
+ %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %1)
+ %3 = load float, float *%a1, align 4
+ %4 = insertelement <4 x float> undef, float %3, i32 0
+ %5 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %4)
+ %6 = add i32 %2, %5
+ ret i32 %6
+}
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
+
+define i64 @test_cvtss2siq(float %a0, float *%a1) {
+; GENERIC-LABEL: test_cvtss2siq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtss2si %xmm0, %rcx
+; GENERIC-NEXT: cvtss2si (%rdi), %rax
+; GENERIC-NEXT: addq %rcx, %rax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtss2siq:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvtss2si (%rdi), %rax
+; ATOM-NEXT: cvtss2si %xmm0, %rcx
+; ATOM-NEXT: addq %rcx, %rax
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtss2siq:
+; SLM: # BB#0:
+; SLM-NEXT: cvtss2si (%rdi), %rax # sched: [7:1.00]
+; SLM-NEXT: cvtss2si %xmm0, %rcx # sched: [4:0.50]
+; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtss2siq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [3:1.00]
+; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [7:1.00]
+; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtss2siq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
+; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00]
+; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtss2siq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00]
+; BTVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [3:1.00]
+; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = insertelement <4 x float> undef, float %a0, i32 0
+ %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %1)
+ %3 = load float, float *%a1, align 4
+ %4 = insertelement <4 x float> undef, float %3, i32 0
+ %5 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %4)
+ %6 = add i64 %2, %5
+ ret i64 %6
+}
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
+
+define i32 @test_cvttss2si(float %a0, float *%a1) {
+; GENERIC-LABEL: test_cvttss2si:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvttss2si %xmm0, %ecx
+; GENERIC-NEXT: cvttss2si (%rdi), %eax
+; GENERIC-NEXT: addl %ecx, %eax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvttss2si:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvttss2si (%rdi), %eax
+; ATOM-NEXT: cvttss2si %xmm0, %ecx
+; ATOM-NEXT: addl %ecx, %eax
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvttss2si:
+; SLM: # BB#0:
+; SLM-NEXT: cvttss2si (%rdi), %eax # sched: [7:1.00]
+; SLM-NEXT: cvttss2si %xmm0, %ecx # sched: [4:0.50]
+; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvttss2si:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [3:1.00]
+; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [7:1.00]
+; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvttss2si:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
+; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00]
+; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvttss2si:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00]
+; BTVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [3:1.00]
+; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fptosi float %a0 to i32
+ %2 = load float, float *%a1, align 4
+ %3 = fptosi float %2 to i32
+ %4 = add i32 %1, %3
+ ret i32 %4
+}
+
+define i64 @test_cvttss2siq(float %a0, float *%a1) {
+; GENERIC-LABEL: test_cvttss2siq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvttss2si %xmm0, %rcx
+; GENERIC-NEXT: cvttss2si (%rdi), %rax
+; GENERIC-NEXT: addq %rcx, %rax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvttss2siq:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvttss2si (%rdi), %rax
+; ATOM-NEXT: cvttss2si %xmm0, %rcx
+; ATOM-NEXT: addq %rcx, %rax
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvttss2siq:
+; SLM: # BB#0:
+; SLM-NEXT: cvttss2si (%rdi), %rax # sched: [7:1.00]
+; SLM-NEXT: cvttss2si %xmm0, %rcx # sched: [4:0.50]
+; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvttss2siq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [3:1.00]
+; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [7:1.00]
+; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvttss2siq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
+; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00]
+; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvttss2siq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00]
+; BTVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [3:1.00]
+; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fptosi float %a0 to i64
+ %2 = load float, float *%a1, align 4
+ %3 = fptosi float %2 to i64
+ %4 = add i64 %1, %3
+ ret i64 %4
+}
+
+define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_divps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: divps %xmm1, %xmm0
+; GENERIC-NEXT: divps (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_divps:
+; ATOM: # BB#0:
+; ATOM-NEXT: divps %xmm1, %xmm0
+; ATOM-NEXT: divps (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_divps:
+; SLM: # BB#0:
+; SLM-NEXT: divps %xmm1, %xmm0 # sched: [34:34.00]
+; SLM-NEXT: divps (%rdi), %xmm0 # sched: [37:34.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_divps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
+; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_divps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
+; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_divps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
+; BTVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fdiv <4 x float> %a0, %a1
+ %2 = load <4 x float>, <4 x float> *%a2, align 16
+ %3 = fdiv <4 x float> %1, %2
+ ret <4 x float> %3
+}
+
+define float @test_divss(float %a0, float %a1, float *%a2) {
+; GENERIC-LABEL: test_divss:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: divss %xmm1, %xmm0
+; GENERIC-NEXT: divss (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_divss:
+; ATOM: # BB#0:
+; ATOM-NEXT: divss %xmm1, %xmm0
+; ATOM-NEXT: divss (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_divss:
+; SLM: # BB#0:
+; SLM-NEXT: divss %xmm1, %xmm0 # sched: [34:34.00]
+; SLM-NEXT: divss (%rdi), %xmm0 # sched: [37:34.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_divss:
+; SANDY: # BB#0:
+; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
+; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_divss:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
+; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_divss:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
+; BTVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fdiv float %a0, %a1
+ %2 = load float, float *%a2, align 4
+ %3 = fdiv float %1, %2
+ ret float %3
+}
+
+define void @test_ldmxcsr(i32 %a0) {
+; GENERIC-LABEL: test_ldmxcsr:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
+; GENERIC-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_ldmxcsr:
+; ATOM: # BB#0:
+; ATOM-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
+; ATOM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_ldmxcsr:
+; SLM: # BB#0:
+; SLM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; SLM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_ldmxcsr:
+; SANDY: # BB#0:
+; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [4:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_ldmxcsr:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [6:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_ldmxcsr:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; BTVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = alloca i32, align 4
+ %2 = bitcast i32* %1 to i8*
+ store i32 %a0, i32* %1
+ call void @llvm.x86.sse.ldmxcsr(i8* %2)
+ ret void
+}
+declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone
+
+define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_maxps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: maxps %xmm1, %xmm0
+; GENERIC-NEXT: maxps (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_maxps:
+; ATOM: # BB#0:
+; ATOM-NEXT: maxps %xmm1, %xmm0
+; ATOM-NEXT: maxps (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_maxps:
+; SLM: # BB#0:
+; SLM-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: maxps (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_maxps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_maxps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_maxps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
+ %2 = load <4 x float>, <4 x float> *%a2, align 16
+ %3 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %1, <4 x float> %2)
+ ret <4 x float> %3
+}
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_maxss:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: maxss %xmm1, %xmm0
+; GENERIC-NEXT: maxss (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_maxss:
+; ATOM: # BB#0:
+; ATOM-NEXT: maxss %xmm1, %xmm0
+; ATOM-NEXT: maxss (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_maxss:
+; SLM: # BB#0:
+; SLM-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: maxss (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_maxss:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_maxss:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_maxss:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
+ %2 = load <4 x float>, <4 x float> *%a2, align 16
+ %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %2)
+ ret <4 x float> %3
+}
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_minps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: minps %xmm1, %xmm0
+; GENERIC-NEXT: minps (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_minps:
+; ATOM: # BB#0:
+; ATOM-NEXT: minps %xmm1, %xmm0
+; ATOM-NEXT: minps (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_minps:
+; SLM: # BB#0:
+; SLM-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: minps (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_minps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_minps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_minps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
+ %2 = load <4 x float>, <4 x float> *%a2, align 16
+ %3 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %1, <4 x float> %2)
+ ret <4 x float> %3
+}
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_minss:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: minss %xmm1, %xmm0
+; GENERIC-NEXT: minss (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_minss:
+; ATOM: # BB#0:
+; ATOM-NEXT: minss %xmm1, %xmm0
+; ATOM-NEXT: minss (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_minss:
+; SLM: # BB#0:
+; SLM-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: minss (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_minss:
+; SANDY: # BB#0:
+; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_minss:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_minss:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
+ %2 = load <4 x float>, <4 x float> *%a2, align 16
+ %3 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %2)
+ ret <4 x float> %3
+}
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) {
+; GENERIC-LABEL: test_movaps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movaps (%rdi), %xmm0
+; GENERIC-NEXT: addps %xmm0, %xmm0
+; GENERIC-NEXT: movaps %xmm0, (%rsi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movaps:
+; ATOM: # BB#0:
+; ATOM-NEXT: movaps (%rdi), %xmm0
+; ATOM-NEXT: addps %xmm0, %xmm0
+; ATOM-NEXT: movaps %xmm0, (%rsi)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movaps:
+; SLM: # BB#0:
+; SLM-NEXT: movaps (%rdi), %xmm0 # sched: [3:1.00]
+; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movaps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [4:0.50]
+; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movaps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [4:0.50]
+; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movaps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = load <4 x float>, <4 x float> *%a0, align 16
+ %2 = fadd <4 x float> %1, %1
+ store <4 x float> %2, <4 x float> *%a1, align 16
+ ret void
+}
+
+; TODO (v)movhlps
+
+define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) {
+; GENERIC-LABEL: test_movhlps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movhlps:
+; ATOM: # BB#0:
+; ATOM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movhlps:
+; SLM: # BB#0:
+; SLM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movhlps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movhlps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movhlps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
+ ret <4 x float> %1
+}
+
+; TODO (v)movhps
+
+define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
+; GENERIC-LABEL: test_movhps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; GENERIC-NEXT: addps %xmm0, %xmm1
+; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
+; GENERIC-NEXT: movlps %xmm1, (%rdi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movhps:
+; ATOM: # BB#0:
+; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; ATOM-NEXT: addps %xmm0, %xmm1
+; ATOM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
+; ATOM-NEXT: movlps %xmm1, (%rdi)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movhps:
+; SLM: # BB#0:
+; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
+; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; SLM-NEXT: pextrq $1, %xmm1, (%rdi) # sched: [4:2.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movhps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
+; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movhps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
+; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movhps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = bitcast x86_mmx* %a2 to <2 x float>*
+ %2 = load <2 x float>, <2 x float> *%1, align 8
+ %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+ %5 = fadd <4 x float> %a0, %4
+ %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+ store <2 x float> %6, <2 x float>* %1
+ ret void
+}
+
+; TODO (v)movlhps
+
+define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
+; GENERIC-LABEL: test_movlhps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; GENERIC-NEXT: addps %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movlhps:
+; ATOM: # BB#0:
+; ATOM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; ATOM-NEXT: addps %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movlhps:
+; SLM: # BB#0:
+; SLM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movlhps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movlhps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movlhps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+ %2 = fadd <4 x float> %a1, %1
+ ret <4 x float> %2
+}
+
+define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
+; GENERIC-LABEL: test_movlps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
+; GENERIC-NEXT: addps %xmm0, %xmm1
+; GENERIC-NEXT: movlps %xmm1, (%rdi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movlps:
+; ATOM: # BB#0:
+; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
+; ATOM-NEXT: addps %xmm0, %xmm1
+; ATOM-NEXT: movlps %xmm1, (%rdi)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movlps:
+; SLM: # BB#0:
+; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
+; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movlps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00]
+; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movlps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00]
+; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movlps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = bitcast x86_mmx* %a2 to <2 x float>*
+ %2 = load <2 x float>, <2 x float> *%1, align 8
+ %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+ %5 = fadd <4 x float> %a0, %4
+ %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ store <2 x float> %6, <2 x float>* %1
+ ret void
+}
+
+define i32 @test_movmskps(<4 x float> %a0) {
+; GENERIC-LABEL: test_movmskps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movmskps %xmm0, %eax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movmskps:
+; ATOM: # BB#0:
+; ATOM-NEXT: movmskps %xmm0, %eax
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movmskps:
+; SLM: # BB#0:
+; SLM-NEXT: movmskps %xmm0, %eax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movmskps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movmskps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movmskps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
+ ret i32 %1
+}
+declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
+
+define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) {
+; GENERIC-LABEL: test_movntps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movntps %xmm0, (%rdi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movntps:
+; ATOM: # BB#0:
+; ATOM-NEXT: movntps %xmm0, (%rdi)
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movntps:
+; SLM: # BB#0:
+; SLM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movntps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movntps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movntps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ store <4 x float> %a0, <4 x float> *%a1, align 16, !nontemporal !0
+ ret void
+}
+
+define void @test_movss_mem(float* %a0, float* %a1) {
+; GENERIC-LABEL: test_movss_mem:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; GENERIC-NEXT: addss %xmm0, %xmm0
+; GENERIC-NEXT: movss %xmm0, (%rsi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movss_mem:
+; ATOM: # BB#0:
+; ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ATOM-NEXT: addss %xmm0, %xmm0
+; ATOM-NEXT: movss %xmm0, (%rsi)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movss_mem:
+; SLM: # BB#0:
+; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00]
+; SLM-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movss_mem:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movss_mem:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movss_mem:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = load float, float* %a0, align 1
+ %2 = fadd float %1, %1
+ store float %2, float *%a1, align 1
+ ret void
+}
+
+define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) {
+; GENERIC-LABEL: test_movss_reg:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movss_reg:
+; ATOM: # BB#0:
+; ATOM-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movss_reg:
+; SLM: # BB#0:
+; SLM-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movss_reg:
+; SANDY: # BB#0:
+; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movss_reg:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movss_reg:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+ ret <4 x float> %1
+}
+
+define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) {
+; GENERIC-LABEL: test_movups:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movups (%rdi), %xmm0
+; GENERIC-NEXT: addps %xmm0, %xmm0
+; GENERIC-NEXT: movups %xmm0, (%rsi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movups:
+; ATOM: # BB#0:
+; ATOM-NEXT: movups (%rdi), %xmm0
+; ATOM-NEXT: addps %xmm0, %xmm0
+; ATOM-NEXT: movups %xmm0, (%rsi)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movups:
+; SLM: # BB#0:
+; SLM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.00]
+; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movups:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50]
+; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movups:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50]
+; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movups:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = load <4 x float>, <4 x float> *%a0, align 1
+ %2 = fadd <4 x float> %1, %1
+ store <4 x float> %2, <4 x float> *%a1, align 1
+ ret void
+}
+
+define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_mulps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: mulps %xmm1, %xmm0
+; GENERIC-NEXT: mulps (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_mulps:
+; ATOM: # BB#0:
+; ATOM-NEXT: mulps %xmm1, %xmm0
+; ATOM-NEXT: mulps (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_mulps:
+; SLM: # BB#0:
+; SLM-NEXT: mulps %xmm1, %xmm0 # sched: [5:2.00]
+; SLM-NEXT: mulps (%rdi), %xmm0 # sched: [8:2.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_mulps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_mulps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_mulps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fmul <4 x float> %a0, %a1
+ %2 = load <4 x float>, <4 x float> *%a2, align 16
+ %3 = fmul <4 x float> %1, %2
+ ret <4 x float> %3
+}
+
+define float @test_mulss(float %a0, float %a1, float *%a2) {
+; GENERIC-LABEL: test_mulss:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: mulss %xmm1, %xmm0
+; GENERIC-NEXT: mulss (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_mulss:
+; ATOM: # BB#0:
+; ATOM-NEXT: mulss %xmm1, %xmm0
+; ATOM-NEXT: mulss (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_mulss:
+; SLM: # BB#0:
+; SLM-NEXT: mulss %xmm1, %xmm0 # sched: [5:2.00]
+; SLM-NEXT: mulss (%rdi), %xmm0 # sched: [8:2.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_mulss:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_mulss:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_mulss:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fmul float %a0, %a1
+ %2 = load float, float *%a2, align 4
+ %3 = fmul float %1, %2
+ ret float %3
+}
+
+define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_orps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: orps %xmm1, %xmm0
+; GENERIC-NEXT: orps (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_orps:
+; ATOM: # BB#0:
+; ATOM-NEXT: orps %xmm1, %xmm0
+; ATOM-NEXT: orps (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_orps:
+; SLM: # BB#0:
+; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: orps (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_orps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_orps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_orps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = bitcast <4 x float> %a0 to <4 x i32>
+ %2 = bitcast <4 x float> %a1 to <4 x i32>
+ %3 = or <4 x i32> %1, %2
+ %4 = load <4 x float>, <4 x float> *%a2, align 16
+ %5 = bitcast <4 x float> %4 to <4 x i32>
+ %6 = or <4 x i32> %3, %5
+ %7 = bitcast <4 x i32> %6 to <4 x float>
+ ret <4 x float> %7
+}
+
+define void @test_prefetchnta(i8* %a0) {
+; GENERIC-LABEL: test_prefetchnta:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: prefetchnta (%rdi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_prefetchnta:
+; ATOM: # BB#0:
+; ATOM-NEXT: prefetchnta (%rdi)
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_prefetchnta:
+; SLM: # BB#0:
+; SLM-NEXT: prefetchnta (%rdi) # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_prefetchnta:
+; SANDY: # BB#0:
+; SANDY-NEXT: prefetchnta (%rdi) # sched: [4:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_prefetchnta:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: prefetchnta (%rdi) # sched: [4:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_prefetchnta:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: prefetchnta (%rdi) # sched: [5:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1)
+ ret void
+}
+declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone
+
+define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) {
+; GENERIC-LABEL: test_rcpps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: rcpps %xmm0, %xmm1
+; GENERIC-NEXT: rcpps (%rdi), %xmm0
+; GENERIC-NEXT: addps %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_rcpps:
+; ATOM: # BB#0:
+; ATOM-NEXT: rcpps (%rdi), %xmm1
+; ATOM-NEXT: rcpps %xmm0, %xmm0
+; ATOM-NEXT: addps %xmm0, %xmm1
+; ATOM-NEXT: movaps %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_rcpps:
+; SLM: # BB#0:
+; SLM-NEXT: rcpps (%rdi), %xmm1 # sched: [8:1.00]
+; SLM-NEXT: rcpps %xmm0, %xmm0 # sched: [5:1.00]
+; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_rcpps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [9:1.00]
+; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_rcpps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [9:1.00]
+; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_rcpps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [7:1.00]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
+ %2 = load <4 x float>, <4 x float> *%a1, align 16
+ %3 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %2)
+ %4 = fadd <4 x float> %1, %3
+ ret <4 x float> %4
+}
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
+
+; TODO - rcpss_m
+
+define <4 x float> @test_rcpss(float %a0, float *%a1) {
+; GENERIC-LABEL: test_rcpss:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: rcpss %xmm0, %xmm0
+; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; GENERIC-NEXT: rcpss %xmm1, %xmm1
+; GENERIC-NEXT: addps %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_rcpss:
+; ATOM: # BB#0:
+; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; ATOM-NEXT: rcpss %xmm0, %xmm0
+; ATOM-NEXT: rcpss %xmm1, %xmm1
+; ATOM-NEXT: addps %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_rcpss:
+; SLM: # BB#0:
+; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
+; SLM-NEXT: rcpss %xmm0, %xmm0 # sched: [8:1.00]
+; SLM-NEXT: rcpss %xmm1, %xmm1 # sched: [8:1.00]
+; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_rcpss:
+; SANDY: # BB#0:
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
+; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_rcpss:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
+; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
+; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_rcpss:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
+; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = insertelement <4 x float> undef, float %a0, i32 0
+ %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %1)
+ %3 = load float, float *%a1, align 4
+ %4 = insertelement <4 x float> undef, float %3, i32 0
+ %5 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
+ %6 = fadd <4 x float> %2, %5
+ ret <4 x float> %6
+}
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
+
+define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) {
+; GENERIC-LABEL: test_rsqrtps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: rsqrtps %xmm0, %xmm1
+; GENERIC-NEXT: rsqrtps (%rdi), %xmm0
+; GENERIC-NEXT: addps %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_rsqrtps:
+; ATOM: # BB#0:
+; ATOM-NEXT: rsqrtps (%rdi), %xmm1
+; ATOM-NEXT: rsqrtps %xmm0, %xmm0
+; ATOM-NEXT: addps %xmm0, %xmm1
+; ATOM-NEXT: movaps %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_rsqrtps:
+; SLM: # BB#0:
+; SLM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [8:1.00]
+; SLM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [5:1.00]
+; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_rsqrtps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [9:1.00]
+; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_rsqrtps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [9:1.00]
+; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_rsqrtps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [7:1.00]
+; BTVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
+ %2 = load <4 x float>, <4 x float> *%a1, align 16
+ %3 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %2)
+ %4 = fadd <4 x float> %1, %3
+ ret <4 x float> %4
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
+
+; TODO - rsqrtss_m
+
+define <4 x float> @test_rsqrtss(float %a0, float *%a1) {
+; GENERIC-LABEL: test_rsqrtss:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: rsqrtss %xmm0, %xmm0
+; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; GENERIC-NEXT: rsqrtss %xmm1, %xmm1
+; GENERIC-NEXT: addps %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_rsqrtss:
+; ATOM: # BB#0:
+; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; ATOM-NEXT: rsqrtss %xmm0, %xmm0
+; ATOM-NEXT: rsqrtss %xmm1, %xmm1
+; ATOM-NEXT: addps %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_rsqrtss:
+; SLM: # BB#0:
+; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
+; SLM-NEXT: rsqrtss %xmm0, %xmm0 # sched: [8:1.00]
+; SLM-NEXT: rsqrtss %xmm1, %xmm1 # sched: [8:1.00]
+; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_rsqrtss:
+; SANDY: # BB#0:
+; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
+; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_rsqrtss:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_rsqrtss:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
+; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = insertelement <4 x float> undef, float %a0, i32 0
+ %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %1)
+ %3 = load float, float *%a1, align 4
+ %4 = insertelement <4 x float> undef, float %3, i32 0
+ %5 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
+ %6 = fadd <4 x float> %2, %5
+ ret <4 x float> %6
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
+
+define void @test_sfence() {
+; GENERIC-LABEL: test_sfence:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: sfence
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_sfence:
+; ATOM: # BB#0:
+; ATOM-NEXT: sfence
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_sfence:
+; SLM: # BB#0:
+; SLM-NEXT: sfence # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_sfence:
+; SANDY: # BB#0:
+; SANDY-NEXT: sfence # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_sfence:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: sfence # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_sfence:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: sfence # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ call void @llvm.x86.sse.sfence()
+ ret void
+}
+declare void @llvm.x86.sse.sfence() nounwind readnone
+
+define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) nounwind {
+; GENERIC-LABEL: test_shufps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0]
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_shufps:
+; ATOM: # BB#0:
+; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0]
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_shufps:
+; SLM: # BB#0:
+; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_shufps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_shufps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_shufps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
+; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
+ %2 = load <4 x float>, <4 x float> *%a2, align 16
+ %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 3, i32 4, i32 4>
+ ret <4 x float> %3
+}
+
+define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) {
+; GENERIC-LABEL: test_sqrtps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: sqrtps %xmm0, %xmm1
+; GENERIC-NEXT: sqrtps (%rdi), %xmm0
+; GENERIC-NEXT: addps %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_sqrtps:
+; ATOM: # BB#0:
+; ATOM-NEXT: sqrtps %xmm0, %xmm1
+; ATOM-NEXT: sqrtps (%rdi), %xmm0
+; ATOM-NEXT: addps %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_sqrtps:
+; SLM: # BB#0:
+; SLM-NEXT: sqrtps (%rdi), %xmm1 # sched: [18:1.00]
+; SLM-NEXT: sqrtps %xmm0, %xmm0 # sched: [15:1.00]
+; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_sqrtps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00]
+; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00]
+; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_sqrtps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00]
+; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00]
+; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_sqrtps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [26:21.00]
+; BTVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [21:21.00]
+; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
+ %2 = load <4 x float>, <4 x float> *%a1, align 16
+ %3 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %2)
+ %4 = fadd <4 x float> %1, %3
+ ret <4 x float> %4
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
+
+; TODO - sqrtss_m
+
+define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
+; GENERIC-LABEL: test_sqrtss:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: sqrtss %xmm0, %xmm0
+; GENERIC-NEXT: movaps (%rdi), %xmm1
+; GENERIC-NEXT: sqrtss %xmm1, %xmm1
+; GENERIC-NEXT: addps %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_sqrtss:
+; ATOM: # BB#0:
+; ATOM-NEXT: movaps (%rdi), %xmm1
+; ATOM-NEXT: sqrtss %xmm0, %xmm0
+; ATOM-NEXT: sqrtss %xmm1, %xmm1
+; ATOM-NEXT: addps %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_sqrtss:
+; SLM: # BB#0:
+; SLM-NEXT: movaps (%rdi), %xmm1 # sched: [3:1.00]
+; SLM-NEXT: sqrtss %xmm0, %xmm0 # sched: [18:1.00]
+; SLM-NEXT: sqrtss %xmm1, %xmm1 # sched: [18:1.00]
+; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_sqrtss:
+; SANDY: # BB#0:
+; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00]
+; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50]
+; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00]
+; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_sqrtss:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00]
+; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50]
+; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00]
+; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_sqrtss:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:1.00]
+; BTVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [26:21.00]
+; BTVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [26:21.00]
+; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0)
+ %2 = load <4 x float>, <4 x float> *%a1, align 16
+ %3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %2)
+ %4 = fadd <4 x float> %1, %3
+ ret <4 x float> %4
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+define i32 @test_stmxcsr() {
+; GENERIC-LABEL: test_stmxcsr:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
+; GENERIC-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_stmxcsr:
+; ATOM: # BB#0:
+; ATOM-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
+; ATOM-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_stmxcsr:
+; SLM: # BB#0:
+; SLM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; SLM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_stmxcsr:
+; SANDY: # BB#0:
+; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_stmxcsr:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
+; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_stmxcsr:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; BTVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = alloca i32, align 4
+ %2 = bitcast i32* %1 to i8*
+ call void @llvm.x86.sse.stmxcsr(i8* %2)
+ %3 = load i32, i32* %1, align 4
+ ret i32 %3
+}
+declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone
+
+define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_subps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: subps %xmm1, %xmm0
+; GENERIC-NEXT: subps (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_subps:
+; ATOM: # BB#0:
+; ATOM-NEXT: subps %xmm1, %xmm0
+; ATOM-NEXT: subps (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_subps:
+; SLM: # BB#0:
+; SLM-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: subps (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_subps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_subps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_subps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fsub <4 x float> %a0, %a1
+ %2 = load <4 x float>, <4 x float> *%a2, align 16
+ %3 = fsub <4 x float> %1, %2
+ ret <4 x float> %3
+}
+
+define float @test_subss(float %a0, float %a1, float *%a2) {
+; GENERIC-LABEL: test_subss:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: subss %xmm1, %xmm0
+; GENERIC-NEXT: subss (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_subss:
+; ATOM: # BB#0:
+; ATOM-NEXT: subss %xmm1, %xmm0
+; ATOM-NEXT: subss (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_subss:
+; SLM: # BB#0:
+; SLM-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: subss (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_subss:
+; SANDY: # BB#0:
+; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_subss:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_subss:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fsub float %a0, %a1
+ %2 = load float, float *%a2, align 4
+ %3 = fsub float %1, %2
+ ret float %3
+}
+
+define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_ucomiss:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: ucomiss %xmm1, %xmm0
+; GENERIC-NEXT: setnp %al
+; GENERIC-NEXT: sete %cl
+; GENERIC-NEXT: andb %al, %cl
+; GENERIC-NEXT: ucomiss (%rdi), %xmm0
+; GENERIC-NEXT: setnp %al
+; GENERIC-NEXT: sete %dl
+; GENERIC-NEXT: andb %al, %dl
+; GENERIC-NEXT: orb %cl, %dl
+; GENERIC-NEXT: movzbl %dl, %eax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_ucomiss:
+; ATOM: # BB#0:
+; ATOM-NEXT: ucomiss %xmm1, %xmm0
+; ATOM-NEXT: setnp %al
+; ATOM-NEXT: sete %cl
+; ATOM-NEXT: andb %al, %cl
+; ATOM-NEXT: ucomiss (%rdi), %xmm0
+; ATOM-NEXT: setnp %al
+; ATOM-NEXT: sete %dl
+; ATOM-NEXT: andb %al, %dl
+; ATOM-NEXT: orb %cl, %dl
+; ATOM-NEXT: movzbl %dl, %eax
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_ucomiss:
+; SLM: # BB#0:
+; SLM-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: setnp %al # sched: [1:0.50]
+; SLM-NEXT: sete %cl # sched: [1:0.50]
+; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
+; SLM-NEXT: ucomiss (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: setnp %al # sched: [1:0.50]
+; SLM-NEXT: sete %dl # sched: [1:0.50]
+; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
+; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
+; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_ucomiss:
+; SANDY: # BB#0:
+; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: setnp %al # sched: [1:0.33]
+; SANDY-NEXT: sete %cl # sched: [1:0.33]
+; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
+; SANDY-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: setnp %al # sched: [1:0.33]
+; SANDY-NEXT: sete %dl # sched: [1:0.33]
+; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
+; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
+; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_ucomiss:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-NEXT: sete %cl # sched: [1:0.50]
+; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
+; HASWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-NEXT: sete %dl # sched: [1:0.50]
+; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
+; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
+; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_ucomiss:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-NEXT: sete %cl # sched: [1:0.50]
+; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
+; BTVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-NEXT: sete %dl # sched: [1:0.50]
+; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
+; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
+; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
+ %2 = load <4 x float>, <4 x float> *%a2, align 4
+ %3 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %2)
+ %4 = or i32 %1, %3
+ ret i32 %4
+}
+declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_unpckhps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3]
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_unpckhps:
+; ATOM: # BB#0:
+; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3]
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_unpckhps:
+; SLM: # BB#0:
+; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_unpckhps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_unpckhps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_unpckhps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ %2 = load <4 x float>, <4 x float> *%a2, align 16
+ %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ ret <4 x float> %3
+}
+
+define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_unpcklps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_unpcklps:
+; ATOM: # BB#0:
+; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_unpcklps:
+; SLM: # BB#0:
+; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_unpcklps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_unpcklps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_unpcklps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %2 = load <4 x float>, <4 x float> *%a2, align 16
+ %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ ret <4 x float> %3
+}
+
+define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_xorps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: xorps %xmm1, %xmm0
+; GENERIC-NEXT: xorps (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_xorps:
+; ATOM: # BB#0:
+; ATOM-NEXT: xorps %xmm1, %xmm0
+; ATOM-NEXT: xorps (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_xorps:
+; SLM: # BB#0:
+; SLM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: xorps (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_xorps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_xorps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_xorps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = bitcast <4 x float> %a0 to <4 x i32>
+ %2 = bitcast <4 x float> %a1 to <4 x i32>
+ %3 = xor <4 x i32> %1, %2
+ %4 = load <4 x float>, <4 x float> *%a2, align 16
+ %5 = bitcast <4 x float> %4 to <4 x i32>
+ %6 = xor <4 x i32> %3, %5
+ %7 = bitcast <4 x i32> %6 to <4 x float>
+ ret <4 x float> %7
+}
+
+!0 = !{i32 1}
diff --git a/test/CodeGen/X86/sse2-schedule.ll b/test/CodeGen/X86/sse2-schedule.ll
new file mode 100644
index 0000000000000..33a4f413b6832
--- /dev/null
+++ b/test/CodeGen/X86/sse2-schedule.ll
@@ -0,0 +1,6039 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
+
+define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_addpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: addpd %xmm1, %xmm0
+; GENERIC-NEXT: addpd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_addpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: addpd %xmm1, %xmm0
+; ATOM-NEXT: addpd (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_addpd:
+; SLM: # BB#0:
+; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: addpd (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_addpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_addpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_addpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fadd <2 x double> %a0, %a1
+ %2 = load <2 x double>, <2 x double> *%a2, align 16
+ %3 = fadd <2 x double> %1, %2
+ ret <2 x double> %3
+}
+
+define double @test_addsd(double %a0, double %a1, double *%a2) {
+; GENERIC-LABEL: test_addsd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: addsd %xmm1, %xmm0
+; GENERIC-NEXT: addsd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_addsd:
+; ATOM: # BB#0:
+; ATOM-NEXT: addsd %xmm1, %xmm0
+; ATOM-NEXT: addsd (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_addsd:
+; SLM: # BB#0:
+; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: addsd (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_addsd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_addsd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_addsd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fadd double %a0, %a1
+ %2 = load double, double *%a2, align 8
+ %3 = fadd double %1, %2
+ ret double %3
+}
+
+define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_andpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: andpd %xmm1, %xmm0
+; GENERIC-NEXT: andpd (%rdi), %xmm0
+; GENERIC-NEXT: addpd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_andpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: andpd %xmm1, %xmm0
+; ATOM-NEXT: andpd (%rdi), %xmm0
+; ATOM-NEXT: addpd %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_andpd:
+; SLM: # BB#0:
+; SLM-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: andpd (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_andpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_andpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_andpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = bitcast <2 x double> %a0 to <4 x i32>
+ %2 = bitcast <2 x double> %a1 to <4 x i32>
+ %3 = and <4 x i32> %1, %2
+ %4 = load <2 x double>, <2 x double> *%a2, align 16
+ %5 = bitcast <2 x double> %4 to <4 x i32>
+ %6 = and <4 x i32> %3, %5
+ %7 = bitcast <4 x i32> %6 to <2 x double>
+ %8 = fadd <2 x double> %a1, %7
+ ret <2 x double> %8
+}
+
+define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_andnotpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: andnpd %xmm1, %xmm0
+; GENERIC-NEXT: andnpd (%rdi), %xmm0
+; GENERIC-NEXT: addpd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_andnotpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: andnpd %xmm1, %xmm0
+; ATOM-NEXT: andnpd (%rdi), %xmm0
+; ATOM-NEXT: addpd %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_andnotpd:
+; SLM: # BB#0:
+; SLM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: andnpd (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_andnotpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_andnotpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_andnotpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = bitcast <2 x double> %a0 to <4 x i32>
+ %2 = bitcast <2 x double> %a1 to <4 x i32>
+ %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %4 = and <4 x i32> %3, %2
+ %5 = load <2 x double>, <2 x double> *%a2, align 16
+ %6 = bitcast <2 x double> %5 to <4 x i32>
+ %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %8 = and <4 x i32> %6, %7
+ %9 = bitcast <4 x i32> %8 to <2 x double>
+ %10 = fadd <2 x double> %a1, %9
+ ret <2 x double> %10
+}
+
+define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_cmppd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cmpeqpd %xmm0, %xmm1
+; GENERIC-NEXT: cmpeqpd (%rdi), %xmm0
+; GENERIC-NEXT: orpd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cmppd:
+; ATOM: # BB#0:
+; ATOM-NEXT: cmpeqpd %xmm0, %xmm1
+; ATOM-NEXT: cmpeqpd (%rdi), %xmm0
+; ATOM-NEXT: orpd %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cmppd:
+; SLM: # BB#0:
+; SLM-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; SLM-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cmppd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cmppd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cmppd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fcmp oeq <2 x double> %a0, %a1
+ %2 = load <2 x double>, <2 x double> *%a2, align 16
+ %3 = fcmp oeq <2 x double> %a0, %2
+ %4 = or <2 x i1> %1, %3
+ %5 = sext <2 x i1> %4 to <2 x i64>
+ %6 = bitcast <2 x i64> %5 to <2 x double>
+ ret <2 x double> %6
+}
+
+define double @test_cmpsd(double %a0, double %a1, double *%a2) {
+; GENERIC-LABEL: test_cmpsd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cmpeqsd %xmm1, %xmm0
+; GENERIC-NEXT: cmpeqsd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cmpsd:
+; ATOM: # BB#0:
+; ATOM-NEXT: cmpeqsd %xmm1, %xmm0
+; ATOM-NEXT: cmpeqsd (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cmpsd:
+; SLM: # BB#0:
+; SLM-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cmpsd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cmpsd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cmpsd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = insertelement <2 x double> undef, double %a0, i32 0
+ %2 = insertelement <2 x double> undef, double %a1, i32 0
+ %3 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %1, <2 x double> %2, i8 0)
+ %4 = load double, double *%a2, align 8
+ %5 = insertelement <2 x double> undef, double %4, i32 0
+ %6 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %3, <2 x double> %5, i8 0)
+ %7 = extractelement <2 x double> %6, i32 0
+ ret double %7
+}
+declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_comisd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: comisd %xmm1, %xmm0
+; GENERIC-NEXT: setnp %al
+; GENERIC-NEXT: sete %cl
+; GENERIC-NEXT: andb %al, %cl
+; GENERIC-NEXT: comisd (%rdi), %xmm0
+; GENERIC-NEXT: setnp %al
+; GENERIC-NEXT: sete %dl
+; GENERIC-NEXT: andb %al, %dl
+; GENERIC-NEXT: orb %cl, %dl
+; GENERIC-NEXT: movzbl %dl, %eax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_comisd:
+; ATOM: # BB#0:
+; ATOM-NEXT: comisd %xmm1, %xmm0
+; ATOM-NEXT: setnp %al
+; ATOM-NEXT: sete %cl
+; ATOM-NEXT: andb %al, %cl
+; ATOM-NEXT: comisd (%rdi), %xmm0
+; ATOM-NEXT: setnp %al
+; ATOM-NEXT: sete %dl
+; ATOM-NEXT: andb %al, %dl
+; ATOM-NEXT: orb %cl, %dl
+; ATOM-NEXT: movzbl %dl, %eax
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_comisd:
+; SLM: # BB#0:
+; SLM-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: setnp %al # sched: [1:0.50]
+; SLM-NEXT: sete %cl # sched: [1:0.50]
+; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
+; SLM-NEXT: comisd (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: setnp %al # sched: [1:0.50]
+; SLM-NEXT: sete %dl # sched: [1:0.50]
+; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
+; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
+; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_comisd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: setnp %al # sched: [1:0.33]
+; SANDY-NEXT: sete %cl # sched: [1:0.33]
+; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
+; SANDY-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: setnp %al # sched: [1:0.33]
+; SANDY-NEXT: sete %dl # sched: [1:0.33]
+; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
+; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
+; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_comisd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-NEXT: sete %cl # sched: [1:0.50]
+; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
+; HASWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-NEXT: sete %dl # sched: [1:0.50]
+; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
+; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
+; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_comisd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-NEXT: sete %cl # sched: [1:0.50]
+; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
+; BTVER2-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-NEXT: sete %dl # sched: [1:0.50]
+; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
+; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
+; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
+ %2 = load <2 x double>, <2 x double> *%a2, align 8
+ %3 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %2)
+ %4 = or i32 %1, %3
+ ret i32 %4
+}
+declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
+; GENERIC-LABEL: test_cvtdq2pd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtdq2pd %xmm0, %xmm1
+; GENERIC-NEXT: cvtdq2pd (%rdi), %xmm0
+; GENERIC-NEXT: addpd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtdq2pd:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvtdq2pd %xmm0, %xmm1
+; ATOM-NEXT: cvtdq2pd (%rdi), %xmm0
+; ATOM-NEXT: addpd %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtdq2pd:
+; SLM: # BB#0:
+; SLM-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50]
+; SLM-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtdq2pd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
+; SANDY-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00]
+; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtdq2pd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00]
+; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtdq2pd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00]
+; BTVER2-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %2 = sitofp <2 x i32> %1 to <2 x double>
+ %3 = load <4 x i32>, <4 x i32>*%a1, align 16
+ %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %5 = sitofp <2 x i32> %4 to <2 x double>
+ %6 = fadd <2 x double> %2, %5
+ ret <2 x double> %6
+}
+
+define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) {
+; GENERIC-LABEL: test_cvtdq2ps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtdq2ps %xmm0, %xmm1
+; GENERIC-NEXT: cvtdq2ps (%rdi), %xmm0
+; GENERIC-NEXT: addps %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtdq2ps:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvtdq2ps (%rdi), %xmm1
+; ATOM-NEXT: cvtdq2ps %xmm0, %xmm0
+; ATOM-NEXT: addps %xmm0, %xmm1
+; ATOM-NEXT: movaps %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtdq2ps:
+; SLM: # BB#0:
+; SLM-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
+; SLM-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtdq2ps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00]
+; SANDY-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
+; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtdq2ps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
+; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtdq2ps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
+; BTVER2-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = sitofp <4 x i32> %a0 to <4 x float>
+ %2 = load <4 x i32>, <4 x i32>*%a1, align 16
+ %3 = sitofp <4 x i32> %2 to <4 x float>
+ %4 = fadd <4 x float> %1, %3
+ ret <4 x float> %4
+}
+
+define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) {
+; GENERIC-LABEL: test_cvtpd2dq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtpd2dq %xmm0, %xmm1
+; GENERIC-NEXT: cvtpd2dq (%rdi), %xmm0
+; GENERIC-NEXT: paddd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtpd2dq:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvtpd2dq (%rdi), %xmm1
+; ATOM-NEXT: cvtpd2dq %xmm0, %xmm0
+; ATOM-NEXT: paddd %xmm0, %xmm1
+; ATOM-NEXT: movdqa %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtpd2dq:
+; SLM: # BB#0:
+; SLM-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:0.50]
+; SLM-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtpd2dq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [7:1.00]
+; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtpd2dq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
+; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtpd2dq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
+; BTVER2-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
+ %2 = load <2 x double>, <2 x double> *%a1, align 16
+ %3 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %2)
+ %4 = add <4 x i32> %1, %3
+ ret <4 x i32> %4
+}
+declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
+
+define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) {
+; GENERIC-LABEL: test_cvtpd2ps:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtpd2ps %xmm0, %xmm1
+; GENERIC-NEXT: cvtpd2ps (%rdi), %xmm0
+; GENERIC-NEXT: addps %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtpd2ps:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvtpd2ps (%rdi), %xmm1
+; ATOM-NEXT: cvtpd2ps %xmm0, %xmm0
+; ATOM-NEXT: addps %xmm0, %xmm1
+; ATOM-NEXT: movaps %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtpd2ps:
+; SLM: # BB#0:
+; SLM-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:0.50]
+; SLM-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtpd2ps:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [7:1.00]
+; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtpd2ps:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
+; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtpd2ps:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
+; BTVER2-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
+ %2 = load <2 x double>, <2 x double> *%a1, align 16
+ %3 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %2)
+ %4 = fadd <4 x float> %1, %3
+ ret <4 x float> %4
+}
+declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
+
+define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) {
+; GENERIC-LABEL: test_cvtps2dq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtps2dq %xmm0, %xmm1
+; GENERIC-NEXT: cvtps2dq (%rdi), %xmm0
+; GENERIC-NEXT: paddd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtps2dq:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvtps2dq (%rdi), %xmm1
+; ATOM-NEXT: cvtps2dq %xmm0, %xmm0
+; ATOM-NEXT: paddd %xmm0, %xmm1
+; ATOM-NEXT: movdqa %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtps2dq:
+; SLM: # BB#0:
+; SLM-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
+; SLM-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtps2dq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [7:1.00]
+; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtps2dq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [7:1.00]
+; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtps2dq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00]
+; BTVER2-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
+ %2 = load <4 x float>, <4 x float> *%a1, align 16
+ %3 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %2)
+ %4 = add <4 x i32> %1, %3
+ ret <4 x i32> %4
+}
+declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
+
+define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
+; GENERIC-LABEL: test_cvtps2pd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtps2pd %xmm0, %xmm1
+; GENERIC-NEXT: cvtps2pd (%rdi), %xmm0
+; GENERIC-NEXT: addpd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtps2pd:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvtps2pd (%rdi), %xmm1
+; ATOM-NEXT: cvtps2pd %xmm0, %xmm0
+; ATOM-NEXT: addpd %xmm0, %xmm1
+; ATOM-NEXT: movapd %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtps2pd:
+; SLM: # BB#0:
+; SLM-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [4:0.50]
+; SLM-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtps2pd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00]
+; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtps2pd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
+; HASWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtps2pd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [8:1.00]
+; BTVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %2 = fpext <2 x float> %1 to <2 x double>
+ %3 = load <4 x float>, <4 x float> *%a1, align 16
+ %4 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %5 = fpext <2 x float> %4 to <2 x double>
+ %6 = fadd <2 x double> %2, %5
+ ret <2 x double> %6
+}
+
+define i32 @test_cvtsd2si(double %a0, double *%a1) {
+; GENERIC-LABEL: test_cvtsd2si:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtsd2si %xmm0, %ecx
+; GENERIC-NEXT: cvtsd2si (%rdi), %eax
+; GENERIC-NEXT: addl %ecx, %eax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtsd2si:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvtsd2si (%rdi), %eax
+; ATOM-NEXT: cvtsd2si %xmm0, %ecx
+; ATOM-NEXT: addl %ecx, %eax
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtsd2si:
+; SLM: # BB#0:
+; SLM-NEXT: cvtsd2si (%rdi), %eax # sched: [7:1.00]
+; SLM-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:0.50]
+; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtsd2si:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtsd2si %xmm0, %ecx # sched: [3:1.00]
+; SANDY-NEXT: vcvtsd2si (%rdi), %eax # sched: [7:1.00]
+; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtsd2si:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00]
+; HASWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [8:1.00]
+; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtsd2si:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [8:1.00]
+; BTVER2-NEXT: vcvtsd2si %xmm0, %ecx # sched: [3:1.00]
+; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = insertelement <2 x double> undef, double %a0, i32 0
+ %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %1)
+ %3 = load double, double *%a1, align 8
+ %4 = insertelement <2 x double> undef, double %3, i32 0
+ %5 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %4)
+ %6 = add i32 %2, %5
+ ret i32 %6
+}
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+
+define i64 @test_cvtsd2siq(double %a0, double *%a1) {
+; GENERIC-LABEL: test_cvtsd2siq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtsd2si %xmm0, %rcx
+; GENERIC-NEXT: cvtsd2si (%rdi), %rax
+; GENERIC-NEXT: addq %rcx, %rax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtsd2siq:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvtsd2si (%rdi), %rax
+; ATOM-NEXT: cvtsd2si %xmm0, %rcx
+; ATOM-NEXT: addq %rcx, %rax
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtsd2siq:
+; SLM: # BB#0:
+; SLM-NEXT: cvtsd2si (%rdi), %rax # sched: [7:1.00]
+; SLM-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:0.50]
+; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtsd2siq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [3:1.00]
+; SANDY-NEXT: vcvtsd2si (%rdi), %rax # sched: [7:1.00]
+; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtsd2siq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00]
+; HASWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [8:1.00]
+; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtsd2siq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [8:1.00]
+; BTVER2-NEXT: vcvtsd2si %xmm0, %rcx # sched: [3:1.00]
+; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = insertelement <2 x double> undef, double %a0, i32 0
+ %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %1)
+ %3 = load double, double *%a1, align 8
+ %4 = insertelement <2 x double> undef, double %3, i32 0
+ %5 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %4)
+ %6 = add i64 %2, %5
+ ret i64 %6
+}
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
+
+define float @test_cvtsd2ss(double %a0, double *%a1) {
+; GENERIC-LABEL: test_cvtsd2ss:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm1
+; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm0
+; GENERIC-NEXT: addss %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtsd2ss:
+; ATOM: # BB#0:
+; ATOM-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; ATOM-NEXT: cvtsd2ss %xmm0, %xmm2
+; ATOM-NEXT: xorps %xmm0, %xmm0
+; ATOM-NEXT: cvtsd2ss %xmm1, %xmm0
+; ATOM-NEXT: addss %xmm2, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtsd2ss:
+; SLM: # BB#0:
+; SLM-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:0.50]
+; SLM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [3:1.00]
+; SLM-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:0.50]
+; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtsd2ss:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50]
+; SANDY-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [3:1.00]
+; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtsd2ss:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50]
+; HASWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
+; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtsd2ss:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
+; BTVER2-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [3:1.00]
+; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fptrunc double %a0 to float
+ %2 = load double, double *%a1, align 8
+ %3 = fptrunc double %2 to float
+ %4 = fadd float %1, %3
+ ret float %4
+}
+
+define double @test_cvtsi2sd(i32 %a0, i32 *%a1) {
+; GENERIC-LABEL: test_cvtsi2sd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtsi2sdl %edi, %xmm1
+; GENERIC-NEXT: cvtsi2sdl (%rsi), %xmm0
+; GENERIC-NEXT: addsd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtsi2sd:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvtsi2sdl (%rsi), %xmm0
+; ATOM-NEXT: cvtsi2sdl %edi, %xmm1
+; ATOM-NEXT: addsd %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtsi2sd:
+; SLM: # BB#0:
+; SLM-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:0.50]
+; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtsi2sd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
+; SANDY-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtsi2sd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtsi2sd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = sitofp i32 %a0 to double
+ %2 = load i32, i32 *%a1, align 8
+ %3 = sitofp i32 %2 to double
+ %4 = fadd double %1, %3
+ ret double %4
+}
+
+define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) {
+; GENERIC-LABEL: test_cvtsi2sdq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtsi2sdq %rdi, %xmm1
+; GENERIC-NEXT: cvtsi2sdq (%rsi), %xmm0
+; GENERIC-NEXT: addsd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtsi2sdq:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvtsi2sdq (%rsi), %xmm0
+; ATOM-NEXT: cvtsi2sdq %rdi, %xmm1
+; ATOM-NEXT: addsd %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtsi2sdq:
+; SLM: # BB#0:
+; SLM-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:0.50]
+; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtsi2sdq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
+; SANDY-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtsi2sdq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtsi2sdq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = sitofp i64 %a0 to double
+ %2 = load i64, i64 *%a1, align 8
+ %3 = sitofp i64 %2 to double
+ %4 = fadd double %1, %3
+ ret double %4
+}
+
+; TODO - cvtss2sd_m
+
+define double @test_cvtss2sd(float %a0, float *%a1) {
+; GENERIC-LABEL: test_cvtss2sd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvtss2sd %xmm0, %xmm1
+; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; GENERIC-NEXT: cvtss2sd %xmm0, %xmm0
+; GENERIC-NEXT: addsd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvtss2sd:
+; ATOM: # BB#0:
+; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; ATOM-NEXT: cvtss2sd %xmm0, %xmm2
+; ATOM-NEXT: xorps %xmm0, %xmm0
+; ATOM-NEXT: cvtss2sd %xmm1, %xmm0
+; ATOM-NEXT: addsd %xmm2, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvtss2sd:
+; SLM: # BB#0:
+; SLM-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:0.50]
+; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00]
+; SLM-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:0.50]
+; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvtss2sd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [3:1.00]
+; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvtss2sd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
+; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00]
+; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvtss2sd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [3:1.00]
+; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fpext float %a0 to double
+ %2 = load float, float *%a1, align 4
+ %3 = fpext float %2 to double
+ %4 = fadd double %1, %3
+ ret double %4
+}
+
+define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) {
+; GENERIC-LABEL: test_cvttpd2dq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvttpd2dq %xmm0, %xmm1
+; GENERIC-NEXT: cvttpd2dq (%rdi), %xmm0
+; GENERIC-NEXT: paddd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvttpd2dq:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvttpd2dq (%rdi), %xmm1
+; ATOM-NEXT: cvttpd2dq %xmm0, %xmm0
+; ATOM-NEXT: paddd %xmm0, %xmm1
+; ATOM-NEXT: movdqa %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvttpd2dq:
+; SLM: # BB#0:
+; SLM-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:0.50]
+; SLM-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvttpd2dq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [7:1.00]
+; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvttpd2dq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
+; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvttpd2dq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
+; BTVER2-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fptosi <2 x double> %a0 to <2 x i32>
+ %2 = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = load <2 x double>, <2 x double> *%a1, align 16
+ %4 = fptosi <2 x double> %3 to <2 x i32>
+ %5 = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = add <4 x i32> %2, %5
+ ret <4 x i32> %6
+}
+
+define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) {
+; GENERIC-LABEL: test_cvttps2dq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvttps2dq %xmm0, %xmm1
+; GENERIC-NEXT: cvttps2dq (%rdi), %xmm0
+; GENERIC-NEXT: paddd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvttps2dq:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvttps2dq (%rdi), %xmm1
+; ATOM-NEXT: cvttps2dq %xmm0, %xmm0
+; ATOM-NEXT: paddd %xmm0, %xmm1
+; ATOM-NEXT: movdqa %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvttps2dq:
+; SLM: # BB#0:
+; SLM-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
+; SLM-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvttps2dq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [7:1.00]
+; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvttps2dq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [7:1.00]
+; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvttps2dq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00]
+; BTVER2-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fptosi <4 x float> %a0 to <4 x i32>
+ %2 = load <4 x float>, <4 x float> *%a1, align 16
+ %3 = fptosi <4 x float> %2 to <4 x i32>
+ %4 = add <4 x i32> %1, %3
+ ret <4 x i32> %4
+}
+
+define i32 @test_cvttsd2si(double %a0, double *%a1) {
+; GENERIC-LABEL: test_cvttsd2si:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvttsd2si %xmm0, %ecx
+; GENERIC-NEXT: cvttsd2si (%rdi), %eax
+; GENERIC-NEXT: addl %ecx, %eax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvttsd2si:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvttsd2si (%rdi), %eax
+; ATOM-NEXT: cvttsd2si %xmm0, %ecx
+; ATOM-NEXT: addl %ecx, %eax
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvttsd2si:
+; SLM: # BB#0:
+; SLM-NEXT: cvttsd2si (%rdi), %eax # sched: [7:1.00]
+; SLM-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:0.50]
+; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvttsd2si:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [3:1.00]
+; SANDY-NEXT: vcvttsd2si (%rdi), %eax # sched: [7:1.00]
+; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvttsd2si:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00]
+; HASWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [8:1.00]
+; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvttsd2si:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [8:1.00]
+; BTVER2-NEXT: vcvttsd2si %xmm0, %ecx # sched: [3:1.00]
+; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fptosi double %a0 to i32
+ %2 = load double, double *%a1, align 8
+ %3 = fptosi double %2 to i32
+ %4 = add i32 %1, %3
+ ret i32 %4
+}
+
+define i64 @test_cvttsd2siq(double %a0, double *%a1) {
+; GENERIC-LABEL: test_cvttsd2siq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: cvttsd2si %xmm0, %rcx
+; GENERIC-NEXT: cvttsd2si (%rdi), %rax
+; GENERIC-NEXT: addq %rcx, %rax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_cvttsd2siq:
+; ATOM: # BB#0:
+; ATOM-NEXT: cvttsd2si (%rdi), %rax
+; ATOM-NEXT: cvttsd2si %xmm0, %rcx
+; ATOM-NEXT: addq %rcx, %rax
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_cvttsd2siq:
+; SLM: # BB#0:
+; SLM-NEXT: cvttsd2si (%rdi), %rax # sched: [7:1.00]
+; SLM-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:0.50]
+; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_cvttsd2siq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [3:1.00]
+; SANDY-NEXT: vcvttsd2si (%rdi), %rax # sched: [7:1.00]
+; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_cvttsd2siq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00]
+; HASWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [8:1.00]
+; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_cvttsd2siq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [8:1.00]
+; BTVER2-NEXT: vcvttsd2si %xmm0, %rcx # sched: [3:1.00]
+; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fptosi double %a0 to i64
+ %2 = load double, double *%a1, align 8
+ %3 = fptosi double %2 to i64
+ %4 = add i64 %1, %3
+ ret i64 %4
+}
+
+define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_divpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: divpd %xmm1, %xmm0
+; GENERIC-NEXT: divpd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_divpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: divpd %xmm1, %xmm0
+; ATOM-NEXT: divpd (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_divpd:
+; SLM: # BB#0:
+; SLM-NEXT: divpd %xmm1, %xmm0 # sched: [34:34.00]
+; SLM-NEXT: divpd (%rdi), %xmm0 # sched: [37:34.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_divpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
+; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_divpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
+; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_divpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
+; BTVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fdiv <2 x double> %a0, %a1
+ %2 = load <2 x double>, <2 x double> *%a2, align 16
+ %3 = fdiv <2 x double> %1, %2
+ ret <2 x double> %3
+}
+
+define double @test_divsd(double %a0, double %a1, double *%a2) {
+; GENERIC-LABEL: test_divsd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: divsd %xmm1, %xmm0
+; GENERIC-NEXT: divsd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_divsd:
+; ATOM: # BB#0:
+; ATOM-NEXT: divsd %xmm1, %xmm0
+; ATOM-NEXT: divsd (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_divsd:
+; SLM: # BB#0:
+; SLM-NEXT: divsd %xmm1, %xmm0 # sched: [34:34.00]
+; SLM-NEXT: divsd (%rdi), %xmm0 # sched: [37:34.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_divsd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
+; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_divsd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
+; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_divsd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
+; BTVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fdiv double %a0, %a1
+ %2 = load double, double *%a2, align 8
+ %3 = fdiv double %1, %2
+ ret double %3
+}
+
+define void @test_lfence() {
+; GENERIC-LABEL: test_lfence:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: lfence
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_lfence:
+; ATOM: # BB#0:
+; ATOM-NEXT: lfence
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_lfence:
+; SLM: # BB#0:
+; SLM-NEXT: lfence # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_lfence:
+; SANDY: # BB#0:
+; SANDY-NEXT: lfence # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_lfence:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: lfence # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_lfence:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: lfence # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ call void @llvm.x86.sse2.lfence()
+ ret void
+}
+declare void @llvm.x86.sse2.lfence() nounwind readnone
+
+define void @test_mfence() {
+; GENERIC-LABEL: test_mfence:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: mfence
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_mfence:
+; ATOM: # BB#0:
+; ATOM-NEXT: mfence
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_mfence:
+; SLM: # BB#0:
+; SLM-NEXT: mfence # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_mfence:
+; SANDY: # BB#0:
+; SANDY-NEXT: mfence # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_mfence:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: mfence # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_mfence:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: mfence # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ call void @llvm.x86.sse2.mfence()
+ ret void
+}
+declare void @llvm.x86.sse2.mfence() nounwind readnone
+
+define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
+; GENERIC-LABEL: test_maskmovdqu:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: maskmovdqu %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_maskmovdqu:
+; ATOM: # BB#0:
+; ATOM-NEXT: maskmovdqu %xmm1, %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_maskmovdqu:
+; SLM: # BB#0:
+; SLM-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_maskmovdqu:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_maskmovdqu:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [14:2.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_maskmovdqu:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2)
+ ret void
+}
+declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
+
+define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_maxpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: maxpd %xmm1, %xmm0
+; GENERIC-NEXT: maxpd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_maxpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: maxpd %xmm1, %xmm0
+; ATOM-NEXT: maxpd (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_maxpd:
+; SLM: # BB#0:
+; SLM-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: maxpd (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_maxpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_maxpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_maxpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
+ %2 = load <2 x double>, <2 x double> *%a2, align 16
+ %3 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %1, <2 x double> %2)
+ ret <2 x double> %3
+}
+declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_maxsd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: maxsd %xmm1, %xmm0
+; GENERIC-NEXT: maxsd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_maxsd:
+; ATOM: # BB#0:
+; ATOM-NEXT: maxsd %xmm1, %xmm0
+; ATOM-NEXT: maxsd (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_maxsd:
+; SLM: # BB#0:
+; SLM-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: maxsd (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_maxsd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_maxsd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_maxsd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
+ %2 = load <2 x double>, <2 x double> *%a2, align 16
+ %3 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %1, <2 x double> %2)
+ ret <2 x double> %3
+}
+declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_minpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: minpd %xmm1, %xmm0
+; GENERIC-NEXT: minpd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_minpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: minpd %xmm1, %xmm0
+; ATOM-NEXT: minpd (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_minpd:
+; SLM: # BB#0:
+; SLM-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: minpd (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_minpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_minpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_minpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
+ %2 = load <2 x double>, <2 x double> *%a2, align 16
+ %3 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %1, <2 x double> %2)
+ ret <2 x double> %3
+}
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_minsd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: minsd %xmm1, %xmm0
+; GENERIC-NEXT: minsd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_minsd:
+; ATOM: # BB#0:
+; ATOM-NEXT: minsd %xmm1, %xmm0
+; ATOM-NEXT: minsd (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_minsd:
+; SLM: # BB#0:
+; SLM-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: minsd (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_minsd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_minsd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_minsd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
+ %2 = load <2 x double>, <2 x double> *%a2, align 16
+ %3 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %1, <2 x double> %2)
+ ret <2 x double> %3
+}
+declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) {
+; GENERIC-LABEL: test_movapd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movapd (%rdi), %xmm0
+; GENERIC-NEXT: addpd %xmm0, %xmm0
+; GENERIC-NEXT: movapd %xmm0, (%rsi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movapd:
+; ATOM: # BB#0:
+; ATOM-NEXT: movapd (%rdi), %xmm0
+; ATOM-NEXT: addpd %xmm0, %xmm0
+; ATOM-NEXT: movapd %xmm0, (%rsi)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movapd:
+; SLM: # BB#0:
+; SLM-NEXT: movapd (%rdi), %xmm0 # sched: [3:1.00]
+; SLM-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movapd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [4:0.50]
+; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movapd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [4:0.50]
+; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movapd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = load <2 x double>, <2 x double> *%a0, align 16
+ %2 = fadd <2 x double> %1, %1
+ store <2 x double> %2, <2 x double> *%a1, align 16
+ ret void
+}
+
+define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) {
+; GENERIC-LABEL: test_movdqa:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movdqa (%rdi), %xmm0
+; GENERIC-NEXT: paddq %xmm0, %xmm0
+; GENERIC-NEXT: movdqa %xmm0, (%rsi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movdqa:
+; ATOM: # BB#0:
+; ATOM-NEXT: movdqa (%rdi), %xmm0
+; ATOM-NEXT: paddq %xmm0, %xmm0
+; ATOM-NEXT: movdqa %xmm0, (%rsi)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movdqa:
+; SLM: # BB#0:
+; SLM-NEXT: movdqa (%rdi), %xmm0 # sched: [3:1.00]
+; SLM-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movdqa:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [4:0.50]
+; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movdqa:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [4:0.50]
+; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movdqa:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = load <2 x i64>, <2 x i64> *%a0, align 16
+ %2 = add <2 x i64> %1, %1
+ store <2 x i64> %2, <2 x i64> *%a1, align 16
+ ret void
+}
+
+define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) {
+; GENERIC-LABEL: test_movdqu:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movdqu (%rdi), %xmm0
+; GENERIC-NEXT: paddq %xmm0, %xmm0
+; GENERIC-NEXT: movdqu %xmm0, (%rsi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movdqu:
+; ATOM: # BB#0:
+; ATOM-NEXT: movdqu (%rdi), %xmm0
+; ATOM-NEXT: paddq %xmm0, %xmm0
+; ATOM-NEXT: movdqu %xmm0, (%rsi)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movdqu:
+; SLM: # BB#0:
+; SLM-NEXT: movdqu (%rdi), %xmm0 # sched: [3:1.00]
+; SLM-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movdqu:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [4:0.50]
+; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movdqu:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [4:0.50]
+; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movdqu:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = load <2 x i64>, <2 x i64> *%a0, align 1
+ %2 = add <2 x i64> %1, %1
+ store <2 x i64> %2, <2 x i64> *%a1, align 1
+ ret void
+}
+
+define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
+; GENERIC-LABEL: test_movd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movd %edi, %xmm1
+; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; GENERIC-NEXT: paddd %xmm0, %xmm1
+; GENERIC-NEXT: paddd %xmm0, %xmm2
+; GENERIC-NEXT: movd %xmm2, %eax
+; GENERIC-NEXT: movd %xmm1, (%rsi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movd:
+; ATOM: # BB#0:
+; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; ATOM-NEXT: paddd %xmm0, %xmm1
+; ATOM-NEXT: movd %xmm1, %eax
+; ATOM-NEXT: movd %edi, %xmm1
+; ATOM-NEXT: paddd %xmm0, %xmm1
+; ATOM-NEXT: movd %xmm1, (%rsi)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movd:
+; SLM: # BB#0:
+; SLM-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [3:1.00]
+; SLM-NEXT: movd %edi, %xmm1 # sched: [1:0.50]
+; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; SLM-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
+; SLM-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
+; SLM-NEXT: movd %xmm2, %eax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovd %edi, %xmm1 # sched: [1:0.33]
+; SANDY-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vmovd %xmm0, %eax # sched: [1:0.33]
+; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
+; HASWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vmovd %xmm0, %eax # sched: [1:1.00]
+; HASWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT: vmovd %edi, %xmm1 # sched: [1:0.17]
+; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
+; BTVER2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vmovd %xmm0, %eax # sched: [1:0.17]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = insertelement <4 x i32> undef, i32 %a1, i32 0
+ %2 = load i32, i32 *%a2
+ %3 = insertelement <4 x i32> undef, i32 %2, i32 0
+ %4 = add <4 x i32> %a0, %1
+ %5 = add <4 x i32> %a0, %3
+ %6 = extractelement <4 x i32> %4, i32 0
+ %7 = extractelement <4 x i32> %5, i32 0
+ store i32 %6, i32* %a2
+ ret i32 %7
+}
+
+define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) {
+; GENERIC-LABEL: test_movd_64:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movd %rdi, %xmm1
+; GENERIC-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
+; GENERIC-NEXT: paddq %xmm0, %xmm1
+; GENERIC-NEXT: paddq %xmm0, %xmm2
+; GENERIC-NEXT: movd %xmm2, %rax
+; GENERIC-NEXT: movq %xmm1, (%rsi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movd_64:
+; ATOM: # BB#0:
+; ATOM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; ATOM-NEXT: movd %rdi, %xmm2
+; ATOM-NEXT: paddq %xmm0, %xmm2
+; ATOM-NEXT: paddq %xmm0, %xmm1
+; ATOM-NEXT: movq %xmm2, (%rsi)
+; ATOM-NEXT: movd %xmm1, %rax
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movd_64:
+; SLM: # BB#0:
+; SLM-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [3:1.00]
+; SLM-NEXT: movd %rdi, %xmm1 # sched: [1:0.50]
+; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; SLM-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
+; SLM-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
+; SLM-NEXT: movd %xmm2, %rax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movd_64:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:0.33]
+; SANDY-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [4:0.50]
+; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vmovq %xmm0, %rax # sched: [1:0.33]
+; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movd_64:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
+; HASWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [4:0.50]
+; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vmovq %xmm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movd_64:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
+; BTVER2-NEXT: vmovq %rdi, %xmm1 # sched: [1:0.17]
+; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
+; BTVER2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vmovq %xmm0, %rax # sched: [1:0.17]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = insertelement <2 x i64> undef, i64 %a1, i64 0
+ %2 = load i64, i64 *%a2
+ %3 = insertelement <2 x i64> undef, i64 %2, i64 0
+ %4 = add <2 x i64> %a0, %1
+ %5 = add <2 x i64> %a0, %3
+ %6 = extractelement <2 x i64> %4, i64 0
+ %7 = extractelement <2 x i64> %5, i64 0
+ store i64 %6, i64* %a2
+ ret i64 %7
+}
+
+define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
+; GENERIC-LABEL: test_movhpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; GENERIC-NEXT: addpd %xmm0, %xmm1
+; GENERIC-NEXT: movhpd %xmm1, (%rdi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movhpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; ATOM-NEXT: addpd %xmm0, %xmm1
+; ATOM-NEXT: movhpd %xmm1, (%rdi)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movhpd:
+; SLM: # BB#0:
+; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
+; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; SLM-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movhpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
+; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movhpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
+; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movhpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = bitcast x86_mmx* %a2 to double*
+ %2 = load double, double *%1, align 8
+ %3 = insertelement <2 x double> %a1, double %2, i32 1
+ %4 = fadd <2 x double> %a0, %3
+ %5 = extractelement <2 x double> %4, i32 1
+ store double %5, double* %1
+ ret void
+}
+
+define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
+; GENERIC-LABEL: test_movlpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
+; GENERIC-NEXT: addpd %xmm0, %xmm1
+; GENERIC-NEXT: movlpd %xmm1, (%rdi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movlpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
+; ATOM-NEXT: addpd %xmm0, %xmm1
+; ATOM-NEXT: movlpd %xmm1, (%rdi)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movlpd:
+; SLM: # BB#0:
+; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
+; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; SLM-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movlpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00]
+; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movlpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00]
+; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movlpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = bitcast x86_mmx* %a2 to double*
+ %2 = load double, double *%1, align 8
+ %3 = insertelement <2 x double> %a1, double %2, i32 0
+ %4 = fadd <2 x double> %a0, %3
+ %5 = extractelement <2 x double> %4, i32 0
+ store double %5, double* %1
+ ret void
+}
+
+define i32 @test_movmskpd(<2 x double> %a0) {
+; GENERIC-LABEL: test_movmskpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movmskpd %xmm0, %eax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movmskpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: movmskpd %xmm0, %eax
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movmskpd:
+; SLM: # BB#0:
+; SLM-NEXT: movmskpd %xmm0, %eax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movmskpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movmskpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movmskpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
+ ret i32 %1
+}
+declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
+
+define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) {
+; GENERIC-LABEL: test_movntdqa:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: paddq %xmm0, %xmm0
+; GENERIC-NEXT: movntdq %xmm0, (%rdi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movntdqa:
+; ATOM: # BB#0:
+; ATOM-NEXT: paddq %xmm0, %xmm0
+; ATOM-NEXT: movntdq %xmm0, (%rdi)
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movntdqa:
+; SLM: # BB#0:
+; SLM-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movntdqa:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movntdqa:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movntdqa:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = add <2 x i64> %a0, %a0
+ store <2 x i64> %1, <2 x i64> *%a1, align 16, !nontemporal !0
+ ret void
+}
+
+define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) {
+; GENERIC-LABEL: test_movntpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: addpd %xmm0, %xmm0
+; GENERIC-NEXT: movntpd %xmm0, (%rdi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movntpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: addpd %xmm0, %xmm0
+; ATOM-NEXT: movntpd %xmm0, (%rdi)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movntpd:
+; SLM: # BB#0:
+; SLM-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movntpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movntpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movntpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fadd <2 x double> %a0, %a0
+ store <2 x double> %1, <2 x double> *%a1, align 16, !nontemporal !0
+ ret void
+}
+
+define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) {
+; GENERIC-LABEL: test_movq_mem:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; GENERIC-NEXT: paddq %xmm1, %xmm0
+; GENERIC-NEXT: movq %xmm0, (%rdi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movq_mem:
+; ATOM: # BB#0:
+; ATOM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; ATOM-NEXT: paddq %xmm1, %xmm0
+; ATOM-NEXT: movq %xmm0, (%rdi)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movq_mem:
+; SLM: # BB#0:
+; SLM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [3:1.00]
+; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movq_mem:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50]
+; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movq_mem:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50]
+; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movq_mem:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
+; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = load i64, i64* %a1, align 1
+ %2 = insertelement <2 x i64> zeroinitializer, i64 %1, i32 0
+ %3 = add <2 x i64> %a0, %2
+ %4 = extractelement <2 x i64> %3, i32 0
+ store i64 %4, i64 *%a1, align 1
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) {
+; GENERIC-LABEL: test_movq_reg:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
+; GENERIC-NEXT: paddq %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movq_reg:
+; ATOM: # BB#0:
+; ATOM-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
+; ATOM-NEXT: paddq %xmm1, %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movq_reg:
+; SLM: # BB#0:
+; SLM-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
+; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movq_reg:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; SANDY-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movq_reg:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; HASWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movq_reg:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
+; BTVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
+ %2 = add <2 x i64> %a1, %1
+ ret <2 x i64> %2
+}
+
+define void @test_movsd_mem(double* %a0, double* %a1) {
+; GENERIC-LABEL: test_movsd_mem:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; GENERIC-NEXT: addsd %xmm0, %xmm0
+; GENERIC-NEXT: movsd %xmm0, (%rsi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movsd_mem:
+; ATOM: # BB#0:
+; ATOM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ATOM-NEXT: addsd %xmm0, %xmm0
+; ATOM-NEXT: movsd %xmm0, (%rsi)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movsd_mem:
+; SLM: # BB#0:
+; SLM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [3:1.00]
+; SLM-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movsd_mem:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [4:0.50]
+; SANDY-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movsd_mem:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [4:0.50]
+; HASWELL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movsd_mem:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
+; BTVER2-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = load double, double* %a0, align 1
+ %2 = fadd double %1, %1
+ store double %2, double *%a1, align 1
+ ret void
+}
+
+define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) {
+; GENERIC-LABEL: test_movsd_reg:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; GENERIC-NEXT: movapd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movsd_reg:
+; ATOM: # BB#0:
+; ATOM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; ATOM-NEXT: movapd %xmm1, %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movsd_reg:
+; SLM: # BB#0:
+; SLM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movsd_reg:
+; SANDY: # BB#0:
+; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movsd_reg:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movsd_reg:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 2, i32 0>
+ ret <2 x double> %1
+}
+
+define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) {
+; GENERIC-LABEL: test_movupd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movupd (%rdi), %xmm0
+; GENERIC-NEXT: addpd %xmm0, %xmm0
+; GENERIC-NEXT: movupd %xmm0, (%rsi)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_movupd:
+; ATOM: # BB#0:
+; ATOM-NEXT: movupd (%rdi), %xmm0
+; ATOM-NEXT: addpd %xmm0, %xmm0
+; ATOM-NEXT: movupd %xmm0, (%rsi)
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_movupd:
+; SLM: # BB#0:
+; SLM-NEXT: movupd (%rdi), %xmm0 # sched: [3:1.00]
+; SLM-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_movupd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [4:0.50]
+; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_movupd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [4:0.50]
+; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_movupd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = load <2 x double>, <2 x double> *%a0, align 1
+ %2 = fadd <2 x double> %1, %1
+ store <2 x double> %2, <2 x double> *%a1, align 1
+ ret void
+}
+
+define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_mulpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: mulpd %xmm1, %xmm0
+; GENERIC-NEXT: mulpd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_mulpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: mulpd %xmm1, %xmm0
+; ATOM-NEXT: mulpd (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_mulpd:
+; SLM: # BB#0:
+; SLM-NEXT: mulpd %xmm1, %xmm0 # sched: [5:2.00]
+; SLM-NEXT: mulpd (%rdi), %xmm0 # sched: [8:2.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_mulpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_mulpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_mulpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fmul <2 x double> %a0, %a1
+ %2 = load <2 x double>, <2 x double> *%a2, align 16
+ %3 = fmul <2 x double> %1, %2
+ ret <2 x double> %3
+}
+
+define double @test_mulsd(double %a0, double %a1, double *%a2) {
+; GENERIC-LABEL: test_mulsd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: mulsd %xmm1, %xmm0
+; GENERIC-NEXT: mulsd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_mulsd:
+; ATOM: # BB#0:
+; ATOM-NEXT: mulsd %xmm1, %xmm0
+; ATOM-NEXT: mulsd (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_mulsd:
+; SLM: # BB#0:
+; SLM-NEXT: mulsd %xmm1, %xmm0 # sched: [5:2.00]
+; SLM-NEXT: mulsd (%rdi), %xmm0 # sched: [8:2.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_mulsd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_mulsd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_mulsd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fmul double %a0, %a1
+ %2 = load double, double *%a2, align 8
+ %3 = fmul double %1, %2
+ ret double %3
+}
+
+define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_orpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: orpd %xmm1, %xmm0
+; GENERIC-NEXT: orpd (%rdi), %xmm0
+; GENERIC-NEXT: addpd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_orpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: orpd %xmm1, %xmm0
+; ATOM-NEXT: orpd (%rdi), %xmm0
+; ATOM-NEXT: addpd %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_orpd:
+; SLM: # BB#0:
+; SLM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: orpd (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_orpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_orpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_orpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = bitcast <2 x double> %a0 to <4 x i32>
+ %2 = bitcast <2 x double> %a1 to <4 x i32>
+ %3 = or <4 x i32> %1, %2
+ %4 = load <2 x double>, <2 x double> *%a2, align 16
+ %5 = bitcast <2 x double> %4 to <4 x i32>
+ %6 = or <4 x i32> %3, %5
+ %7 = bitcast <4 x i32> %6 to <2 x double>
+ %8 = fadd <2 x double> %a1, %7
+ ret <2 x double> %8
+}
+
+define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
+; GENERIC-LABEL: test_packssdw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: packssdw %xmm1, %xmm0
+; GENERIC-NEXT: packssdw (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_packssdw:
+; ATOM: # BB#0:
+; ATOM-NEXT: packssdw %xmm1, %xmm0
+; ATOM-NEXT: packssdw (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_packssdw:
+; SLM: # BB#0:
+; SLM-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: packssdw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_packssdw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_packssdw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_packssdw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1)
+ %2 = bitcast <8 x i16> %1 to <4 x i32>
+ %3 = load <4 x i32>, <4 x i32> *%a2, align 16
+ %4 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %2, <4 x i32> %3)
+ ret <8 x i16> %4
+}
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_packsswb:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: packsswb %xmm1, %xmm0
+; GENERIC-NEXT: packsswb (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_packsswb:
+; ATOM: # BB#0:
+; ATOM-NEXT: packsswb %xmm1, %xmm0
+; ATOM-NEXT: packsswb (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_packsswb:
+; SLM: # BB#0:
+; SLM-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: packsswb (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_packsswb:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_packsswb:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_packsswb:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1)
+ %2 = bitcast <16 x i8> %1 to <8 x i16>
+ %3 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %4 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %2, <8 x i16> %3)
+ ret <16 x i8> %4
+}
+declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_packuswb:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: packuswb %xmm1, %xmm0
+; GENERIC-NEXT: packuswb (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_packuswb:
+; ATOM: # BB#0:
+; ATOM-NEXT: packuswb %xmm1, %xmm0
+; ATOM-NEXT: packuswb (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_packuswb:
+; SLM: # BB#0:
+; SLM-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: packuswb (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_packuswb:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_packuswb:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_packuswb:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
+ %2 = bitcast <16 x i8> %1 to <8 x i16>
+ %3 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %4 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %2, <8 x i16> %3)
+ ret <16 x i8> %4
+}
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
+; GENERIC-LABEL: test_paddb:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: paddb %xmm1, %xmm0
+; GENERIC-NEXT: paddb (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_paddb:
+; ATOM: # BB#0:
+; ATOM-NEXT: paddb %xmm1, %xmm0
+; ATOM-NEXT: paddb (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_paddb:
+; SLM: # BB#0:
+; SLM-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: paddb (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_paddb:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_paddb:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_paddb:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = add <16 x i8> %a0, %a1
+ %2 = load <16 x i8>, <16 x i8> *%a2, align 16
+ %3 = add <16 x i8> %1, %2
+ ret <16 x i8> %3
+}
+
+define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
+; GENERIC-LABEL: test_paddd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: paddd %xmm1, %xmm0
+; GENERIC-NEXT: paddd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_paddd:
+; ATOM: # BB#0:
+; ATOM-NEXT: paddd %xmm1, %xmm0
+; ATOM-NEXT: paddd (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_paddd:
+; SLM: # BB#0:
+; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: paddd (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_paddd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_paddd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_paddd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = add <4 x i32> %a0, %a1
+ %2 = load <4 x i32>, <4 x i32> *%a2, align 16
+ %3 = add <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
+; GENERIC-LABEL: test_paddq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: paddq %xmm1, %xmm0
+; GENERIC-NEXT: paddq (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_paddq:
+; ATOM: # BB#0:
+; ATOM-NEXT: paddq %xmm1, %xmm0
+; ATOM-NEXT: paddq (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_paddq:
+; SLM: # BB#0:
+; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: paddq (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_paddq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_paddq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_paddq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = add <2 x i64> %a0, %a1
+ %2 = load <2 x i64>, <2 x i64> *%a2, align 16
+ %3 = add <2 x i64> %1, %2
+ ret <2 x i64> %3
+}
+
+define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
+; GENERIC-LABEL: test_paddsb:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: paddsb %xmm1, %xmm0
+; GENERIC-NEXT: paddsb (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_paddsb:
+; ATOM: # BB#0:
+; ATOM-NEXT: paddsb %xmm1, %xmm0
+; ATOM-NEXT: paddsb (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_paddsb:
+; SLM: # BB#0:
+; SLM-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: paddsb (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_paddsb:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_paddsb:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_paddsb:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1)
+ %2 = load <16 x i8>, <16 x i8> *%a2, align 16
+ %3 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %1, <16 x i8> %2)
+ ret <16 x i8> %3
+}
+declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_paddsw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: paddsw %xmm1, %xmm0
+; GENERIC-NEXT: paddsw (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_paddsw:
+; ATOM: # BB#0:
+; ATOM-NEXT: paddsw %xmm1, %xmm0
+; ATOM-NEXT: paddsw (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_paddsw:
+; SLM: # BB#0:
+; SLM-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: paddsw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_paddsw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_paddsw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_paddsw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1)
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %1, <8 x i16> %2)
+ ret <8 x i16> %3
+}
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
+; GENERIC-LABEL: test_paddusb:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: paddusb %xmm1, %xmm0
+; GENERIC-NEXT: paddusb (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_paddusb:
+; ATOM: # BB#0:
+; ATOM-NEXT: paddusb %xmm1, %xmm0
+; ATOM-NEXT: paddusb (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_paddusb:
+; SLM: # BB#0:
+; SLM-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: paddusb (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_paddusb:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_paddusb:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_paddusb:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1)
+ %2 = load <16 x i8>, <16 x i8> *%a2, align 16
+ %3 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %1, <16 x i8> %2)
+ ret <16 x i8> %3
+}
+declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_paddusw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: paddusw %xmm1, %xmm0
+; GENERIC-NEXT: paddusw (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_paddusw:
+; ATOM: # BB#0:
+; ATOM-NEXT: paddusw %xmm1, %xmm0
+; ATOM-NEXT: paddusw (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_paddusw:
+; SLM: # BB#0:
+; SLM-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: paddusw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_paddusw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_paddusw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_paddusw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1)
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %1, <8 x i16> %2)
+ ret <8 x i16> %3
+}
+declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_paddw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: paddw %xmm1, %xmm0
+; GENERIC-NEXT: paddw (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_paddw:
+; ATOM: # BB#0:
+; ATOM-NEXT: paddw %xmm1, %xmm0
+; ATOM-NEXT: paddw (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_paddw:
+; SLM: # BB#0:
+; SLM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: paddw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_paddw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_paddw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_paddw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = add <8 x i16> %a0, %a1
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = add <8 x i16> %1, %2
+ ret <8 x i16> %3
+}
+
+define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
+; GENERIC-LABEL: test_pand:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pand %xmm1, %xmm0
+; GENERIC-NEXT: pand (%rdi), %xmm0
+; GENERIC-NEXT: paddq %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pand:
+; ATOM: # BB#0:
+; ATOM-NEXT: pand %xmm1, %xmm0
+; ATOM-NEXT: pand (%rdi), %xmm0
+; ATOM-NEXT: paddq %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pand:
+; SLM: # BB#0:
+; SLM-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: pand (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pand:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pand:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; HASWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pand:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = and <2 x i64> %a0, %a1
+ %2 = load <2 x i64>, <2 x i64> *%a2, align 16
+ %3 = and <2 x i64> %1, %2
+ %4 = add <2 x i64> %3, %a1
+ ret <2 x i64> %4
+}
+
+define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
+; GENERIC-LABEL: test_pandn:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pandn %xmm1, %xmm0
+; GENERIC-NEXT: movdqa %xmm0, %xmm1
+; GENERIC-NEXT: pandn (%rdi), %xmm1
+; GENERIC-NEXT: paddq %xmm0, %xmm1
+; GENERIC-NEXT: movdqa %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pandn:
+; ATOM: # BB#0:
+; ATOM-NEXT: pandn %xmm1, %xmm0
+; ATOM-NEXT: movdqa %xmm0, %xmm1
+; ATOM-NEXT: pandn (%rdi), %xmm1
+; ATOM-NEXT: paddq %xmm0, %xmm1
+; ATOM-NEXT: movdqa %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pandn:
+; SLM: # BB#0:
+; SLM-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50]
+; SLM-NEXT: pandn (%rdi), %xmm1 # sched: [4:1.00]
+; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pandn:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [5:0.50]
+; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pandn:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; HASWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [5:0.50]
+; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pandn:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [6:1.00]
+; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = xor <2 x i64> %a0, <i64 -1, i64 -1>
+ %2 = and <2 x i64> %a1, %1
+ %3 = load <2 x i64>, <2 x i64> *%a2, align 16
+ %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
+ %5 = and <2 x i64> %3, %4
+ %6 = add <2 x i64> %2, %5
+ ret <2 x i64> %6
+}
+
+define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
+; GENERIC-LABEL: test_pavgb:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pavgb %xmm1, %xmm0
+; GENERIC-NEXT: pavgb (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pavgb:
+; ATOM: # BB#0:
+; ATOM-NEXT: pavgb %xmm1, %xmm0
+; ATOM-NEXT: pavgb (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pavgb:
+; SLM: # BB#0:
+; SLM-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: pavgb (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pavgb:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pavgb:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pavgb:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1)
+ %2 = load <16 x i8>, <16 x i8> *%a2, align 16
+ %3 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %1, <16 x i8> %2)
+ ret <16 x i8> %3
+}
+declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwind readnone
+
+define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_pavgw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pavgw %xmm1, %xmm0
+; GENERIC-NEXT: pavgw (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pavgw:
+; ATOM: # BB#0:
+; ATOM-NEXT: pavgw %xmm1, %xmm0
+; ATOM-NEXT: pavgw (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pavgw:
+; SLM: # BB#0:
+; SLM-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: pavgw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pavgw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pavgw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pavgw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1)
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %1, <8 x i16> %2)
+ ret <8 x i16> %3
+}
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
+; GENERIC-LABEL: test_pcmpeqb:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pcmpeqb %xmm0, %xmm1
+; GENERIC-NEXT: pcmpeqb (%rdi), %xmm0
+; GENERIC-NEXT: por %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pcmpeqb:
+; ATOM: # BB#0:
+; ATOM-NEXT: pcmpeqb %xmm0, %xmm1
+; ATOM-NEXT: pcmpeqb (%rdi), %xmm0
+; ATOM-NEXT: por %xmm1, %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pcmpeqb:
+; SLM: # BB#0:
+; SLM-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; SLM-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pcmpeqb:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pcmpeqb:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pcmpeqb:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = icmp eq <16 x i8> %a0, %a1
+ %2 = load <16 x i8>, <16 x i8> *%a2, align 16
+ %3 = icmp eq <16 x i8> %a0, %2
+ %4 = or <16 x i1> %1, %3
+ %5 = sext <16 x i1> %4 to <16 x i8>
+ ret <16 x i8> %5
+}
+
+define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
+; GENERIC-LABEL: test_pcmpeqd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pcmpeqd %xmm0, %xmm1
+; GENERIC-NEXT: pcmpeqd (%rdi), %xmm0
+; GENERIC-NEXT: por %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pcmpeqd:
+; ATOM: # BB#0:
+; ATOM-NEXT: pcmpeqd %xmm0, %xmm1
+; ATOM-NEXT: pcmpeqd (%rdi), %xmm0
+; ATOM-NEXT: por %xmm1, %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pcmpeqd:
+; SLM: # BB#0:
+; SLM-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; SLM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pcmpeqd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pcmpeqd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pcmpeqd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = icmp eq <4 x i32> %a0, %a1
+ %2 = load <4 x i32>, <4 x i32> *%a2, align 16
+ %3 = icmp eq <4 x i32> %a0, %2
+ %4 = or <4 x i1> %1, %3
+ %5 = sext <4 x i1> %4 to <4 x i32>
+ ret <4 x i32> %5
+}
+
+define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_pcmpeqw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pcmpeqw %xmm0, %xmm1
+; GENERIC-NEXT: pcmpeqw (%rdi), %xmm0
+; GENERIC-NEXT: por %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pcmpeqw:
+; ATOM: # BB#0:
+; ATOM-NEXT: pcmpeqw %xmm0, %xmm1
+; ATOM-NEXT: pcmpeqw (%rdi), %xmm0
+; ATOM-NEXT: por %xmm1, %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pcmpeqw:
+; SLM: # BB#0:
+; SLM-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; SLM-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pcmpeqw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pcmpeqw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pcmpeqw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = icmp eq <8 x i16> %a0, %a1
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = icmp eq <8 x i16> %a0, %2
+ %4 = or <8 x i1> %1, %3
+ %5 = sext <8 x i1> %4 to <8 x i16>
+ ret <8 x i16> %5
+}
+
+define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
+; GENERIC-LABEL: test_pcmpgtb:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movdqa %xmm0, %xmm2
+; GENERIC-NEXT: pcmpgtb %xmm1, %xmm2
+; GENERIC-NEXT: pcmpgtb (%rdi), %xmm0
+; GENERIC-NEXT: por %xmm2, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pcmpgtb:
+; ATOM: # BB#0:
+; ATOM-NEXT: movdqa %xmm0, %xmm2
+; ATOM-NEXT: pcmpgtb (%rdi), %xmm0
+; ATOM-NEXT: pcmpgtb %xmm1, %xmm2
+; ATOM-NEXT: por %xmm2, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pcmpgtb:
+; SLM: # BB#0:
+; SLM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
+; SLM-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pcmpgtb:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pcmpgtb:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pcmpgtb:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = icmp sgt <16 x i8> %a0, %a1
+ %2 = load <16 x i8>, <16 x i8> *%a2, align 16
+ %3 = icmp sgt <16 x i8> %a0, %2
+ %4 = or <16 x i1> %1, %3
+ %5 = sext <16 x i1> %4 to <16 x i8>
+ ret <16 x i8> %5
+}
+
+define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
+; GENERIC-LABEL: test_pcmpgtd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movdqa %xmm0, %xmm2
+; GENERIC-NEXT: pcmpgtd %xmm1, %xmm2
+; GENERIC-NEXT: pcmpeqd (%rdi), %xmm0
+; GENERIC-NEXT: por %xmm2, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pcmpgtd:
+; ATOM: # BB#0:
+; ATOM-NEXT: movdqa %xmm0, %xmm2
+; ATOM-NEXT: pcmpeqd (%rdi), %xmm0
+; ATOM-NEXT: pcmpgtd %xmm1, %xmm2
+; ATOM-NEXT: por %xmm2, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pcmpgtd:
+; SLM: # BB#0:
+; SLM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
+; SLM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pcmpgtd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pcmpgtd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pcmpgtd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = icmp sgt <4 x i32> %a0, %a1
+ %2 = load <4 x i32>, <4 x i32> *%a2, align 16
+ %3 = icmp eq <4 x i32> %a0, %2
+ %4 = or <4 x i1> %1, %3
+ %5 = sext <4 x i1> %4 to <4 x i32>
+ ret <4 x i32> %5
+}
+
+define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_pcmpgtw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: movdqa %xmm0, %xmm2
+; GENERIC-NEXT: pcmpgtw %xmm1, %xmm2
+; GENERIC-NEXT: pcmpgtw (%rdi), %xmm0
+; GENERIC-NEXT: por %xmm2, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pcmpgtw:
+; ATOM: # BB#0:
+; ATOM-NEXT: movdqa %xmm0, %xmm2
+; ATOM-NEXT: pcmpgtw (%rdi), %xmm0
+; ATOM-NEXT: pcmpgtw %xmm1, %xmm2
+; ATOM-NEXT: por %xmm2, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pcmpgtw:
+; SLM: # BB#0:
+; SLM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
+; SLM-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pcmpgtw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pcmpgtw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pcmpgtw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = icmp sgt <8 x i16> %a0, %a1
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = icmp sgt <8 x i16> %a0, %2
+ %4 = or <8 x i1> %1, %3
+ %5 = sext <8 x i1> %4 to <8 x i16>
+ ret <8 x i16> %5
+}
+
+define i16 @test_pextrw(<8 x i16> %a0) {
+; GENERIC-LABEL: test_pextrw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pextrw $6, %xmm0, %eax
+; GENERIC-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pextrw:
+; ATOM: # BB#0:
+; ATOM-NEXT: pextrw $6, %xmm0, %eax
+; ATOM-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pextrw:
+; SLM: # BB#0:
+; SLM-NEXT: pextrw $6, %xmm0, %eax # sched: [4:1.00]
+; SLM-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pextrw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [1:0.50]
+; SANDY-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pextrw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [1:1.00]
+; HASWELL-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pextrw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = extractelement <8 x i16> %a0, i32 6
+ ret i16 %1
+}
+
+define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_pmaddwd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pmaddwd %xmm1, %xmm0
+; GENERIC-NEXT: pmaddwd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pmaddwd:
+; ATOM: # BB#0:
+; ATOM-NEXT: pmaddwd %xmm1, %xmm0
+; ATOM-NEXT: pmaddwd (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pmaddwd:
+; SLM: # BB#0:
+; SLM-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00]
+; SLM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pmaddwd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pmaddwd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pmaddwd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1)
+ %2 = bitcast <4 x i32> %1 to <8 x i16>
+ %3 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %4 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %2, <8 x i16> %3)
+ ret <4 x i32> %4
+}
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_pmaxsw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pmaxsw %xmm1, %xmm0
+; GENERIC-NEXT: pmaxsw (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pmaxsw:
+; ATOM: # BB#0:
+; ATOM-NEXT: pmaxsw %xmm1, %xmm0
+; ATOM-NEXT: pmaxsw (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pmaxsw:
+; SLM: # BB#0:
+; SLM-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: pmaxsw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pmaxsw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pmaxsw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pmaxsw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %1, <8 x i16> %2)
+ ret <8 x i16> %3
+}
+declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
+; GENERIC-LABEL: test_pmaxub:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pmaxub %xmm1, %xmm0
+; GENERIC-NEXT: pmaxub (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pmaxub:
+; ATOM: # BB#0:
+; ATOM-NEXT: pmaxub %xmm1, %xmm0
+; ATOM-NEXT: pmaxub (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pmaxub:
+; SLM: # BB#0:
+; SLM-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: pmaxub (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pmaxub:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pmaxub:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pmaxub:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
+ %2 = load <16 x i8>, <16 x i8> *%a2, align 16
+ %3 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %1, <16 x i8> %2)
+ ret <16 x i8> %3
+}
+declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_pminsw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pminsw %xmm1, %xmm0
+; GENERIC-NEXT: pminsw (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pminsw:
+; ATOM: # BB#0:
+; ATOM-NEXT: pminsw %xmm1, %xmm0
+; ATOM-NEXT: pminsw (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pminsw:
+; SLM: # BB#0:
+; SLM-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: pminsw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pminsw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pminsw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pminsw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %1, <8 x i16> %2)
+ ret <8 x i16> %3
+}
+declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
+; GENERIC-LABEL: test_pminub:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pminub %xmm1, %xmm0
+; GENERIC-NEXT: pminub (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pminub:
+; ATOM: # BB#0:
+; ATOM-NEXT: pminub %xmm1, %xmm0
+; ATOM-NEXT: pminub (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pminub:
+; SLM: # BB#0:
+; SLM-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: pminub (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pminub:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pminub:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pminub:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
+ %2 = load <16 x i8>, <16 x i8> *%a2, align 16
+ %3 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %1, <16 x i8> %2)
+ ret <16 x i8> %3
+}
+declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define i32 @test_pmovmskb(<16 x i8> %a0) {
+; GENERIC-LABEL: test_pmovmskb:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pmovmskb %xmm0, %eax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pmovmskb:
+; ATOM: # BB#0:
+; ATOM-NEXT: pmovmskb %xmm0, %eax
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pmovmskb:
+; SLM: # BB#0:
+; SLM-NEXT: pmovmskb %xmm0, %eax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pmovmskb:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pmovmskb:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pmovmskb:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
+ ret i32 %1
+}
+declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
+
+define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_pmulhuw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pmulhuw %xmm1, %xmm0
+; GENERIC-NEXT: pmulhuw (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pmulhuw:
+; ATOM: # BB#0:
+; ATOM-NEXT: pmulhuw %xmm1, %xmm0
+; ATOM-NEXT: pmulhuw (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pmulhuw:
+; SLM: # BB#0:
+; SLM-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00]
+; SLM-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pmulhuw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pmulhuw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pmulhuw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1)
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %1, <8 x i16> %2)
+ ret <8 x i16> %3
+}
+declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_pmulhw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pmulhw %xmm1, %xmm0
+; GENERIC-NEXT: pmulhw (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pmulhw:
+; ATOM: # BB#0:
+; ATOM-NEXT: pmulhw %xmm1, %xmm0
+; ATOM-NEXT: pmulhw (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pmulhw:
+; SLM: # BB#0:
+; SLM-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00]
+; SLM-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pmulhw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pmulhw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pmulhw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1)
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %1, <8 x i16> %2)
+ ret <8 x i16> %3
+}
+declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_pmullw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pmullw %xmm1, %xmm0
+; GENERIC-NEXT: pmullw (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pmullw:
+; ATOM: # BB#0:
+; ATOM-NEXT: pmullw %xmm1, %xmm0
+; ATOM-NEXT: pmullw (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pmullw:
+; SLM: # BB#0:
+; SLM-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00]
+; SLM-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pmullw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pmullw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pmullw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = mul <8 x i16> %a0, %a1
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = mul <8 x i16> %1, %2
+ ret <8 x i16> %3
+}
+
+define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
+; GENERIC-LABEL: test_pmuludq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pmuludq %xmm1, %xmm0
+; GENERIC-NEXT: pmuludq (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pmuludq:
+; ATOM: # BB#0:
+; ATOM-NEXT: pmuludq %xmm1, %xmm0
+; ATOM-NEXT: pmuludq (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pmuludq:
+; SLM: # BB#0:
+; SLM-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00]
+; SLM-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pmuludq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pmuludq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pmuludq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1)
+ %2 = bitcast <2 x i64> %1 to <4 x i32>
+ %3 = load <4 x i32>, <4 x i32> *%a2, align 16
+ %4 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %2, <4 x i32> %3)
+ ret <2 x i64> %4
+}
+declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
+; GENERIC-LABEL: test_por:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: por %xmm1, %xmm0
+; GENERIC-NEXT: por (%rdi), %xmm0
+; GENERIC-NEXT: paddq %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_por:
+; ATOM: # BB#0:
+; ATOM-NEXT: por %xmm1, %xmm0
+; ATOM-NEXT: por (%rdi), %xmm0
+; ATOM-NEXT: paddq %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_por:
+; SLM: # BB#0:
+; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: por (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_por:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_por:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; HASWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_por:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = or <2 x i64> %a0, %a1
+ %2 = load <2 x i64>, <2 x i64> *%a2, align 16
+ %3 = or <2 x i64> %1, %2
+ %4 = add <2 x i64> %3, %a1
+ ret <2 x i64> %4
+}
+
+define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
+; GENERIC-LABEL: test_psadbw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psadbw %xmm1, %xmm0
+; GENERIC-NEXT: psadbw (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psadbw:
+; ATOM: # BB#0:
+; ATOM-NEXT: psadbw %xmm1, %xmm0
+; ATOM-NEXT: psadbw (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psadbw:
+; SLM: # BB#0:
+; SLM-NEXT: psadbw %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: psadbw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psadbw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psadbw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psadbw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1)
+ %2 = bitcast <2 x i64> %1 to <16 x i8>
+ %3 = load <16 x i8>, <16 x i8> *%a2, align 16
+ %4 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %2, <16 x i8> %3)
+ ret <2 x i64> %4
+}
+declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) {
+; GENERIC-LABEL: test_pshufd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; GENERIC-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0]
+; GENERIC-NEXT: paddd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pshufd:
+; ATOM: # BB#0:
+; ATOM-NEXT: pshufd {{.*#+}} xmm1 = mem[3,2,1,0]
+; ATOM-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; ATOM-NEXT: paddd %xmm0, %xmm1
+; ATOM-NEXT: movdqa %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pshufd:
+; SLM: # BB#0:
+; SLM-NEXT: pshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [4:1.00]
+; SLM-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
+; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pshufd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50]
+; SANDY-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:0.50]
+; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pshufd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
+; HASWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:1.00]
+; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pshufd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
+; BTVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50]
+; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ %2 = load <4 x i32>, <4 x i32> *%a1, align 16
+ %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ %4 = add <4 x i32> %1, %3
+ ret <4 x i32> %4
+}
+
+define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) {
+; GENERIC-LABEL: test_pshufhw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6]
+; GENERIC-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4]
+; GENERIC-NEXT: paddw %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pshufhw:
+; ATOM: # BB#0:
+; ATOM-NEXT: pshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4]
+; ATOM-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
+; ATOM-NEXT: paddw %xmm0, %xmm1
+; ATOM-NEXT: movdqa %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pshufhw:
+; SLM: # BB#0:
+; SLM-NEXT: pshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [4:1.00]
+; SLM-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50]
+; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pshufhw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
+; SANDY-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [5:0.50]
+; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pshufhw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; HASWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [5:1.00]
+; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pshufhw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
+; BTVER2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
+; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6>
+ %2 = load <8 x i16>, <8 x i16> *%a1, align 16
+ %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4>
+ %4 = add <8 x i16> %1, %3
+ ret <8 x i16> %4
+}
+
+define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) {
+; GENERIC-LABEL: test_pshuflw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7]
+; GENERIC-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7]
+; GENERIC-NEXT: paddw %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pshuflw:
+; ATOM: # BB#0:
+; ATOM-NEXT: pshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7]
+; ATOM-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
+; ATOM-NEXT: paddw %xmm0, %xmm1
+; ATOM-NEXT: movdqa %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pshuflw:
+; SLM: # BB#0:
+; SLM-NEXT: pshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [4:1.00]
+; SLM-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50]
+; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pshuflw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
+; SANDY-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [5:0.50]
+; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pshuflw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; HASWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [5:1.00]
+; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pshuflw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
+; BTVER2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
+; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
+ %2 = load <8 x i16>, <8 x i16> *%a1, align 16
+ %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+ %4 = add <8 x i16> %1, %3
+ ret <8 x i16> %4
+}
+
+define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
+; GENERIC-LABEL: test_pslld:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pslld %xmm1, %xmm0
+; GENERIC-NEXT: pslld (%rdi), %xmm0
+; GENERIC-NEXT: pslld $2, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pslld:
+; ATOM: # BB#0:
+; ATOM-NEXT: pslld %xmm1, %xmm0
+; ATOM-NEXT: pslld (%rdi), %xmm0
+; ATOM-NEXT: pslld $2, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pslld:
+; SLM: # BB#0:
+; SLM-NEXT: pslld %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: pslld (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pslld:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pslld:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; HASWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pslld:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1)
+ %2 = load <4 x i32>, <4 x i32> *%a2, align 16
+ %3 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %1, <4 x i32> %2)
+ %4 = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %3, i32 2)
+ ret <4 x i32> %4
+}
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
+
+define <4 x i32> @test_pslldq(<4 x i32> %a0) {
+; GENERIC-LABEL: test_pslldq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pslldq:
+; ATOM: # BB#0:
+; ATOM-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pslldq:
+; SLM: # BB#0:
+; SLM-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pslldq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pslldq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pslldq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
+ ret <4 x i32> %1
+}
+
+define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
+; GENERIC-LABEL: test_psllq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psllq %xmm1, %xmm0
+; GENERIC-NEXT: psllq (%rdi), %xmm0
+; GENERIC-NEXT: psllq $2, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psllq:
+; ATOM: # BB#0:
+; ATOM-NEXT: psllq %xmm1, %xmm0
+; ATOM-NEXT: psllq (%rdi), %xmm0
+; ATOM-NEXT: psllq $2, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psllq:
+; SLM: # BB#0:
+; SLM-NEXT: psllq %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: psllq (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psllq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psllq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; HASWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psllq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
+ %2 = load <2 x i64>, <2 x i64> *%a2, align 16
+ %3 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %1, <2 x i64> %2)
+ %4 = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %3, i32 2)
+ ret <2 x i64> %4
+}
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
+
+define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_psllw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psllw %xmm1, %xmm0
+; GENERIC-NEXT: psllw (%rdi), %xmm0
+; GENERIC-NEXT: psllw $2, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psllw:
+; ATOM: # BB#0:
+; ATOM-NEXT: psllw %xmm1, %xmm0
+; ATOM-NEXT: psllw (%rdi), %xmm0
+; ATOM-NEXT: psllw $2, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psllw:
+; SLM: # BB#0:
+; SLM-NEXT: psllw %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: psllw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psllw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psllw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; HASWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psllw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1)
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %1, <8 x i16> %2)
+ %4 = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %3, i32 2)
+ ret <8 x i16> %4
+}
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
+
+define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
+; GENERIC-LABEL: test_psrad:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psrad %xmm1, %xmm0
+; GENERIC-NEXT: psrad (%rdi), %xmm0
+; GENERIC-NEXT: psrad $2, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psrad:
+; ATOM: # BB#0:
+; ATOM-NEXT: psrad %xmm1, %xmm0
+; ATOM-NEXT: psrad (%rdi), %xmm0
+; ATOM-NEXT: psrad $2, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psrad:
+; SLM: # BB#0:
+; SLM-NEXT: psrad %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: psrad (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psrad:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psrad:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; HASWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psrad:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1)
+ %2 = load <4 x i32>, <4 x i32> *%a2, align 16
+ %3 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> %2)
+ %4 = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
+ ret <4 x i32> %4
+}
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
+
+define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_psraw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psraw %xmm1, %xmm0
+; GENERIC-NEXT: psraw (%rdi), %xmm0
+; GENERIC-NEXT: psraw $2, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psraw:
+; ATOM: # BB#0:
+; ATOM-NEXT: psraw %xmm1, %xmm0
+; ATOM-NEXT: psraw (%rdi), %xmm0
+; ATOM-NEXT: psraw $2, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psraw:
+; SLM: # BB#0:
+; SLM-NEXT: psraw %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: psraw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psraw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psraw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; HASWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psraw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1)
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> %2)
+ %4 = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
+ ret <8 x i16> %4
+}
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
+
+define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
+; GENERIC-LABEL: test_psrld:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psrld %xmm1, %xmm0
+; GENERIC-NEXT: psrld (%rdi), %xmm0
+; GENERIC-NEXT: psrld $2, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psrld:
+; ATOM: # BB#0:
+; ATOM-NEXT: psrld %xmm1, %xmm0
+; ATOM-NEXT: psrld (%rdi), %xmm0
+; ATOM-NEXT: psrld $2, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psrld:
+; SLM: # BB#0:
+; SLM-NEXT: psrld %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: psrld (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psrld:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psrld:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; HASWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psrld:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1)
+ %2 = load <4 x i32>, <4 x i32> *%a2, align 16
+ %3 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %1, <4 x i32> %2)
+ %4 = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %3, i32 2)
+ ret <4 x i32> %4
+}
+declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
+
+define <4 x i32> @test_psrldq(<4 x i32> %a0) {
+; GENERIC-LABEL: test_psrldq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psrldq:
+; ATOM: # BB#0:
+; ATOM-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psrldq:
+; SLM: # BB#0:
+; SLM-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psrldq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psrldq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psrldq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+ ret <4 x i32> %1
+}
+
+define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
+; GENERIC-LABEL: test_psrlq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psrlq %xmm1, %xmm0
+; GENERIC-NEXT: psrlq (%rdi), %xmm0
+; GENERIC-NEXT: psrlq $2, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psrlq:
+; ATOM: # BB#0:
+; ATOM-NEXT: psrlq %xmm1, %xmm0
+; ATOM-NEXT: psrlq (%rdi), %xmm0
+; ATOM-NEXT: psrlq $2, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psrlq:
+; SLM: # BB#0:
+; SLM-NEXT: psrlq %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: psrlq (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psrlq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psrlq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; HASWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psrlq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
+ %2 = load <2 x i64>, <2 x i64> *%a2, align 16
+ %3 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %1, <2 x i64> %2)
+ %4 = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %3, i32 2)
+ ret <2 x i64> %4
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
+
+define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_psrlw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psrlw %xmm1, %xmm0
+; GENERIC-NEXT: psrlw (%rdi), %xmm0
+; GENERIC-NEXT: psrlw $2, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psrlw:
+; ATOM: # BB#0:
+; ATOM-NEXT: psrlw %xmm1, %xmm0
+; ATOM-NEXT: psrlw (%rdi), %xmm0
+; ATOM-NEXT: psrlw $2, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psrlw:
+; SLM: # BB#0:
+; SLM-NEXT: psrlw %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: psrlw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psrlw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psrlw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; HASWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psrlw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1)
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %1, <8 x i16> %2)
+ %4 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %3, i32 2)
+ ret <8 x i16> %4
+}
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
+
+define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
+; GENERIC-LABEL: test_psubb:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psubb %xmm1, %xmm0
+; GENERIC-NEXT: psubb (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psubb:
+; ATOM: # BB#0:
+; ATOM-NEXT: psubb %xmm1, %xmm0
+; ATOM-NEXT: psubb (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psubb:
+; SLM: # BB#0:
+; SLM-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: psubb (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psubb:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psubb:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psubb:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = sub <16 x i8> %a0, %a1
+ %2 = load <16 x i8>, <16 x i8> *%a2, align 16
+ %3 = sub <16 x i8> %1, %2
+ ret <16 x i8> %3
+}
+
+define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
+; GENERIC-LABEL: test_psubd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psubd %xmm1, %xmm0
+; GENERIC-NEXT: psubd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psubd:
+; ATOM: # BB#0:
+; ATOM-NEXT: psubd %xmm1, %xmm0
+; ATOM-NEXT: psubd (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psubd:
+; SLM: # BB#0:
+; SLM-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: psubd (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psubd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psubd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psubd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = sub <4 x i32> %a0, %a1
+ %2 = load <4 x i32>, <4 x i32> *%a2, align 16
+ %3 = sub <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
+; GENERIC-LABEL: test_psubq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psubq %xmm1, %xmm0
+; GENERIC-NEXT: psubq (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psubq:
+; ATOM: # BB#0:
+; ATOM-NEXT: psubq %xmm1, %xmm0
+; ATOM-NEXT: psubq (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psubq:
+; SLM: # BB#0:
+; SLM-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: psubq (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psubq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psubq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psubq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = sub <2 x i64> %a0, %a1
+ %2 = load <2 x i64>, <2 x i64> *%a2, align 16
+ %3 = sub <2 x i64> %1, %2
+ ret <2 x i64> %3
+}
+
+define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
+; GENERIC-LABEL: test_psubsb:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psubsb %xmm1, %xmm0
+; GENERIC-NEXT: psubsb (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psubsb:
+; ATOM: # BB#0:
+; ATOM-NEXT: psubsb %xmm1, %xmm0
+; ATOM-NEXT: psubsb (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psubsb:
+; SLM: # BB#0:
+; SLM-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: psubsb (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psubsb:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psubsb:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psubsb:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1)
+ %2 = load <16 x i8>, <16 x i8> *%a2, align 16
+ %3 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %1, <16 x i8> %2)
+ ret <16 x i8> %3
+}
+declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_psubsw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psubsw %xmm1, %xmm0
+; GENERIC-NEXT: psubsw (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psubsw:
+; ATOM: # BB#0:
+; ATOM-NEXT: psubsw %xmm1, %xmm0
+; ATOM-NEXT: psubsw (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psubsw:
+; SLM: # BB#0:
+; SLM-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: psubsw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psubsw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psubsw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psubsw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1)
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %1, <8 x i16> %2)
+ ret <8 x i16> %3
+}
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
+; GENERIC-LABEL: test_psubusb:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psubusb %xmm1, %xmm0
+; GENERIC-NEXT: psubusb (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psubusb:
+; ATOM: # BB#0:
+; ATOM-NEXT: psubusb %xmm1, %xmm0
+; ATOM-NEXT: psubusb (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psubusb:
+; SLM: # BB#0:
+; SLM-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: psubusb (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psubusb:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psubusb:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psubusb:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1)
+ %2 = load <16 x i8>, <16 x i8> *%a2, align 16
+ %3 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %1, <16 x i8> %2)
+ ret <16 x i8> %3
+}
+declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_psubusw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psubusw %xmm1, %xmm0
+; GENERIC-NEXT: psubusw (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psubusw:
+; ATOM: # BB#0:
+; ATOM-NEXT: psubusw %xmm1, %xmm0
+; ATOM-NEXT: psubusw (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psubusw:
+; SLM: # BB#0:
+; SLM-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: psubusw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psubusw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psubusw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psubusw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1)
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %1, <8 x i16> %2)
+ ret <8 x i16> %3
+}
+declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_psubw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: psubw %xmm1, %xmm0
+; GENERIC-NEXT: psubw (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_psubw:
+; ATOM: # BB#0:
+; ATOM-NEXT: psubw %xmm1, %xmm0
+; ATOM-NEXT: psubw (%rdi), %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_psubw:
+; SLM: # BB#0:
+; SLM-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: psubw (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_psubw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_psubw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_psubw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = sub <8 x i16> %a0, %a1
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = sub <8 x i16> %1, %2
+ ret <8 x i16> %3
+}
+
+define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
+; GENERIC-LABEL: test_punpckhbw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; GENERIC-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15]
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_punpckhbw:
+; ATOM: # BB#0:
+; ATOM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; ATOM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15]
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_punpckhbw:
+; SLM: # BB#0:
+; SLM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
+; SLM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_punpckhbw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
+; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_punpckhbw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
+; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_punpckhbw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
+; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+ %2 = load <16 x i8>, <16 x i8> *%a2, align 16
+ %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+ ret <16 x i8> %3
+}
+
+define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
+; GENERIC-LABEL: test_punpckhdq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; GENERIC-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3]
+; GENERIC-NEXT: paddd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_punpckhdq:
+; ATOM: # BB#0:
+; ATOM-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; ATOM-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3]
+; ATOM-NEXT: paddd %xmm1, %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_punpckhdq:
+; SLM: # BB#0:
+; SLM-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SLM-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [4:1.00]
+; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_punpckhdq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [5:0.50]
+; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_punpckhdq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [5:1.00]
+; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_punpckhdq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
+; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ %2 = load <4 x i32>, <4 x i32> *%a2, align 16
+ %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ %4 = add <4 x i32> %1, %3
+ ret <4 x i32> %4
+}
+
+define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
+; GENERIC-LABEL: test_punpckhqdq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; GENERIC-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1]
+; GENERIC-NEXT: paddq %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_punpckhqdq:
+; ATOM: # BB#0:
+; ATOM-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; ATOM-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1]
+; ATOM-NEXT: paddq %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_punpckhqdq:
+; SLM: # BB#0:
+; SLM-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SLM-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [4:1.00]
+; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_punpckhqdq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:0.50]
+; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_punpckhqdq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:1.00]
+; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_punpckhqdq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
+; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
+ %2 = load <2 x i64>, <2 x i64> *%a2, align 16
+ %3 = shufflevector <2 x i64> %a1, <2 x i64> %2, <2x i32> <i32 1, i32 3>
+ %4 = add <2 x i64> %1, %3
+ ret <2 x i64> %4
+}
+
+define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_punpckhwd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; GENERIC-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_punpckhwd:
+; ATOM: # BB#0:
+; ATOM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; ATOM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_punpckhwd:
+; SLM: # BB#0:
+; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_punpckhwd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_punpckhwd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_punpckhwd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ ret <8 x i16> %3
+}
+
+define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
+; GENERIC-LABEL: test_punpcklbw:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; GENERIC-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_punpcklbw:
+; ATOM: # BB#0:
+; ATOM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; ATOM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_punpcklbw:
+; SLM: # BB#0:
+; SLM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SLM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_punpcklbw:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_punpcklbw:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_punpcklbw:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ %2 = load <16 x i8>, <16 x i8> *%a2, align 16
+ %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ ret <16 x i8> %3
+}
+
+define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
+; GENERIC-LABEL: test_punpckldq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; GENERIC-NEXT: paddd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_punpckldq:
+; ATOM: # BB#0:
+; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; ATOM-NEXT: paddd %xmm1, %xmm0
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_punpckldq:
+; SLM: # BB#0:
+; SLM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SLM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [4:1.00]
+; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_punpckldq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; SANDY-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [5:0.50]
+; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_punpckldq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [5:1.00]
+; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_punpckldq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
+; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %2 = load <4 x i32>, <4 x i32> *%a2, align 16
+ %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %4 = add <4 x i32> %1, %3
+ ret <4 x i32> %4
+}
+
+define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
+; GENERIC-LABEL: test_punpcklqdq:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0]
+; GENERIC-NEXT: paddq %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_punpcklqdq:
+; ATOM: # BB#0:
+; ATOM-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0]
+; ATOM-NEXT: paddq %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_punpcklqdq:
+; SLM: # BB#0:
+; SLM-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SLM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
+; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_punpcklqdq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:0.50]
+; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_punpcklqdq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
+; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_punpcklqdq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
+ %2 = load <2 x i64>, <2 x i64> *%a2, align 16
+ %3 = shufflevector <2 x i64> %a1, <2 x i64> %2, <2x i32> <i32 0, i32 2>
+ %4 = add <2 x i64> %1, %3
+ ret <2 x i64> %4
+}
+
+define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_punpcklwd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; GENERIC-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_punpcklwd:
+; ATOM: # BB#0:
+; ATOM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; ATOM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_punpcklwd:
+; SLM: # BB#0:
+; SLM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SLM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_punpcklwd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_punpcklwd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_punpcklwd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ %2 = load <8 x i16>, <8 x i16> *%a2, align 16
+ %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ ret <8 x i16> %3
+}
+
+define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
+; GENERIC-LABEL: test_pxor:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: pxor %xmm1, %xmm0
+; GENERIC-NEXT: pxor (%rdi), %xmm0
+; GENERIC-NEXT: paddq %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_pxor:
+; ATOM: # BB#0:
+; ATOM-NEXT: pxor %xmm1, %xmm0
+; ATOM-NEXT: pxor (%rdi), %xmm0
+; ATOM-NEXT: paddq %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_pxor:
+; SLM: # BB#0:
+; SLM-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: pxor (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_pxor:
+; SANDY: # BB#0:
+; SANDY-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_pxor:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; HASWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_pxor:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = xor <2 x i64> %a0, %a1
+ %2 = load <2 x i64>, <2 x i64> *%a2, align 16
+ %3 = xor <2 x i64> %1, %2
+ %4 = add <2 x i64> %3, %a1
+ ret <2 x i64> %4
+}
+
+define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_shufpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; GENERIC-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0]
+; GENERIC-NEXT: addpd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_shufpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; ATOM-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0]
+; ATOM-NEXT: addpd %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_shufpd:
+; SLM: # BB#0:
+; SLM-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; SLM-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [4:1.00]
+; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_shufpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; SANDY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [5:1.00]
+; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_shufpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; HASWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [5:1.00]
+; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_shufpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
+; BTVER2-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
+; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
+ %2 = load <2 x double>, <2 x double> *%a2, align 16
+ %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> <i32 1, i32 2>
+ %4 = fadd <2 x double> %1, %3
+ ret <2 x double> %4
+}
+
+define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) {
+; GENERIC-LABEL: test_sqrtpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: sqrtpd %xmm0, %xmm1
+; GENERIC-NEXT: sqrtpd (%rdi), %xmm0
+; GENERIC-NEXT: addpd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_sqrtpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: sqrtpd %xmm0, %xmm1
+; ATOM-NEXT: sqrtpd (%rdi), %xmm0
+; ATOM-NEXT: addpd %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_sqrtpd:
+; SLM: # BB#0:
+; SLM-NEXT: sqrtpd (%rdi), %xmm1 # sched: [18:1.00]
+; SLM-NEXT: sqrtpd %xmm0, %xmm0 # sched: [15:1.00]
+; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_sqrtpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [15:1.00]
+; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [19:1.00]
+; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_sqrtpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [15:1.00]
+; HASWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [19:1.00]
+; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_sqrtpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [26:21.00]
+; BTVER2-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:21.00]
+; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
+ %2 = load <2 x double>, <2 x double> *%a1, align 16
+ %3 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %2)
+ %4 = fadd <2 x double> %1, %3
+ ret <2 x double> %4
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
+
+; TODO - sqrtsd_m
+
+define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
+; GENERIC-LABEL: test_sqrtsd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: sqrtsd %xmm0, %xmm0
+; GENERIC-NEXT: movapd (%rdi), %xmm1
+; GENERIC-NEXT: sqrtsd %xmm1, %xmm1
+; GENERIC-NEXT: addpd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_sqrtsd:
+; ATOM: # BB#0:
+; ATOM-NEXT: movapd (%rdi), %xmm1
+; ATOM-NEXT: sqrtsd %xmm0, %xmm0
+; ATOM-NEXT: sqrtsd %xmm1, %xmm1
+; ATOM-NEXT: addpd %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_sqrtsd:
+; SLM: # BB#0:
+; SLM-NEXT: movapd (%rdi), %xmm1 # sched: [3:1.00]
+; SLM-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:1.00]
+; SLM-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:1.00]
+; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_sqrtsd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [19:1.00]
+; SANDY-NEXT: vmovapd (%rdi), %xmm1 # sched: [4:0.50]
+; SANDY-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [19:1.00]
+; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_sqrtsd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [19:1.00]
+; HASWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [4:0.50]
+; HASWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [19:1.00]
+; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_sqrtsd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:1.00]
+; BTVER2-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [26:21.00]
+; BTVER2-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [26:21.00]
+; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
+ %2 = load <2 x double>, <2 x double> *%a1, align 16
+ %3 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
+ %4 = fadd <2 x double> %1, %3
+ ret <2 x double> %4
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+
+define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_subpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: subpd %xmm1, %xmm0
+; GENERIC-NEXT: subpd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_subpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: subpd %xmm1, %xmm0
+; ATOM-NEXT: subpd (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_subpd:
+; SLM: # BB#0:
+; SLM-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: subpd (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_subpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_subpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_subpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fsub <2 x double> %a0, %a1
+ %2 = load <2 x double>, <2 x double> *%a2, align 16
+ %3 = fsub <2 x double> %1, %2
+ ret <2 x double> %3
+}
+
+define double @test_subsd(double %a0, double %a1, double *%a2) {
+; GENERIC-LABEL: test_subsd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: subsd %xmm1, %xmm0
+; GENERIC-NEXT: subsd (%rdi), %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_subsd:
+; ATOM: # BB#0:
+; ATOM-NEXT: subsd %xmm1, %xmm0
+; ATOM-NEXT: subsd (%rdi), %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_subsd:
+; SLM: # BB#0:
+; SLM-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: subsd (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_subsd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_subsd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_subsd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = fsub double %a0, %a1
+ %2 = load double, double *%a2, align 8
+ %3 = fsub double %1, %2
+ ret double %3
+}
+
+define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_ucomisd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: ucomisd %xmm1, %xmm0
+; GENERIC-NEXT: setnp %al
+; GENERIC-NEXT: sete %cl
+; GENERIC-NEXT: andb %al, %cl
+; GENERIC-NEXT: ucomisd (%rdi), %xmm0
+; GENERIC-NEXT: setnp %al
+; GENERIC-NEXT: sete %dl
+; GENERIC-NEXT: andb %al, %dl
+; GENERIC-NEXT: orb %cl, %dl
+; GENERIC-NEXT: movzbl %dl, %eax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_ucomisd:
+; ATOM: # BB#0:
+; ATOM-NEXT: ucomisd %xmm1, %xmm0
+; ATOM-NEXT: setnp %al
+; ATOM-NEXT: sete %cl
+; ATOM-NEXT: andb %al, %cl
+; ATOM-NEXT: ucomisd (%rdi), %xmm0
+; ATOM-NEXT: setnp %al
+; ATOM-NEXT: sete %dl
+; ATOM-NEXT: andb %al, %dl
+; ATOM-NEXT: orb %cl, %dl
+; ATOM-NEXT: movzbl %dl, %eax
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_ucomisd:
+; SLM: # BB#0:
+; SLM-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: setnp %al # sched: [1:0.50]
+; SLM-NEXT: sete %cl # sched: [1:0.50]
+; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
+; SLM-NEXT: ucomisd (%rdi), %xmm0 # sched: [6:1.00]
+; SLM-NEXT: setnp %al # sched: [1:0.50]
+; SLM-NEXT: sete %dl # sched: [1:0.50]
+; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
+; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
+; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_ucomisd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: setnp %al # sched: [1:0.33]
+; SANDY-NEXT: sete %cl # sched: [1:0.33]
+; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
+; SANDY-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-NEXT: setnp %al # sched: [1:0.33]
+; SANDY-NEXT: sete %dl # sched: [1:0.33]
+; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
+; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
+; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_ucomisd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-NEXT: sete %cl # sched: [1:0.50]
+; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
+; HASWELL-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-NEXT: sete %dl # sched: [1:0.50]
+; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
+; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
+; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_ucomisd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-NEXT: sete %cl # sched: [1:0.50]
+; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
+; BTVER2-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-NEXT: sete %dl # sched: [1:0.50]
+; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
+; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
+; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
+ %2 = load <2 x double>, <2 x double> *%a2, align 8
+ %3 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %2)
+ %4 = or i32 %1, %3
+ ret i32 %4
+}
+declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_unpckhpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; GENERIC-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1]
+; GENERIC-NEXT: addpd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_unpckhpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; ATOM-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1]
+; ATOM-NEXT: addpd %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_unpckhpd:
+; SLM: # BB#0:
+; SLM-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SLM-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [4:1.00]
+; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_unpckhpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SANDY-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:1.00]
+; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_unpckhpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:1.00]
+; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_unpckhpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
+; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
+ %2 = load <2 x double>, <2 x double> *%a2, align 16
+ %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> <i32 1, i32 3>
+ %4 = fadd <2 x double> %1, %3
+ ret <2 x double> %4
+}
+
+define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_unpcklpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; GENERIC-NEXT: movapd %xmm0, %xmm1
+; GENERIC-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; GENERIC-NEXT: addpd %xmm0, %xmm1
+; GENERIC-NEXT: movapd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_unpcklpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; ATOM-NEXT: movapd %xmm0, %xmm1
+; ATOM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; ATOM-NEXT: addpd %xmm0, %xmm1
+; ATOM-NEXT: movapd %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_unpcklpd:
+; SLM: # BB#0:
+; SLM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SLM-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00]
+; SLM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
+; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_unpcklpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SANDY-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [5:1.00]
+; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_unpcklpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [5:1.00]
+; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_unpcklpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [6:1.00]
+; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
+ %2 = load <2 x double>, <2 x double> *%a2, align 16
+ %3 = shufflevector <2 x double> %1, <2 x double> %2, <2 x i32> <i32 0, i32 2>
+ %4 = fadd <2 x double> %1, %3
+ ret <2 x double> %4
+}
+
+define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_xorpd:
+; GENERIC: # BB#0:
+; GENERIC-NEXT: xorpd %xmm1, %xmm0
+; GENERIC-NEXT: xorpd (%rdi), %xmm0
+; GENERIC-NEXT: addpd %xmm1, %xmm0
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test_xorpd:
+; ATOM: # BB#0:
+; ATOM-NEXT: xorpd %xmm1, %xmm0
+; ATOM-NEXT: xorpd (%rdi), %xmm0
+; ATOM-NEXT: addpd %xmm1, %xmm0
+; ATOM-NEXT: retq
+;
+; SLM-LABEL: test_xorpd:
+; SLM: # BB#0:
+; SLM-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50]
+; SLM-NEXT: xorpd (%rdi), %xmm0 # sched: [4:1.00]
+; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_xorpd:
+; SANDY: # BB#0:
+; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SANDY-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: test_xorpd:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; BTVER2-LABEL: test_xorpd:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+ %1 = bitcast <2 x double> %a0 to <4 x i32>
+ %2 = bitcast <2 x double> %a1 to <4 x i32>
+ %3 = xor <4 x i32> %1, %2
+ %4 = load <2 x double>, <2 x double> *%a2, align 16
+ %5 = bitcast <2 x double> %4 to <4 x i32>
+ %6 = xor <4 x i32> %3, %5
+ %7 = bitcast <4 x i32> %6 to <2 x double>
+ %8 = fadd <2 x double> %a1, %7
+ ret <2 x double> %8
+}
+
+!0 = !{i32 1}
diff --git a/test/CodeGen/X86/tail-merge-after-mbp.ll b/test/CodeGen/X86/tail-merge-after-mbp.ll
deleted file mode 100644
index dc5f3a12bd91f..0000000000000
--- a/test/CodeGen/X86/tail-merge-after-mbp.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; RUN: llc -mtriple=x86_64-linux -o - %s | FileCheck %s
-
-%0 = type { %1, %3* }
-%1 = type { %2* }
-%2 = type { %2*, i8* }
-%3 = type { i32, i32 (i32, i32)* }
-
-
-declare i32 @Up(...)
-declare i32 @f(i32, i32)
-
-; check loop block_14 is not merged with block_21
-; check loop block_11 is not merged with block_18, block_25
-define i32 @foo(%0* nocapture readonly, i32, i1 %c, i8* %p1, %2** %p2) {
-; CHECK-LABEL: foo:
-; CHECK: # %block_11
-; CHECK-NEXT: movq (%r14), %rax
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: je
-; CHECK-NEXT:# %block_14
-; CHECK-NEXT: cmpq $0, 8(%rax)
-; CHECK-NEXT: jne
-; CHECK-NEXT:# %block_18
-; CHECK-NEXT: movq (%r14), %rax
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: je
-; CHECK-NEXT:# %block_21
-; CHECK-NEXT:# =>This Inner Loop Header
-; CHECK-NEXT: cmpq $0, 8(%rax)
-; CHECK-NEXT: jne
-; CHECK-NEXT:# %block_25
-; CHECK-NEXT:# in Loop
-; CHECK-NEXT: movq (%r14), %rax
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: jne
- br i1 %c, label %block_34, label %block_3
-
-block_3: ; preds = %2
- br i1 %c, label %block_7, label %block_4
-
-block_4: ; preds = %block_3
- %a5 = tail call i32 @f(i32 undef, i32 undef)
- %a6 = icmp eq i32 %a5, 0
- br i1 %a6, label %block_7, label %block_34
-
-block_7: ; preds = %block_4, %block_3
- %a8 = icmp eq %2* null, null
- br i1 %a8, label %block_34, label %block_9
-
-block_9: ; preds = %block_7
- %a10 = icmp eq i8* %p1, null
- br i1 %a10, label %block_11, label %block_32
-
-block_11: ; preds = %block_9
- %a12 = load %2*, %2** %p2, align 8
- %a13 = icmp eq %2* %a12, null
- br i1 %a13, label %block_34, label %block_14
-
-block_14: ; preds = %block_11
- %a15 = getelementptr inbounds %2, %2* %a12, i64 0, i32 1
- %a16 = load i8*, i8** %a15, align 8
- %a17 = icmp eq i8* %a16, null
- br i1 %a17, label %block_18, label %block_32
-
-block_18: ; preds = %block_14
- %a19 = load %2*, %2** %p2, align 8
- %a20 = icmp eq %2* %a19, null
- br i1 %a20, label %block_34, label %block_21
-
-block_21: ; preds = %block_18
- %a22 = getelementptr inbounds %2, %2* %a19, i64 0, i32 1
- %a23 = load i8*, i8** %a22, align 8
- %a24 = icmp eq i8* %a23, null
- br i1 %a24, label %block_25, label %block_32
-
-block_25: ; preds = %block_28, %block_21
- %a26 = load %2*, %2** %p2, align 8
- %a27 = icmp eq %2* %a26, null
- br i1 %a27, label %block_34, label %block_28
-
-block_28: ; preds = %block_25
- %a29 = getelementptr inbounds %2, %2* %a26, i64 0, i32 1
- %a30 = load i8*, i8** %a29, align 8
- %a31 = icmp eq i8* %a30, null
- br i1 %a31, label %block_25, label %block_32
-
-block_32: ; preds = %block_28, %block_21, %block_14, %block_9
- %a33 = tail call i32 (...) @Up()
- br label %block_34
-
-block_34: ; preds = %block_32, %block_25, %block_18, %block_11, %block_7, %block_4, %2
- %a35 = phi i32 [ 0, %2 ], [ %a5, %block_4 ], [ 0, %block_7 ], [ 0, %block_11 ], [ 0, %block_32 ], [ 0, %block_18 ], [ 0, %block_25 ]
- ret i32 %a35
-}
diff --git a/test/CodeGen/X86/tail-merge-after-mbp.mir b/test/CodeGen/X86/tail-merge-after-mbp.mir
new file mode 100644
index 0000000000000..d1dc65336948a
--- /dev/null
+++ b/test/CodeGen/X86/tail-merge-after-mbp.mir
@@ -0,0 +1,105 @@
+# RUN: llc -mtriple=x86_64-linux -run-pass=block-placement -o - %s | FileCheck %s
+
+---
+# check loop bb.7 is not merged with bb.10, bb.13
+# check loop bb.9 is not merged with bb.12
+# CHECK: bb.2:
+# CHECK-NEXT: successors: %bb.9(0x30000000), %bb.3(0x50000000)
+# CHECK: %rax = MOV64rm %r14, 1, _, 0, _
+# CHECK-NEXT: TEST64rr %rax, %rax
+# CHECK-NEXT: JE_1 %bb.9
+# CHECK: bb.3:
+# CHECK-NEXT: successors: %bb.4(0x30000000), %bb.8(0x50000000)
+# CHECK: CMP64mi8 killed %rax, 1, _, 8, _, 0
+# CHECK-NEXT: JNE_1 %bb.8
+# CHECK: bb.4:
+# CHECK-NEXT: successors: %bb.9(0x30000000), %bb.5(0x50000000)
+# CHECK: %rax = MOV64rm %r14, 1, _, 0, _
+# CHECK-NEXT: TEST64rr %rax, %rax
+# CHECK-NEXT: JE_1 %bb.9
+# CHECK: bb.5
+# CHECK-NEXT: successors: %bb.6(0x71555555), %bb.8(0x0eaaaaab)
+# CHECK: CMP64mi8 killed %rax, 1, _, 8, _, 0
+# CHECK-NEXT: JNE_1 %bb.8
+# CHECK: bb.6:
+# CHECK-NEXT: successors: %bb.9(0x04000000), %bb.5(0x7c000000)
+# CHECK: %rax = MOV64rm %r14, 1, _, 0, _
+# CHECK-NEXT: TEST64rr %rax, %rax
+# CHECK-NEXT: JNE_1 %bb.5
+
+name: foo
+body: |
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.7(0x40000000)
+
+ TEST8ri %dl, 1, implicit-def %eflags, implicit killed %edx
+ JE_1 %bb.7, implicit %eflags
+
+ bb.1:
+ successors: %bb.16(0x80000000)
+
+ %ebp = XOR32rr undef %ebp, undef %ebp, implicit-def dead %eflags
+ JMP_1 %bb.16
+
+ bb.7:
+ successors: %bb.8(0x30000000), %bb.9(0x50000000)
+
+ %rax = MOV64rm %r14, 1, _, 0, _ :: (load 8)
+ TEST64rr %rax, %rax, implicit-def %eflags
+ JNE_1 %bb.9, implicit killed %eflags
+
+ bb.8:
+ successors: %bb.16(0x80000000)
+
+ %ebp = XOR32rr undef %ebp, undef %ebp, implicit-def dead %eflags
+ JMP_1 %bb.16
+
+ bb.9:
+ successors: %bb.10(0x30000000), %bb.15(0x50000000)
+
+ CMP64mi8 killed %rax, 1, _, 8, _, 0, implicit-def %eflags :: (load 8)
+ JNE_1 %bb.15, implicit %eflags
+
+ bb.10:
+ successors: %bb.11(0x30000000), %bb.12(0x50000000)
+
+ %rax = MOV64rm %r14, 1, _, 0, _ :: (load 8)
+ TEST64rr %rax, %rax, implicit-def %eflags
+ JNE_1 %bb.12, implicit %eflags
+
+ bb.11:
+ successors: %bb.16(0x80000000)
+
+ %ebp = XOR32rr undef %ebp, undef %ebp, implicit-def dead %eflags
+ JMP_1 %bb.16
+
+ bb.12:
+ successors: %bb.13(0x71555555), %bb.15(0x0eaaaaab)
+
+ CMP64mi8 killed %rax, 1, _, 8, _, 0, implicit-def %eflags :: (load 8), (load 8)
+ JNE_1 %bb.15, implicit %eflags
+
+ bb.13:
+ successors: %bb.14(0x04000000), %bb.12(0x7c000000)
+
+ %rax = MOV64rm %r14, 1, _, 0, _ :: (load 8)
+ TEST64rr %rax, %rax, implicit-def %eflags
+ JNE_1 %bb.12, implicit %eflags
+
+ bb.14:
+ successors: %bb.16(0x80000000)
+
+ %ebp = XOR32rr undef %ebp, undef %ebp, implicit-def dead %eflags
+ JMP_1 %bb.16
+
+ bb.15:
+ successors: %bb.16(0x80000000)
+
+ %ebp = XOR32rr undef %ebp, undef %ebp, implicit-def dead %eflags
+ dead %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, implicit-def %al
+
+ bb.16:
+
+ RETQ %eax
+
+...
diff --git a/test/CodeGen/X86/vector-rotate-128.ll b/test/CodeGen/X86/vector-rotate-128.ll
index 5eb1a55881e57..852c1f4d3d981 100644
--- a/test/CodeGen/X86/vector-rotate-128.ll
+++ b/test/CodeGen/X86/vector-rotate-128.ll
@@ -1534,31 +1534,20 @@ define <16 x i8> @splatconstant_rotate_v16i8(<16 x i8> %a) nounwind {
define <2 x i64> @splatconstant_rotate_mask_v2i64(<2 x i64> %a) nounwind {
; SSE-LABEL: splatconstant_rotate_mask_v2i64:
; SSE: # BB#0:
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psllq $15, %xmm1
; SSE-NEXT: psrlq $49, %xmm0
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
-; SSE-NEXT: pand {{.*}}(%rip), %xmm1
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: splatconstant_rotate_mask_v2i64:
; AVX: # BB#0:
-; AVX-NEXT: vpsllq $15, %xmm0, %xmm1
; AVX-NEXT: vpsrlq $49, %xmm0, %xmm0
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: splatconstant_rotate_mask_v2i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpsllq $15, %xmm0, %xmm1
; AVX512-NEXT: vpsrlq $49, %xmm0, %xmm0
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
;
; XOP-LABEL: splatconstant_rotate_mask_v2i64:
diff --git a/test/CodeGen/X86/vector-rotate-256.ll b/test/CodeGen/X86/vector-rotate-256.ll
index 3306cd400c1d0..14215e486bf9e 100644
--- a/test/CodeGen/X86/vector-rotate-256.ll
+++ b/test/CodeGen/X86/vector-rotate-256.ll
@@ -1014,34 +1014,23 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind {
define <4 x i64> @splatconstant_rotate_mask_v4i64(<4 x i64> %a) nounwind {
; AVX1-LABEL: splatconstant_rotate_mask_v4i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vpsllq $15, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpsllq $15, %xmm2, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpsrlq $49, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlq $49, %xmm2, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vpsrlq $49, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
-; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatconstant_rotate_mask_v4i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vpsllq $15, %ymm0, %ymm1
; AVX2-NEXT: vpsrlq $49, %ymm0, %ymm0
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
-; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: splatconstant_rotate_mask_v4i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpsllq $15, %ymm0, %ymm1
; AVX512-NEXT: vpsrlq $49, %ymm0, %ymm0
; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
-; AVX512-NEXT: vpor %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_rotate_mask_v4i64:
diff --git a/test/CodeGen/X86/x86-16.ll b/test/CodeGen/X86/x86-16.ll
index 775b2c447bbdb..55b53a8047c51 100644
--- a/test/CodeGen/X86/x86-16.ll
+++ b/test/CodeGen/X86/x86-16.ll
@@ -12,9 +12,16 @@ define i32 @main() #0 {
; CHECK: .code16
; CHECK-LABEL: main
+define i64 @foo(i32 %index) #0 {
+ %asm = tail call i64 asm "rdmsr", "=A,{cx},~{dirflag},~{fpsr},~{flags}"(i32 %index)
+ ret i64 %asm
+}
+
+; CHECK-LABEL: foo
+; CHECK: rdmsr
attributes #0 = { nounwind }
!llvm.ident = !{!0}
-!0 = !{!"clang version 3.9.0 (trunk 265439) (llvm/trunk 265567)"} \ No newline at end of file
+!0 = !{!"clang version 3.9.0 (trunk 265439) (llvm/trunk 265567)"}
diff --git a/test/DebugInfo/AMDGPU/code-pointer-size.ll b/test/DebugInfo/AMDGPU/code-pointer-size.ll
new file mode 100644
index 0000000000000..9b2b0da945e68
--- /dev/null
+++ b/test/DebugInfo/AMDGPU/code-pointer-size.ll
@@ -0,0 +1,73 @@
+; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; LLVM IR generated with the following command and OpenCL source:
+;
+; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm <path-to-file>
+;
+; kernel void kernel1(global int *A) {
+; *A = 11;
+; }
+;
+; kernel void kernel2(global int *B) {
+; *B = 12;
+; }
+
+; Make sure that code pointer size is 8 bytes:
+; CHECK: .debug_info contents:
+; CHECK: addr_size = 0x08
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+define amdgpu_kernel void @kernel1(i32 addrspace(1)* %A) !dbg !7 {
+entry:
+ %A.addr = alloca i32 addrspace(1)*, align 4
+ store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !16, metadata !17), !dbg !18
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !19
+ store i32 11, i32 addrspace(1)* %0, align 4, !dbg !20
+ ret void, !dbg !21
+}
+
+define amdgpu_kernel void @kernel2(i32 addrspace(1)* %B) !dbg !22 {
+entry:
+ %B.addr = alloca i32 addrspace(1)*, align 4
+ store i32 addrspace(1)* %B, i32 addrspace(1)** %B.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)** %B.addr, metadata !23, metadata !17), !dbg !24
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)** %B.addr, align 4, !dbg !25
+ store i32 12, i32 addrspace(1)* %0, align 4, !dbg !26
+ ret void, !dbg !27
+}
+
+!llvm.dbg.cu = !{!0}
+!opencl.ocl.version = !{!3, !3}
+!llvm.module.flags = !{!4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "dwarfdump-relocs.cl", directory: "/some/random/directory")
+!2 = !{}
+!3 = !{i32 2, i32 0}
+!4 = !{i32 2, !"Dwarf Version", i32 2}
+!5 = !{i32 2, !"Debug Info Version", i32 3}
+!6 = !{!""}
+!7 = distinct !DISubprogram(name: "kernel1", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !{i32 1}
+!13 = !{!"none"}
+!14 = !{!"int*"}
+!15 = !{!""}
+!16 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10)
+!17 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)
+!18 = !DILocation(line: 1, column: 33, scope: !7)
+!19 = !DILocation(line: 2, column: 4, scope: !7)
+!20 = !DILocation(line: 2, column: 6, scope: !7)
+!21 = !DILocation(line: 3, column: 1, scope: !7)
+!22 = distinct !DISubprogram(name: "kernel2", scope: !1, file: !1, line: 5, type: !8, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!23 = !DILocalVariable(name: "B", arg: 1, scope: !22, file: !1, line: 5, type: !10)
+!24 = !DILocation(line: 5, column: 33, scope: !22)
+!25 = !DILocation(line: 6, column: 4, scope: !22)
+!26 = !DILocation(line: 6, column: 6, scope: !22)
+!27 = !DILocation(line: 7, column: 1, scope: !22)
diff --git a/test/DebugInfo/AMDGPU/dwarfdump-relocs.ll b/test/DebugInfo/AMDGPU/dwarfdump-relocs.ll
new file mode 100644
index 0000000000000..9c7e205aa2d08
--- /dev/null
+++ b/test/DebugInfo/AMDGPU/dwarfdump-relocs.ll
@@ -0,0 +1,72 @@
+; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -filetype=obj < %s | llvm-dwarfdump - 2>&1 | FileCheck %s
+
+; LLVM IR generated with the following command and OpenCL source:
+;
+; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm <path-to-file>
+;
+; kernel void kernel1(global int *A) {
+; *A = 11;
+; }
+;
+; kernel void kernel2(global int *B) {
+; *B = 12;
+; }
+
+; CHECK-NOT: failed to compute relocation
+; CHECK: file_names[ 1] 0 0x00000000 0x00000000 dwarfdump-relocs.cl
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+define amdgpu_kernel void @kernel1(i32 addrspace(1)* %A) !dbg !7 {
+entry:
+ %A.addr = alloca i32 addrspace(1)*, align 4
+ store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !16, metadata !17), !dbg !18
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !19
+ store i32 11, i32 addrspace(1)* %0, align 4, !dbg !20
+ ret void, !dbg !21
+}
+
+define amdgpu_kernel void @kernel2(i32 addrspace(1)* %B) !dbg !22 {
+entry:
+ %B.addr = alloca i32 addrspace(1)*, align 4
+ store i32 addrspace(1)* %B, i32 addrspace(1)** %B.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)** %B.addr, metadata !23, metadata !17), !dbg !24
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)** %B.addr, align 4, !dbg !25
+ store i32 12, i32 addrspace(1)* %0, align 4, !dbg !26
+ ret void, !dbg !27
+}
+
+!llvm.dbg.cu = !{!0}
+!opencl.ocl.version = !{!3, !3}
+!llvm.module.flags = !{!4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "dwarfdump-relocs.cl", directory: "/some/random/directory")
+!2 = !{}
+!3 = !{i32 2, i32 0}
+!4 = !{i32 2, !"Dwarf Version", i32 2}
+!5 = !{i32 2, !"Debug Info Version", i32 3}
+!6 = !{!""}
+!7 = distinct !DISubprogram(name: "kernel1", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !{i32 1}
+!13 = !{!"none"}
+!14 = !{!"int*"}
+!15 = !{!""}
+!16 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10)
+!17 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)
+!18 = !DILocation(line: 1, column: 33, scope: !7)
+!19 = !DILocation(line: 2, column: 4, scope: !7)
+!20 = !DILocation(line: 2, column: 6, scope: !7)
+!21 = !DILocation(line: 3, column: 1, scope: !7)
+!22 = distinct !DISubprogram(name: "kernel2", scope: !1, file: !1, line: 5, type: !8, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!23 = !DILocalVariable(name: "B", arg: 1, scope: !22, file: !1, line: 5, type: !10)
+!24 = !DILocation(line: 5, column: 33, scope: !22)
+!25 = !DILocation(line: 6, column: 4, scope: !22)
+!26 = !DILocation(line: 6, column: 6, scope: !22)
+!27 = !DILocation(line: 7, column: 1, scope: !22)
diff --git a/test/DebugInfo/AMDGPU/pointer-address-space-dwarf-v1.ll b/test/DebugInfo/AMDGPU/pointer-address-space-dwarf-v1.ll
deleted file mode 100644
index cbd5e7688a5a2..0000000000000
--- a/test/DebugInfo/AMDGPU/pointer-address-space-dwarf-v1.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; LLVM IR generated with the following command and OpenCL source:
-;
-; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm <path-to-file>
-;
-; kernel void kernel1() {
-; global int *FuncVar0 = 0;
-; constant int *FuncVar1 = 0;
-; local int *FuncVar2 = 0;
-; private int *FuncVar3 = 0;
-; int *FuncVar4 = 0;
-; }
-
-; DW_AT_address_class is available since Dwarf Version 2.
-; CHECK-NOT: DW_AT_address_class
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-define amdgpu_kernel void @kernel1() #0 !dbg !7 {
-entry:
- %FuncVar0 = alloca i32 addrspace(1)*, align 4
- %FuncVar1 = alloca i32 addrspace(2)*, align 4
- %FuncVar2 = alloca i32 addrspace(3)*, align 4
- %FuncVar3 = alloca i32*, align 4
- %FuncVar4 = alloca i32 addrspace(4)*, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %FuncVar0, metadata !10, metadata !13), !dbg !14
- store i32 addrspace(1)* null, i32 addrspace(1)** %FuncVar0, align 4, !dbg !14
- call void @llvm.dbg.declare(metadata i32 addrspace(2)** %FuncVar1, metadata !15, metadata !13), !dbg !16
- store i32 addrspace(2)* null, i32 addrspace(2)** %FuncVar1, align 4, !dbg !16
- call void @llvm.dbg.declare(metadata i32 addrspace(3)** %FuncVar2, metadata !17, metadata !13), !dbg !19
- store i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), i32 addrspace(3)** %FuncVar2, align 4, !dbg !19
- call void @llvm.dbg.declare(metadata i32** %FuncVar3, metadata !20, metadata !13), !dbg !22
- store i32* addrspacecast (i32 addrspace(4)* null to i32*), i32** %FuncVar3, align 4, !dbg !22
- call void @llvm.dbg.declare(metadata i32 addrspace(4)** %FuncVar4, metadata !23, metadata !13), !dbg !24
- store i32 addrspace(4)* null, i32 addrspace(4)** %FuncVar4, align 4, !dbg !24
- ret void, !dbg !25
-}
-
-!llvm.dbg.cu = !{!0}
-!opencl.ocl.version = !{!3}
-!llvm.module.flags = !{!4, !5}
-!llvm.ident = !{!6}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
-!1 = !DIFile(filename: "pointer-address-space-dwarf-v1.cl", directory: "/some/random/directory")
-!2 = !{}
-!3 = !{i32 2, i32 0}
-!4 = !{i32 2, !"Dwarf Version", i32 1}
-!5 = !{i32 2, !"Debug Info Version", i32 3}
-!6 = !{!""}
-!7 = distinct !DISubprogram(name: "kernel1", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !0, variables: !2)
-!8 = !DISubroutineType(types: !9)
-!9 = !{null}
-!10 = !DILocalVariable(name: "FuncVar0", scope: !7, file: !1, line: 2, type: !11)
-!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64)
-!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
-!13 = !DIExpression()
-!14 = !DILocation(line: 2, column: 15, scope: !7)
-!15 = !DILocalVariable(name: "FuncVar1", scope: !7, file: !1, line: 3, type: !11)
-!16 = !DILocation(line: 3, column: 17, scope: !7)
-!17 = !DILocalVariable(name: "FuncVar2", scope: !7, file: !1, line: 4, type: !18)
-!18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 32, dwarfAddressSpace: 2)
-!19 = !DILocation(line: 4, column: 14, scope: !7)
-!20 = !DILocalVariable(name: "FuncVar3", scope: !7, file: !1, line: 5, type: !21)
-!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 32, dwarfAddressSpace: 1)
-!22 = !DILocation(line: 5, column: 16, scope: !7)
-!23 = !DILocalVariable(name: "FuncVar4", scope: !7, file: !1, line: 6, type: !11)
-!24 = !DILocation(line: 6, column: 8, scope: !7)
-!25 = !DILocation(line: 7, column: 1, scope: !7)
diff --git a/test/DebugInfo/AMDGPU/variable-locations-dwarf-v1.ll b/test/DebugInfo/AMDGPU/variable-locations-dwarf-v1.ll
deleted file mode 100644
index d04a8eb74656d..0000000000000
--- a/test/DebugInfo/AMDGPU/variable-locations-dwarf-v1.ll
+++ /dev/null
@@ -1,92 +0,0 @@
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; LLVM IR generated with the following command and OpenCL source:
-;
-; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm <path-to-file>
-;
-; global int GlobA;
-; global int GlobB;
-;
-; kernel void kernel1(unsigned int ArgN, global int *ArgA, global int *ArgB) {
-; ArgA[ArgN] += ArgB[ArgN];
-; }
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata)
-
-; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x05> 03 00 00 00 00 )
-@GlobA = common addrspace(1) global i32 0, align 4, !dbg !0
-; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x05> 03 00 00 00 00 )
-@GlobB = common addrspace(1) global i32 0, align 4, !dbg !6
-
-define amdgpu_kernel void @kernel1(
-; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x06> 91 04 10 01 16 18 )
- i32 %ArgN,
-; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x06> 91 08 10 01 16 18 )
- i32 addrspace(1)* %ArgA,
-; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x06> 91 10 10 01 16 18 )
- i32 addrspace(1)* %ArgB) !dbg !13 {
-entry:
- %ArgN.addr = alloca i32, align 4
- %ArgA.addr = alloca i32 addrspace(1)*, align 4
- %ArgB.addr = alloca i32 addrspace(1)*, align 4
- store i32 %ArgN, i32* %ArgN.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %ArgN.addr, metadata !22, metadata !23), !dbg !24
- store i32 addrspace(1)* %ArgA, i32 addrspace(1)** %ArgA.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %ArgA.addr, metadata !25, metadata !23), !dbg !26
- store i32 addrspace(1)* %ArgB, i32 addrspace(1)** %ArgB.addr, align 4
- call void @llvm.dbg.declare(metadata i32 addrspace(1)** %ArgB.addr, metadata !27, metadata !23), !dbg !28
- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %ArgB.addr, align 4, !dbg !29
- %1 = load i32, i32* %ArgN.addr, align 4, !dbg !30
- %idxprom = zext i32 %1 to i64, !dbg !29
- %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 %idxprom, !dbg !29
- %2 = load i32, i32 addrspace(1)* %arrayidx, align 4, !dbg !29
- %3 = load i32 addrspace(1)*, i32 addrspace(1)** %ArgA.addr, align 4, !dbg !31
- %4 = load i32, i32* %ArgN.addr, align 4, !dbg !32
- %idxprom1 = zext i32 %4 to i64, !dbg !31
- %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %3, i64 %idxprom1, !dbg !31
- %5 = load i32, i32 addrspace(1)* %arrayidx2, align 4, !dbg !33
- %add = add nsw i32 %5, %2, !dbg !33
- store i32 %add, i32 addrspace(1)* %arrayidx2, align 4, !dbg !33
- ret void, !dbg !34
-}
-
-!llvm.dbg.cu = !{!2}
-!opencl.ocl.version = !{!9}
-!llvm.module.flags = !{!10, !11}
-!llvm.ident = !{!12}
-
-!0 = !DIGlobalVariableExpression(var: !1)
-!1 = distinct !DIGlobalVariable(name: "GlobA", scope: !2, file: !3, line: 1, type: !8, isLocal: false, isDefinition: true)
-!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5)
-!3 = !DIFile(filename: "variable-locations-dwarf-v1.cl", directory: "/some/random/directory")
-!4 = !{}
-!5 = !{!0, !6}
-!6 = !DIGlobalVariableExpression(var: !7)
-!7 = distinct !DIGlobalVariable(name: "GlobB", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true)
-!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
-!9 = !{i32 2, i32 0}
-!10 = !{i32 2, !"Dwarf Version", i32 1}
-!11 = !{i32 2, !"Debug Info Version", i32 3}
-!12 = !{!"clang version 5.0.0"}
-!13 = distinct !DISubprogram(name: "kernel1", scope: !3, file: !3, line: 4, type: !14, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, unit: !2, variables: !4)
-!14 = !DISubroutineType(types: !15)
-!15 = !{null, !16, !17, !17}
-!16 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
-!17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64)
-!18 = !{i32 0, i32 1, i32 1}
-!19 = !{!"none", !"none", !"none"}
-!20 = !{!"uint", !"int*", !"int*"}
-!21 = !{!"", !"", !""}
-!22 = !DILocalVariable(name: "ArgN", arg: 1, scope: !13, file: !3, line: 4, type: !16)
-!23 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)
-!24 = !DILocation(line: 4, column: 34, scope: !13)
-!25 = !DILocalVariable(name: "ArgA", arg: 2, scope: !13, file: !3, line: 4, type: !17)
-!26 = !DILocation(line: 4, column: 52, scope: !13)
-!27 = !DILocalVariable(name: "ArgB", arg: 3, scope: !13, file: !3, line: 4, type: !17)
-!28 = !DILocation(line: 4, column: 70, scope: !13)
-!29 = !DILocation(line: 5, column: 17, scope: !13)
-!30 = !DILocation(line: 5, column: 22, scope: !13)
-!31 = !DILocation(line: 5, column: 3, scope: !13)
-!32 = !DILocation(line: 5, column: 8, scope: !13)
-!33 = !DILocation(line: 5, column: 14, scope: !13)
-!34 = !DILocation(line: 6, column: 1, scope: !13)
diff --git a/test/DebugInfo/AMDGPU/variable-locations.ll b/test/DebugInfo/AMDGPU/variable-locations.ll
index 1aab40f946c6a..93a0f26d1f1dd 100644
--- a/test/DebugInfo/AMDGPU/variable-locations.ll
+++ b/test/DebugInfo/AMDGPU/variable-locations.ll
@@ -19,7 +19,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
; CHECK-NEXT: DW_AT_external
; CHECK-NEXT: DW_AT_decl_file
; CHECK-NEXT: DW_AT_decl_line
-; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (<0x05> 03 00 00 00 00 )
+; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (<0x09> 03 00 00 00 00 00 00 00 00 )
@GlobA = common addrspace(1) global i32 0, align 4, !dbg !0
; CHECK: {{.*}}DW_TAG_variable
@@ -28,7 +28,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
; CHECK-NEXT: DW_AT_external
; CHECK-NEXT: DW_AT_decl_file
; CHECK-NEXT: DW_AT_decl_line
-; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (<0x05> 03 00 00 00 00 )
+; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (<0x09> 03 00 00 00 00 00 00 00 00 )
@GlobB = common addrspace(1) global i32 0, align 4, !dbg !6
define amdgpu_kernel void @kernel1(
diff --git a/test/DebugInfo/ARM/selectiondag-deadcode.ll b/test/DebugInfo/ARM/selectiondag-deadcode.ll
index fe5e87658ddee..d4d0207bf07db 100644
--- a/test/DebugInfo/ARM/selectiondag-deadcode.ll
+++ b/test/DebugInfo/ARM/selectiondag-deadcode.ll
@@ -13,7 +13,7 @@ _ZN7Vector39NormalizeEv.exit: ; preds = %1, %0
; and SelectionDAGISel crashes. It should definitely not
; crash. Drop the dbg_value instead.
; CHECK-NOT: "matrix"
- tail call void @llvm.dbg.declare(metadata %class.Matrix3.0.6.10* %agg.result, metadata !45, metadata !DIExpression(DW_OP_deref))
+ tail call void @llvm.dbg.declare(metadata %class.Matrix3.0.6.10* %agg.result, metadata !45, metadata !DIExpression())
%2 = getelementptr inbounds %class.Matrix3.0.6.10, %class.Matrix3.0.6.10* %agg.result, i32 0, i32 0, i32 8
ret void
}
diff --git a/test/DebugInfo/Generic/block-asan.ll b/test/DebugInfo/Generic/block-asan.ll
index 96072b1ccfb5c..f1f8b35df27c9 100644
--- a/test/DebugInfo/Generic/block-asan.ll
+++ b/test/DebugInfo/Generic/block-asan.ll
@@ -13,7 +13,7 @@
; Check that the location of the ASAN instrumented __block variable is
; correct.
-; CHECK: !DIExpression(DW_OP_deref, DW_OP_plus, 8, DW_OP_deref, DW_OP_plus, 24)
+; CHECK: !DIExpression(DW_OP_plus, 8, DW_OP_deref, DW_OP_plus, 24)
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/test/DebugInfo/X86/dbg-declare-arg.ll b/test/DebugInfo/X86/dbg-declare-arg.ll
index 7fd6296c7ee17..ca865ab598293 100644
--- a/test/DebugInfo/X86/dbg-declare-arg.ll
+++ b/test/DebugInfo/X86/dbg-declare-arg.ll
@@ -1,9 +1,17 @@
-; RUN: llc -O0 -fast-isel=false < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel=true -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-macosx10.6.7"
-;Radar 9321650
-
-;CHECK: ##DEBUG_VALUE: my_a
+; rdar://problem/9321650
+;
+; CHECK: DW_AT_name {{.*}}"j"
+; CHECK: DW_TAG_variable
+; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset] (0x00000000)
+; CHECK-NEXT: DW_AT_name {{.*}}"my_a"
+; CHECK: .debug_loc contents:
+; CHECK: 0x00000000: Beginning address offset:
+; CHECK-NEXT: Ending address offset:
+; CHECK-NEXT: Location description: 77 08
+; rsp+8
%class.A = type { i32, i32, i32, i32 }
diff --git a/test/DebugInfo/X86/dbg_value_direct.ll b/test/DebugInfo/X86/dbg_value_direct.ll
index 58560e4c81d48..12adf125fadbb 100644
--- a/test/DebugInfo/X86/dbg_value_direct.ll
+++ b/test/DebugInfo/X86/dbg_value_direct.ll
@@ -70,7 +70,7 @@ entry:
; <label>:28 ; preds = %22, %entry
store i32 %0, i32* %3, align 4
- call void @llvm.dbg.declare(metadata %struct.A* %agg.result, metadata !24, metadata !DIExpression(DW_OP_deref)), !dbg !25
+ call void @llvm.dbg.declare(metadata %struct.A* %agg.result, metadata !24, metadata !DIExpression()), !dbg !25
call void @_ZN1AC1Ev(%struct.A* %agg.result), !dbg !25
store i64 1172321806, i64* %4, !dbg !26
%29 = inttoptr i64 %10 to i32*, !dbg !26
diff --git a/test/DebugInfo/X86/debug-info-block-captured-self.ll b/test/DebugInfo/X86/debug-info-block-captured-self.ll
index e3cfca19955eb..1085eaef0d4e4 100644
--- a/test/DebugInfo/X86/debug-info-block-captured-self.ll
+++ b/test/DebugInfo/X86/debug-info-block-captured-self.ll
@@ -107,5 +107,5 @@ define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_
!106 = !DILocation(line: 40, scope: !42)
!107 = !DIFile(filename: "llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m", directory: "")
!108 = !{i32 1, !"Debug Info Version", i32 3}
-!109 = !DIExpression(DW_OP_deref, DW_OP_plus, 32)
-!110 = !DIExpression(DW_OP_deref, DW_OP_plus, 32)
+!109 = !DIExpression(DW_OP_plus, 32, DW_OP_deref)
+!110 = !DIExpression(DW_OP_plus, 32, DW_OP_deref)
diff --git a/test/DebugInfo/X86/dw_op_minus.ll b/test/DebugInfo/X86/dw_op_minus.ll
index e76f2933fdda6..8e65b489c27b0 100644
--- a/test/DebugInfo/X86/dw_op_minus.ll
+++ b/test/DebugInfo/X86/dw_op_minus.ll
@@ -10,7 +10,7 @@
; Capture(buf);
; }
; }
-; The interesting part is !DIExpression(DW_OP_deref, DW_OP_minus, 400)
+; The interesting part is !DIExpression(DW_OP_minus, 400)
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -56,20 +56,17 @@ declare void @Capture(i32*)
!14 = !{i32 2, !"Debug Info Version", i32 3}
!15 = !{!"clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)"}
!16 = !DILocation(line: 5, column: 3, scope: !4)
-!17 = !DIExpression(DW_OP_deref, DW_OP_minus, 400)
+!17 = !DIExpression(DW_OP_minus, 400)
!18 = !DILocation(line: 5, column: 7, scope: !4)
!19 = !DILocation(line: 6, column: 11, scope: !4)
!20 = !DILocation(line: 6, column: 3, scope: !4)
!21 = !DILocation(line: 7, column: 1, scope: !4)
; RCX - 400
-; CHECK: .short 6 # Loc expr size
+; CHECK: .short 3 # Loc expr size
; CHECK-NEXT: .byte 114 # DW_OP_breg2
-; CHECK-NEXT: .byte 0 # 0
-; CHECK-NEXT: .byte 16 # DW_OP_constu
-; CHECK-NEXT: .byte 144 # 400
-; CHECK-NEXT: .byte 3 # DW_OP_minus
-; CHECK-NEXT: .byte 28
+; CHECK-NEXT: .byte 240 # -400
+; CHECK-NEXT: .byte 124
; RCX is clobbered in call @Capture, but there is a spilled copy.
; *(RSP + 8) - 400
diff --git a/test/DebugInfo/X86/dw_op_minus_direct.ll b/test/DebugInfo/X86/dw_op_minus_direct.ll
index 29e07213abbb2..8d346be532e87 100644
--- a/test/DebugInfo/X86/dw_op_minus_direct.ll
+++ b/test/DebugInfo/X86/dw_op_minus_direct.ll
@@ -1,15 +1,24 @@
; Test dwarf codegen of DW_OP_minus.
; RUN: llc -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s
+; RUN: llc -dwarf-version=2 -filetype=obj < %s | llvm-dwarfdump - \
+; RUN: | FileCheck %s --check-prefix=DWARF2
+; RUN: llc -dwarf-version=3 -filetype=obj < %s | llvm-dwarfdump - \
+; RUN: | FileCheck %s --check-prefix=DWARF2
; This was derived manually from:
; int inc(int i) {
; return i+1;
; }
+; DWARF2: .debug_info
+; DWARF2: DW_TAG_formal_parameter
+; DWARF2-NEXT: DW_AT_name {{.*}}"i"
+; DWARF2-NOT: DW_AT_location
+
; CHECK: Beginning address offset: 0x0000000000000000
; CHECK: Ending address offset: 0x0000000000000004
-; CHECK: Location description: 50 10 ff ff ff ff 0f 1a 10 01 1c
-; rax, constu 0xffffffff, and, constu 0x00000001, minus
+; CHECK: Location description: 70 00 10 ff ff ff ff 0f 1a 10 01 1c 9f
+; rax+0, constu 0xffffffff, and, constu 0x00000001, minus, stack-value
source_filename = "minus.c"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.12.0"
@@ -42,7 +51,7 @@ attributes #1 = { nounwind readnone }
!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!11 = !{!12}
!12 = !DILocalVariable(name: "i", arg: 1, scope: !7, file: !1, line: 1, type: !10)
-!13 = !DIExpression(DW_OP_minus, 1)
+!13 = !DIExpression(DW_OP_minus, 1, DW_OP_stack_value)
!14 = !DILocation(line: 1, column: 13, scope: !7)
!15 = !DILocation(line: 2, column: 11, scope: !7)
!16 = !DILocation(line: 2, column: 3, scope: !7)
diff --git a/test/DebugInfo/X86/fi-expr.ll b/test/DebugInfo/X86/fi-expr.ll
new file mode 100644
index 0000000000000..cf240505c0ccf
--- /dev/null
+++ b/test/DebugInfo/X86/fi-expr.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -o - %s -filetype=obj \
+; RUN: | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; A hand-crafted FrameIndex location with a DW_OP_deref.
+; CHECK: DW_TAG_formal_parameter
+; fbreg -8, deref
+; CHECK-NEXT: DW_AT_location {{.*}} (<0x3> 91 78 06 )
+; CHECK-NEXT: DW_AT_name {{.*}} "foo"
+define void @f(i8* %bar) !dbg !6 {
+entry:
+ %foo.addr = alloca i8*
+ store i8* %bar, i8** %foo.addr
+ call void @llvm.dbg.declare(metadata i8** %foo.addr, metadata !12, metadata !13), !dbg !14
+ ret void, !dbg !15
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "t.c", directory: "/")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!6 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!7 = !DISubroutineType(types: !8)
+!8 = !{null, !9}
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64)
+!10 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !11)
+!11 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!12 = !DILocalVariable(name: "foo", arg: 1, scope: !6, file: !1, line: 1, type: !10)
+!13 = !DIExpression(DW_OP_deref)
+!14 = !DILocation(line: 1, scope: !6)
+!15 = !DILocation(line: 1, scope: !6)
diff --git a/test/DebugInfo/X86/sret.ll b/test/DebugInfo/X86/sret.ll
index 84d6719348894..c4bb005a36681 100644
--- a/test/DebugInfo/X86/sret.ll
+++ b/test/DebugInfo/X86/sret.ll
@@ -1,10 +1,22 @@
; RUN: llc -split-dwarf=Enable -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
-; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s --check-prefix=CHECK-DWO
; Based on the debuginfo-tests/sret.cpp code.
-; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x51ac5644b1937aa1)
-; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x51ac5644b1937aa1)
+; CHECK-DWO: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x51ac5644b1937aa1)
+; CHECK-DWO: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x51ac5644b1937aa1)
+
+; RUN: llc -O0 -fast-isel=true -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
+; RUN: llc -O0 -fast-isel=false -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
+; CHECK: _ZN1B9AInstanceEv
+; CHECK: DW_TAG_variable
+; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset] (0x00000000)
+; CHECK-NEXT: DW_AT_name {{.*}}"a"
+; CHECK: .debug_loc contents:
+; CHECK: 0x00000000: Beginning address offset:
+; CHECK-NEXT: Ending address offset:
+; CHECK-NEXT: Location description: 75 00
+; rdi+0
%class.A = type { i32 (...)**, i32 }
%class.B = type { i8 }
@@ -98,7 +110,7 @@ entry:
call void @llvm.dbg.declare(metadata %class.B** %this.addr, metadata !89, metadata !DIExpression()), !dbg !91
%this1 = load %class.B*, %class.B** %this.addr
store i1 false, i1* %nrvo, !dbg !92
- call void @llvm.dbg.declare(metadata %class.A* %agg.result, metadata !93, metadata !DIExpression(DW_OP_deref)), !dbg !92
+ call void @llvm.dbg.declare(metadata %class.A* %agg.result, metadata !93, metadata !DIExpression()), !dbg !92
call void @_ZN1AC1Ei(%class.A* %agg.result, i32 12), !dbg !92
store i1 true, i1* %nrvo, !dbg !94
store i32 1, i32* %cleanup.dest.slot
diff --git a/test/Instrumentation/AddressSanitizer/debug_info.ll b/test/Instrumentation/AddressSanitizer/debug_info.ll
index cc79cbbce9e9b..0366c0008d34d 100644
--- a/test/Instrumentation/AddressSanitizer/debug_info.ll
+++ b/test/Instrumentation/AddressSanitizer/debug_info.ll
@@ -24,9 +24,9 @@ entry:
; CHECK: entry:
; Verify that llvm.dbg.declare calls are in the entry basic block.
; CHECK-NOT: %entry
-; CHECK: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[ARG_ID:[0-9]+]], metadata ![[OPDEREF:[0-9]+]])
+; CHECK: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[ARG_ID:[0-9]+]], metadata ![[EMPTY:[0-9]+]])
; CHECK-NOT: %entry
-; CHECK: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[VAR_ID:[0-9]+]], metadata ![[OPDEREF:[0-9]+]])
+; CHECK: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[VAR_ID:[0-9]+]], metadata ![[EMPTY:[0-9]+]])
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
@@ -47,7 +47,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
; Verify that debug descriptors for argument and local variable will be replaced
; with descriptors that end with OpDeref (encoded as 2).
; CHECK: ![[ARG_ID]] = !DILocalVariable(name: "p", arg: 1,{{.*}} line: 1
-; CHECK: ![[OPDEREF]] = !DIExpression(DW_OP_deref)
+; CHECK: ![[EMPTY]] = !DIExpression()
; CHECK: ![[VAR_ID]] = !DILocalVariable(name: "r",{{.*}} line: 2
; Verify that there are no more variable descriptors.
; CHECK-NOT: !DILocalVariable(tag: DW_TAG_arg_variable
diff --git a/test/Instrumentation/SanitizerCoverage/coverage.ll b/test/Instrumentation/SanitizerCoverage/coverage.ll
index 75a341da021c9..d675c9d9c3709 100644
--- a/test/Instrumentation/SanitizerCoverage/coverage.ll
+++ b/test/Instrumentation/SanitizerCoverage/coverage.ll
@@ -5,9 +5,7 @@
; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -sanitizer-coverage-block-threshold=0 -S | FileCheck %s --check-prefix=CHECK_WITH_CHECK
; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -sanitizer-coverage-block-threshold=1 -S | FileCheck %s --check-prefix=CHECK_WITH_CHECK
; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-block-threshold=10 -S | FileCheck %s --check-prefix=CHECK3
-; RUN: opt < %s -sancov -sanitizer-coverage-level=4 -S | FileCheck %s --check-prefix=CHECK4
; RUN: opt < %s -sancov -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc -S | FileCheck %s --check-prefix=CHECK_TRACE_PC
-; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-8bit-counters=1 -S | FileCheck %s --check-prefix=CHECK-8BIT
; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -sanitizer-coverage-block-threshold=10 \
; RUN: -S | FileCheck %s --check-prefix=CHECK2
@@ -81,25 +79,6 @@ entry:
; CHECK3-NOT: call void @__sanitizer_cov
; CHECK3: ret void
-; test -sanitizer-coverage-8bit-counters=1
-; CHECK-8BIT-LABEL: define void @foo
-
-; CHECK-8BIT: [[V11:%[0-9]*]] = load i8{{.*}}!nosanitize
-; CHECK-8BIT: [[V12:%[0-9]*]] = add i8 [[V11]], 1
-; CHECK-8BIT: store i8 [[V12]]{{.*}}!nosanitize
-; CHECK-8BIT: [[V21:%[0-9]*]] = load i8{{.*}}!nosanitize
-; CHECK-8BIT: [[V22:%[0-9]*]] = add i8 [[V21]], 1
-; CHECK-8BIT: store i8 [[V22]]{{.*}}!nosanitize
-; CHECK-8BIT: [[V31:%[0-9]*]] = load i8{{.*}}!nosanitize
-; CHECK-8BIT: [[V32:%[0-9]*]] = add i8 [[V31]], 1
-; CHECK-8BIT: store i8 [[V32]]{{.*}}!nosanitize
-; CHECK-8BIT: [[V41:%[0-9]*]] = load i8{{.*}}!nosanitize
-; CHECK-8BIT: [[V42:%[0-9]*]] = add i8 [[V41]], 1
-; CHECK-8BIT: store i8 [[V42]]{{.*}}!nosanitize
-
-; CHECK-8BIT: ret void
-
-
%struct.StructWithVptr = type { i32 (...)** }
define void @CallViaVptr(%struct.StructWithVptr* %foo) uwtable sanitize_address {
@@ -113,13 +92,6 @@ entry:
ret void
}
-; We expect to see two calls to __sanitizer_cov_indir_call16
-; with different values of second argument.
-; CHECK4-LABEL: define void @CallViaVptr
-; CHECK4: call void @__sanitizer_cov_indir_call16({{.*}},[[CACHE:.*]])
-; CHECK4-NOT: call void @__sanitizer_cov_indir_call16({{.*}},[[CACHE]])
-; CHECK4: ret void
-
; CHECK_TRACE_PC-LABEL: define void @foo
; CHECK_TRACE_PC: call void @__sanitizer_cov_trace_pc
; CHECK_TRACE_PC: call void asm sideeffect "", ""()
@@ -135,10 +107,6 @@ entry:
unreachable
}
-; CHECK4-LABEL: define void @call_unreachable
-; CHECK4-NOT: __sanitizer_cov
-; CHECK4: unreachable
-
; CHECKPRUNE-LABEL: define void @foo
; CHECKPRUNE: call void @__sanitizer_cov
; CHECKPRUNE: call void @__sanitizer_cov
diff --git a/test/Instrumentation/SanitizerCoverage/tracing.ll b/test/Instrumentation/SanitizerCoverage/tracing.ll
index 9e153472eaba2..1561a14860144 100644
--- a/test/Instrumentation/SanitizerCoverage/tracing.ll
+++ b/test/Instrumentation/SanitizerCoverage/tracing.ll
@@ -1,6 +1,4 @@
; Test -sanitizer-coverage-experimental-tracing
-; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -sanitizer-coverage-experimental-tracing -S | FileCheck %s --check-prefix=CHECK1
-; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-experimental-tracing -S | FileCheck %s --check-prefix=CHECK3
; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc -S | FileCheck %s --check-prefix=CHECK_PC
; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s --check-prefix=CHECK_PC_GUARD
; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S -mtriple=x86_64-apple-macosx | FileCheck %s --check-prefix=CHECK_PC_GUARD_DARWIN
@@ -20,19 +18,6 @@ entry:
ret void
}
-; CHECK1-LABEL: define void @foo
-; CHECK1: call void @__sanitizer_cov_trace_func_enter
-; CHECK1: call void @__sanitizer_cov_trace_basic_block
-; CHECK1-NOT: call void @__sanitizer_cov_trace_basic_block
-; CHECK1: ret void
-
-; CHECK3-LABEL: define void @foo
-; CHECK3: call void @__sanitizer_cov_trace_func_enter
-; CHECK3: call void @__sanitizer_cov_trace_basic_block
-; CHECK3: call void @__sanitizer_cov_trace_basic_block
-; CHECK3-NOT: call void @__sanitizer_cov_trace_basic_block
-; CHECK3: ret void
-
; CHECK_PC-LABEL: define void @foo
; CHECK_PC: call void @__sanitizer_cov_trace_pc
; CHECK_PC: call void @__sanitizer_cov_trace_pc
diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s
index 80e32c48673a6..d37c0d5aba2a6 100644
--- a/test/MC/AArch64/basic-a64-diagnostics.s
+++ b/test/MC/AArch64/basic-a64-diagnostics.s
@@ -1781,12 +1781,20 @@
;; Exponent too large
fmov d3, #0.0625
fmov s2, #32.0
+ fmov s2, #32
+ fmov v0.4s, #-32
// CHECK-ERROR: error: expected compatible register or floating-point constant
// CHECK-ERROR-NEXT: fmov d3, #0.0625
// CHECK-ERROR-NEXT: ^
// CHECK-ERROR-NEXT: error: expected compatible register or floating-point constant
// CHECK-ERROR-NEXT: fmov s2, #32.0
// CHECK-ERROR-NEXT: ^
+// CHECK-ERROR-NEXT: error: expected compatible register or floating-point constant
+// CHECK-ERROR-NEXT: fmov s2, #32
+// CHECK-ERROR-NEXT: ^
+// CHECK-ERROR-NEXT: error: expected compatible register or floating-point constant
+// CHECK-ERROR-NEXT: fmov v0.4s, #-32
+// CHECK-ERROR-NEXT: ^
;; Fraction too precise
fmov s9, #1.03125
@@ -1798,11 +1806,17 @@
// CHECK-ERROR-NEXT: fmov s28, #1.96875
// CHECK-ERROR-NEXT: ^
- ;; No particular reason, but a striking omission
- fmov d0, #0.0
-// CHECK-ERROR-AARCH64: error: expected compatible register or floating-point constant
-// CHECK-ERROR-AARCH64-NEXT: fmov d0, #0.0
-// CHECK-ERROR-AARCH64-NEXT: ^
+ ;; Explicitly encoded value too large
+ fmov s15, #0x100
+// CHECK-ERROR: error: encoded floating point value out of range
+// CHECK-ERROR-NEXT: fmov s15, #0x100
+// CHECK-ERROR-NEXT: ^
+
+ ;; Not possible to fmov ZR to a whole vector
+ fmov v0.4s, #0.0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT: fmov v0.4s, #0.0
+// CHECK-ERROR-NEXT: ^
//------------------------------------------------------------------------------
// Floating-point <-> integer conversion
diff --git a/test/MC/AMDGPU/gfx7_asm_all.s b/test/MC/AMDGPU/gfx7_asm_all.s
index d1d864c3ffeba..34c4f429ce24f 100644
--- a/test/MC/AMDGPU/gfx7_asm_all.s
+++ b/test/MC/AMDGPU/gfx7_asm_all.s
@@ -1,7 +1,5 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s
-// *** GENERATED BY TESTGEN, DO NOT EDIT! ***
-
ds_add_u32 v1, v2 offset:65535
// CHECK: [0xff,0xff,0x00,0xd8,0x01,0x02,0x00,0x00]
@@ -458,24 +456,12 @@ ds_max_f32 v1, v2 offset:65535 gds
ds_gws_init v1 gds
// CHECK: [0x00,0x00,0x66,0xd8,0x00,0x01,0x00,0x00]
-ds_gws_sema_v gds
-// CHECK: [0x00,0x00,0x6a,0xd8,0x00,0x00,0x00,0x00]
-
ds_gws_sema_br v1 gds
// CHECK: [0x00,0x00,0x6e,0xd8,0x00,0x01,0x00,0x00]
-ds_gws_sema_p gds
-// CHECK: [0x00,0x00,0x72,0xd8,0x00,0x00,0x00,0x00]
-
ds_gws_barrier v1 gds
// CHECK: [0x00,0x00,0x76,0xd8,0x00,0x01,0x00,0x00]
-ds_gws_sema_release_all offset:65535 gds
-// CHECK: [0xff,0xff,0x62,0xd8,0x00,0x00,0x00,0x00]
-
-ds_gws_sema_release_all gds
-// CHECK: [0x00,0x00,0x62,0xd8,0x00,0x00,0x00,0x00]
-
ds_write_b8 v1, v2 offset:65535
// CHECK: [0xff,0xff,0x78,0xd8,0x01,0x02,0x00,0x00]
@@ -2666,23 +2652,89 @@ ds_max_src2_f64 v1 offset:4
ds_max_src2_f64 v1 offset:65535 gds
// CHECK: [0xff,0xff,0x4e,0xdb,0x01,0x00,0x00,0x00]
-ds_wrap_rtn_b32 v255, v1, v2, v3 offset:65535
-// CHECK: [0xff,0xff,0xd0,0xd8,0x01,0x02,0x03,0xff]
+ds_write_b96 v1, v[2:4] offset:65535
+// CHECK: [0xff,0xff,0x78,0xdb,0x01,0x02,0x00,0x00]
+
+ds_write_b96 v255, v[2:4] offset:65535
+// CHECK: [0xff,0xff,0x78,0xdb,0xff,0x02,0x00,0x00]
+
+ds_write_b96 v1, v[253:255] offset:65535
+// CHECK: [0xff,0xff,0x78,0xdb,0x01,0xfd,0x00,0x00]
+
+ds_write_b96 v1, v[2:4]
+// CHECK: [0x00,0x00,0x78,0xdb,0x01,0x02,0x00,0x00]
+
+ds_write_b96 v1, v[2:4] offset:0
+// CHECK: [0x00,0x00,0x78,0xdb,0x01,0x02,0x00,0x00]
+
+ds_write_b96 v1, v[2:4] offset:4
+// CHECK: [0x04,0x00,0x78,0xdb,0x01,0x02,0x00,0x00]
+
+ds_write_b96 v1, v[2:4] offset:65535 gds
+// CHECK: [0xff,0xff,0x7a,0xdb,0x01,0x02,0x00,0x00]
+
+ds_write_b128 v1, v[2:5] offset:65535
+// CHECK: [0xff,0xff,0x7c,0xdb,0x01,0x02,0x00,0x00]
+
+ds_write_b128 v255, v[2:5] offset:65535
+// CHECK: [0xff,0xff,0x7c,0xdb,0xff,0x02,0x00,0x00]
+
+ds_write_b128 v1, v[252:255] offset:65535
+// CHECK: [0xff,0xff,0x7c,0xdb,0x01,0xfc,0x00,0x00]
+
+ds_write_b128 v1, v[2:5]
+// CHECK: [0x00,0x00,0x7c,0xdb,0x01,0x02,0x00,0x00]
+
+ds_write_b128 v1, v[2:5] offset:0
+// CHECK: [0x00,0x00,0x7c,0xdb,0x01,0x02,0x00,0x00]
+
+ds_write_b128 v1, v[2:5] offset:4
+// CHECK: [0x04,0x00,0x7c,0xdb,0x01,0x02,0x00,0x00]
+
+ds_write_b128 v1, v[2:5] offset:65535 gds
+// CHECK: [0xff,0xff,0x7e,0xdb,0x01,0x02,0x00,0x00]
-ds_wrap_rtn_b32 v255, v1, v2, v3 offset:65535 gds
-// CHECK: [0xff,0xff,0xd2,0xd8,0x01,0x02,0x03,0xff]
+ds_read_b96 v[5:7], v1 offset:65535
+// CHECK: [0xff,0xff,0xf8,0xdb,0x01,0x00,0x00,0x05]
-ds_wrap_rtn_b32 v255, v1, v2, v3
-// CHECK: [0x00,0x00,0xd0,0xd8,0x01,0x02,0x03,0xff]
+ds_read_b96 v[253:255], v1 offset:65535
+// CHECK: [0xff,0xff,0xf8,0xdb,0x01,0x00,0x00,0xfd]
-ds_condxchg32_rtn_b64 v[5:6], v1, v[2:3]
-// CHECK: [0x00,0x00,0xf8,0xd9,0x01,0x02,0x00,0x05]
+ds_read_b96 v[5:7], v255 offset:65535
+// CHECK: [0xff,0xff,0xf8,0xdb,0xff,0x00,0x00,0x05]
-ds_condxchg32_rtn_b64 v[5:6], v1, v[2:3] gds
-// CHECK: [0x00,0x00,0xfa,0xd9,0x01,0x02,0x00,0x05]
+ds_read_b96 v[5:7], v1
+// CHECK: [0x00,0x00,0xf8,0xdb,0x01,0x00,0x00,0x05]
-ds_condxchg32_rtn_b64 v[5:6], v1, v[254:255] offset:65535
-// CHECK: [0xff,0xff,0xf8,0xd9,0x01,0xfe,0x00,0x05]
+ds_read_b96 v[5:7], v1 offset:0
+// CHECK: [0x00,0x00,0xf8,0xdb,0x01,0x00,0x00,0x05]
+
+ds_read_b96 v[5:7], v1 offset:4
+// CHECK: [0x04,0x00,0xf8,0xdb,0x01,0x00,0x00,0x05]
+
+ds_read_b96 v[5:7], v1 offset:65535 gds
+// CHECK: [0xff,0xff,0xfa,0xdb,0x01,0x00,0x00,0x05]
+
+ds_read_b128 v[5:8], v1 offset:65535
+// CHECK: [0xff,0xff,0xfc,0xdb,0x01,0x00,0x00,0x05]
+
+ds_read_b128 v[252:255], v1 offset:65535
+// CHECK: [0xff,0xff,0xfc,0xdb,0x01,0x00,0x00,0xfc]
+
+ds_read_b128 v[5:8], v255 offset:65535
+// CHECK: [0xff,0xff,0xfc,0xdb,0xff,0x00,0x00,0x05]
+
+ds_read_b128 v[5:8], v1
+// CHECK: [0x00,0x00,0xfc,0xdb,0x01,0x00,0x00,0x05]
+
+ds_read_b128 v[5:8], v1 offset:0
+// CHECK: [0x00,0x00,0xfc,0xdb,0x01,0x00,0x00,0x05]
+
+ds_read_b128 v[5:8], v1 offset:4
+// CHECK: [0x04,0x00,0xfc,0xdb,0x01,0x00,0x00,0x05]
+
+ds_read_b128 v[5:8], v1 offset:65535 gds
+// CHECK: [0xff,0xff,0xfe,0xdb,0x01,0x00,0x00,0x05]
exp mrt0, v0, v0, v0, v0
// CHECK: [0x0f,0x00,0x00,0xf8,0x00,0x00,0x00,0x00]
@@ -23165,8 +23217,17 @@ v_cvt_i32_f64_e64 v5, ttmp[10:11]
v_cvt_i32_f64_e64 v5, exec
// CHECK: [0x05,0x00,0x06,0xd3,0x7e,0x00,0x00,0x00]
-v_cvt_i32_f64_e64 v5, scc
-// CHECK: [0x05,0x00,0x06,0xd3,0xfd,0x00,0x00,0x00]
+v_cvt_i32_f64_e64 v5, 0
+// CHECK: [0x05,0x00,0x06,0xd3,0x80,0x00,0x00,0x00]
+
+v_cvt_i32_f64_e64 v5, -1
+// CHECK: [0x05,0x00,0x06,0xd3,0xc1,0x00,0x00,0x00]
+
+v_cvt_i32_f64_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x06,0xd3,0xf0,0x00,0x00,0x00]
+
+v_cvt_i32_f64_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x06,0xd3,0xf7,0x00,0x00,0x00]
v_cvt_i32_f64_e64 v5, v[1:2]
// CHECK: [0x05,0x00,0x06,0xd3,0x01,0x01,0x00,0x00]
@@ -23690,8 +23751,17 @@ v_cvt_u32_f32_e64 v5, exec_lo
v_cvt_u32_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x0e,0xd3,0x7f,0x00,0x00,0x00]
-v_cvt_u32_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x0e,0xd3,0xfd,0x00,0x00,0x00]
+v_cvt_u32_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x0e,0xd3,0x80,0x00,0x00,0x00]
+
+v_cvt_u32_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x0e,0xd3,0xc1,0x00,0x00,0x00]
+
+v_cvt_u32_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x0e,0xd3,0xf0,0x00,0x00,0x00]
+
+v_cvt_u32_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x0e,0xd3,0xf7,0x00,0x00,0x00]
v_cvt_u32_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x0e,0xd3,0x01,0x01,0x00,0x00]
@@ -23819,8 +23889,17 @@ v_cvt_i32_f32_e64 v5, exec_lo
v_cvt_i32_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x10,0xd3,0x7f,0x00,0x00,0x00]
-v_cvt_i32_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x10,0xd3,0xfd,0x00,0x00,0x00]
+v_cvt_i32_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x10,0xd3,0x80,0x00,0x00,0x00]
+
+v_cvt_i32_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x10,0xd3,0xc1,0x00,0x00,0x00]
+
+v_cvt_i32_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x10,0xd3,0xf0,0x00,0x00,0x00]
+
+v_cvt_i32_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x10,0xd3,0xf7,0x00,0x00,0x00]
v_cvt_i32_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x10,0xd3,0x01,0x01,0x00,0x00]
@@ -24080,8 +24159,17 @@ v_cvt_f16_f32_e64 v5, exec_lo
v_cvt_f16_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x14,0xd3,0x7f,0x00,0x00,0x00]
-v_cvt_f16_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x14,0xd3,0xfd,0x00,0x00,0x00]
+v_cvt_f16_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x14,0xd3,0x80,0x00,0x00,0x00]
+
+v_cvt_f16_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x14,0xd3,0xc1,0x00,0x00,0x00]
+
+v_cvt_f16_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x14,0xd3,0xf0,0x00,0x00,0x00]
+
+v_cvt_f16_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x14,0xd3,0xf7,0x00,0x00,0x00]
v_cvt_f16_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x14,0xd3,0x01,0x01,0x00,0x00]
@@ -24197,9 +24285,6 @@ v_cvt_f32_f16_e64 v5, exec_lo
v_cvt_f32_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x16,0xd3,0x7f,0x00,0x00,0x00]
-v_cvt_f32_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x16,0xd3,0xfd,0x00,0x00,0x00]
-
v_cvt_f32_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x16,0xd3,0x01,0x01,0x00,0x00]
@@ -24332,8 +24417,17 @@ v_cvt_rpi_i32_f32_e64 v5, exec_lo
v_cvt_rpi_i32_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x18,0xd3,0x7f,0x00,0x00,0x00]
-v_cvt_rpi_i32_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x18,0xd3,0xfd,0x00,0x00,0x00]
+v_cvt_rpi_i32_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x18,0xd3,0x80,0x00,0x00,0x00]
+
+v_cvt_rpi_i32_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x18,0xd3,0xc1,0x00,0x00,0x00]
+
+v_cvt_rpi_i32_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x18,0xd3,0xf0,0x00,0x00,0x00]
+
+v_cvt_rpi_i32_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x18,0xd3,0xf7,0x00,0x00,0x00]
v_cvt_rpi_i32_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x18,0xd3,0x01,0x01,0x00,0x00]
@@ -24461,8 +24555,17 @@ v_cvt_flr_i32_f32_e64 v5, exec_lo
v_cvt_flr_i32_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x1a,0xd3,0x7f,0x00,0x00,0x00]
-v_cvt_flr_i32_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x1a,0xd3,0xfd,0x00,0x00,0x00]
+v_cvt_flr_i32_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x1a,0xd3,0x80,0x00,0x00,0x00]
+
+v_cvt_flr_i32_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x1a,0xd3,0xc1,0x00,0x00,0x00]
+
+v_cvt_flr_i32_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x1a,0xd3,0xf0,0x00,0x00,0x00]
+
+v_cvt_flr_i32_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x1a,0xd3,0xf7,0x00,0x00,0x00]
v_cvt_flr_i32_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x1a,0xd3,0x01,0x01,0x00,0x00]
@@ -24692,8 +24795,17 @@ v_cvt_f32_f64_e64 v5, ttmp[10:11]
v_cvt_f32_f64_e64 v5, exec
// CHECK: [0x05,0x00,0x1e,0xd3,0x7e,0x00,0x00,0x00]
-v_cvt_f32_f64_e64 v5, scc
-// CHECK: [0x05,0x00,0x1e,0xd3,0xfd,0x00,0x00,0x00]
+v_cvt_f32_f64_e64 v5, 0
+// CHECK: [0x05,0x00,0x1e,0xd3,0x80,0x00,0x00,0x00]
+
+v_cvt_f32_f64_e64 v5, -1
+// CHECK: [0x05,0x00,0x1e,0xd3,0xc1,0x00,0x00,0x00]
+
+v_cvt_f32_f64_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x1e,0xd3,0xf0,0x00,0x00,0x00]
+
+v_cvt_f32_f64_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x1e,0xd3,0xf7,0x00,0x00,0x00]
v_cvt_f32_f64_e64 v5, v[1:2]
// CHECK: [0x05,0x00,0x1e,0xd3,0x01,0x01,0x00,0x00]
@@ -24833,8 +24945,17 @@ v_cvt_f64_f32_e64 v[5:6], exec_lo
v_cvt_f64_f32_e64 v[5:6], exec_hi
// CHECK: [0x05,0x00,0x20,0xd3,0x7f,0x00,0x00,0x00]
-v_cvt_f64_f32_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x20,0xd3,0xfd,0x00,0x00,0x00]
+v_cvt_f64_f32_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x20,0xd3,0x80,0x00,0x00,0x00]
+
+v_cvt_f64_f32_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x20,0xd3,0xc1,0x00,0x00,0x00]
+
+v_cvt_f64_f32_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x20,0xd3,0xf0,0x00,0x00,0x00]
+
+v_cvt_f64_f32_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x20,0xd3,0xf7,0x00,0x00,0x00]
v_cvt_f64_f32_e64 v[5:6], v1
// CHECK: [0x05,0x00,0x20,0xd3,0x01,0x01,0x00,0x00]
@@ -25472,8 +25593,17 @@ v_cvt_u32_f64_e64 v5, ttmp[10:11]
v_cvt_u32_f64_e64 v5, exec
// CHECK: [0x05,0x00,0x2a,0xd3,0x7e,0x00,0x00,0x00]
-v_cvt_u32_f64_e64 v5, scc
-// CHECK: [0x05,0x00,0x2a,0xd3,0xfd,0x00,0x00,0x00]
+v_cvt_u32_f64_e64 v5, 0
+// CHECK: [0x05,0x00,0x2a,0xd3,0x80,0x00,0x00,0x00]
+
+v_cvt_u32_f64_e64 v5, -1
+// CHECK: [0x05,0x00,0x2a,0xd3,0xc1,0x00,0x00,0x00]
+
+v_cvt_u32_f64_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x2a,0xd3,0xf0,0x00,0x00,0x00]
+
+v_cvt_u32_f64_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x2a,0xd3,0xf7,0x00,0x00,0x00]
v_cvt_u32_f64_e64 v5, v[1:2]
// CHECK: [0x05,0x00,0x2a,0xd3,0x01,0x01,0x00,0x00]
@@ -25703,8 +25833,17 @@ v_trunc_f64_e64 v[5:6], ttmp[10:11]
v_trunc_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x2e,0xd3,0x7e,0x00,0x00,0x00]
-v_trunc_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x2e,0xd3,0xfd,0x00,0x00,0x00]
+v_trunc_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x2e,0xd3,0x80,0x00,0x00,0x00]
+
+v_trunc_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x2e,0xd3,0xc1,0x00,0x00,0x00]
+
+v_trunc_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x2e,0xd3,0xf0,0x00,0x00,0x00]
+
+v_trunc_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x2e,0xd3,0xf7,0x00,0x00,0x00]
v_trunc_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x2e,0xd3,0x01,0x01,0x00,0x00]
@@ -25814,8 +25953,17 @@ v_ceil_f64_e64 v[5:6], ttmp[10:11]
v_ceil_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x30,0xd3,0x7e,0x00,0x00,0x00]
-v_ceil_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x30,0xd3,0xfd,0x00,0x00,0x00]
+v_ceil_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x30,0xd3,0x80,0x00,0x00,0x00]
+
+v_ceil_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x30,0xd3,0xc1,0x00,0x00,0x00]
+
+v_ceil_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x30,0xd3,0xf0,0x00,0x00,0x00]
+
+v_ceil_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x30,0xd3,0xf7,0x00,0x00,0x00]
v_ceil_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x30,0xd3,0x01,0x01,0x00,0x00]
@@ -25925,8 +26073,17 @@ v_rndne_f64_e64 v[5:6], ttmp[10:11]
v_rndne_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x32,0xd3,0x7e,0x00,0x00,0x00]
-v_rndne_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x32,0xd3,0xfd,0x00,0x00,0x00]
+v_rndne_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x32,0xd3,0x80,0x00,0x00,0x00]
+
+v_rndne_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x32,0xd3,0xc1,0x00,0x00,0x00]
+
+v_rndne_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x32,0xd3,0xf0,0x00,0x00,0x00]
+
+v_rndne_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x32,0xd3,0xf7,0x00,0x00,0x00]
v_rndne_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x32,0xd3,0x01,0x01,0x00,0x00]
@@ -26036,8 +26193,17 @@ v_floor_f64_e64 v[5:6], ttmp[10:11]
v_floor_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x34,0xd3,0x7e,0x00,0x00,0x00]
-v_floor_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x34,0xd3,0xfd,0x00,0x00,0x00]
+v_floor_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x34,0xd3,0x80,0x00,0x00,0x00]
+
+v_floor_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x34,0xd3,0xc1,0x00,0x00,0x00]
+
+v_floor_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x34,0xd3,0xf0,0x00,0x00,0x00]
+
+v_floor_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x34,0xd3,0xf7,0x00,0x00,0x00]
v_floor_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x34,0xd3,0x01,0x01,0x00,0x00]
@@ -26177,8 +26343,17 @@ v_fract_f32_e64 v5, exec_lo
v_fract_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x40,0xd3,0x7f,0x00,0x00,0x00]
-v_fract_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x40,0xd3,0xfd,0x00,0x00,0x00]
+v_fract_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x40,0xd3,0x80,0x00,0x00,0x00]
+
+v_fract_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x40,0xd3,0xc1,0x00,0x00,0x00]
+
+v_fract_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x40,0xd3,0xf0,0x00,0x00,0x00]
+
+v_fract_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x40,0xd3,0xf7,0x00,0x00,0x00]
v_fract_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x40,0xd3,0x01,0x01,0x00,0x00]
@@ -26318,8 +26493,17 @@ v_trunc_f32_e64 v5, exec_lo
v_trunc_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x42,0xd3,0x7f,0x00,0x00,0x00]
-v_trunc_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x42,0xd3,0xfd,0x00,0x00,0x00]
+v_trunc_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x42,0xd3,0x80,0x00,0x00,0x00]
+
+v_trunc_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x42,0xd3,0xc1,0x00,0x00,0x00]
+
+v_trunc_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x42,0xd3,0xf0,0x00,0x00,0x00]
+
+v_trunc_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x42,0xd3,0xf7,0x00,0x00,0x00]
v_trunc_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x42,0xd3,0x01,0x01,0x00,0x00]
@@ -26459,8 +26643,17 @@ v_ceil_f32_e64 v5, exec_lo
v_ceil_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x44,0xd3,0x7f,0x00,0x00,0x00]
-v_ceil_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x44,0xd3,0xfd,0x00,0x00,0x00]
+v_ceil_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x44,0xd3,0x80,0x00,0x00,0x00]
+
+v_ceil_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x44,0xd3,0xc1,0x00,0x00,0x00]
+
+v_ceil_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x44,0xd3,0xf0,0x00,0x00,0x00]
+
+v_ceil_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x44,0xd3,0xf7,0x00,0x00,0x00]
v_ceil_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x44,0xd3,0x01,0x01,0x00,0x00]
@@ -26600,8 +26793,17 @@ v_rndne_f32_e64 v5, exec_lo
v_rndne_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x46,0xd3,0x7f,0x00,0x00,0x00]
-v_rndne_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x46,0xd3,0xfd,0x00,0x00,0x00]
+v_rndne_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x46,0xd3,0x80,0x00,0x00,0x00]
+
+v_rndne_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x46,0xd3,0xc1,0x00,0x00,0x00]
+
+v_rndne_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x46,0xd3,0xf0,0x00,0x00,0x00]
+
+v_rndne_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x46,0xd3,0xf7,0x00,0x00,0x00]
v_rndne_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x46,0xd3,0x01,0x01,0x00,0x00]
@@ -26741,8 +26943,17 @@ v_floor_f32_e64 v5, exec_lo
v_floor_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x48,0xd3,0x7f,0x00,0x00,0x00]
-v_floor_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x48,0xd3,0xfd,0x00,0x00,0x00]
+v_floor_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x48,0xd3,0x80,0x00,0x00,0x00]
+
+v_floor_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x48,0xd3,0xc1,0x00,0x00,0x00]
+
+v_floor_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x48,0xd3,0xf0,0x00,0x00,0x00]
+
+v_floor_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x48,0xd3,0xf7,0x00,0x00,0x00]
v_floor_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x48,0xd3,0x01,0x01,0x00,0x00]
@@ -26885,11 +27096,14 @@ v_exp_f32_e64 v5, exec_hi
v_exp_f32_e64 v5, 0
// CHECK: [0x05,0x00,0x4a,0xd3,0x80,0x00,0x00,0x00]
+v_exp_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x4a,0xd3,0xc1,0x00,0x00,0x00]
+
v_exp_f32_e64 v5, 0.5
// CHECK: [0x05,0x00,0x4a,0xd3,0xf0,0x00,0x00,0x00]
-v_exp_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x4a,0xd3,0xfd,0x00,0x00,0x00]
+v_exp_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x4a,0xd3,0xf7,0x00,0x00,0x00]
v_exp_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x4a,0xd3,0x01,0x01,0x00,0x00]
@@ -27026,11 +27240,14 @@ v_log_clamp_f32_e64 v5, exec_hi
v_log_clamp_f32_e64 v5, 0
// CHECK: [0x05,0x00,0x4c,0xd3,0x80,0x00,0x00,0x00]
+v_log_clamp_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x4c,0xd3,0xc1,0x00,0x00,0x00]
+
v_log_clamp_f32_e64 v5, 0.5
// CHECK: [0x05,0x00,0x4c,0xd3,0xf0,0x00,0x00,0x00]
-v_log_clamp_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x4c,0xd3,0xfd,0x00,0x00,0x00]
+v_log_clamp_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x4c,0xd3,0xf7,0x00,0x00,0x00]
v_log_clamp_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x4c,0xd3,0x01,0x01,0x00,0x00]
@@ -27167,11 +27384,14 @@ v_log_f32_e64 v5, exec_hi
v_log_f32_e64 v5, 0
// CHECK: [0x05,0x00,0x4e,0xd3,0x80,0x00,0x00,0x00]
+v_log_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x4e,0xd3,0xc1,0x00,0x00,0x00]
+
v_log_f32_e64 v5, 0.5
// CHECK: [0x05,0x00,0x4e,0xd3,0xf0,0x00,0x00,0x00]
-v_log_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x4e,0xd3,0xfd,0x00,0x00,0x00]
+v_log_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x4e,0xd3,0xf7,0x00,0x00,0x00]
v_log_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x4e,0xd3,0x01,0x01,0x00,0x00]
@@ -27308,11 +27528,14 @@ v_rcp_clamp_f32_e64 v5, exec_hi
v_rcp_clamp_f32_e64 v5, 0
// CHECK: [0x05,0x00,0x50,0xd3,0x80,0x00,0x00,0x00]
+v_rcp_clamp_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x50,0xd3,0xc1,0x00,0x00,0x00]
+
v_rcp_clamp_f32_e64 v5, 0.5
// CHECK: [0x05,0x00,0x50,0xd3,0xf0,0x00,0x00,0x00]
-v_rcp_clamp_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x50,0xd3,0xfd,0x00,0x00,0x00]
+v_rcp_clamp_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x50,0xd3,0xf7,0x00,0x00,0x00]
v_rcp_clamp_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x50,0xd3,0x01,0x01,0x00,0x00]
@@ -27449,11 +27672,14 @@ v_rcp_legacy_f32_e64 v5, exec_hi
v_rcp_legacy_f32_e64 v5, 0
// CHECK: [0x05,0x00,0x52,0xd3,0x80,0x00,0x00,0x00]
+v_rcp_legacy_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x52,0xd3,0xc1,0x00,0x00,0x00]
+
v_rcp_legacy_f32_e64 v5, 0.5
// CHECK: [0x05,0x00,0x52,0xd3,0xf0,0x00,0x00,0x00]
-v_rcp_legacy_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x52,0xd3,0xfd,0x00,0x00,0x00]
+v_rcp_legacy_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x52,0xd3,0xf7,0x00,0x00,0x00]
v_rcp_legacy_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x52,0xd3,0x01,0x01,0x00,0x00]
@@ -27590,11 +27816,14 @@ v_rcp_f32_e64 v5, exec_hi
v_rcp_f32_e64 v5, 0
// CHECK: [0x05,0x00,0x54,0xd3,0x80,0x00,0x00,0x00]
+v_rcp_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x54,0xd3,0xc1,0x00,0x00,0x00]
+
v_rcp_f32_e64 v5, 0.5
// CHECK: [0x05,0x00,0x54,0xd3,0xf0,0x00,0x00,0x00]
-v_rcp_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x54,0xd3,0xfd,0x00,0x00,0x00]
+v_rcp_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x54,0xd3,0xf7,0x00,0x00,0x00]
v_rcp_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x54,0xd3,0x01,0x01,0x00,0x00]
@@ -27728,8 +27957,17 @@ v_rcp_iflag_f32_e64 v5, exec_lo
v_rcp_iflag_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x56,0xd3,0x7f,0x00,0x00,0x00]
-v_rcp_iflag_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x56,0xd3,0xfd,0x00,0x00,0x00]
+v_rcp_iflag_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x56,0xd3,0x80,0x00,0x00,0x00]
+
+v_rcp_iflag_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x56,0xd3,0xc1,0x00,0x00,0x00]
+
+v_rcp_iflag_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x56,0xd3,0xf0,0x00,0x00,0x00]
+
+v_rcp_iflag_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x56,0xd3,0xf7,0x00,0x00,0x00]
v_rcp_iflag_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x56,0xd3,0x01,0x01,0x00,0x00]
@@ -27869,8 +28107,17 @@ v_rsq_clamp_f32_e64 v5, exec_lo
v_rsq_clamp_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x58,0xd3,0x7f,0x00,0x00,0x00]
-v_rsq_clamp_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x58,0xd3,0xfd,0x00,0x00,0x00]
+v_rsq_clamp_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x58,0xd3,0x80,0x00,0x00,0x00]
+
+v_rsq_clamp_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x58,0xd3,0xc1,0x00,0x00,0x00]
+
+v_rsq_clamp_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x58,0xd3,0xf0,0x00,0x00,0x00]
+
+v_rsq_clamp_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x58,0xd3,0xf7,0x00,0x00,0x00]
v_rsq_clamp_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x58,0xd3,0x01,0x01,0x00,0x00]
@@ -28010,8 +28257,17 @@ v_rsq_legacy_f32_e64 v5, exec_lo
v_rsq_legacy_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x5a,0xd3,0x7f,0x00,0x00,0x00]
-v_rsq_legacy_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x5a,0xd3,0xfd,0x00,0x00,0x00]
+v_rsq_legacy_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x5a,0xd3,0x80,0x00,0x00,0x00]
+
+v_rsq_legacy_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x5a,0xd3,0xc1,0x00,0x00,0x00]
+
+v_rsq_legacy_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x5a,0xd3,0xf0,0x00,0x00,0x00]
+
+v_rsq_legacy_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x5a,0xd3,0xf7,0x00,0x00,0x00]
v_rsq_legacy_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x5a,0xd3,0x01,0x01,0x00,0x00]
@@ -28151,8 +28407,17 @@ v_rsq_f32_e64 v5, exec_lo
v_rsq_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x5c,0xd3,0x7f,0x00,0x00,0x00]
-v_rsq_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x5c,0xd3,0xfd,0x00,0x00,0x00]
+v_rsq_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x5c,0xd3,0x80,0x00,0x00,0x00]
+
+v_rsq_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x5c,0xd3,0xc1,0x00,0x00,0x00]
+
+v_rsq_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x5c,0xd3,0xf0,0x00,0x00,0x00]
+
+v_rsq_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x5c,0xd3,0xf7,0x00,0x00,0x00]
v_rsq_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x5c,0xd3,0x01,0x01,0x00,0x00]
@@ -28262,8 +28527,17 @@ v_rcp_f64_e64 v[5:6], ttmp[10:11]
v_rcp_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x5e,0xd3,0x7e,0x00,0x00,0x00]
-v_rcp_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x5e,0xd3,0xfd,0x00,0x00,0x00]
+v_rcp_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x5e,0xd3,0x80,0x00,0x00,0x00]
+
+v_rcp_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x5e,0xd3,0xc1,0x00,0x00,0x00]
+
+v_rcp_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x5e,0xd3,0xf0,0x00,0x00,0x00]
+
+v_rcp_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x5e,0xd3,0xf7,0x00,0x00,0x00]
v_rcp_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x5e,0xd3,0x01,0x01,0x00,0x00]
@@ -28373,8 +28647,17 @@ v_rcp_clamp_f64_e64 v[5:6], ttmp[10:11]
v_rcp_clamp_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x60,0xd3,0x7e,0x00,0x00,0x00]
-v_rcp_clamp_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x60,0xd3,0xfd,0x00,0x00,0x00]
+v_rcp_clamp_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x60,0xd3,0x80,0x00,0x00,0x00]
+
+v_rcp_clamp_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x60,0xd3,0xc1,0x00,0x00,0x00]
+
+v_rcp_clamp_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x60,0xd3,0xf0,0x00,0x00,0x00]
+
+v_rcp_clamp_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x60,0xd3,0xf7,0x00,0x00,0x00]
v_rcp_clamp_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x60,0xd3,0x01,0x01,0x00,0x00]
@@ -28484,8 +28767,17 @@ v_rsq_f64_e64 v[5:6], ttmp[10:11]
v_rsq_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x62,0xd3,0x7e,0x00,0x00,0x00]
-v_rsq_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x62,0xd3,0xfd,0x00,0x00,0x00]
+v_rsq_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x62,0xd3,0x80,0x00,0x00,0x00]
+
+v_rsq_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x62,0xd3,0xc1,0x00,0x00,0x00]
+
+v_rsq_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x62,0xd3,0xf0,0x00,0x00,0x00]
+
+v_rsq_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x62,0xd3,0xf7,0x00,0x00,0x00]
v_rsq_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x62,0xd3,0x01,0x01,0x00,0x00]
@@ -28595,8 +28887,17 @@ v_rsq_clamp_f64_e64 v[5:6], ttmp[10:11]
v_rsq_clamp_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x64,0xd3,0x7e,0x00,0x00,0x00]
-v_rsq_clamp_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x64,0xd3,0xfd,0x00,0x00,0x00]
+v_rsq_clamp_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x64,0xd3,0x80,0x00,0x00,0x00]
+
+v_rsq_clamp_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x64,0xd3,0xc1,0x00,0x00,0x00]
+
+v_rsq_clamp_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x64,0xd3,0xf0,0x00,0x00,0x00]
+
+v_rsq_clamp_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x64,0xd3,0xf7,0x00,0x00,0x00]
v_rsq_clamp_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x64,0xd3,0x01,0x01,0x00,0x00]
@@ -28736,8 +29037,17 @@ v_sqrt_f32_e64 v5, exec_lo
v_sqrt_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x66,0xd3,0x7f,0x00,0x00,0x00]
-v_sqrt_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x66,0xd3,0xfd,0x00,0x00,0x00]
+v_sqrt_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x66,0xd3,0x80,0x00,0x00,0x00]
+
+v_sqrt_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x66,0xd3,0xc1,0x00,0x00,0x00]
+
+v_sqrt_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x66,0xd3,0xf0,0x00,0x00,0x00]
+
+v_sqrt_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x66,0xd3,0xf7,0x00,0x00,0x00]
v_sqrt_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x66,0xd3,0x01,0x01,0x00,0x00]
@@ -28847,8 +29157,17 @@ v_sqrt_f64_e64 v[5:6], ttmp[10:11]
v_sqrt_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x68,0xd3,0x7e,0x00,0x00,0x00]
-v_sqrt_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x68,0xd3,0xfd,0x00,0x00,0x00]
+v_sqrt_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x68,0xd3,0x80,0x00,0x00,0x00]
+
+v_sqrt_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x68,0xd3,0xc1,0x00,0x00,0x00]
+
+v_sqrt_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x68,0xd3,0xf0,0x00,0x00,0x00]
+
+v_sqrt_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x68,0xd3,0xf7,0x00,0x00,0x00]
v_sqrt_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x68,0xd3,0x01,0x01,0x00,0x00]
@@ -28988,8 +29307,17 @@ v_sin_f32_e64 v5, exec_lo
v_sin_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x6a,0xd3,0x7f,0x00,0x00,0x00]
-v_sin_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x6a,0xd3,0xfd,0x00,0x00,0x00]
+v_sin_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x6a,0xd3,0x80,0x00,0x00,0x00]
+
+v_sin_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x6a,0xd3,0xc1,0x00,0x00,0x00]
+
+v_sin_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x6a,0xd3,0xf0,0x00,0x00,0x00]
+
+v_sin_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x6a,0xd3,0xf7,0x00,0x00,0x00]
v_sin_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x6a,0xd3,0x01,0x01,0x00,0x00]
@@ -29129,8 +29457,17 @@ v_cos_f32_e64 v5, exec_lo
v_cos_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x6c,0xd3,0x7f,0x00,0x00,0x00]
-v_cos_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x6c,0xd3,0xfd,0x00,0x00,0x00]
+v_cos_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x6c,0xd3,0x80,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x6c,0xd3,0xc1,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x6c,0xd3,0xf0,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x6c,0xd3,0xf7,0x00,0x00,0x00]
v_cos_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x6c,0xd3,0x01,0x01,0x00,0x00]
@@ -29900,8 +30237,17 @@ v_frexp_exp_i32_f64_e64 v5, ttmp[10:11]
v_frexp_exp_i32_f64_e64 v5, exec
// CHECK: [0x05,0x00,0x78,0xd3,0x7e,0x00,0x00,0x00]
-v_frexp_exp_i32_f64_e64 v5, scc
-// CHECK: [0x05,0x00,0x78,0xd3,0xfd,0x00,0x00,0x00]
+v_frexp_exp_i32_f64_e64 v5, 0
+// CHECK: [0x05,0x00,0x78,0xd3,0x80,0x00,0x00,0x00]
+
+v_frexp_exp_i32_f64_e64 v5, -1
+// CHECK: [0x05,0x00,0x78,0xd3,0xc1,0x00,0x00,0x00]
+
+v_frexp_exp_i32_f64_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x78,0xd3,0xf0,0x00,0x00,0x00]
+
+v_frexp_exp_i32_f64_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x78,0xd3,0xf7,0x00,0x00,0x00]
v_frexp_exp_i32_f64_e64 v5, v[1:2]
// CHECK: [0x05,0x00,0x78,0xd3,0x01,0x01,0x00,0x00]
@@ -29999,8 +30345,17 @@ v_frexp_mant_f64_e64 v[5:6], ttmp[10:11]
v_frexp_mant_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x7a,0xd3,0x7e,0x00,0x00,0x00]
-v_frexp_mant_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x7a,0xd3,0xfd,0x00,0x00,0x00]
+v_frexp_mant_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x7a,0xd3,0x80,0x00,0x00,0x00]
+
+v_frexp_mant_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x7a,0xd3,0xc1,0x00,0x00,0x00]
+
+v_frexp_mant_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x7a,0xd3,0xf0,0x00,0x00,0x00]
+
+v_frexp_mant_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x7a,0xd3,0xf7,0x00,0x00,0x00]
v_frexp_mant_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x7a,0xd3,0x01,0x01,0x00,0x00]
@@ -30110,8 +30465,17 @@ v_fract_f64_e64 v[5:6], ttmp[10:11]
v_fract_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x7c,0xd3,0x7e,0x00,0x00,0x00]
-v_fract_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x7c,0xd3,0xfd,0x00,0x00,0x00]
+v_fract_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x7c,0xd3,0x80,0x00,0x00,0x00]
+
+v_fract_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x7c,0xd3,0xc1,0x00,0x00,0x00]
+
+v_fract_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x7c,0xd3,0xf0,0x00,0x00,0x00]
+
+v_fract_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x7c,0xd3,0xf7,0x00,0x00,0x00]
v_fract_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x7c,0xd3,0x01,0x01,0x00,0x00]
@@ -30254,11 +30618,14 @@ v_frexp_exp_i32_f32_e64 v5, exec_hi
v_frexp_exp_i32_f32_e64 v5, 0
// CHECK: [0x05,0x00,0x7e,0xd3,0x80,0x00,0x00,0x00]
+v_frexp_exp_i32_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x7e,0xd3,0xc1,0x00,0x00,0x00]
+
v_frexp_exp_i32_f32_e64 v5, 0.5
// CHECK: [0x05,0x00,0x7e,0xd3,0xf0,0x00,0x00,0x00]
-v_frexp_exp_i32_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x7e,0xd3,0xfd,0x00,0x00,0x00]
+v_frexp_exp_i32_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x7e,0xd3,0xf7,0x00,0x00,0x00]
v_frexp_exp_i32_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x7e,0xd3,0x01,0x01,0x00,0x00]
@@ -30383,11 +30750,14 @@ v_frexp_mant_f32_e64 v5, exec_hi
v_frexp_mant_f32_e64 v5, 0
// CHECK: [0x05,0x00,0x80,0xd3,0x80,0x00,0x00,0x00]
+v_frexp_mant_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x80,0xd3,0xc1,0x00,0x00,0x00]
+
v_frexp_mant_f32_e64 v5, 0.5
// CHECK: [0x05,0x00,0x80,0xd3,0xf0,0x00,0x00,0x00]
-v_frexp_mant_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x80,0xd3,0xfd,0x00,0x00,0x00]
+v_frexp_mant_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x80,0xd3,0xf7,0x00,0x00,0x00]
v_frexp_mant_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x80,0xd3,0x01,0x01,0x00,0x00]
@@ -30599,8 +30969,17 @@ v_log_legacy_f32_e64 v5, exec_lo
v_log_legacy_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x8a,0xd3,0x7f,0x00,0x00,0x00]
-v_log_legacy_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x8a,0xd3,0xfd,0x00,0x00,0x00]
+v_log_legacy_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x8a,0xd3,0x80,0x00,0x00,0x00]
+
+v_log_legacy_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x8a,0xd3,0xc1,0x00,0x00,0x00]
+
+v_log_legacy_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x8a,0xd3,0xf0,0x00,0x00,0x00]
+
+v_log_legacy_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x8a,0xd3,0xf7,0x00,0x00,0x00]
v_log_legacy_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0x01,0x00,0x00]
@@ -30740,8 +31119,17 @@ v_exp_legacy_f32_e64 v5, exec_lo
v_exp_legacy_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x8c,0xd3,0x7f,0x00,0x00,0x00]
-v_exp_legacy_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x8c,0xd3,0xfd,0x00,0x00,0x00]
+v_exp_legacy_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x8c,0xd3,0x80,0x00,0x00,0x00]
+
+v_exp_legacy_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x8c,0xd3,0xc1,0x00,0x00,0x00]
+
+v_exp_legacy_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x8c,0xd3,0xf0,0x00,0x00,0x00]
+
+v_exp_legacy_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x8c,0xd3,0xf7,0x00,0x00,0x00]
v_exp_legacy_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0x01,0x00,0x00]
@@ -31025,92 +31413,113 @@ v_add_f32 v5, v255, v2
v_add_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x06]
+v_add_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0x04,0x00,0x00]
+
+v_add_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x06,0xd2,0x80,0x04,0x00,0x00]
+
+v_add_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x06,0xd2,0xc1,0x04,0x00,0x00]
+
+v_add_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x06,0xd2,0xf0,0x04,0x00,0x00]
+
+v_add_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x06,0xd2,0xf7,0x04,0x00,0x00]
+
v_add_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x06,0xd2,0x01,0x05,0x00,0x00]
-v_add_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x06,0xd2,0x01,0x05,0x00,0x00]
-
v_add_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x06,0xd2,0xff,0x05,0x00,0x00]
-v_add_f32_e64 v5, v1, s103
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0xcf,0x00,0x00]
+v_add_f32_e64 v5, 0, s103
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xce,0x00,0x00]
+
+v_add_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xd0,0x00,0x00]
+
+v_add_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xd2,0x00,0x00]
-v_add_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0xd1,0x00,0x00]
+v_add_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xd4,0x00,0x00]
-v_add_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0xd3,0x00,0x00]
+v_add_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xd6,0x00,0x00]
-v_add_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0xd5,0x00,0x00]
+v_add_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xd8,0x00,0x00]
-v_add_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0xd7,0x00,0x00]
+v_add_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xda,0x00,0x00]
-v_add_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0xd9,0x00,0x00]
+v_add_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xdc,0x00,0x00]
-v_add_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0xdb,0x00,0x00]
+v_add_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xde,0x00,0x00]
-v_add_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0xdd,0x00,0x00]
+v_add_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xf6,0x00,0x00]
-v_add_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0xdf,0x00,0x00]
+v_add_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xf8,0x00,0x00]
-v_add_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0xf7,0x00,0x00]
+v_add_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xfc,0x00,0x00]
-v_add_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0xf9,0x00,0x00]
+v_add_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xfe,0x00,0x00]
-v_add_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0xfd,0x00,0x00]
+v_add_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0x00,0x01,0x00]
-v_add_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0xff,0x00,0x00]
+v_add_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0x82,0x01,0x00]
-v_add_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0xfb,0x01,0x00]
+v_add_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xe0,0x01,0x00]
-v_add_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0x05,0x02,0x00]
+v_add_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xee,0x01,0x00]
-v_add_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0xff,0x03,0x00]
+v_add_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0x04,0x02,0x00]
-v_add_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0x05,0x00,0x20]
+v_add_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0xfe,0x03,0x00]
-v_add_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0x05,0x00,0x40]
+v_add_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0x04,0x00,0x20]
-v_add_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0x05,0x00,0x60]
+v_add_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0x04,0x00,0x40]
-v_add_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x06,0xd2,0x01,0x05,0x00,0x00]
+v_add_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0x04,0x00,0x60]
-v_add_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x06,0xd2,0x01,0x05,0x00,0x00]
+v_add_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x06,0xd2,0x80,0x04,0x00,0x00]
-v_add_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x06,0xd2,0x01,0x05,0x00,0x00]
+v_add_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x06,0xd2,0x80,0x04,0x00,0x00]
-v_add_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x08,0x06,0xd2,0x01,0x05,0x00,0x00]
+v_add_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x06,0xd2,0x80,0x04,0x00,0x00]
-v_add_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0x05,0x00,0x08]
+v_add_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x08,0x06,0xd2,0x80,0x04,0x00,0x00]
-v_add_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0x05,0x00,0x10]
+v_add_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0x04,0x00,0x08]
-v_add_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x06,0xd2,0x01,0x05,0x00,0x18]
+v_add_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0x04,0x00,0x10]
+
+v_add_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x06,0xd2,0x80,0x04,0x00,0x18]
v_sub_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x08]
@@ -31184,92 +31593,113 @@ v_sub_f32 v5, v255, v2
v_sub_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x08]
+v_sub_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0x04,0x00,0x00]
+
+v_sub_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x08,0xd2,0x80,0x04,0x00,0x00]
+
+v_sub_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x08,0xd2,0xc1,0x04,0x00,0x00]
+
+v_sub_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x08,0xd2,0xf0,0x04,0x00,0x00]
+
+v_sub_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x08,0xd2,0xf7,0x04,0x00,0x00]
+
v_sub_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x08,0xd2,0x01,0x05,0x00,0x00]
-v_sub_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x08,0xd2,0x01,0x05,0x00,0x00]
-
v_sub_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x08,0xd2,0xff,0x05,0x00,0x00]
-v_sub_f32_e64 v5, v1, s103
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0xcf,0x00,0x00]
+v_sub_f32_e64 v5, 0, s103
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xce,0x00,0x00]
+
+v_sub_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xd0,0x00,0x00]
+
+v_sub_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xd2,0x00,0x00]
-v_sub_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0xd1,0x00,0x00]
+v_sub_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xd4,0x00,0x00]
-v_sub_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0xd3,0x00,0x00]
+v_sub_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xd6,0x00,0x00]
-v_sub_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0xd5,0x00,0x00]
+v_sub_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xd8,0x00,0x00]
-v_sub_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0xd7,0x00,0x00]
+v_sub_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xda,0x00,0x00]
-v_sub_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0xd9,0x00,0x00]
+v_sub_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xdc,0x00,0x00]
-v_sub_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0xdb,0x00,0x00]
+v_sub_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xde,0x00,0x00]
-v_sub_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0xdd,0x00,0x00]
+v_sub_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xf6,0x00,0x00]
-v_sub_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0xdf,0x00,0x00]
+v_sub_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xf8,0x00,0x00]
-v_sub_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0xf7,0x00,0x00]
+v_sub_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xfc,0x00,0x00]
-v_sub_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0xf9,0x00,0x00]
+v_sub_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xfe,0x00,0x00]
-v_sub_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0xfd,0x00,0x00]
+v_sub_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0x00,0x01,0x00]
-v_sub_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0xff,0x00,0x00]
+v_sub_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0x82,0x01,0x00]
-v_sub_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0xfb,0x01,0x00]
+v_sub_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xe0,0x01,0x00]
-v_sub_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0x05,0x02,0x00]
+v_sub_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xee,0x01,0x00]
-v_sub_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0xff,0x03,0x00]
+v_sub_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0x04,0x02,0x00]
-v_sub_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0x05,0x00,0x20]
+v_sub_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0xfe,0x03,0x00]
-v_sub_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0x05,0x00,0x40]
+v_sub_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0x04,0x00,0x20]
-v_sub_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0x05,0x00,0x60]
+v_sub_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0x04,0x00,0x40]
-v_sub_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x08,0xd2,0x01,0x05,0x00,0x00]
+v_sub_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0x04,0x00,0x60]
-v_sub_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x08,0xd2,0x01,0x05,0x00,0x00]
+v_sub_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x08,0xd2,0x80,0x04,0x00,0x00]
-v_sub_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x08,0xd2,0x01,0x05,0x00,0x00]
+v_sub_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x08,0xd2,0x80,0x04,0x00,0x00]
-v_sub_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x08,0x08,0xd2,0x01,0x05,0x00,0x00]
+v_sub_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x08,0xd2,0x80,0x04,0x00,0x00]
-v_sub_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0x05,0x00,0x08]
+v_sub_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x08,0x08,0xd2,0x80,0x04,0x00,0x00]
-v_sub_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0x05,0x00,0x10]
+v_sub_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0x04,0x00,0x08]
-v_sub_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x08,0xd2,0x01,0x05,0x00,0x18]
+v_sub_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0x04,0x00,0x10]
+
+v_sub_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x08,0xd2,0x80,0x04,0x00,0x18]
v_subrev_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x0a]
@@ -31343,92 +31773,113 @@ v_subrev_f32 v5, v255, v2
v_subrev_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x0a]
+v_subrev_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0x04,0x00,0x00]
+
+v_subrev_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x0a,0xd2,0x80,0x04,0x00,0x00]
+
+v_subrev_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x0a,0xd2,0xc1,0x04,0x00,0x00]
+
+v_subrev_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x0a,0xd2,0xf0,0x04,0x00,0x00]
+
+v_subrev_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x0a,0xd2,0xf7,0x04,0x00,0x00]
+
v_subrev_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0x05,0x00,0x00]
-v_subrev_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x0a,0xd2,0x01,0x05,0x00,0x00]
-
v_subrev_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x0a,0xd2,0xff,0x05,0x00,0x00]
-v_subrev_f32_e64 v5, v1, s103
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0xcf,0x00,0x00]
+v_subrev_f32_e64 v5, 0, s103
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xce,0x00,0x00]
+
+v_subrev_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xd0,0x00,0x00]
+
+v_subrev_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xd2,0x00,0x00]
-v_subrev_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0xd1,0x00,0x00]
+v_subrev_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xd4,0x00,0x00]
-v_subrev_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0xd3,0x00,0x00]
+v_subrev_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xd6,0x00,0x00]
-v_subrev_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0xd5,0x00,0x00]
+v_subrev_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xd8,0x00,0x00]
-v_subrev_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0xd7,0x00,0x00]
+v_subrev_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xda,0x00,0x00]
-v_subrev_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0xd9,0x00,0x00]
+v_subrev_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xdc,0x00,0x00]
-v_subrev_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0xdb,0x00,0x00]
+v_subrev_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xde,0x00,0x00]
-v_subrev_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0xdd,0x00,0x00]
+v_subrev_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xf6,0x00,0x00]
-v_subrev_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0xdf,0x00,0x00]
+v_subrev_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xf8,0x00,0x00]
-v_subrev_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0xf7,0x00,0x00]
+v_subrev_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xfc,0x00,0x00]
-v_subrev_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0xf9,0x00,0x00]
+v_subrev_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xfe,0x00,0x00]
-v_subrev_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0xfd,0x00,0x00]
+v_subrev_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0x00,0x01,0x00]
-v_subrev_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0xff,0x00,0x00]
+v_subrev_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0x82,0x01,0x00]
-v_subrev_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0xfb,0x01,0x00]
+v_subrev_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xe0,0x01,0x00]
-v_subrev_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0x05,0x02,0x00]
+v_subrev_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xee,0x01,0x00]
-v_subrev_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0xff,0x03,0x00]
+v_subrev_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0x04,0x02,0x00]
-v_subrev_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0x05,0x00,0x20]
+v_subrev_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0xfe,0x03,0x00]
-v_subrev_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0x05,0x00,0x40]
+v_subrev_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0x04,0x00,0x20]
-v_subrev_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0x05,0x00,0x60]
+v_subrev_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0x04,0x00,0x40]
-v_subrev_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x0a,0xd2,0x01,0x05,0x00,0x00]
+v_subrev_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0x04,0x00,0x60]
-v_subrev_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x0a,0xd2,0x01,0x05,0x00,0x00]
+v_subrev_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x0a,0xd2,0x80,0x04,0x00,0x00]
-v_subrev_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x0a,0xd2,0x01,0x05,0x00,0x00]
+v_subrev_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x0a,0xd2,0x80,0x04,0x00,0x00]
-v_subrev_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x08,0x0a,0xd2,0x01,0x05,0x00,0x00]
+v_subrev_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x0a,0xd2,0x80,0x04,0x00,0x00]
-v_subrev_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0x05,0x00,0x08]
+v_subrev_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x08,0x0a,0xd2,0x80,0x04,0x00,0x00]
-v_subrev_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0x05,0x00,0x10]
+v_subrev_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0x04,0x00,0x08]
-v_subrev_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x0a,0xd2,0x01,0x05,0x00,0x18]
+v_subrev_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0x04,0x00,0x10]
+
+v_subrev_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x0a,0xd2,0x80,0x04,0x00,0x18]
v_mac_legacy_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x0c]
@@ -31502,92 +31953,113 @@ v_mac_legacy_f32 v5, v255, v2
v_mac_legacy_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x0c]
+v_mac_legacy_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0x04,0x00,0x00]
+
+v_mac_legacy_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x0c,0xd2,0x80,0x04,0x00,0x00]
+
+v_mac_legacy_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x0c,0xd2,0xc1,0x04,0x00,0x00]
+
+v_mac_legacy_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x0c,0xd2,0xf0,0x04,0x00,0x00]
+
+v_mac_legacy_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x0c,0xd2,0xf7,0x04,0x00,0x00]
+
v_mac_legacy_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0x05,0x00,0x00]
-v_mac_legacy_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x0c,0xd2,0x01,0x05,0x00,0x00]
-
v_mac_legacy_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x0c,0xd2,0xff,0x05,0x00,0x00]
-v_mac_legacy_f32_e64 v5, v1, s103
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0xcf,0x00,0x00]
+v_mac_legacy_f32_e64 v5, 0, s103
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xce,0x00,0x00]
+
+v_mac_legacy_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xd0,0x00,0x00]
+
+v_mac_legacy_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xd2,0x00,0x00]
-v_mac_legacy_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0xd1,0x00,0x00]
+v_mac_legacy_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xd4,0x00,0x00]
-v_mac_legacy_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0xd3,0x00,0x00]
+v_mac_legacy_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xd6,0x00,0x00]
-v_mac_legacy_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0xd5,0x00,0x00]
+v_mac_legacy_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xd8,0x00,0x00]
-v_mac_legacy_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0xd7,0x00,0x00]
+v_mac_legacy_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xda,0x00,0x00]
-v_mac_legacy_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0xd9,0x00,0x00]
+v_mac_legacy_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xdc,0x00,0x00]
-v_mac_legacy_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0xdb,0x00,0x00]
+v_mac_legacy_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xde,0x00,0x00]
-v_mac_legacy_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0xdd,0x00,0x00]
+v_mac_legacy_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xf6,0x00,0x00]
-v_mac_legacy_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0xdf,0x00,0x00]
+v_mac_legacy_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xf8,0x00,0x00]
-v_mac_legacy_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0xf7,0x00,0x00]
+v_mac_legacy_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xfc,0x00,0x00]
-v_mac_legacy_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0xf9,0x00,0x00]
+v_mac_legacy_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xfe,0x00,0x00]
-v_mac_legacy_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0xfd,0x00,0x00]
+v_mac_legacy_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0x00,0x01,0x00]
-v_mac_legacy_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0xff,0x00,0x00]
+v_mac_legacy_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0x82,0x01,0x00]
-v_mac_legacy_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0xfb,0x01,0x00]
+v_mac_legacy_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xe0,0x01,0x00]
-v_mac_legacy_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0x05,0x02,0x00]
+v_mac_legacy_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xee,0x01,0x00]
-v_mac_legacy_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0xff,0x03,0x00]
+v_mac_legacy_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0x04,0x02,0x00]
-v_mac_legacy_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0x05,0x00,0x20]
+v_mac_legacy_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0xfe,0x03,0x00]
-v_mac_legacy_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0x05,0x00,0x40]
+v_mac_legacy_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0x04,0x00,0x20]
-v_mac_legacy_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0x05,0x00,0x60]
+v_mac_legacy_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0x04,0x00,0x40]
-v_mac_legacy_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x0c,0xd2,0x01,0x05,0x00,0x00]
+v_mac_legacy_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0x04,0x00,0x60]
-v_mac_legacy_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x0c,0xd2,0x01,0x05,0x00,0x00]
+v_mac_legacy_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x0c,0xd2,0x80,0x04,0x00,0x00]
-v_mac_legacy_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x0c,0xd2,0x01,0x05,0x00,0x00]
+v_mac_legacy_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x0c,0xd2,0x80,0x04,0x00,0x00]
-v_mac_legacy_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x08,0x0c,0xd2,0x01,0x05,0x00,0x00]
+v_mac_legacy_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x0c,0xd2,0x80,0x04,0x00,0x00]
-v_mac_legacy_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0x05,0x00,0x08]
+v_mac_legacy_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x08,0x0c,0xd2,0x80,0x04,0x00,0x00]
-v_mac_legacy_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0x05,0x00,0x10]
+v_mac_legacy_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0x04,0x00,0x08]
-v_mac_legacy_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x0c,0xd2,0x01,0x05,0x00,0x18]
+v_mac_legacy_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0x04,0x00,0x10]
+
+v_mac_legacy_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x0c,0xd2,0x80,0x04,0x00,0x18]
v_mul_legacy_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x0e]
@@ -31661,92 +32133,113 @@ v_mul_legacy_f32 v5, v255, v2
v_mul_legacy_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x0e]
+v_mul_legacy_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0x04,0x00,0x00]
+
+v_mul_legacy_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x0e,0xd2,0x80,0x04,0x00,0x00]
+
+v_mul_legacy_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x0e,0xd2,0xc1,0x04,0x00,0x00]
+
+v_mul_legacy_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x0e,0xd2,0xf0,0x04,0x00,0x00]
+
+v_mul_legacy_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x0e,0xd2,0xf7,0x04,0x00,0x00]
+
v_mul_legacy_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0x05,0x00,0x00]
-v_mul_legacy_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x0e,0xd2,0x01,0x05,0x00,0x00]
-
v_mul_legacy_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x0e,0xd2,0xff,0x05,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, s103
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0xcf,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, s103
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xce,0x00,0x00]
+
+v_mul_legacy_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xd0,0x00,0x00]
+
+v_mul_legacy_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xd2,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0xd1,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xd4,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0xd3,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xd6,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0xd5,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xd8,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0xd7,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xda,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0xd9,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xdc,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0xdb,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xde,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0xdd,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xf6,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0xdf,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xf8,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0xf7,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xfc,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0xf9,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xfe,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0xfd,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0x00,0x01,0x00]
-v_mul_legacy_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0xff,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0x82,0x01,0x00]
-v_mul_legacy_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0xfb,0x01,0x00]
+v_mul_legacy_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xe0,0x01,0x00]
-v_mul_legacy_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0x05,0x02,0x00]
+v_mul_legacy_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xee,0x01,0x00]
-v_mul_legacy_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0xff,0x03,0x00]
+v_mul_legacy_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0x04,0x02,0x00]
-v_mul_legacy_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0x05,0x00,0x20]
+v_mul_legacy_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0xfe,0x03,0x00]
-v_mul_legacy_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0x05,0x00,0x40]
+v_mul_legacy_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0x04,0x00,0x20]
-v_mul_legacy_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0x05,0x00,0x60]
+v_mul_legacy_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0x04,0x00,0x40]
-v_mul_legacy_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x0e,0xd2,0x01,0x05,0x00,0x00]
+v_mul_legacy_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0x04,0x00,0x60]
-v_mul_legacy_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x0e,0xd2,0x01,0x05,0x00,0x00]
+v_mul_legacy_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x0e,0xd2,0x80,0x04,0x00,0x00]
-v_mul_legacy_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x0e,0xd2,0x01,0x05,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x0e,0xd2,0x80,0x04,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x08,0x0e,0xd2,0x01,0x05,0x00,0x00]
+v_mul_legacy_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x0e,0xd2,0x80,0x04,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0x05,0x00,0x08]
+v_mul_legacy_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x08,0x0e,0xd2,0x80,0x04,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0x05,0x00,0x10]
+v_mul_legacy_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0x04,0x00,0x08]
-v_mul_legacy_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x0e,0xd2,0x01,0x05,0x00,0x18]
+v_mul_legacy_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0x04,0x00,0x10]
+
+v_mul_legacy_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x0e,0xd2,0x80,0x04,0x00,0x18]
v_mul_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x10]
@@ -31820,92 +32313,113 @@ v_mul_f32 v5, v255, v2
v_mul_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x10]
+v_mul_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0x04,0x00,0x00]
+
+v_mul_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x10,0xd2,0x80,0x04,0x00,0x00]
+
+v_mul_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x10,0xd2,0xc1,0x04,0x00,0x00]
+
+v_mul_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x10,0xd2,0xf0,0x04,0x00,0x00]
+
+v_mul_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x10,0xd2,0xf7,0x04,0x00,0x00]
+
v_mul_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x10,0xd2,0x01,0x05,0x00,0x00]
-v_mul_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x10,0xd2,0x01,0x05,0x00,0x00]
-
v_mul_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x10,0xd2,0xff,0x05,0x00,0x00]
-v_mul_f32_e64 v5, v1, s103
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0xcf,0x00,0x00]
+v_mul_f32_e64 v5, 0, s103
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xce,0x00,0x00]
+
+v_mul_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xd0,0x00,0x00]
+
+v_mul_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xd2,0x00,0x00]
-v_mul_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0xd1,0x00,0x00]
+v_mul_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xd4,0x00,0x00]
-v_mul_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0xd3,0x00,0x00]
+v_mul_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xd6,0x00,0x00]
-v_mul_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0xd5,0x00,0x00]
+v_mul_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xd8,0x00,0x00]
-v_mul_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0xd7,0x00,0x00]
+v_mul_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xda,0x00,0x00]
-v_mul_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0xd9,0x00,0x00]
+v_mul_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xdc,0x00,0x00]
-v_mul_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0xdb,0x00,0x00]
+v_mul_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xde,0x00,0x00]
-v_mul_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0xdd,0x00,0x00]
+v_mul_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xf6,0x00,0x00]
-v_mul_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0xdf,0x00,0x00]
+v_mul_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xf8,0x00,0x00]
-v_mul_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0xf7,0x00,0x00]
+v_mul_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xfc,0x00,0x00]
-v_mul_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0xf9,0x00,0x00]
+v_mul_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xfe,0x00,0x00]
-v_mul_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0xfd,0x00,0x00]
+v_mul_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0x00,0x01,0x00]
-v_mul_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0xff,0x00,0x00]
+v_mul_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0x82,0x01,0x00]
-v_mul_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0xfb,0x01,0x00]
+v_mul_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xe0,0x01,0x00]
-v_mul_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0x05,0x02,0x00]
+v_mul_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xee,0x01,0x00]
-v_mul_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0xff,0x03,0x00]
+v_mul_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0x04,0x02,0x00]
-v_mul_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0x05,0x00,0x20]
+v_mul_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0xfe,0x03,0x00]
-v_mul_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0x05,0x00,0x40]
+v_mul_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0x04,0x00,0x20]
-v_mul_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0x05,0x00,0x60]
+v_mul_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0x04,0x00,0x40]
-v_mul_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x10,0xd2,0x01,0x05,0x00,0x00]
+v_mul_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0x04,0x00,0x60]
-v_mul_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x10,0xd2,0x01,0x05,0x00,0x00]
+v_mul_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x10,0xd2,0x80,0x04,0x00,0x00]
-v_mul_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x10,0xd2,0x01,0x05,0x00,0x00]
+v_mul_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x10,0xd2,0x80,0x04,0x00,0x00]
-v_mul_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x08,0x10,0xd2,0x01,0x05,0x00,0x00]
+v_mul_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x10,0xd2,0x80,0x04,0x00,0x00]
-v_mul_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0x05,0x00,0x08]
+v_mul_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x08,0x10,0xd2,0x80,0x04,0x00,0x00]
-v_mul_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0x05,0x00,0x10]
+v_mul_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0x04,0x00,0x08]
-v_mul_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x10,0xd2,0x01,0x05,0x00,0x18]
+v_mul_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0x04,0x00,0x10]
+
+v_mul_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x10,0xd2,0x80,0x04,0x00,0x18]
v_mul_i32_i24 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x12]
@@ -32579,92 +33093,113 @@ v_min_legacy_f32 v5, v255, v2
v_min_legacy_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x1a]
+v_min_legacy_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0x04,0x00,0x00]
+
+v_min_legacy_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x1a,0xd2,0x80,0x04,0x00,0x00]
+
+v_min_legacy_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x1a,0xd2,0xc1,0x04,0x00,0x00]
+
+v_min_legacy_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x1a,0xd2,0xf0,0x04,0x00,0x00]
+
+v_min_legacy_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x1a,0xd2,0xf7,0x04,0x00,0x00]
+
v_min_legacy_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0x05,0x00,0x00]
-v_min_legacy_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x1a,0xd2,0x01,0x05,0x00,0x00]
-
v_min_legacy_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x1a,0xd2,0xff,0x05,0x00,0x00]
-v_min_legacy_f32_e64 v5, v1, s103
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0xcf,0x00,0x00]
+v_min_legacy_f32_e64 v5, 0, s103
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xce,0x00,0x00]
+
+v_min_legacy_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xd0,0x00,0x00]
+
+v_min_legacy_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xd2,0x00,0x00]
-v_min_legacy_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0xd1,0x00,0x00]
+v_min_legacy_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xd4,0x00,0x00]
-v_min_legacy_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0xd3,0x00,0x00]
+v_min_legacy_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xd6,0x00,0x00]
-v_min_legacy_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0xd5,0x00,0x00]
+v_min_legacy_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xd8,0x00,0x00]
-v_min_legacy_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0xd7,0x00,0x00]
+v_min_legacy_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xda,0x00,0x00]
-v_min_legacy_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0xd9,0x00,0x00]
+v_min_legacy_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xdc,0x00,0x00]
-v_min_legacy_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0xdb,0x00,0x00]
+v_min_legacy_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xde,0x00,0x00]
-v_min_legacy_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0xdd,0x00,0x00]
+v_min_legacy_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xf6,0x00,0x00]
-v_min_legacy_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0xdf,0x00,0x00]
+v_min_legacy_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xf8,0x00,0x00]
-v_min_legacy_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0xf7,0x00,0x00]
+v_min_legacy_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xfc,0x00,0x00]
-v_min_legacy_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0xf9,0x00,0x00]
+v_min_legacy_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xfe,0x00,0x00]
-v_min_legacy_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0xfd,0x00,0x00]
+v_min_legacy_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0x00,0x01,0x00]
-v_min_legacy_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0xff,0x00,0x00]
+v_min_legacy_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0x82,0x01,0x00]
-v_min_legacy_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0xfb,0x01,0x00]
+v_min_legacy_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xe0,0x01,0x00]
-v_min_legacy_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0x05,0x02,0x00]
+v_min_legacy_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xee,0x01,0x00]
-v_min_legacy_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0xff,0x03,0x00]
+v_min_legacy_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0x04,0x02,0x00]
-v_min_legacy_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0x05,0x00,0x20]
+v_min_legacy_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0xfe,0x03,0x00]
-v_min_legacy_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0x05,0x00,0x40]
+v_min_legacy_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0x04,0x00,0x20]
-v_min_legacy_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0x05,0x00,0x60]
+v_min_legacy_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0x04,0x00,0x40]
-v_min_legacy_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x1a,0xd2,0x01,0x05,0x00,0x00]
+v_min_legacy_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0x04,0x00,0x60]
-v_min_legacy_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x1a,0xd2,0x01,0x05,0x00,0x00]
+v_min_legacy_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x1a,0xd2,0x80,0x04,0x00,0x00]
-v_min_legacy_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x1a,0xd2,0x01,0x05,0x00,0x00]
+v_min_legacy_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x1a,0xd2,0x80,0x04,0x00,0x00]
-v_min_legacy_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x08,0x1a,0xd2,0x01,0x05,0x00,0x00]
+v_min_legacy_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x1a,0xd2,0x80,0x04,0x00,0x00]
-v_min_legacy_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0x05,0x00,0x08]
+v_min_legacy_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x08,0x1a,0xd2,0x80,0x04,0x00,0x00]
-v_min_legacy_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0x05,0x00,0x10]
+v_min_legacy_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0x04,0x00,0x08]
-v_min_legacy_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x1a,0xd2,0x01,0x05,0x00,0x18]
+v_min_legacy_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0x04,0x00,0x10]
+
+v_min_legacy_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x1a,0xd2,0x80,0x04,0x00,0x18]
v_max_legacy_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x1c]
@@ -32738,92 +33273,113 @@ v_max_legacy_f32 v5, v255, v2
v_max_legacy_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x1c]
+v_max_legacy_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0x04,0x00,0x00]
+
+v_max_legacy_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x1c,0xd2,0x80,0x04,0x00,0x00]
+
+v_max_legacy_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x1c,0xd2,0xc1,0x04,0x00,0x00]
+
+v_max_legacy_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x1c,0xd2,0xf0,0x04,0x00,0x00]
+
+v_max_legacy_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x1c,0xd2,0xf7,0x04,0x00,0x00]
+
v_max_legacy_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0x05,0x00,0x00]
-v_max_legacy_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x1c,0xd2,0x01,0x05,0x00,0x00]
-
v_max_legacy_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x1c,0xd2,0xff,0x05,0x00,0x00]
-v_max_legacy_f32_e64 v5, v1, s103
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0xcf,0x00,0x00]
+v_max_legacy_f32_e64 v5, 0, s103
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xce,0x00,0x00]
+
+v_max_legacy_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xd0,0x00,0x00]
+
+v_max_legacy_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xd2,0x00,0x00]
-v_max_legacy_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0xd1,0x00,0x00]
+v_max_legacy_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xd4,0x00,0x00]
-v_max_legacy_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0xd3,0x00,0x00]
+v_max_legacy_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xd6,0x00,0x00]
-v_max_legacy_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0xd5,0x00,0x00]
+v_max_legacy_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xd8,0x00,0x00]
-v_max_legacy_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0xd7,0x00,0x00]
+v_max_legacy_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xda,0x00,0x00]
-v_max_legacy_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0xd9,0x00,0x00]
+v_max_legacy_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xdc,0x00,0x00]
-v_max_legacy_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0xdb,0x00,0x00]
+v_max_legacy_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xde,0x00,0x00]
-v_max_legacy_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0xdd,0x00,0x00]
+v_max_legacy_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xf6,0x00,0x00]
-v_max_legacy_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0xdf,0x00,0x00]
+v_max_legacy_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xf8,0x00,0x00]
-v_max_legacy_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0xf7,0x00,0x00]
+v_max_legacy_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xfc,0x00,0x00]
-v_max_legacy_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0xf9,0x00,0x00]
+v_max_legacy_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xfe,0x00,0x00]
-v_max_legacy_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0xfd,0x00,0x00]
+v_max_legacy_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0x00,0x01,0x00]
-v_max_legacy_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0xff,0x00,0x00]
+v_max_legacy_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0x82,0x01,0x00]
-v_max_legacy_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0xfb,0x01,0x00]
+v_max_legacy_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xe0,0x01,0x00]
-v_max_legacy_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0x05,0x02,0x00]
+v_max_legacy_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xee,0x01,0x00]
-v_max_legacy_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0xff,0x03,0x00]
+v_max_legacy_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0x04,0x02,0x00]
-v_max_legacy_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0x05,0x00,0x20]
+v_max_legacy_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0xfe,0x03,0x00]
-v_max_legacy_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0x05,0x00,0x40]
+v_max_legacy_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0x04,0x00,0x20]
-v_max_legacy_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0x05,0x00,0x60]
+v_max_legacy_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0x04,0x00,0x40]
-v_max_legacy_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x1c,0xd2,0x01,0x05,0x00,0x00]
+v_max_legacy_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0x04,0x00,0x60]
-v_max_legacy_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x1c,0xd2,0x01,0x05,0x00,0x00]
+v_max_legacy_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x1c,0xd2,0x80,0x04,0x00,0x00]
-v_max_legacy_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x1c,0xd2,0x01,0x05,0x00,0x00]
+v_max_legacy_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x1c,0xd2,0x80,0x04,0x00,0x00]
-v_max_legacy_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x08,0x1c,0xd2,0x01,0x05,0x00,0x00]
+v_max_legacy_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x1c,0xd2,0x80,0x04,0x00,0x00]
-v_max_legacy_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0x05,0x00,0x08]
+v_max_legacy_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x08,0x1c,0xd2,0x80,0x04,0x00,0x00]
-v_max_legacy_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0x05,0x00,0x10]
+v_max_legacy_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0x04,0x00,0x08]
-v_max_legacy_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x1c,0xd2,0x01,0x05,0x00,0x18]
+v_max_legacy_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0x04,0x00,0x10]
+
+v_max_legacy_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x1c,0xd2,0x80,0x04,0x00,0x18]
v_min_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x1e]
@@ -32897,92 +33453,113 @@ v_min_f32 v5, v255, v2
v_min_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x1e]
+v_min_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0x04,0x00,0x00]
+
+v_min_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x1e,0xd2,0x80,0x04,0x00,0x00]
+
+v_min_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x1e,0xd2,0xc1,0x04,0x00,0x00]
+
+v_min_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x1e,0xd2,0xf0,0x04,0x00,0x00]
+
+v_min_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x1e,0xd2,0xf7,0x04,0x00,0x00]
+
v_min_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0x05,0x00,0x00]
-v_min_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x1e,0xd2,0x01,0x05,0x00,0x00]
-
v_min_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x1e,0xd2,0xff,0x05,0x00,0x00]
-v_min_f32_e64 v5, v1, s103
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0xcf,0x00,0x00]
+v_min_f32_e64 v5, 0, s103
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xce,0x00,0x00]
+
+v_min_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xd0,0x00,0x00]
+
+v_min_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xd2,0x00,0x00]
-v_min_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0xd1,0x00,0x00]
+v_min_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xd4,0x00,0x00]
-v_min_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0xd3,0x00,0x00]
+v_min_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xd6,0x00,0x00]
-v_min_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0xd5,0x00,0x00]
+v_min_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xd8,0x00,0x00]
-v_min_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0xd7,0x00,0x00]
+v_min_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xda,0x00,0x00]
-v_min_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0xd9,0x00,0x00]
+v_min_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xdc,0x00,0x00]
-v_min_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0xdb,0x00,0x00]
+v_min_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xde,0x00,0x00]
-v_min_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0xdd,0x00,0x00]
+v_min_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xf6,0x00,0x00]
-v_min_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0xdf,0x00,0x00]
+v_min_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xf8,0x00,0x00]
-v_min_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0xf7,0x00,0x00]
+v_min_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xfc,0x00,0x00]
-v_min_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0xf9,0x00,0x00]
+v_min_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xfe,0x00,0x00]
-v_min_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0xfd,0x00,0x00]
+v_min_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0x00,0x01,0x00]
-v_min_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0xff,0x00,0x00]
+v_min_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0x82,0x01,0x00]
-v_min_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0xfb,0x01,0x00]
+v_min_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xe0,0x01,0x00]
-v_min_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0x05,0x02,0x00]
+v_min_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xee,0x01,0x00]
-v_min_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0xff,0x03,0x00]
+v_min_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0x04,0x02,0x00]
-v_min_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0x05,0x00,0x20]
+v_min_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0xfe,0x03,0x00]
-v_min_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0x05,0x00,0x40]
+v_min_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0x04,0x00,0x20]
-v_min_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0x05,0x00,0x60]
+v_min_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0x04,0x00,0x40]
-v_min_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x1e,0xd2,0x01,0x05,0x00,0x00]
+v_min_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0x04,0x00,0x60]
-v_min_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x1e,0xd2,0x01,0x05,0x00,0x00]
+v_min_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x1e,0xd2,0x80,0x04,0x00,0x00]
-v_min_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x1e,0xd2,0x01,0x05,0x00,0x00]
+v_min_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x1e,0xd2,0x80,0x04,0x00,0x00]
-v_min_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x08,0x1e,0xd2,0x01,0x05,0x00,0x00]
+v_min_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x1e,0xd2,0x80,0x04,0x00,0x00]
-v_min_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0x05,0x00,0x08]
+v_min_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x08,0x1e,0xd2,0x80,0x04,0x00,0x00]
-v_min_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0x05,0x00,0x10]
+v_min_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0x04,0x00,0x08]
-v_min_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x1e,0xd2,0x01,0x05,0x00,0x18]
+v_min_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0x04,0x00,0x10]
+
+v_min_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x1e,0xd2,0x80,0x04,0x00,0x18]
v_max_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x20]
@@ -33056,92 +33633,113 @@ v_max_f32 v5, v255, v2
v_max_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x20]
+v_max_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0x04,0x00,0x00]
+
+v_max_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x20,0xd2,0x80,0x04,0x00,0x00]
+
+v_max_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x20,0xd2,0xc1,0x04,0x00,0x00]
+
+v_max_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x20,0xd2,0xf0,0x04,0x00,0x00]
+
+v_max_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x20,0xd2,0xf7,0x04,0x00,0x00]
+
v_max_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x20,0xd2,0x01,0x05,0x00,0x00]
-v_max_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x20,0xd2,0x01,0x05,0x00,0x00]
-
v_max_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x20,0xd2,0xff,0x05,0x00,0x00]
-v_max_f32_e64 v5, v1, s103
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0xcf,0x00,0x00]
+v_max_f32_e64 v5, 0, s103
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xce,0x00,0x00]
+
+v_max_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xd0,0x00,0x00]
+
+v_max_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xd2,0x00,0x00]
-v_max_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0xd1,0x00,0x00]
+v_max_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xd4,0x00,0x00]
-v_max_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0xd3,0x00,0x00]
+v_max_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xd6,0x00,0x00]
-v_max_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0xd5,0x00,0x00]
+v_max_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xd8,0x00,0x00]
-v_max_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0xd7,0x00,0x00]
+v_max_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xda,0x00,0x00]
-v_max_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0xd9,0x00,0x00]
+v_max_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xdc,0x00,0x00]
-v_max_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0xdb,0x00,0x00]
+v_max_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xde,0x00,0x00]
-v_max_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0xdd,0x00,0x00]
+v_max_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xf6,0x00,0x00]
-v_max_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0xdf,0x00,0x00]
+v_max_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xf8,0x00,0x00]
-v_max_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0xf7,0x00,0x00]
+v_max_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xfc,0x00,0x00]
-v_max_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0xf9,0x00,0x00]
+v_max_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xfe,0x00,0x00]
-v_max_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0xfd,0x00,0x00]
+v_max_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0x00,0x01,0x00]
-v_max_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0xff,0x00,0x00]
+v_max_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0x82,0x01,0x00]
-v_max_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0xfb,0x01,0x00]
+v_max_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xe0,0x01,0x00]
-v_max_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0x05,0x02,0x00]
+v_max_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xee,0x01,0x00]
-v_max_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0xff,0x03,0x00]
+v_max_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0x04,0x02,0x00]
-v_max_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0x05,0x00,0x20]
+v_max_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0xfe,0x03,0x00]
-v_max_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0x05,0x00,0x40]
+v_max_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0x04,0x00,0x20]
-v_max_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0x05,0x00,0x60]
+v_max_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0x04,0x00,0x40]
-v_max_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x20,0xd2,0x01,0x05,0x00,0x00]
+v_max_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0x04,0x00,0x60]
-v_max_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x20,0xd2,0x01,0x05,0x00,0x00]
+v_max_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x20,0xd2,0x80,0x04,0x00,0x00]
-v_max_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x20,0xd2,0x01,0x05,0x00,0x00]
+v_max_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x20,0xd2,0x80,0x04,0x00,0x00]
-v_max_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x08,0x20,0xd2,0x01,0x05,0x00,0x00]
+v_max_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x20,0xd2,0x80,0x04,0x00,0x00]
-v_max_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0x05,0x00,0x08]
+v_max_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x08,0x20,0xd2,0x80,0x04,0x00,0x00]
-v_max_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0x05,0x00,0x10]
+v_max_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0x04,0x00,0x08]
-v_max_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x20,0xd2,0x01,0x05,0x00,0x18]
+v_max_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0x04,0x00,0x10]
+
+v_max_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x20,0xd2,0x80,0x04,0x00,0x18]
v_min_i32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x22]
@@ -35315,92 +35913,113 @@ v_mac_f32 v5, v255, v2
v_mac_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x3e]
+v_mac_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0x04,0x00,0x00]
+
+v_mac_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x3e,0xd2,0x80,0x04,0x00,0x00]
+
+v_mac_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x3e,0xd2,0xc1,0x04,0x00,0x00]
+
+v_mac_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x3e,0xd2,0xf0,0x04,0x00,0x00]
+
+v_mac_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x3e,0xd2,0xf7,0x04,0x00,0x00]
+
v_mac_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0x05,0x00,0x00]
-v_mac_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x3e,0xd2,0x01,0x05,0x00,0x00]
-
v_mac_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x3e,0xd2,0xff,0x05,0x00,0x00]
-v_mac_f32_e64 v5, v1, s103
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0xcf,0x00,0x00]
+v_mac_f32_e64 v5, 0, s103
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xce,0x00,0x00]
+
+v_mac_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xd0,0x00,0x00]
+
+v_mac_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xd2,0x00,0x00]
-v_mac_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0xd1,0x00,0x00]
+v_mac_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xd4,0x00,0x00]
-v_mac_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0xd3,0x00,0x00]
+v_mac_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xd6,0x00,0x00]
-v_mac_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0xd5,0x00,0x00]
+v_mac_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xd8,0x00,0x00]
-v_mac_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0xd7,0x00,0x00]
+v_mac_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xda,0x00,0x00]
-v_mac_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0xd9,0x00,0x00]
+v_mac_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xdc,0x00,0x00]
-v_mac_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0xdb,0x00,0x00]
+v_mac_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xde,0x00,0x00]
-v_mac_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0xdd,0x00,0x00]
+v_mac_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xf6,0x00,0x00]
-v_mac_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0xdf,0x00,0x00]
+v_mac_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xf8,0x00,0x00]
-v_mac_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0xf7,0x00,0x00]
+v_mac_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xfc,0x00,0x00]
-v_mac_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0xf9,0x00,0x00]
+v_mac_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xfe,0x00,0x00]
-v_mac_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0xfd,0x00,0x00]
+v_mac_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0x00,0x01,0x00]
-v_mac_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0xff,0x00,0x00]
+v_mac_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0x82,0x01,0x00]
-v_mac_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0xfb,0x01,0x00]
+v_mac_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xe0,0x01,0x00]
-v_mac_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0x05,0x02,0x00]
+v_mac_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xee,0x01,0x00]
-v_mac_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0xff,0x03,0x00]
+v_mac_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0x04,0x02,0x00]
-v_mac_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0x05,0x00,0x20]
+v_mac_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0xfe,0x03,0x00]
-v_mac_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0x05,0x00,0x40]
+v_mac_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0x04,0x00,0x20]
-v_mac_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0x05,0x00,0x60]
+v_mac_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0x04,0x00,0x40]
-v_mac_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x3e,0xd2,0x01,0x05,0x00,0x00]
+v_mac_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0x04,0x00,0x60]
-v_mac_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x3e,0xd2,0x01,0x05,0x00,0x00]
+v_mac_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x3e,0xd2,0x80,0x04,0x00,0x00]
-v_mac_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x3e,0xd2,0x01,0x05,0x00,0x00]
+v_mac_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x3e,0xd2,0x80,0x04,0x00,0x00]
-v_mac_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x08,0x3e,0xd2,0x01,0x05,0x00,0x00]
+v_mac_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x3e,0xd2,0x80,0x04,0x00,0x00]
-v_mac_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0x05,0x00,0x08]
+v_mac_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x08,0x3e,0xd2,0x80,0x04,0x00,0x00]
-v_mac_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0x05,0x00,0x10]
+v_mac_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0x04,0x00,0x08]
-v_mac_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x3e,0xd2,0x01,0x05,0x00,0x18]
+v_mac_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0x04,0x00,0x10]
+
+v_mac_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x3e,0xd2,0x80,0x04,0x00,0x18]
v_madmk_f32 v5, 0, 0x11213141, v3
// CHECK: [0x80,0x06,0x0a,0x40,0x41,0x31,0x21,0x11]
@@ -36803,9 +37422,15 @@ v_ldexp_f32_e64 v5, 0, s2
v_ldexp_f32_e64 v255, 0, s2
// CHECK: [0xff,0x00,0x56,0xd2,0x80,0x04,0x00,0x00]
+v_ldexp_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x56,0xd2,0xc1,0x04,0x00,0x00]
+
v_ldexp_f32_e64 v5, 0.5, s2
// CHECK: [0x05,0x00,0x56,0xd2,0xf0,0x04,0x00,0x00]
+v_ldexp_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x56,0xd2,0xf7,0x04,0x00,0x00]
+
v_ldexp_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x56,0xd2,0x01,0x05,0x00,0x00]
@@ -36863,9 +37488,6 @@ v_ldexp_f32_e64 v5, 0, 0.5
v_ldexp_f32_e64 v5, 0, -4.0
// CHECK: [0x05,0x00,0x56,0xd2,0x80,0xee,0x01,0x00]
-v_ldexp_f32_e64 v5, 0, scc
-// CHECK: [0x05,0x00,0x56,0xd2,0x80,0xfa,0x01,0x00]
-
v_ldexp_f32_e64 v5, 0, v2
// CHECK: [0x05,0x00,0x56,0xd2,0x80,0x04,0x02,0x00]
@@ -36944,80 +37566,89 @@ v_cvt_pkaccum_u8_f32 v5, v255, v2
v_cvt_pkaccum_u8_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x58]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pkaccum_u8_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x58,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pkaccum_u8_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x58,0xd2,0xc1,0x04,0x00,0x00]
+
+v_cvt_pkaccum_u8_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x58,0xd2,0xf0,0x04,0x00,0x00]
+
+v_cvt_pkaccum_u8_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x58,0xd2,0xf7,0x04,0x00,0x00]
+
v_cvt_pkaccum_u8_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x58,0xd2,0x01,0x05,0x00,0x00]
-v_cvt_pkaccum_u8_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x58,0xd2,0x01,0x05,0x00,0x00]
-
v_cvt_pkaccum_u8_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x58,0xd2,0xff,0x05,0x00,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, s103
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xcf,0x00,0x00]
-
-v_cvt_pkaccum_u8_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xd1,0x00,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, s103
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xce,0x00,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xd3,0x00,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xd0,0x00,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xd5,0x00,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xd2,0x00,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xd7,0x00,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xd4,0x00,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xd9,0x00,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xd6,0x00,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xdb,0x00,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xd8,0x00,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xdd,0x00,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xda,0x00,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xdf,0x00,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xdc,0x00,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xf7,0x00,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xde,0x00,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xf9,0x00,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xf6,0x00,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xfd,0x00,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xf8,0x00,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xff,0x00,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xfc,0x00,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, 0
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0x01,0x01,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xfe,0x00,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, -1
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0x83,0x01,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0x00,0x01,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, 0.5
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xe1,0x01,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0x82,0x01,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, -4.0
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xef,0x01,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xe0,0x01,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xfb,0x01,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xee,0x01,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0x05,0x02,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0x04,0x02,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0xff,0x03,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0xfe,0x03,0x00]
-v_cvt_pkaccum_u8_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x58,0xd2,0x01,0x05,0x00,0x20]
+v_cvt_pkaccum_u8_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x58,0xd2,0x80,0x04,0x00,0x20]
-v_cvt_pkaccum_u8_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x58,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pkaccum_u8_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x58,0xd2,0x80,0x04,0x00,0x00]
v_cvt_pknorm_i16_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x5a]
@@ -37091,80 +37722,101 @@ v_cvt_pknorm_i16_f32 v5, v255, v2
v_cvt_pknorm_i16_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x5a]
+v_cvt_pknorm_i16_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_i16_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x5a,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_i16_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x5a,0xd2,0xc1,0x04,0x00,0x00]
+
+v_cvt_pknorm_i16_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x5a,0xd2,0xf0,0x04,0x00,0x00]
+
+v_cvt_pknorm_i16_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x5a,0xd2,0xf7,0x04,0x00,0x00]
+
v_cvt_pknorm_i16_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0x05,0x00,0x00]
-v_cvt_pknorm_i16_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x5a,0xd2,0x01,0x05,0x00,0x00]
-
v_cvt_pknorm_i16_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x5a,0xd2,0xff,0x05,0x00,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, s103
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0xcf,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, s103
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xce,0x00,0x00]
+
+v_cvt_pknorm_i16_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xd0,0x00,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0xd1,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xd2,0x00,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0xd3,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xd4,0x00,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0xd5,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xd6,0x00,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0xd7,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xd8,0x00,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0xd9,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xda,0x00,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0xdb,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xdc,0x00,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0xdd,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xde,0x00,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0xdf,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xf6,0x00,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0xf7,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xf8,0x00,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0xf9,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xfc,0x00,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0xfd,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xfe,0x00,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0xff,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0x00,0x01,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0xfb,0x01,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0x82,0x01,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0x05,0x02,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xe0,0x01,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0xff,0x03,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xee,0x01,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0x05,0x00,0x20]
+v_cvt_pknorm_i16_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0x04,0x02,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0x05,0x00,0x40]
+v_cvt_pknorm_i16_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0xfe,0x03,0x00]
-v_cvt_pknorm_i16_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x5a,0xd2,0x01,0x05,0x00,0x60]
+v_cvt_pknorm_i16_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0x04,0x00,0x20]
-v_cvt_pknorm_i16_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x5a,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0x04,0x00,0x40]
-v_cvt_pknorm_i16_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x5a,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x5a,0xd2,0x80,0x04,0x00,0x60]
-v_cvt_pknorm_i16_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x5a,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pknorm_i16_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x5a,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_i16_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x5a,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_i16_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x5a,0xd2,0x80,0x04,0x00,0x00]
v_cvt_pknorm_u16_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x5c]
@@ -37238,80 +37890,101 @@ v_cvt_pknorm_u16_f32 v5, v255, v2
v_cvt_pknorm_u16_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x5c]
+v_cvt_pknorm_u16_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_u16_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x5c,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_u16_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x5c,0xd2,0xc1,0x04,0x00,0x00]
+
+v_cvt_pknorm_u16_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x5c,0xd2,0xf0,0x04,0x00,0x00]
+
+v_cvt_pknorm_u16_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x5c,0xd2,0xf7,0x04,0x00,0x00]
+
v_cvt_pknorm_u16_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0x05,0x00,0x00]
-v_cvt_pknorm_u16_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x5c,0xd2,0x01,0x05,0x00,0x00]
-
v_cvt_pknorm_u16_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x5c,0xd2,0xff,0x05,0x00,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, s103
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0xcf,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, s103
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xce,0x00,0x00]
+
+v_cvt_pknorm_u16_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xd0,0x00,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0xd1,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xd2,0x00,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0xd3,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xd4,0x00,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0xd5,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xd6,0x00,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0xd7,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xd8,0x00,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0xd9,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xda,0x00,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0xdb,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xdc,0x00,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0xdd,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xde,0x00,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0xdf,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xf6,0x00,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0xf7,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xf8,0x00,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0xf9,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xfc,0x00,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0xfd,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xfe,0x00,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0xff,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0x00,0x01,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0xfb,0x01,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0x82,0x01,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0x05,0x02,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xe0,0x01,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0xff,0x03,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xee,0x01,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0x05,0x00,0x20]
+v_cvt_pknorm_u16_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0x04,0x02,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0x05,0x00,0x40]
+v_cvt_pknorm_u16_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0xfe,0x03,0x00]
-v_cvt_pknorm_u16_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x5c,0xd2,0x01,0x05,0x00,0x60]
+v_cvt_pknorm_u16_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0x04,0x00,0x20]
-v_cvt_pknorm_u16_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x5c,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0x04,0x00,0x40]
-v_cvt_pknorm_u16_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x5c,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x5c,0xd2,0x80,0x04,0x00,0x60]
-v_cvt_pknorm_u16_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x5c,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pknorm_u16_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x5c,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_u16_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x5c,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_u16_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x5c,0xd2,0x80,0x04,0x00,0x00]
v_cvt_pkrtz_f16_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x5e]
@@ -37385,80 +38058,101 @@ v_cvt_pkrtz_f16_f32 v5, v255, v2
v_cvt_pkrtz_f16_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x5e]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pkrtz_f16_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x5e,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pkrtz_f16_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x5e,0xd2,0xc1,0x04,0x00,0x00]
+
+v_cvt_pkrtz_f16_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x5e,0xd2,0xf0,0x04,0x00,0x00]
+
+v_cvt_pkrtz_f16_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x5e,0xd2,0xf7,0x04,0x00,0x00]
+
v_cvt_pkrtz_f16_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0x05,0x00,0x00]
-v_cvt_pkrtz_f16_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x5e,0xd2,0x01,0x05,0x00,0x00]
-
v_cvt_pkrtz_f16_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x5e,0xd2,0xff,0x05,0x00,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, s103
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0xcf,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, s103
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xce,0x00,0x00]
+
+v_cvt_pkrtz_f16_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xd0,0x00,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0xd1,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xd2,0x00,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0xd3,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xd4,0x00,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0xd5,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xd6,0x00,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0xd7,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xd8,0x00,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0xd9,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xda,0x00,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0xdb,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xdc,0x00,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0xdd,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xde,0x00,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0xdf,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xf6,0x00,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0xf7,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xf8,0x00,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0xf9,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xfc,0x00,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0xfd,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xfe,0x00,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0xff,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0x00,0x01,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0xfb,0x01,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0x82,0x01,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0x05,0x02,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xe0,0x01,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0xff,0x03,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xee,0x01,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0x05,0x00,0x20]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0x04,0x02,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0x05,0x00,0x40]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0xfe,0x03,0x00]
-v_cvt_pkrtz_f16_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x5e,0xd2,0x01,0x05,0x00,0x60]
+v_cvt_pkrtz_f16_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0x04,0x00,0x20]
-v_cvt_pkrtz_f16_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x5e,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0x04,0x00,0x40]
-v_cvt_pkrtz_f16_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x5e,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x5e,0xd2,0x80,0x04,0x00,0x60]
-v_cvt_pkrtz_f16_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x5e,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pkrtz_f16_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x5e,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pkrtz_f16_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x5e,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pkrtz_f16_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x5e,0xd2,0x80,0x04,0x00,0x00]
v_cvt_pk_u16_u32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x60]
@@ -37760,197 +38454,263 @@ v_cvt_pk_i16_i32_e64 v5, 0, v2
v_cvt_pk_i16_i32_e64 v5, 0, v255
// CHECK: [0x05,0x00,0x62,0xd2,0x80,0xfe,0x03,0x00]
-v_mad_legacy_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_legacy_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0x80,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_legacy_f32 v5, s103, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x67,0x00,0x01,0x02]
+
+v_mad_legacy_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x68,0x00,0x01,0x02]
+
+v_mad_legacy_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x69,0x00,0x01,0x02]
+
+v_mad_legacy_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x6a,0x00,0x01,0x02]
+
+v_mad_legacy_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x6b,0x00,0x01,0x02]
+
+v_mad_legacy_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x6c,0x00,0x01,0x02]
+
+v_mad_legacy_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x6d,0x00,0x01,0x02]
+
+v_mad_legacy_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x6e,0x00,0x01,0x02]
+
+v_mad_legacy_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x6f,0x00,0x01,0x02]
+
+v_mad_legacy_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x7b,0x00,0x01,0x02]
-v_mad_legacy_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0x80,0xd2,0x01,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x7c,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, s103, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x67,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x7e,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x68,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x7f,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x69,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x80,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x6a,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0xc1,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x6b,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0xf0,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x6c,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0xf7,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x6d,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x01,0x01,0x02]
-v_mad_legacy_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x6e,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0xff,0x01,0x01,0x02]
-v_mad_legacy_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x6f,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x82,0x01,0x02]
-v_mad_legacy_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x7b,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0xe0,0x01,0x02]
-v_mad_legacy_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x7c,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0xee,0x01,0x02]
-v_mad_legacy_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x7e,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x04,0x02,0x02]
-v_mad_legacy_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x7f,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0xfe,0x03,0x02]
-v_mad_legacy_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0xfd,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x00,0x05,0x03]
-v_mad_legacy_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x05,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x00,0xc1,0x03]
-v_mad_legacy_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0xff,0x05,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x00,0xdd,0x03]
-v_mad_legacy_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x01,0xfe,0x0f,0x04]
+v_mad_legacy_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x00,0x0d,0x04]
-v_mad_legacy_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x04,0xfe,0x07]
+v_mad_legacy_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x00,0xfd,0x07]
-v_mad_legacy_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x04,0x0e,0x24]
+v_mad_legacy_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x00,0x01,0x22]
-v_mad_legacy_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x04,0x0e,0x44]
+v_mad_legacy_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x00,0x01,0x42]
-v_mad_legacy_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x04,0x0e,0x84]
+v_mad_legacy_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x00,0x01,0x82]
-v_mad_legacy_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x04,0x0e,0xe4]
+v_mad_legacy_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x00,0x01,0xe2]
-v_mad_legacy_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0x80,0xd2,0x01,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0x80,0xd2,0x01,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0x80,0xd2,0x01,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0x80,0xd2,0x01,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0x80,0xd2,0x01,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0x80,0xd2,0x01,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0x80,0xd2,0x01,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0x80,0xd2,0x01,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x08,0x80,0xd2,0x01,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x08,0x80,0xd2,0x01,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x04,0x0e,0x0c]
+v_mad_legacy_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x00,0x01,0x0a]
-v_mad_legacy_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x04,0x0e,0x14]
+v_mad_legacy_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x00,0x01,0x12]
-v_mad_legacy_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x04,0x0e,0x1c]
+v_mad_legacy_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x00,0x01,0x1a]
-v_mad_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x00,0x01,0x02]
-v_mad_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0x82,0xd2,0x01,0x04,0x0e,0x04]
+v_mad_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0x82,0xd2,0x01,0x00,0x01,0x02]
-v_mad_f32 v5, s103, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x67,0x04,0x0e,0x04]
+v_mad_f32 v5, s103, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x67,0x00,0x01,0x02]
-v_mad_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x68,0x04,0x0e,0x04]
+v_mad_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x68,0x00,0x01,0x02]
-v_mad_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x69,0x04,0x0e,0x04]
+v_mad_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x69,0x00,0x01,0x02]
-v_mad_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x6a,0x04,0x0e,0x04]
+v_mad_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x6a,0x00,0x01,0x02]
-v_mad_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x6b,0x04,0x0e,0x04]
+v_mad_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x6b,0x00,0x01,0x02]
-v_mad_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x6c,0x04,0x0e,0x04]
+v_mad_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x6c,0x00,0x01,0x02]
-v_mad_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x6d,0x04,0x0e,0x04]
+v_mad_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x6d,0x00,0x01,0x02]
-v_mad_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x6e,0x04,0x0e,0x04]
+v_mad_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x6e,0x00,0x01,0x02]
-v_mad_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x6f,0x04,0x0e,0x04]
+v_mad_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x6f,0x00,0x01,0x02]
-v_mad_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x7b,0x04,0x0e,0x04]
+v_mad_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x7b,0x00,0x01,0x02]
-v_mad_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x7c,0x04,0x0e,0x04]
+v_mad_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x7c,0x00,0x01,0x02]
-v_mad_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x7e,0x04,0x0e,0x04]
+v_mad_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x7e,0x00,0x01,0x02]
-v_mad_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x7f,0x04,0x0e,0x04]
+v_mad_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x7f,0x00,0x01,0x02]
-v_mad_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0xfd,0x04,0x0e,0x04]
+v_mad_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x80,0x00,0x01,0x02]
-v_mad_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x05,0x0e,0x04]
+v_mad_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0xc1,0x00,0x01,0x02]
-v_mad_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0xff,0x05,0x0e,0x04]
+v_mad_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0xf0,0x00,0x01,0x02]
-v_mad_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x01,0xfe,0x0f,0x04]
+v_mad_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0xf7,0x00,0x01,0x02]
-v_mad_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x04,0xfe,0x07]
+v_mad_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x01,0x01,0x02]
-v_mad_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x04,0x0e,0x24]
+v_mad_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0xff,0x01,0x01,0x02]
-v_mad_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x04,0x0e,0x44]
+v_mad_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x82,0x01,0x02]
-v_mad_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x04,0x0e,0x84]
+v_mad_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0xe0,0x01,0x02]
-v_mad_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x04,0x0e,0xe4]
+v_mad_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0xee,0x01,0x02]
-v_mad_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0x82,0xd2,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x04,0x02,0x02]
-v_mad_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0x82,0xd2,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0xfe,0x03,0x02]
-v_mad_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0x82,0xd2,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x00,0x05,0x03]
-v_mad_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0x82,0xd2,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x00,0xc1,0x03]
-v_mad_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x08,0x82,0xd2,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x00,0xdd,0x03]
-v_mad_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x04,0x0e,0x0c]
+v_mad_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x00,0x0d,0x04]
-v_mad_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x04,0x0e,0x14]
+v_mad_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x00,0xfd,0x07]
-v_mad_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x04,0x0e,0x1c]
+v_mad_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x00,0x01,0x22]
+
+v_mad_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x00,0x01,0x42]
+
+v_mad_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x00,0x01,0x82]
+
+v_mad_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x00,0x01,0xe2]
+
+v_mad_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0x82,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0x82,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0x82,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0x82,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x08,0x82,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x00,0x01,0x0a]
+
+v_mad_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x00,0x01,0x12]
+
+v_mad_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x00,0x01,0x1a]
v_mad_i32_i24 v5, s1, 0, 0
// CHECK: [0x05,0x00,0x84,0xd2,0x01,0x00,0x01,0x02]
@@ -38138,389 +38898,521 @@ v_mad_u32_u24 v5, s1, 0, v3
v_mad_u32_u24 v5, s1, 0, v255
// CHECK: [0x05,0x00,0x86,0xd2,0x01,0x00,0xfd,0x07]
-v_cubeid_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x00,0x01,0x02]
+
+v_cubeid_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0x88,0xd2,0x01,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, s103, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x67,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x68,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x69,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x6a,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x6b,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x6c,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x6d,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x6e,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x6f,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x7b,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x7c,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x7e,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x7f,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0xc1,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0xf0,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0xf7,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x01,0x01,0x02]
+
+v_cubeid_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0xff,0x01,0x01,0x02]
+
+v_cubeid_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x82,0x01,0x02]
+
+v_cubeid_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xe0,0x01,0x02]
+
+v_cubeid_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xee,0x01,0x02]
+
+v_cubeid_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x04,0x02,0x02]
+
+v_cubeid_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xfe,0x03,0x02]
-v_cubeid_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0x88,0xd2,0x01,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x00,0x05,0x03]
-v_cubeid_f32 v5, s103, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x67,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x00,0xc1,0x03]
-v_cubeid_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x68,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x00,0xdd,0x03]
-v_cubeid_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x69,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x00,0x0d,0x04]
-v_cubeid_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x6a,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x00,0xfd,0x07]
-v_cubeid_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x6b,0x04,0x0e,0x04]
+v_cubeid_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x00,0x01,0x22]
-v_cubeid_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x6c,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x00,0x01,0x42]
-v_cubeid_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x6d,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x00,0x01,0x82]
-v_cubeid_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x6e,0x04,0x0e,0x04]
+v_cubeid_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x00,0x01,0xe2]
-v_cubeid_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x6f,0x04,0x0e,0x04]
+v_cubeid_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0x88,0xd2,0x01,0x00,0x01,0x02]
-v_cubeid_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x7b,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0x88,0xd2,0x01,0x00,0x01,0x02]
-v_cubeid_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x7c,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0x88,0xd2,0x01,0x00,0x01,0x02]
-v_cubeid_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x7e,0x04,0x0e,0x04]
+v_cubeid_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0x88,0xd2,0x01,0x00,0x01,0x02]
-v_cubeid_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x7f,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x08,0x88,0xd2,0x01,0x00,0x01,0x02]
-v_cubeid_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0xfd,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x00,0x01,0x0a]
-v_cubeid_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x05,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x00,0x01,0x12]
-v_cubeid_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0xff,0x05,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x00,0x01,0x1a]
-v_cubeid_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xfe,0x0f,0x04]
+v_cubesc_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x04,0xfe,0x07]
+v_cubesc_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0x8a,0xd2,0x01,0x00,0x01,0x02]
-v_cubeid_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x04,0x0e,0x24]
+v_cubesc_f32 v5, s103, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x67,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x04,0x0e,0x44]
+v_cubesc_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x68,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x04,0x0e,0x84]
+v_cubesc_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x69,0x00,0x01,0x02]
-v_cubeid_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x04,0x0e,0xe4]
+v_cubesc_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x6a,0x00,0x01,0x02]
-v_cubeid_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0x88,0xd2,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x6b,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0x88,0xd2,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x6c,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0x88,0xd2,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x6d,0x00,0x01,0x02]
-v_cubeid_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0x88,0xd2,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x6e,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x08,0x88,0xd2,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x6f,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x04,0x0e,0x0c]
+v_cubesc_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x7b,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x04,0x0e,0x14]
+v_cubesc_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x7c,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x04,0x0e,0x1c]
+v_cubesc_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x7e,0x00,0x01,0x02]
-v_cubesc_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x7f,0x00,0x01,0x02]
-v_cubesc_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0x8a,0xd2,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x80,0x00,0x01,0x02]
-v_cubesc_f32 v5, s103, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x67,0x04,0x0e,0x04]
+v_cubesc_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0xc1,0x00,0x01,0x02]
-v_cubesc_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x68,0x04,0x0e,0x04]
+v_cubesc_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0xf0,0x00,0x01,0x02]
-v_cubesc_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x69,0x04,0x0e,0x04]
+v_cubesc_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0xf7,0x00,0x01,0x02]
-v_cubesc_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x6a,0x04,0x0e,0x04]
+v_cubesc_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x01,0x01,0x02]
-v_cubesc_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x6b,0x04,0x0e,0x04]
+v_cubesc_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0xff,0x01,0x01,0x02]
-v_cubesc_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x6c,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x82,0x01,0x02]
-v_cubesc_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x6d,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0xe0,0x01,0x02]
-v_cubesc_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x6e,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0xee,0x01,0x02]
-v_cubesc_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x6f,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x04,0x02,0x02]
-v_cubesc_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x7b,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0xfe,0x03,0x02]
-v_cubesc_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x7c,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x00,0x05,0x03]
-v_cubesc_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x7e,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x00,0xc1,0x03]
-v_cubesc_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x7f,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x00,0xdd,0x03]
-v_cubesc_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0xfd,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x00,0x0d,0x04]
-v_cubesc_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x05,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x00,0xfd,0x07]
-v_cubesc_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0xff,0x05,0x0e,0x04]
+v_cubesc_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x00,0x01,0x22]
-v_cubesc_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0xfe,0x0f,0x04]
+v_cubesc_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x00,0x01,0x42]
-v_cubesc_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x04,0xfe,0x07]
+v_cubesc_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x00,0x01,0x82]
-v_cubesc_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x04,0x0e,0x24]
+v_cubesc_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x00,0x01,0xe2]
-v_cubesc_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x04,0x0e,0x44]
+v_cubesc_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0x8a,0xd2,0x01,0x00,0x01,0x02]
-v_cubesc_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x04,0x0e,0x84]
+v_cubesc_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0x8a,0xd2,0x01,0x00,0x01,0x02]
-v_cubesc_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x04,0x0e,0xe4]
+v_cubesc_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0x8a,0xd2,0x01,0x00,0x01,0x02]
-v_cubesc_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0x8a,0xd2,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0x8a,0xd2,0x01,0x00,0x01,0x02]
-v_cubesc_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0x8a,0xd2,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x08,0x8a,0xd2,0x01,0x00,0x01,0x02]
-v_cubesc_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0x8a,0xd2,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x00,0x01,0x0a]
-v_cubesc_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0x8a,0xd2,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x00,0x01,0x12]
-v_cubesc_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x08,0x8a,0xd2,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x00,0x01,0x1a]
-v_cubesc_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x04,0x0e,0x0c]
+v_cubetc_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x00,0x01,0x02]
-v_cubesc_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x04,0x0e,0x14]
+v_cubetc_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0x8c,0xd2,0x01,0x00,0x01,0x02]
-v_cubesc_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0x8a,0xd2,0x01,0x04,0x0e,0x1c]
+v_cubetc_f32 v5, s103, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x67,0x00,0x01,0x02]
-v_cubetc_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x68,0x00,0x01,0x02]
-v_cubetc_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0x8c,0xd2,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x69,0x00,0x01,0x02]
-v_cubetc_f32 v5, s103, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x67,0x04,0x0e,0x04]
+v_cubetc_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x6a,0x00,0x01,0x02]
-v_cubetc_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x68,0x04,0x0e,0x04]
+v_cubetc_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x6b,0x00,0x01,0x02]
-v_cubetc_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x69,0x04,0x0e,0x04]
+v_cubetc_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x6c,0x00,0x01,0x02]
-v_cubetc_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x6a,0x04,0x0e,0x04]
+v_cubetc_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x6d,0x00,0x01,0x02]
-v_cubetc_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x6b,0x04,0x0e,0x04]
+v_cubetc_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x6e,0x00,0x01,0x02]
-v_cubetc_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x6c,0x04,0x0e,0x04]
+v_cubetc_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x6f,0x00,0x01,0x02]
-v_cubetc_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x6d,0x04,0x0e,0x04]
+v_cubetc_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x7b,0x00,0x01,0x02]
-v_cubetc_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x6e,0x04,0x0e,0x04]
+v_cubetc_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x7c,0x00,0x01,0x02]
-v_cubetc_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x6f,0x04,0x0e,0x04]
+v_cubetc_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x7e,0x00,0x01,0x02]
-v_cubetc_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x7b,0x04,0x0e,0x04]
+v_cubetc_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x7f,0x00,0x01,0x02]
-v_cubetc_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x7c,0x04,0x0e,0x04]
+v_cubetc_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x80,0x00,0x01,0x02]
-v_cubetc_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x7e,0x04,0x0e,0x04]
+v_cubetc_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0xc1,0x00,0x01,0x02]
-v_cubetc_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x7f,0x04,0x0e,0x04]
+v_cubetc_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0xf0,0x00,0x01,0x02]
-v_cubetc_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0xfd,0x04,0x0e,0x04]
+v_cubetc_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0xf7,0x00,0x01,0x02]
-v_cubetc_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x05,0x0e,0x04]
+v_cubetc_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x01,0x01,0x02]
-v_cubetc_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0xff,0x05,0x0e,0x04]
+v_cubetc_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0xff,0x01,0x01,0x02]
-v_cubetc_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0xfe,0x0f,0x04]
+v_cubetc_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x82,0x01,0x02]
-v_cubetc_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x04,0xfe,0x07]
+v_cubetc_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0xe0,0x01,0x02]
-v_cubetc_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x04,0x0e,0x24]
+v_cubetc_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0xee,0x01,0x02]
-v_cubetc_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x04,0x0e,0x44]
+v_cubetc_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x04,0x02,0x02]
-v_cubetc_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x04,0x0e,0x84]
+v_cubetc_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0xfe,0x03,0x02]
-v_cubetc_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x04,0x0e,0xe4]
+v_cubetc_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x00,0x05,0x03]
-v_cubetc_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0x8c,0xd2,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x00,0xc1,0x03]
-v_cubetc_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0x8c,0xd2,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x00,0xdd,0x03]
-v_cubetc_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0x8c,0xd2,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x00,0x0d,0x04]
-v_cubetc_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0x8c,0xd2,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x00,0xfd,0x07]
-v_cubetc_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x08,0x8c,0xd2,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x00,0x01,0x22]
-v_cubetc_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x04,0x0e,0x0c]
+v_cubetc_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x00,0x01,0x42]
-v_cubetc_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x04,0x0e,0x14]
+v_cubetc_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x00,0x01,0x82]
-v_cubetc_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x04,0x0e,0x1c]
+v_cubetc_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x00,0x01,0xe2]
-v_cubema_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0x8c,0xd2,0x01,0x00,0x01,0x02]
-v_cubema_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0x8e,0xd2,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0x8c,0xd2,0x01,0x00,0x01,0x02]
-v_cubema_f32 v5, s103, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x67,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0x8c,0xd2,0x01,0x00,0x01,0x02]
-v_cubema_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x68,0x04,0x0e,0x04]
+v_cubetc_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0x8c,0xd2,0x01,0x00,0x01,0x02]
-v_cubema_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x69,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x08,0x8c,0xd2,0x01,0x00,0x01,0x02]
-v_cubema_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x6a,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x00,0x01,0x0a]
-v_cubema_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x6b,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x00,0x01,0x12]
-v_cubema_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x6c,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0x8c,0xd2,0x01,0x00,0x01,0x1a]
-v_cubema_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x6d,0x04,0x0e,0x04]
+v_cubema_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x00,0x01,0x02]
-v_cubema_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x6e,0x04,0x0e,0x04]
+v_cubema_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0x8e,0xd2,0x01,0x00,0x01,0x02]
-v_cubema_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x6f,0x04,0x0e,0x04]
+v_cubema_f32 v5, s103, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x67,0x00,0x01,0x02]
-v_cubema_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x7b,0x04,0x0e,0x04]
+v_cubema_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x68,0x00,0x01,0x02]
-v_cubema_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x7c,0x04,0x0e,0x04]
+v_cubema_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x69,0x00,0x01,0x02]
-v_cubema_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x7e,0x04,0x0e,0x04]
+v_cubema_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x6a,0x00,0x01,0x02]
-v_cubema_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x7f,0x04,0x0e,0x04]
+v_cubema_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x6b,0x00,0x01,0x02]
-v_cubema_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0xfd,0x04,0x0e,0x04]
+v_cubema_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x6c,0x00,0x01,0x02]
-v_cubema_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x05,0x0e,0x04]
+v_cubema_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x6d,0x00,0x01,0x02]
-v_cubema_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0xff,0x05,0x0e,0x04]
+v_cubema_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x6e,0x00,0x01,0x02]
-v_cubema_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0xfe,0x0f,0x04]
+v_cubema_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x6f,0x00,0x01,0x02]
-v_cubema_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x04,0xfe,0x07]
+v_cubema_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x7b,0x00,0x01,0x02]
-v_cubema_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x04,0x0e,0x24]
+v_cubema_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x7c,0x00,0x01,0x02]
-v_cubema_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x04,0x0e,0x44]
+v_cubema_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x7e,0x00,0x01,0x02]
-v_cubema_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x04,0x0e,0x84]
+v_cubema_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x7f,0x00,0x01,0x02]
-v_cubema_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x04,0x0e,0xe4]
+v_cubema_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x80,0x00,0x01,0x02]
-v_cubema_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0x8e,0xd2,0x01,0x04,0x0e,0x04]
+v_cubema_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0xc1,0x00,0x01,0x02]
-v_cubema_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0x8e,0xd2,0x01,0x04,0x0e,0x04]
+v_cubema_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0xf0,0x00,0x01,0x02]
-v_cubema_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0x8e,0xd2,0x01,0x04,0x0e,0x04]
+v_cubema_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0xf7,0x00,0x01,0x02]
-v_cubema_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0x8e,0xd2,0x01,0x04,0x0e,0x04]
+v_cubema_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x01,0x01,0x02]
-v_cubema_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x08,0x8e,0xd2,0x01,0x04,0x0e,0x04]
+v_cubema_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0xff,0x01,0x01,0x02]
-v_cubema_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x04,0x0e,0x0c]
+v_cubema_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x82,0x01,0x02]
-v_cubema_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x04,0x0e,0x14]
+v_cubema_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0xe0,0x01,0x02]
-v_cubema_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x04,0x0e,0x1c]
+v_cubema_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0xee,0x01,0x02]
+
+v_cubema_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x04,0x02,0x02]
+
+v_cubema_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0xfe,0x03,0x02]
+
+v_cubema_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x00,0x05,0x03]
+
+v_cubema_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x00,0xc1,0x03]
+
+v_cubema_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x00,0xdd,0x03]
+
+v_cubema_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x00,0x0d,0x04]
+
+v_cubema_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x00,0xfd,0x07]
+
+v_cubema_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x00,0x01,0x22]
+
+v_cubema_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x00,0x01,0x42]
+
+v_cubema_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x00,0x01,0x82]
+
+v_cubema_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x00,0x01,0xe2]
+
+v_cubema_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0x8e,0xd2,0x01,0x00,0x01,0x02]
+
+v_cubema_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0x8e,0xd2,0x01,0x00,0x01,0x02]
+
+v_cubema_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0x8e,0xd2,0x01,0x00,0x01,0x02]
+
+v_cubema_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0x8e,0xd2,0x01,0x00,0x01,0x02]
+
+v_cubema_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x08,0x8e,0xd2,0x01,0x00,0x01,0x02]
+
+v_cubema_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x00,0x01,0x0a]
+
+v_cubema_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x00,0x01,0x12]
+
+v_cubema_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0x8e,0xd2,0x01,0x00,0x01,0x1a]
v_bfe_u32 v5, s1, 0, 0
// CHECK: [0x05,0x00,0x90,0xd2,0x01,0x00,0x01,0x02]
@@ -38801,182 +39693,248 @@ v_bfi_b32 v5, s1, 0, v3
v_bfi_b32 v5, s1, 0, v255
// CHECK: [0x05,0x00,0x94,0xd2,0x01,0x00,0xfd,0x07]
-v_fma_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x00,0x01,0x02]
+
+v_fma_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0x96,0xd2,0x01,0x00,0x01,0x02]
+
+v_fma_f32 v5, s103, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x67,0x00,0x01,0x02]
+
+v_fma_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x68,0x00,0x01,0x02]
+
+v_fma_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x69,0x00,0x01,0x02]
+
+v_fma_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x6a,0x00,0x01,0x02]
+
+v_fma_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x6b,0x00,0x01,0x02]
+
+v_fma_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x6c,0x00,0x01,0x02]
+
+v_fma_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x6d,0x00,0x01,0x02]
+
+v_fma_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x6e,0x00,0x01,0x02]
+
+v_fma_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x6f,0x00,0x01,0x02]
+
+v_fma_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x7b,0x00,0x01,0x02]
+
+v_fma_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x7c,0x00,0x01,0x02]
+
+v_fma_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x7e,0x00,0x01,0x02]
+
+v_fma_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x7f,0x00,0x01,0x02]
+
+v_fma_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0x00,0x01,0x02]
+
+v_fma_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0xc1,0x00,0x01,0x02]
+
+v_fma_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0xf0,0x00,0x01,0x02]
-v_fma_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0x96,0xd2,0x01,0x04,0x0e,0x04]
+v_fma_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0xf7,0x00,0x01,0x02]
-v_fma_f32 v5, s103, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x67,0x04,0x0e,0x04]
+v_fma_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x01,0x01,0x02]
-v_fma_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x68,0x04,0x0e,0x04]
+v_fma_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0xff,0x01,0x01,0x02]
-v_fma_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x69,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x82,0x01,0x02]
-v_fma_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x6a,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xe0,0x01,0x02]
-v_fma_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x6b,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xee,0x01,0x02]
-v_fma_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x6c,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x04,0x02,0x02]
-v_fma_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x6d,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xfe,0x03,0x02]
-v_fma_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x6e,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x00,0x05,0x03]
-v_fma_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x6f,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x00,0xc1,0x03]
-v_fma_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x7b,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x00,0xdd,0x03]
-v_fma_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x7c,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x00,0x0d,0x04]
-v_fma_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x7e,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x00,0xfd,0x07]
-v_fma_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x7f,0x04,0x0e,0x04]
+v_fma_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x00,0x01,0x22]
-v_fma_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0xfd,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x00,0x01,0x42]
-v_fma_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x05,0x0e,0x04]
+v_fma_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x00,0x01,0x82]
-v_fma_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0xff,0x05,0x0e,0x04]
+v_fma_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x00,0x01,0xe2]
-v_fma_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xfe,0x0f,0x04]
+v_fma_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0x96,0xd2,0x01,0x00,0x01,0x02]
-v_fma_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x04,0xfe,0x07]
+v_fma_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0x96,0xd2,0x01,0x00,0x01,0x02]
-v_fma_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x04,0x0e,0x24]
+v_fma_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0x96,0xd2,0x01,0x00,0x01,0x02]
-v_fma_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x04,0x0e,0x44]
+v_fma_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0x96,0xd2,0x01,0x00,0x01,0x02]
-v_fma_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x04,0x0e,0x84]
+v_fma_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x08,0x96,0xd2,0x01,0x00,0x01,0x02]
-v_fma_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x04,0x0e,0xe4]
+v_fma_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x00,0x01,0x0a]
-v_fma_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0x96,0xd2,0x01,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x00,0x01,0x12]
-v_fma_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0x96,0xd2,0x01,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x00,0x01,0x1a]
-v_fma_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0x96,0xd2,0x01,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], s[2:3], 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x00,0x01,0x02]
-v_fma_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0x96,0xd2,0x01,0x04,0x0e,0x04]
+v_fma_f64 v[254:255], s[2:3], 0, 0
+// CHECK: [0xfe,0x00,0x98,0xd2,0x02,0x00,0x01,0x02]
-v_fma_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x08,0x96,0xd2,0x01,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], s[4:5], 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x04,0x00,0x01,0x02]
-v_fma_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x04,0x0e,0x0c]
+v_fma_f64 v[5:6], s[102:103], 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x66,0x00,0x01,0x02]
-v_fma_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x04,0x0e,0x14]
+v_fma_f64 v[5:6], flat_scratch, 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x68,0x00,0x01,0x02]
-v_fma_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x04,0x0e,0x1c]
+v_fma_f64 v[5:6], vcc, 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x6a,0x00,0x01,0x02]
-v_fma_f64 v[5:6], s[2:3], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], tba, 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x6c,0x00,0x01,0x02]
-v_fma_f64 v[254:255], s[2:3], v[2:3], v[3:4]
-// CHECK: [0xfe,0x00,0x98,0xd2,0x02,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], tma, 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x6e,0x00,0x01,0x02]
-v_fma_f64 v[5:6], s[4:5], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0x04,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], ttmp[10:11], 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x7a,0x00,0x01,0x02]
-v_fma_f64 v[5:6], s[102:103], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0x66,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], exec, 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x7e,0x00,0x01,0x02]
-v_fma_f64 v[5:6], flat_scratch, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0x68,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], 0, 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x80,0x00,0x01,0x02]
-v_fma_f64 v[5:6], vcc, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0x6a,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], -1, 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0xc1,0x00,0x01,0x02]
-v_fma_f64 v[5:6], tba, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0x6c,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], 0.5, 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0xf0,0x00,0x01,0x02]
-v_fma_f64 v[5:6], tma, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0x6e,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], -4.0, 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0xf7,0x00,0x01,0x02]
-v_fma_f64 v[5:6], ttmp[10:11], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0x7a,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], v[1:2], 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x01,0x01,0x01,0x02]
-v_fma_f64 v[5:6], exec, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0x7e,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], v[254:255], 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0xfe,0x01,0x01,0x02]
-v_fma_f64 v[5:6], scc, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0xfd,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], s[2:3], -1, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x82,0x01,0x02]
-v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0x01,0x05,0x0e,0x04]
+v_fma_f64 v[5:6], s[2:3], 0.5, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0xe0,0x01,0x02]
-v_fma_f64 v[5:6], v[254:255], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0xfe,0x05,0x0e,0x04]
+v_fma_f64 v[5:6], s[2:3], -4.0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0xee,0x01,0x02]
-v_fma_f64 v[5:6], s[2:3], v[254:255], v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0x02,0xfc,0x0f,0x04]
+v_fma_f64 v[5:6], s[2:3], v[2:3], 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x04,0x02,0x02]
-v_fma_f64 v[5:6], s[2:3], v[2:3], v[254:255]
-// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x04,0xfa,0x07]
+v_fma_f64 v[5:6], s[2:3], v[254:255], 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0xfc,0x03,0x02]
-v_fma_f64 v[5:6], -s[2:3], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x04,0x0e,0x24]
+v_fma_f64 v[5:6], s[2:3], 0, -1
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x00,0x05,0x03]
-v_fma_f64 v[5:6], s[2:3], -v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x04,0x0e,0x44]
+v_fma_f64 v[5:6], s[2:3], 0, 0.5
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x00,0xc1,0x03]
-v_fma_f64 v[5:6], s[2:3], v[2:3], -v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x04,0x0e,0x84]
+v_fma_f64 v[5:6], s[2:3], 0, -4.0
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x00,0xdd,0x03]
-v_fma_f64 v[5:6], -s[2:3], -v[2:3], -v[3:4]
-// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x04,0x0e,0xe4]
+v_fma_f64 v[5:6], s[2:3], 0, v[3:4]
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x00,0x0d,0x04]
-v_fma_f64 v[5:6], |s[2:3]|, v[2:3], v[3:4]
-// CHECK: [0x05,0x01,0x98,0xd2,0x02,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], s[2:3], 0, v[254:255]
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x00,0xf9,0x07]
-v_fma_f64 v[5:6], s[2:3], |v[2:3]|, v[3:4]
-// CHECK: [0x05,0x02,0x98,0xd2,0x02,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], -s[2:3], 0, 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x00,0x01,0x22]
-v_fma_f64 v[5:6], s[2:3], v[2:3], |v[3:4]|
-// CHECK: [0x05,0x04,0x98,0xd2,0x02,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], s[2:3], neg(0), 0
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x00,0x01,0x42]
-v_fma_f64 v[5:6], |s[2:3]|, |v[2:3]|, |v[3:4]|
-// CHECK: [0x05,0x07,0x98,0xd2,0x02,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], s[2:3], 0, neg(0)
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x00,0x01,0x82]
-v_fma_f64 v[5:6], s[2:3], v[2:3], v[3:4] clamp
-// CHECK: [0x05,0x08,0x98,0xd2,0x02,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], -s[2:3], neg(0), neg(0)
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x00,0x01,0xe2]
-v_fma_f64 v[5:6], s[2:3], v[2:3], v[3:4] mul:2
-// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x04,0x0e,0x0c]
+v_fma_f64 v[5:6], |s[2:3]|, 0, 0
+// CHECK: [0x05,0x01,0x98,0xd2,0x02,0x00,0x01,0x02]
-v_fma_f64 v[5:6], s[2:3], v[2:3], v[3:4] mul:4
-// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x04,0x0e,0x14]
+v_fma_f64 v[5:6], s[2:3], |0|, 0
+// CHECK: [0x05,0x02,0x98,0xd2,0x02,0x00,0x01,0x02]
-v_fma_f64 v[5:6], s[2:3], v[2:3], v[3:4] div:2
-// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x04,0x0e,0x1c]
+v_fma_f64 v[5:6], s[2:3], 0, |0|
+// CHECK: [0x05,0x04,0x98,0xd2,0x02,0x00,0x01,0x02]
+
+v_fma_f64 v[5:6], |s[2:3]|, |0|, |0|
+// CHECK: [0x05,0x07,0x98,0xd2,0x02,0x00,0x01,0x02]
+
+v_fma_f64 v[5:6], s[2:3], 0, 0 clamp
+// CHECK: [0x05,0x08,0x98,0xd2,0x02,0x00,0x01,0x02]
+
+v_fma_f64 v[5:6], s[2:3], 0, 0 mul:2
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x00,0x01,0x0a]
+
+v_fma_f64 v[5:6], s[2:3], 0, 0 mul:4
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x00,0x01,0x12]
+
+v_fma_f64 v[5:6], s[2:3], 0, 0 div:2
+// CHECK: [0x05,0x00,0x98,0xd2,0x02,0x00,0x01,0x1a]
v_lerp_u8 v5, s1, 0, 0
// CHECK: [0x05,0x00,0x9a,0xd2,0x01,0x00,0x01,0x02]
@@ -39239,197 +40197,263 @@ v_alignbyte_b32 v5, s1, 0, v3
v_alignbyte_b32 v5, s1, 0, v255
// CHECK: [0x05,0x00,0x9e,0xd2,0x01,0x00,0xfd,0x07]
-v_mullit_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x04,0x0e,0x04]
+v_mullit_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x00,0x01,0x02]
+
+v_mullit_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xa0,0xd2,0x01,0x00,0x01,0x02]
+
+v_mullit_f32 v5, s103, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x67,0x00,0x01,0x02]
+
+v_mullit_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x68,0x00,0x01,0x02]
+
+v_mullit_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x69,0x00,0x01,0x02]
+
+v_mullit_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x6a,0x00,0x01,0x02]
+
+v_mullit_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x6b,0x00,0x01,0x02]
+
+v_mullit_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x6c,0x00,0x01,0x02]
+
+v_mullit_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x6d,0x00,0x01,0x02]
+
+v_mullit_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x6e,0x00,0x01,0x02]
+
+v_mullit_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x6f,0x00,0x01,0x02]
+
+v_mullit_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x7b,0x00,0x01,0x02]
+
+v_mullit_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x7c,0x00,0x01,0x02]
+
+v_mullit_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x7e,0x00,0x01,0x02]
-v_mullit_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xa0,0xd2,0x01,0x04,0x0e,0x04]
+v_mullit_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x7f,0x00,0x01,0x02]
-v_mullit_f32 v5, s103, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x67,0x04,0x0e,0x04]
+v_mullit_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x80,0x00,0x01,0x02]
-v_mullit_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x68,0x04,0x0e,0x04]
+v_mullit_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0xc1,0x00,0x01,0x02]
-v_mullit_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x69,0x04,0x0e,0x04]
+v_mullit_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0xf0,0x00,0x01,0x02]
-v_mullit_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x6a,0x04,0x0e,0x04]
+v_mullit_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0xf7,0x00,0x01,0x02]
-v_mullit_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x6b,0x04,0x0e,0x04]
+v_mullit_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x01,0x01,0x02]
-v_mullit_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x6c,0x04,0x0e,0x04]
+v_mullit_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0xff,0x01,0x01,0x02]
-v_mullit_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x6d,0x04,0x0e,0x04]
+v_mullit_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x82,0x01,0x02]
-v_mullit_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x6e,0x04,0x0e,0x04]
+v_mullit_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0xe0,0x01,0x02]
-v_mullit_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x6f,0x04,0x0e,0x04]
+v_mullit_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0xee,0x01,0x02]
-v_mullit_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x7b,0x04,0x0e,0x04]
+v_mullit_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x04,0x02,0x02]
-v_mullit_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x7c,0x04,0x0e,0x04]
+v_mullit_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0xfe,0x03,0x02]
-v_mullit_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x7e,0x04,0x0e,0x04]
+v_mullit_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x00,0x05,0x03]
-v_mullit_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x7f,0x04,0x0e,0x04]
+v_mullit_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x00,0xc1,0x03]
-v_mullit_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0xfd,0x04,0x0e,0x04]
+v_mullit_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x00,0xdd,0x03]
-v_mullit_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x05,0x0e,0x04]
+v_mullit_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x00,0x0d,0x04]
-v_mullit_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0xff,0x05,0x0e,0x04]
+v_mullit_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x00,0xfd,0x07]
-v_mullit_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0xfe,0x0f,0x04]
+v_mullit_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x00,0x01,0x22]
-v_mullit_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x04,0xfe,0x07]
+v_mullit_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x00,0x01,0x42]
-v_mullit_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x04,0x0e,0x24]
+v_mullit_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x00,0x01,0x82]
-v_mullit_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x04,0x0e,0x44]
+v_mullit_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x00,0x01,0xe2]
-v_mullit_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x04,0x0e,0x84]
+v_mullit_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xa0,0xd2,0x01,0x00,0x01,0x02]
-v_mullit_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x04,0x0e,0xe4]
+v_mullit_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xa0,0xd2,0x01,0x00,0x01,0x02]
-v_mullit_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xa0,0xd2,0x01,0x04,0x0e,0x04]
+v_mullit_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xa0,0xd2,0x01,0x00,0x01,0x02]
-v_mullit_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xa0,0xd2,0x01,0x04,0x0e,0x04]
+v_mullit_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xa0,0xd2,0x01,0x00,0x01,0x02]
-v_mullit_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xa0,0xd2,0x01,0x04,0x0e,0x04]
+v_mullit_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x08,0xa0,0xd2,0x01,0x00,0x01,0x02]
-v_mullit_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xa0,0xd2,0x01,0x04,0x0e,0x04]
+v_mullit_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x00,0x01,0x0a]
-v_mullit_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x08,0xa0,0xd2,0x01,0x04,0x0e,0x04]
+v_mullit_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x00,0x01,0x12]
-v_mullit_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x04,0x0e,0x0c]
+v_mullit_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x00,0x01,0x1a]
-v_mullit_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x04,0x0e,0x14]
+v_min3_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x00,0x01,0x02]
-v_mullit_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xa0,0xd2,0x01,0x04,0x0e,0x1c]
+v_min3_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xa2,0xd2,0x01,0x00,0x01,0x02]
-v_min3_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x04,0x0e,0x04]
+v_min3_f32 v5, s103, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x67,0x00,0x01,0x02]
-v_min3_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xa2,0xd2,0x01,0x04,0x0e,0x04]
+v_min3_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x68,0x00,0x01,0x02]
-v_min3_f32 v5, s103, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x67,0x04,0x0e,0x04]
+v_min3_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x69,0x00,0x01,0x02]
-v_min3_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x68,0x04,0x0e,0x04]
+v_min3_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x6a,0x00,0x01,0x02]
-v_min3_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x69,0x04,0x0e,0x04]
+v_min3_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x6b,0x00,0x01,0x02]
-v_min3_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x6a,0x04,0x0e,0x04]
+v_min3_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x6c,0x00,0x01,0x02]
-v_min3_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x6b,0x04,0x0e,0x04]
+v_min3_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x6d,0x00,0x01,0x02]
-v_min3_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x6c,0x04,0x0e,0x04]
+v_min3_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x6e,0x00,0x01,0x02]
-v_min3_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x6d,0x04,0x0e,0x04]
+v_min3_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x6f,0x00,0x01,0x02]
-v_min3_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x6e,0x04,0x0e,0x04]
+v_min3_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x7b,0x00,0x01,0x02]
-v_min3_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x6f,0x04,0x0e,0x04]
+v_min3_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x7c,0x00,0x01,0x02]
-v_min3_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x7b,0x04,0x0e,0x04]
+v_min3_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x7e,0x00,0x01,0x02]
-v_min3_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x7c,0x04,0x0e,0x04]
+v_min3_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x7f,0x00,0x01,0x02]
-v_min3_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x7e,0x04,0x0e,0x04]
+v_min3_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x80,0x00,0x01,0x02]
-v_min3_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x7f,0x04,0x0e,0x04]
+v_min3_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0xc1,0x00,0x01,0x02]
-v_min3_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0xfd,0x04,0x0e,0x04]
+v_min3_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0xf0,0x00,0x01,0x02]
-v_min3_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x05,0x0e,0x04]
+v_min3_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0xf7,0x00,0x01,0x02]
-v_min3_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0xff,0x05,0x0e,0x04]
+v_min3_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x01,0x01,0x02]
-v_min3_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0xfe,0x0f,0x04]
+v_min3_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0xff,0x01,0x01,0x02]
-v_min3_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x04,0xfe,0x07]
+v_min3_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x82,0x01,0x02]
-v_min3_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x04,0x0e,0x24]
+v_min3_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0xe0,0x01,0x02]
-v_min3_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x04,0x0e,0x44]
+v_min3_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0xee,0x01,0x02]
-v_min3_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x04,0x0e,0x84]
+v_min3_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x04,0x02,0x02]
-v_min3_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x04,0x0e,0xe4]
+v_min3_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0xfe,0x03,0x02]
-v_min3_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xa2,0xd2,0x01,0x04,0x0e,0x04]
+v_min3_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x00,0x05,0x03]
-v_min3_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xa2,0xd2,0x01,0x04,0x0e,0x04]
+v_min3_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x00,0xc1,0x03]
-v_min3_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xa2,0xd2,0x01,0x04,0x0e,0x04]
+v_min3_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x00,0xdd,0x03]
-v_min3_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xa2,0xd2,0x01,0x04,0x0e,0x04]
+v_min3_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x00,0x0d,0x04]
-v_min3_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x08,0xa2,0xd2,0x01,0x04,0x0e,0x04]
+v_min3_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x00,0xfd,0x07]
-v_min3_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x04,0x0e,0x0c]
+v_min3_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x00,0x01,0x22]
-v_min3_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x04,0x0e,0x14]
+v_min3_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x00,0x01,0x42]
-v_min3_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x04,0x0e,0x1c]
+v_min3_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x00,0x01,0x82]
+
+v_min3_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x00,0x01,0xe2]
+
+v_min3_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xa2,0xd2,0x01,0x00,0x01,0x02]
+
+v_min3_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xa2,0xd2,0x01,0x00,0x01,0x02]
+
+v_min3_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xa2,0xd2,0x01,0x00,0x01,0x02]
+
+v_min3_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xa2,0xd2,0x01,0x00,0x01,0x02]
+
+v_min3_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x08,0xa2,0xd2,0x01,0x00,0x01,0x02]
+
+v_min3_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x00,0x01,0x0a]
+
+v_min3_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x00,0x01,0x12]
+
+v_min3_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xa2,0xd2,0x01,0x00,0x01,0x1a]
v_min3_i32 v5, s1, 0, 0
// CHECK: [0x05,0x00,0xa4,0xd2,0x01,0x00,0x01,0x02]
@@ -39617,101 +40641,134 @@ v_min3_u32 v5, s1, 0, v3
v_min3_u32 v5, s1, 0, v255
// CHECK: [0x05,0x00,0xa6,0xd2,0x01,0x00,0xfd,0x07]
-v_max3_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x00,0x01,0x02]
+
+v_max3_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xa8,0xd2,0x01,0x00,0x01,0x02]
+
+v_max3_f32 v5, s103, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x67,0x00,0x01,0x02]
+
+v_max3_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x68,0x00,0x01,0x02]
+
+v_max3_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x69,0x00,0x01,0x02]
+
+v_max3_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x6a,0x00,0x01,0x02]
+
+v_max3_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x6b,0x00,0x01,0x02]
+
+v_max3_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x6c,0x00,0x01,0x02]
+
+v_max3_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x6d,0x00,0x01,0x02]
+
+v_max3_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x6e,0x00,0x01,0x02]
-v_max3_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xa8,0xd2,0x01,0x04,0x0e,0x04]
+v_max3_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x6f,0x00,0x01,0x02]
-v_max3_f32 v5, s103, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x67,0x04,0x0e,0x04]
+v_max3_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x7b,0x00,0x01,0x02]
-v_max3_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x68,0x04,0x0e,0x04]
+v_max3_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x7c,0x00,0x01,0x02]
-v_max3_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x69,0x04,0x0e,0x04]
+v_max3_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x7e,0x00,0x01,0x02]
-v_max3_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x6a,0x04,0x0e,0x04]
+v_max3_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x7f,0x00,0x01,0x02]
-v_max3_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x6b,0x04,0x0e,0x04]
+v_max3_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x80,0x00,0x01,0x02]
-v_max3_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x6c,0x04,0x0e,0x04]
+v_max3_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0xc1,0x00,0x01,0x02]
-v_max3_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x6d,0x04,0x0e,0x04]
+v_max3_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0xf0,0x00,0x01,0x02]
-v_max3_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x6e,0x04,0x0e,0x04]
+v_max3_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0xf7,0x00,0x01,0x02]
-v_max3_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x6f,0x04,0x0e,0x04]
+v_max3_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x01,0x01,0x02]
-v_max3_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x7b,0x04,0x0e,0x04]
+v_max3_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0xff,0x01,0x01,0x02]
-v_max3_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x7c,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x82,0x01,0x02]
-v_max3_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x7e,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0xe0,0x01,0x02]
-v_max3_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x7f,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0xee,0x01,0x02]
-v_max3_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0xfd,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x04,0x02,0x02]
-v_max3_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04]
+v_max3_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0xfe,0x03,0x02]
-v_max3_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0xff,0x05,0x0e,0x04]
+v_max3_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x00,0x05,0x03]
-v_max3_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0xfe,0x0f,0x04]
+v_max3_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x00,0xc1,0x03]
-v_max3_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x04,0xfe,0x07]
+v_max3_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x00,0xdd,0x03]
-v_max3_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x04,0x0e,0x24]
+v_max3_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x00,0x0d,0x04]
-v_max3_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x04,0x0e,0x44]
+v_max3_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x00,0xfd,0x07]
-v_max3_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x04,0x0e,0x84]
+v_max3_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x00,0x01,0x22]
-v_max3_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x04,0x0e,0xe4]
+v_max3_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x00,0x01,0x42]
-v_max3_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xa8,0xd2,0x01,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x00,0x01,0x82]
-v_max3_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xa8,0xd2,0x01,0x04,0x0e,0x04]
+v_max3_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x00,0x01,0xe2]
-v_max3_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xa8,0xd2,0x01,0x04,0x0e,0x04]
+v_max3_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xa8,0xd2,0x01,0x00,0x01,0x02]
-v_max3_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xa8,0xd2,0x01,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xa8,0xd2,0x01,0x00,0x01,0x02]
-v_max3_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x08,0xa8,0xd2,0x01,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xa8,0xd2,0x01,0x00,0x01,0x02]
-v_max3_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x04,0x0e,0x0c]
+v_max3_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xa8,0xd2,0x01,0x00,0x01,0x02]
-v_max3_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x04,0x0e,0x14]
+v_max3_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x08,0xa8,0xd2,0x01,0x00,0x01,0x02]
-v_max3_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x04,0x0e,0x1c]
+v_max3_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x00,0x01,0x0a]
+
+v_max3_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x00,0x01,0x12]
+
+v_max3_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xa8,0xd2,0x01,0x00,0x01,0x1a]
v_max3_i32 v5, s1, 0, 0
// CHECK: [0x05,0x00,0xaa,0xd2,0x01,0x00,0x01,0x02]
@@ -39899,101 +40956,134 @@ v_max3_u32 v5, s1, 0, v3
v_max3_u32 v5, s1, 0, v255
// CHECK: [0x05,0x00,0xac,0xd2,0x01,0x00,0xfd,0x07]
-v_med3_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x04,0x0e,0x04]
+v_med3_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x00,0x01,0x02]
+
+v_med3_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xae,0xd2,0x01,0x00,0x01,0x02]
+
+v_med3_f32 v5, s103, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x67,0x00,0x01,0x02]
+
+v_med3_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x68,0x00,0x01,0x02]
+
+v_med3_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x69,0x00,0x01,0x02]
+
+v_med3_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x6a,0x00,0x01,0x02]
+
+v_med3_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x6b,0x00,0x01,0x02]
+
+v_med3_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x6c,0x00,0x01,0x02]
-v_med3_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xae,0xd2,0x01,0x04,0x0e,0x04]
+v_med3_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x6d,0x00,0x01,0x02]
-v_med3_f32 v5, s103, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x67,0x04,0x0e,0x04]
+v_med3_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x6e,0x00,0x01,0x02]
-v_med3_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x68,0x04,0x0e,0x04]
+v_med3_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x6f,0x00,0x01,0x02]
-v_med3_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x69,0x04,0x0e,0x04]
+v_med3_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x7b,0x00,0x01,0x02]
-v_med3_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x6a,0x04,0x0e,0x04]
+v_med3_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x7c,0x00,0x01,0x02]
-v_med3_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x6b,0x04,0x0e,0x04]
+v_med3_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x7e,0x00,0x01,0x02]
-v_med3_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x6c,0x04,0x0e,0x04]
+v_med3_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x7f,0x00,0x01,0x02]
-v_med3_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x6d,0x04,0x0e,0x04]
+v_med3_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x80,0x00,0x01,0x02]
-v_med3_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x6e,0x04,0x0e,0x04]
+v_med3_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0xc1,0x00,0x01,0x02]
-v_med3_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x6f,0x04,0x0e,0x04]
+v_med3_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0xf0,0x00,0x01,0x02]
-v_med3_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x7b,0x04,0x0e,0x04]
+v_med3_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0xf7,0x00,0x01,0x02]
-v_med3_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x7c,0x04,0x0e,0x04]
+v_med3_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x01,0x01,0x02]
-v_med3_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x7e,0x04,0x0e,0x04]
+v_med3_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0xff,0x01,0x01,0x02]
-v_med3_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x7f,0x04,0x0e,0x04]
+v_med3_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x82,0x01,0x02]
-v_med3_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0xfd,0x04,0x0e,0x04]
+v_med3_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0xe0,0x01,0x02]
-v_med3_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x05,0x0e,0x04]
+v_med3_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0xee,0x01,0x02]
-v_med3_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0xff,0x05,0x0e,0x04]
+v_med3_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x04,0x02,0x02]
-v_med3_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x01,0xfe,0x0f,0x04]
+v_med3_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0xfe,0x03,0x02]
-v_med3_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x04,0xfe,0x07]
+v_med3_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x00,0x05,0x03]
-v_med3_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x04,0x0e,0x24]
+v_med3_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x00,0xc1,0x03]
-v_med3_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x04,0x0e,0x44]
+v_med3_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x00,0xdd,0x03]
-v_med3_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x04,0x0e,0x84]
+v_med3_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x00,0x0d,0x04]
-v_med3_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x04,0x0e,0xe4]
+v_med3_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x00,0xfd,0x07]
-v_med3_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xae,0xd2,0x01,0x04,0x0e,0x04]
+v_med3_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x00,0x01,0x22]
-v_med3_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xae,0xd2,0x01,0x04,0x0e,0x04]
+v_med3_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x00,0x01,0x42]
-v_med3_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xae,0xd2,0x01,0x04,0x0e,0x04]
+v_med3_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x00,0x01,0x82]
-v_med3_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xae,0xd2,0x01,0x04,0x0e,0x04]
+v_med3_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x00,0x01,0xe2]
-v_med3_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x08,0xae,0xd2,0x01,0x04,0x0e,0x04]
+v_med3_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xae,0xd2,0x01,0x00,0x01,0x02]
-v_med3_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x04,0x0e,0x0c]
+v_med3_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xae,0xd2,0x01,0x00,0x01,0x02]
-v_med3_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x04,0x0e,0x14]
+v_med3_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xae,0xd2,0x01,0x00,0x01,0x02]
-v_med3_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x04,0x0e,0x1c]
+v_med3_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xae,0xd2,0x01,0x00,0x01,0x02]
+
+v_med3_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x08,0xae,0xd2,0x01,0x00,0x01,0x02]
+
+v_med3_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x00,0x01,0x0a]
+
+v_med3_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x00,0x01,0x12]
+
+v_med3_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xae,0xd2,0x01,0x00,0x01,0x1a]
v_med3_i32 v5, s1, 0, 0
// CHECK: [0x05,0x00,0xb0,0xd2,0x01,0x00,0x01,0x02]
@@ -40565,11 +41655,14 @@ v_cvt_pk_u8_f32 v5, exec_hi, 0, 0
v_cvt_pk_u8_f32 v5, 0, 0, 0
// CHECK: [0x05,0x00,0xbc,0xd2,0x80,0x00,0x01,0x02]
+v_cvt_pk_u8_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xbc,0xd2,0xc1,0x00,0x01,0x02]
+
v_cvt_pk_u8_f32 v5, 0.5, 0, 0
// CHECK: [0x05,0x00,0xbc,0xd2,0xf0,0x00,0x01,0x02]
-v_cvt_pk_u8_f32 v5, scc, 0, 0
-// CHECK: [0x05,0x00,0xbc,0xd2,0xfd,0x00,0x01,0x02]
+v_cvt_pk_u8_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xbc,0xd2,0xf7,0x00,0x01,0x02]
v_cvt_pk_u8_f32 v5, v1, 0, 0
// CHECK: [0x05,0x00,0xbc,0xd2,0x01,0x01,0x01,0x02]
@@ -40607,182 +41700,248 @@ v_cvt_pk_u8_f32 v5, s1, 0, v3
v_cvt_pk_u8_f32 v5, s1, 0, v255
// CHECK: [0x05,0x00,0xbc,0xd2,0x01,0x00,0xfd,0x07]
-v_div_fixup_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x00,0x01,0x02]
+
+v_div_fixup_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xbe,0xd2,0x01,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, s103, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x67,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x68,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x69,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x6a,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x6b,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x6c,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x6d,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x6e,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x6f,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x7b,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x7c,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x7e,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x7f,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x80,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0xc1,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0xf0,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0xf7,0x00,0x01,0x02]
+
+v_div_fixup_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x01,0x01,0x02]
+
+v_div_fixup_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0xff,0x01,0x01,0x02]
+
+v_div_fixup_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x82,0x01,0x02]
+
+v_div_fixup_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0xe0,0x01,0x02]
-v_div_fixup_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xbe,0xd2,0x01,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0xee,0x01,0x02]
-v_div_fixup_f32 v5, s103, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x67,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x04,0x02,0x02]
-v_div_fixup_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x68,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0xfe,0x03,0x02]
-v_div_fixup_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x69,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x00,0x05,0x03]
-v_div_fixup_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x6a,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x00,0xc1,0x03]
-v_div_fixup_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x6b,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x00,0xdd,0x03]
-v_div_fixup_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x6c,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x00,0x0d,0x04]
-v_div_fixup_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x6d,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x00,0xfd,0x07]
-v_div_fixup_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x6e,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x00,0x01,0x22]
-v_div_fixup_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x6f,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x00,0x01,0x42]
-v_div_fixup_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x7b,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x00,0x01,0x82]
-v_div_fixup_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x7c,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x00,0x01,0xe2]
-v_div_fixup_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x7e,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xbe,0xd2,0x01,0x00,0x01,0x02]
-v_div_fixup_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x7f,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xbe,0xd2,0x01,0x00,0x01,0x02]
-v_div_fixup_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0xfd,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xbe,0xd2,0x01,0x00,0x01,0x02]
-v_div_fixup_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x05,0x0e,0x04]
+v_div_fixup_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xbe,0xd2,0x01,0x00,0x01,0x02]
-v_div_fixup_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0xff,0x05,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x08,0xbe,0xd2,0x01,0x00,0x01,0x02]
-v_div_fixup_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0xfe,0x0f,0x04]
+v_div_fixup_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x00,0x01,0x0a]
-v_div_fixup_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x04,0xfe,0x07]
+v_div_fixup_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x00,0x01,0x12]
-v_div_fixup_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x04,0x0e,0x24]
+v_div_fixup_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x00,0x01,0x1a]
-v_div_fixup_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x04,0x0e,0x44]
+v_div_fixup_f64 v[5:6], s[2:3], 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x00,0x01,0x02]
-v_div_fixup_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x04,0x0e,0x84]
+v_div_fixup_f64 v[254:255], s[2:3], 0, 0
+// CHECK: [0xfe,0x00,0xc0,0xd2,0x02,0x00,0x01,0x02]
-v_div_fixup_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x04,0x0e,0xe4]
+v_div_fixup_f64 v[5:6], s[4:5], 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x04,0x00,0x01,0x02]
-v_div_fixup_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xbe,0xd2,0x01,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], s[102:103], 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x66,0x00,0x01,0x02]
-v_div_fixup_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xbe,0xd2,0x01,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], flat_scratch, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x68,0x00,0x01,0x02]
-v_div_fixup_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xbe,0xd2,0x01,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], vcc, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x6a,0x00,0x01,0x02]
-v_div_fixup_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xbe,0xd2,0x01,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], tba, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x6c,0x00,0x01,0x02]
-v_div_fixup_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x08,0xbe,0xd2,0x01,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], tma, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x6e,0x00,0x01,0x02]
-v_div_fixup_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x04,0x0e,0x0c]
+v_div_fixup_f64 v[5:6], ttmp[10:11], 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x7a,0x00,0x01,0x02]
-v_div_fixup_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x04,0x0e,0x14]
+v_div_fixup_f64 v[5:6], exec, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x7e,0x00,0x01,0x02]
-v_div_fixup_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xbe,0xd2,0x01,0x04,0x0e,0x1c]
+v_div_fixup_f64 v[5:6], 0, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x80,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], -1, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0xc1,0x00,0x01,0x02]
-v_div_fixup_f64 v[254:255], s[2:3], v[2:3], v[3:4]
-// CHECK: [0xfe,0x00,0xc0,0xd2,0x02,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], 0.5, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0xf0,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[4:5], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x04,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], -4.0, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0xf7,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[102:103], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x66,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], v[1:2], 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x01,0x01,0x01,0x02]
-v_div_fixup_f64 v[5:6], flat_scratch, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x68,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], v[254:255], 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0xfe,0x01,0x01,0x02]
-v_div_fixup_f64 v[5:6], vcc, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x6a,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], s[2:3], -1, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x82,0x01,0x02]
-v_div_fixup_f64 v[5:6], tba, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x6c,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], s[2:3], 0.5, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0xe0,0x01,0x02]
-v_div_fixup_f64 v[5:6], tma, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x6e,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], s[2:3], -4.0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0xee,0x01,0x02]
-v_div_fixup_f64 v[5:6], ttmp[10:11], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x7a,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], s[2:3], v[2:3], 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x04,0x02,0x02]
-v_div_fixup_f64 v[5:6], exec, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x7e,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], s[2:3], v[254:255], 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0xfc,0x03,0x02]
-v_div_fixup_f64 v[5:6], scc, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0xfd,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], s[2:3], 0, -1
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x00,0x05,0x03]
-v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x01,0x05,0x0e,0x04]
+v_div_fixup_f64 v[5:6], s[2:3], 0, 0.5
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x00,0xc1,0x03]
-v_div_fixup_f64 v[5:6], v[254:255], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0xfe,0x05,0x0e,0x04]
+v_div_fixup_f64 v[5:6], s[2:3], 0, -4.0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x00,0xdd,0x03]
-v_div_fixup_f64 v[5:6], s[2:3], v[254:255], v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0xfc,0x0f,0x04]
+v_div_fixup_f64 v[5:6], s[2:3], 0, v[3:4]
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x00,0x0d,0x04]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], v[254:255]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x04,0xfa,0x07]
+v_div_fixup_f64 v[5:6], s[2:3], 0, v[254:255]
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x00,0xf9,0x07]
-v_div_fixup_f64 v[5:6], -s[2:3], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x04,0x0e,0x24]
+v_div_fixup_f64 v[5:6], -s[2:3], 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x00,0x01,0x22]
-v_div_fixup_f64 v[5:6], s[2:3], -v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x04,0x0e,0x44]
+v_div_fixup_f64 v[5:6], s[2:3], neg(0), 0
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x00,0x01,0x42]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], -v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x04,0x0e,0x84]
+v_div_fixup_f64 v[5:6], s[2:3], 0, neg(0)
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x00,0x01,0x82]
-v_div_fixup_f64 v[5:6], -s[2:3], -v[2:3], -v[3:4]
-// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x04,0x0e,0xe4]
+v_div_fixup_f64 v[5:6], -s[2:3], neg(0), neg(0)
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x00,0x01,0xe2]
-v_div_fixup_f64 v[5:6], |s[2:3]|, v[2:3], v[3:4]
-// CHECK: [0x05,0x01,0xc0,0xd2,0x02,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], |s[2:3]|, 0, 0
+// CHECK: [0x05,0x01,0xc0,0xd2,0x02,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[2:3], |v[2:3]|, v[3:4]
-// CHECK: [0x05,0x02,0xc0,0xd2,0x02,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], s[2:3], |0|, 0
+// CHECK: [0x05,0x02,0xc0,0xd2,0x02,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], |v[3:4]|
-// CHECK: [0x05,0x04,0xc0,0xd2,0x02,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], s[2:3], 0, |0|
+// CHECK: [0x05,0x04,0xc0,0xd2,0x02,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], |s[2:3]|, |v[2:3]|, |v[3:4]|
-// CHECK: [0x05,0x07,0xc0,0xd2,0x02,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], |s[2:3]|, |0|, |0|
+// CHECK: [0x05,0x07,0xc0,0xd2,0x02,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], v[3:4] clamp
-// CHECK: [0x05,0x08,0xc0,0xd2,0x02,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], s[2:3], 0, 0 clamp
+// CHECK: [0x05,0x08,0xc0,0xd2,0x02,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], v[3:4] mul:2
-// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x04,0x0e,0x0c]
+v_div_fixup_f64 v[5:6], s[2:3], 0, 0 mul:2
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x00,0x01,0x0a]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], v[3:4] mul:4
-// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x04,0x0e,0x14]
+v_div_fixup_f64 v[5:6], s[2:3], 0, 0 mul:4
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x00,0x01,0x12]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], v[3:4] div:2
-// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x04,0x0e,0x1c]
+v_div_fixup_f64 v[5:6], s[2:3], 0, 0 div:2
+// CHECK: [0x05,0x00,0xc0,0xd2,0x02,0x00,0x01,0x1a]
v_lshl_b64 v[5:6], 0, s2
// CHECK: [0x05,0x00,0xc2,0xd2,0x80,0x04,0x00,0x00]
@@ -41024,12 +42183,36 @@ v_add_f64 v[5:6], s[4:5], s[4:5]
v_add_f64 v[254:255], s[4:5], s[4:5]
// CHECK: [0xfe,0x00,0xc8,0xd2,0x04,0x08,0x00,0x00]
+v_add_f64 v[5:6], 0, s[4:5]
+// CHECK: [0x05,0x00,0xc8,0xd2,0x80,0x08,0x00,0x00]
+
+v_add_f64 v[5:6], -1, s[4:5]
+// CHECK: [0x05,0x00,0xc8,0xd2,0xc1,0x08,0x00,0x00]
+
+v_add_f64 v[5:6], 0.5, s[4:5]
+// CHECK: [0x05,0x00,0xc8,0xd2,0xf0,0x08,0x00,0x00]
+
+v_add_f64 v[5:6], -4.0, s[4:5]
+// CHECK: [0x05,0x00,0xc8,0xd2,0xf7,0x08,0x00,0x00]
+
v_add_f64 v[5:6], v[1:2], s[4:5]
// CHECK: [0x05,0x00,0xc8,0xd2,0x01,0x09,0x00,0x00]
v_add_f64 v[5:6], v[254:255], s[4:5]
// CHECK: [0x05,0x00,0xc8,0xd2,0xfe,0x09,0x00,0x00]
+v_add_f64 v[5:6], s[4:5], 0
+// CHECK: [0x05,0x00,0xc8,0xd2,0x04,0x00,0x01,0x00]
+
+v_add_f64 v[5:6], s[4:5], -1
+// CHECK: [0x05,0x00,0xc8,0xd2,0x04,0x82,0x01,0x00]
+
+v_add_f64 v[5:6], s[4:5], 0.5
+// CHECK: [0x05,0x00,0xc8,0xd2,0x04,0xe0,0x01,0x00]
+
+v_add_f64 v[5:6], s[4:5], -4.0
+// CHECK: [0x05,0x00,0xc8,0xd2,0x04,0xee,0x01,0x00]
+
v_add_f64 v[5:6], s[4:5], v[2:3]
// CHECK: [0x05,0x00,0xc8,0xd2,0x04,0x04,0x02,0x00]
@@ -41072,12 +42255,36 @@ v_mul_f64 v[5:6], s[4:5], s[4:5]
v_mul_f64 v[254:255], s[4:5], s[4:5]
// CHECK: [0xfe,0x00,0xca,0xd2,0x04,0x08,0x00,0x00]
+v_mul_f64 v[5:6], 0, s[4:5]
+// CHECK: [0x05,0x00,0xca,0xd2,0x80,0x08,0x00,0x00]
+
+v_mul_f64 v[5:6], -1, s[4:5]
+// CHECK: [0x05,0x00,0xca,0xd2,0xc1,0x08,0x00,0x00]
+
+v_mul_f64 v[5:6], 0.5, s[4:5]
+// CHECK: [0x05,0x00,0xca,0xd2,0xf0,0x08,0x00,0x00]
+
+v_mul_f64 v[5:6], -4.0, s[4:5]
+// CHECK: [0x05,0x00,0xca,0xd2,0xf7,0x08,0x00,0x00]
+
v_mul_f64 v[5:6], v[1:2], s[4:5]
// CHECK: [0x05,0x00,0xca,0xd2,0x01,0x09,0x00,0x00]
v_mul_f64 v[5:6], v[254:255], s[4:5]
// CHECK: [0x05,0x00,0xca,0xd2,0xfe,0x09,0x00,0x00]
+v_mul_f64 v[5:6], s[4:5], 0
+// CHECK: [0x05,0x00,0xca,0xd2,0x04,0x00,0x01,0x00]
+
+v_mul_f64 v[5:6], s[4:5], -1
+// CHECK: [0x05,0x00,0xca,0xd2,0x04,0x82,0x01,0x00]
+
+v_mul_f64 v[5:6], s[4:5], 0.5
+// CHECK: [0x05,0x00,0xca,0xd2,0x04,0xe0,0x01,0x00]
+
+v_mul_f64 v[5:6], s[4:5], -4.0
+// CHECK: [0x05,0x00,0xca,0xd2,0x04,0xee,0x01,0x00]
+
v_mul_f64 v[5:6], s[4:5], v[2:3]
// CHECK: [0x05,0x00,0xca,0xd2,0x04,0x04,0x02,0x00]
@@ -41120,12 +42327,36 @@ v_min_f64 v[5:6], s[4:5], s[4:5]
v_min_f64 v[254:255], s[4:5], s[4:5]
// CHECK: [0xfe,0x00,0xcc,0xd2,0x04,0x08,0x00,0x00]
+v_min_f64 v[5:6], 0, s[4:5]
+// CHECK: [0x05,0x00,0xcc,0xd2,0x80,0x08,0x00,0x00]
+
+v_min_f64 v[5:6], -1, s[4:5]
+// CHECK: [0x05,0x00,0xcc,0xd2,0xc1,0x08,0x00,0x00]
+
+v_min_f64 v[5:6], 0.5, s[4:5]
+// CHECK: [0x05,0x00,0xcc,0xd2,0xf0,0x08,0x00,0x00]
+
+v_min_f64 v[5:6], -4.0, s[4:5]
+// CHECK: [0x05,0x00,0xcc,0xd2,0xf7,0x08,0x00,0x00]
+
v_min_f64 v[5:6], v[1:2], s[4:5]
// CHECK: [0x05,0x00,0xcc,0xd2,0x01,0x09,0x00,0x00]
v_min_f64 v[5:6], v[254:255], s[4:5]
// CHECK: [0x05,0x00,0xcc,0xd2,0xfe,0x09,0x00,0x00]
+v_min_f64 v[5:6], s[4:5], 0
+// CHECK: [0x05,0x00,0xcc,0xd2,0x04,0x00,0x01,0x00]
+
+v_min_f64 v[5:6], s[4:5], -1
+// CHECK: [0x05,0x00,0xcc,0xd2,0x04,0x82,0x01,0x00]
+
+v_min_f64 v[5:6], s[4:5], 0.5
+// CHECK: [0x05,0x00,0xcc,0xd2,0x04,0xe0,0x01,0x00]
+
+v_min_f64 v[5:6], s[4:5], -4.0
+// CHECK: [0x05,0x00,0xcc,0xd2,0x04,0xee,0x01,0x00]
+
v_min_f64 v[5:6], s[4:5], v[2:3]
// CHECK: [0x05,0x00,0xcc,0xd2,0x04,0x04,0x02,0x00]
@@ -41168,12 +42399,36 @@ v_max_f64 v[5:6], s[4:5], s[4:5]
v_max_f64 v[254:255], s[4:5], s[4:5]
// CHECK: [0xfe,0x00,0xce,0xd2,0x04,0x08,0x00,0x00]
+v_max_f64 v[5:6], 0, s[4:5]
+// CHECK: [0x05,0x00,0xce,0xd2,0x80,0x08,0x00,0x00]
+
+v_max_f64 v[5:6], -1, s[4:5]
+// CHECK: [0x05,0x00,0xce,0xd2,0xc1,0x08,0x00,0x00]
+
+v_max_f64 v[5:6], 0.5, s[4:5]
+// CHECK: [0x05,0x00,0xce,0xd2,0xf0,0x08,0x00,0x00]
+
+v_max_f64 v[5:6], -4.0, s[4:5]
+// CHECK: [0x05,0x00,0xce,0xd2,0xf7,0x08,0x00,0x00]
+
v_max_f64 v[5:6], v[1:2], s[4:5]
// CHECK: [0x05,0x00,0xce,0xd2,0x01,0x09,0x00,0x00]
v_max_f64 v[5:6], v[254:255], s[4:5]
// CHECK: [0x05,0x00,0xce,0xd2,0xfe,0x09,0x00,0x00]
+v_max_f64 v[5:6], s[4:5], 0
+// CHECK: [0x05,0x00,0xce,0xd2,0x04,0x00,0x01,0x00]
+
+v_max_f64 v[5:6], s[4:5], -1
+// CHECK: [0x05,0x00,0xce,0xd2,0x04,0x82,0x01,0x00]
+
+v_max_f64 v[5:6], s[4:5], 0.5
+// CHECK: [0x05,0x00,0xce,0xd2,0x04,0xe0,0x01,0x00]
+
+v_max_f64 v[5:6], s[4:5], -4.0
+// CHECK: [0x05,0x00,0xce,0xd2,0x04,0xee,0x01,0x00]
+
v_max_f64 v[5:6], s[4:5], v[2:3]
// CHECK: [0x05,0x00,0xce,0xd2,0x04,0x04,0x02,0x00]
@@ -41216,9 +42471,15 @@ v_ldexp_f64 v[5:6], 0, s2
v_ldexp_f64 v[254:255], 0, s2
// CHECK: [0xfe,0x00,0xd0,0xd2,0x80,0x04,0x00,0x00]
+v_ldexp_f64 v[5:6], -1, s2
+// CHECK: [0x05,0x00,0xd0,0xd2,0xc1,0x04,0x00,0x00]
+
v_ldexp_f64 v[5:6], 0.5, s2
// CHECK: [0x05,0x00,0xd0,0xd2,0xf0,0x04,0x00,0x00]
+v_ldexp_f64 v[5:6], -4.0, s2
+// CHECK: [0x05,0x00,0xd0,0xd2,0xf7,0x04,0x00,0x00]
+
v_ldexp_f64 v[5:6], v[1:2], s2
// CHECK: [0x05,0x00,0xd0,0xd2,0x01,0x05,0x00,0x00]
@@ -41276,15 +42537,18 @@ v_ldexp_f64 v[5:6], 0, 0.5
v_ldexp_f64 v[5:6], 0, -4.0
// CHECK: [0x05,0x00,0xd0,0xd2,0x80,0xee,0x01,0x00]
-v_ldexp_f64 v[5:6], 0, scc
-// CHECK: [0x05,0x00,0xd0,0xd2,0x80,0xfa,0x01,0x00]
-
v_ldexp_f64 v[5:6], 0, v2
// CHECK: [0x05,0x00,0xd0,0xd2,0x80,0x04,0x02,0x00]
v_ldexp_f64 v[5:6], 0, v255
// CHECK: [0x05,0x00,0xd0,0xd2,0x80,0xfe,0x03,0x00]
+v_ldexp_f64 v[5:6], neg(0), s2
+// CHECK: [0x05,0x00,0xd0,0xd2,0x80,0x04,0x00,0x20]
+
+v_ldexp_f64 v[5:6], |0|, s2
+// CHECK: [0x05,0x01,0xd0,0xd2,0x80,0x04,0x00,0x00]
+
v_ldexp_f64 v[5:6], 0, s2 clamp
// CHECK: [0x05,0x08,0xd0,0xd2,0x80,0x04,0x00,0x00]
@@ -41780,56 +43044,92 @@ v_div_scale_f64 v[5:6], vcc, s[2:3], 0, v[3:4]
v_div_scale_f64 v[5:6], vcc, s[2:3], 0, v[254:255]
// CHECK: [0x05,0x6a,0xdc,0xd2,0x02,0x00,0xf9,0x07]
-v_div_fmas_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd2,0x01,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0x00,0x01,0x02]
+
+v_div_fmas_f32 v255, 0, 0, 0
+// CHECK: [0xff,0x00,0xde,0xd2,0x80,0x00,0x01,0x02]
+
+v_div_fmas_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd2,0xc1,0x00,0x01,0x02]
+
+v_div_fmas_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd2,0xf0,0x00,0x01,0x02]
+
+v_div_fmas_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd2,0xf7,0x00,0x01,0x02]
+
+v_div_fmas_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd2,0x01,0x01,0x01,0x02]
+
+v_div_fmas_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd2,0xff,0x01,0x01,0x02]
+
+v_div_fmas_f32 v5, 0, -1, 0
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0x82,0x01,0x02]
+
+v_div_fmas_f32 v5, 0, 0.5, 0
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0xe0,0x01,0x02]
-v_div_fmas_f32 v255, v1, v2, v3
-// CHECK: [0xff,0x00,0xde,0xd2,0x01,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, 0, -4.0, 0
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0xee,0x01,0x02]
-v_div_fmas_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd2,0xff,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, 0, v2, 0
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0x04,0x02,0x02]
-v_div_fmas_f32 v5, v1, v255, v3
-// CHECK: [0x05,0x00,0xde,0xd2,0x01,0xff,0x0f,0x04]
+v_div_fmas_f32 v5, 0, v255, 0
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0xfe,0x03,0x02]
-v_div_fmas_f32 v5, v1, v2, v255
-// CHECK: [0x05,0x00,0xde,0xd2,0x01,0x05,0xfe,0x07]
+v_div_fmas_f32 v5, 0, 0, -1
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0x00,0x05,0x03]
-v_div_fmas_f32 v5, -v1, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd2,0x01,0x05,0x0e,0x24]
+v_div_fmas_f32 v5, 0, 0, 0.5
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0x00,0xc1,0x03]
-v_div_fmas_f32 v5, v1, -v2, v3
-// CHECK: [0x05,0x00,0xde,0xd2,0x01,0x05,0x0e,0x44]
+v_div_fmas_f32 v5, 0, 0, -4.0
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0x00,0xdd,0x03]
-v_div_fmas_f32 v5, v1, v2, -v3
-// CHECK: [0x05,0x00,0xde,0xd2,0x01,0x05,0x0e,0x84]
+v_div_fmas_f32 v5, 0, 0, v3
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0x00,0x0d,0x04]
-v_div_fmas_f32 v5, -v1, -v2, -v3
-// CHECK: [0x05,0x00,0xde,0xd2,0x01,0x05,0x0e,0xe4]
+v_div_fmas_f32 v5, 0, 0, v255
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0x00,0xfd,0x07]
-v_div_fmas_f32 v5, |v1|, v2, v3
-// CHECK: [0x05,0x01,0xde,0xd2,0x01,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, neg(0), 0, 0
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0x00,0x01,0x22]
-v_div_fmas_f32 v5, v1, |v2|, v3
-// CHECK: [0x05,0x02,0xde,0xd2,0x01,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, 0, neg(0), 0
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0x00,0x01,0x42]
-v_div_fmas_f32 v5, v1, v2, |v3|
-// CHECK: [0x05,0x04,0xde,0xd2,0x01,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, 0, 0, neg(0)
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0x00,0x01,0x82]
-v_div_fmas_f32 v5, |v1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xde,0xd2,0x01,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, neg(0), neg(0), neg(0)
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0x00,0x01,0xe2]
-v_div_fmas_f32 v5, v1, v2, v3 clamp
-// CHECK: [0x05,0x08,0xde,0xd2,0x01,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, |0|, 0, 0
+// CHECK: [0x05,0x01,0xde,0xd2,0x80,0x00,0x01,0x02]
-v_div_fmas_f32 v5, v1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xde,0xd2,0x01,0x05,0x0e,0x0c]
+v_div_fmas_f32 v5, 0, |0|, 0
+// CHECK: [0x05,0x02,0xde,0xd2,0x80,0x00,0x01,0x02]
-v_div_fmas_f32 v5, v1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xde,0xd2,0x01,0x05,0x0e,0x14]
+v_div_fmas_f32 v5, 0, 0, |0|
+// CHECK: [0x05,0x04,0xde,0xd2,0x80,0x00,0x01,0x02]
-v_div_fmas_f32 v5, v1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xde,0xd2,0x01,0x05,0x0e,0x1c]
+v_div_fmas_f32 v5, |0|, |0|, |0|
+// CHECK: [0x05,0x07,0xde,0xd2,0x80,0x00,0x01,0x02]
+
+v_div_fmas_f32 v5, 0, 0, 0 clamp
+// CHECK: [0x05,0x08,0xde,0xd2,0x80,0x00,0x01,0x02]
+
+v_div_fmas_f32 v5, 0, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0x00,0x01,0x0a]
+
+v_div_fmas_f32 v5, 0, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0x00,0x01,0x12]
+
+v_div_fmas_f32 v5, 0, 0, 0 div:2
+// CHECK: [0x05,0x00,0xde,0xd2,0x80,0x00,0x01,0x1a]
v_div_fmas_f64 v[5:6], vcc, vcc, vcc
// CHECK: [0x05,0x00,0xe0,0xd2,0x6a,0xd4,0xa8,0x01]
@@ -41837,18 +43137,54 @@ v_div_fmas_f64 v[5:6], vcc, vcc, vcc
v_div_fmas_f64 v[254:255], vcc, vcc, vcc
// CHECK: [0xfe,0x00,0xe0,0xd2,0x6a,0xd4,0xa8,0x01]
+v_div_fmas_f64 v[5:6], 0, vcc, vcc
+// CHECK: [0x05,0x00,0xe0,0xd2,0x80,0xd4,0xa8,0x01]
+
+v_div_fmas_f64 v[5:6], -1, vcc, vcc
+// CHECK: [0x05,0x00,0xe0,0xd2,0xc1,0xd4,0xa8,0x01]
+
+v_div_fmas_f64 v[5:6], 0.5, vcc, vcc
+// CHECK: [0x05,0x00,0xe0,0xd2,0xf0,0xd4,0xa8,0x01]
+
+v_div_fmas_f64 v[5:6], -4.0, vcc, vcc
+// CHECK: [0x05,0x00,0xe0,0xd2,0xf7,0xd4,0xa8,0x01]
+
v_div_fmas_f64 v[5:6], v[1:2], vcc, vcc
// CHECK: [0x05,0x00,0xe0,0xd2,0x01,0xd5,0xa8,0x01]
v_div_fmas_f64 v[5:6], v[254:255], vcc, vcc
// CHECK: [0x05,0x00,0xe0,0xd2,0xfe,0xd5,0xa8,0x01]
+v_div_fmas_f64 v[5:6], vcc, 0, vcc
+// CHECK: [0x05,0x00,0xe0,0xd2,0x6a,0x00,0xa9,0x01]
+
+v_div_fmas_f64 v[5:6], vcc, -1, vcc
+// CHECK: [0x05,0x00,0xe0,0xd2,0x6a,0x82,0xa9,0x01]
+
+v_div_fmas_f64 v[5:6], vcc, 0.5, vcc
+// CHECK: [0x05,0x00,0xe0,0xd2,0x6a,0xe0,0xa9,0x01]
+
+v_div_fmas_f64 v[5:6], vcc, -4.0, vcc
+// CHECK: [0x05,0x00,0xe0,0xd2,0x6a,0xee,0xa9,0x01]
+
v_div_fmas_f64 v[5:6], vcc, v[2:3], vcc
// CHECK: [0x05,0x00,0xe0,0xd2,0x6a,0x04,0xaa,0x01]
v_div_fmas_f64 v[5:6], vcc, v[254:255], vcc
// CHECK: [0x05,0x00,0xe0,0xd2,0x6a,0xfc,0xab,0x01]
+v_div_fmas_f64 v[5:6], vcc, vcc, 0
+// CHECK: [0x05,0x00,0xe0,0xd2,0x6a,0xd4,0x00,0x02]
+
+v_div_fmas_f64 v[5:6], vcc, vcc, -1
+// CHECK: [0x05,0x00,0xe0,0xd2,0x6a,0xd4,0x04,0x03]
+
+v_div_fmas_f64 v[5:6], vcc, vcc, 0.5
+// CHECK: [0x05,0x00,0xe0,0xd2,0x6a,0xd4,0xc0,0x03]
+
+v_div_fmas_f64 v[5:6], vcc, vcc, -4.0
+// CHECK: [0x05,0x00,0xe0,0xd2,0x6a,0xd4,0xdc,0x03]
+
v_div_fmas_f64 v[5:6], vcc, vcc, v[3:4]
// CHECK: [0x05,0x00,0xe0,0xd2,0x6a,0xd4,0x0c,0x04]
@@ -42092,9 +43428,15 @@ v_trig_preop_f64 v[5:6], 0, s2
v_trig_preop_f64 v[254:255], 0, s2
// CHECK: [0xfe,0x00,0xe8,0xd2,0x80,0x04,0x00,0x00]
+v_trig_preop_f64 v[5:6], -1, s2
+// CHECK: [0x05,0x00,0xe8,0xd2,0xc1,0x04,0x00,0x00]
+
v_trig_preop_f64 v[5:6], 0.5, s2
// CHECK: [0x05,0x00,0xe8,0xd2,0xf0,0x04,0x00,0x00]
+v_trig_preop_f64 v[5:6], -4.0, s2
+// CHECK: [0x05,0x00,0xe8,0xd2,0xf7,0x04,0x00,0x00]
+
v_trig_preop_f64 v[5:6], v[1:2], s2
// CHECK: [0x05,0x00,0xe8,0xd2,0x01,0x05,0x00,0x00]
@@ -42152,15 +43494,18 @@ v_trig_preop_f64 v[5:6], 0, 0.5
v_trig_preop_f64 v[5:6], 0, -4.0
// CHECK: [0x05,0x00,0xe8,0xd2,0x80,0xee,0x01,0x00]
-v_trig_preop_f64 v[5:6], 0, scc
-// CHECK: [0x05,0x00,0xe8,0xd2,0x80,0xfa,0x01,0x00]
-
v_trig_preop_f64 v[5:6], 0, v2
// CHECK: [0x05,0x00,0xe8,0xd2,0x80,0x04,0x02,0x00]
v_trig_preop_f64 v[5:6], 0, v255
// CHECK: [0x05,0x00,0xe8,0xd2,0x80,0xfe,0x03,0x00]
+v_trig_preop_f64 v[5:6], neg(0), s2
+// CHECK: [0x05,0x00,0xe8,0xd2,0x80,0x04,0x00,0x20]
+
+v_trig_preop_f64 v[5:6], |0|, s2
+// CHECK: [0x05,0x01,0xe8,0xd2,0x80,0x04,0x00,0x00]
+
v_trig_preop_f64 v[5:6], 0, s2 clamp
// CHECK: [0x05,0x08,0xe8,0xd2,0x80,0x04,0x00,0x00]
@@ -42173,6 +43518,234 @@ v_trig_preop_f64 v[5:6], 0, s2 mul:4
v_trig_preop_f64 v[5:6], 0, s2 div:2
// CHECK: [0x05,0x00,0xe8,0xd2,0x80,0x04,0x00,0x18]
+v_mad_u64_u32 v[5:6], s[12:13], s1, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[254:255], s[12:13], s1, 0, 0
+// CHECK: [0xfe,0x0c,0xec,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[14:15], s1, 0, 0
+// CHECK: [0x05,0x0e,0xec,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[102:103], s1, 0, 0
+// CHECK: [0x05,0x66,0xec,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], flat_scratch, s1, 0, 0
+// CHECK: [0x05,0x68,0xec,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], vcc, s1, 0, 0
+// CHECK: [0x05,0x6a,0xec,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], tba, s1, 0, 0
+// CHECK: [0x05,0x6c,0xec,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], tma, s1, 0, 0
+// CHECK: [0x05,0x6e,0xec,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], ttmp[10:11], s1, 0, 0
+// CHECK: [0x05,0x7a,0xec,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], s103, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x67,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x68,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x69,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], vcc_lo, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x6a,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], vcc_hi, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x6b,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], tba_lo, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x6c,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], tba_hi, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x6d,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], tma_lo, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x6e,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], tma_hi, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x6f,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], ttmp11, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x7b,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], m0, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x7c,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], exec_lo, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x7e,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], exec_hi, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x7f,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], 0, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x80,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], -1, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0xc1,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], 0.5, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0xf0,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], -4.0, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0xf7,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], v1, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x01,0x01,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], v255, 0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0xff,0x01,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], s1, -1, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x01,0x82,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], s1, 0.5, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x01,0xe0,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], s1, -4.0, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x01,0xee,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], s1, v2, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x01,0x04,0x02,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], s1, v255, 0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x01,0xfe,0x03,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], s1, 0, -1
+// CHECK: [0x05,0x0c,0xec,0xd2,0x01,0x00,0x05,0x03]
+
+v_mad_u64_u32 v[5:6], s[12:13], s1, 0, 0.5
+// CHECK: [0x05,0x0c,0xec,0xd2,0x01,0x00,0xc1,0x03]
+
+v_mad_u64_u32 v[5:6], s[12:13], s1, 0, -4.0
+// CHECK: [0x05,0x0c,0xec,0xd2,0x01,0x00,0xdd,0x03]
+
+v_mad_u64_u32 v[5:6], s[12:13], s1, 0, v[3:4]
+// CHECK: [0x05,0x0c,0xec,0xd2,0x01,0x00,0x0d,0x04]
+
+v_mad_u64_u32 v[5:6], s[12:13], s1, 0, v[254:255]
+// CHECK: [0x05,0x0c,0xec,0xd2,0x01,0x00,0xf9,0x07]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[254:255], s[12:13], s1, 0, 0
+// CHECK: [0xfe,0x0c,0xee,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[14:15], s1, 0, 0
+// CHECK: [0x05,0x0e,0xee,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[102:103], s1, 0, 0
+// CHECK: [0x05,0x66,0xee,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], flat_scratch, s1, 0, 0
+// CHECK: [0x05,0x68,0xee,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], vcc, s1, 0, 0
+// CHECK: [0x05,0x6a,0xee,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], tba, s1, 0, 0
+// CHECK: [0x05,0x6c,0xee,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], tma, s1, 0, 0
+// CHECK: [0x05,0x6e,0xee,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], ttmp[10:11], s1, 0, 0
+// CHECK: [0x05,0x7a,0xee,0xd2,0x01,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], s103, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x67,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x68,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x69,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], vcc_lo, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x6a,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], vcc_hi, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x6b,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], tba_lo, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x6c,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], tba_hi, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x6d,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], tma_lo, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x6e,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], tma_hi, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x6f,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], ttmp11, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x7b,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], m0, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x7c,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], exec_lo, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x7e,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], exec_hi, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x7f,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], 0, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x80,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], -1, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0xc1,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], 0.5, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0xf0,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], -4.0, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0xf7,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], v1, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x01,0x01,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], v255, 0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0xff,0x01,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, -1, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x01,0x82,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, 0.5, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x01,0xe0,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, -4.0, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x01,0xee,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, v2, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x01,0x04,0x02,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, v255, 0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x01,0xfe,0x03,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, 0, -1
+// CHECK: [0x05,0x0c,0xee,0xd2,0x01,0x00,0x05,0x03]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, 0, 0.5
+// CHECK: [0x05,0x0c,0xee,0xd2,0x01,0x00,0xc1,0x03]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, 0, -4.0
+// CHECK: [0x05,0x0c,0xee,0xd2,0x01,0x00,0xdd,0x03]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, 0, v[3:4]
+// CHECK: [0x05,0x0c,0xee,0xd2,0x01,0x00,0x0d,0x04]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, 0, v[254:255]
+// CHECK: [0x05,0x0c,0xee,0xd2,0x01,0x00,0xf9,0x07]
+
v_cmp_f_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x00,0x7c]
@@ -42266,9 +43839,15 @@ v_cmp_f_f32_e64 tma, 0, s2
v_cmp_f_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x00,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_f_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x00,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_f_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x00,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_f_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x00,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_f_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x00,0xd0,0x01,0x05,0x00,0x00]
@@ -42317,11 +43896,14 @@ v_cmp_f_f32_e64 s[10:11], 0, exec_hi
v_cmp_f_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x00,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_f_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x00,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_f_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x00,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_f_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x00,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_f_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x00,0xd0,0x80,0xee,0x01,0x00]
v_cmp_f_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x00,0xd0,0x80,0x04,0x02,0x00]
@@ -42329,9 +43911,15 @@ v_cmp_f_f32_e64 s[10:11], 0, v2
v_cmp_f_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x00,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_f_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x00,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_f_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x00,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_f_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x00,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_lt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x02,0x7c]
@@ -42425,9 +44013,15 @@ v_cmp_lt_f32_e64 tma, 0, s2
v_cmp_lt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x02,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_lt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x02,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_lt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x02,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_lt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x02,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_lt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x02,0xd0,0x01,0x05,0x00,0x00]
@@ -42476,11 +44070,14 @@ v_cmp_lt_f32_e64 s[10:11], 0, exec_hi
v_cmp_lt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x02,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_lt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x02,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_lt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x02,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_lt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x02,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_lt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x02,0xd0,0x80,0xee,0x01,0x00]
v_cmp_lt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x02,0xd0,0x80,0x04,0x02,0x00]
@@ -42488,9 +44085,15 @@ v_cmp_lt_f32_e64 s[10:11], 0, v2
v_cmp_lt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x02,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_lt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x02,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_lt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x02,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_lt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x02,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_eq_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x04,0x7c]
@@ -42584,9 +44187,15 @@ v_cmp_eq_f32_e64 tma, 0, s2
v_cmp_eq_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x04,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_eq_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x04,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_eq_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x04,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_eq_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x04,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_eq_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x04,0xd0,0x01,0x05,0x00,0x00]
@@ -42635,11 +44244,14 @@ v_cmp_eq_f32_e64 s[10:11], 0, exec_hi
v_cmp_eq_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x04,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_eq_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x04,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_eq_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x04,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_eq_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x04,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_eq_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x04,0xd0,0x80,0xee,0x01,0x00]
v_cmp_eq_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x04,0xd0,0x80,0x04,0x02,0x00]
@@ -42647,9 +44259,15 @@ v_cmp_eq_f32_e64 s[10:11], 0, v2
v_cmp_eq_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x04,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_eq_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x04,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_eq_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x04,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_eq_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x04,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_le_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x06,0x7c]
@@ -42743,9 +44361,15 @@ v_cmp_le_f32_e64 tma, 0, s2
v_cmp_le_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x06,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_le_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x06,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_le_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x06,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_le_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x06,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_le_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x06,0xd0,0x01,0x05,0x00,0x00]
@@ -42794,11 +44418,14 @@ v_cmp_le_f32_e64 s[10:11], 0, exec_hi
v_cmp_le_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x06,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_le_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x06,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_le_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x06,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_le_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x06,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_le_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x06,0xd0,0x80,0xee,0x01,0x00]
v_cmp_le_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x06,0xd0,0x80,0x04,0x02,0x00]
@@ -42806,9 +44433,15 @@ v_cmp_le_f32_e64 s[10:11], 0, v2
v_cmp_le_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x06,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_le_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x06,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_le_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x06,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_le_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x06,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_gt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x08,0x7c]
@@ -42902,9 +44535,15 @@ v_cmp_gt_f32_e64 tma, 0, s2
v_cmp_gt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x08,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_gt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x08,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_gt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x08,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_gt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x08,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_gt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x08,0xd0,0x01,0x05,0x00,0x00]
@@ -42953,11 +44592,14 @@ v_cmp_gt_f32_e64 s[10:11], 0, exec_hi
v_cmp_gt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x08,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_gt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x08,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_gt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x08,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_gt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x08,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_gt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x08,0xd0,0x80,0xee,0x01,0x00]
v_cmp_gt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x08,0xd0,0x80,0x04,0x02,0x00]
@@ -42965,9 +44607,15 @@ v_cmp_gt_f32_e64 s[10:11], 0, v2
v_cmp_gt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x08,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_gt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x08,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_gt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x08,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_gt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x08,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_lg_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x0a,0x7c]
@@ -43061,9 +44709,15 @@ v_cmp_lg_f32_e64 tma, 0, s2
v_cmp_lg_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x0a,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_lg_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x0a,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_lg_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x0a,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_lg_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x0a,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_lg_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x0a,0xd0,0x01,0x05,0x00,0x00]
@@ -43112,11 +44766,14 @@ v_cmp_lg_f32_e64 s[10:11], 0, exec_hi
v_cmp_lg_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x0a,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_lg_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x0a,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_lg_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x0a,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_lg_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x0a,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_lg_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x0a,0xd0,0x80,0xee,0x01,0x00]
v_cmp_lg_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x0a,0xd0,0x80,0x04,0x02,0x00]
@@ -43124,9 +44781,15 @@ v_cmp_lg_f32_e64 s[10:11], 0, v2
v_cmp_lg_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x0a,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_lg_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x0a,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_lg_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x0a,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_lg_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x0a,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_ge_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x0c,0x7c]
@@ -43220,9 +44883,15 @@ v_cmp_ge_f32_e64 tma, 0, s2
v_cmp_ge_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x0c,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_ge_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x0c,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_ge_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x0c,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_ge_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x0c,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_ge_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x0c,0xd0,0x01,0x05,0x00,0x00]
@@ -43271,11 +44940,14 @@ v_cmp_ge_f32_e64 s[10:11], 0, exec_hi
v_cmp_ge_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x0c,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_ge_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x0c,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_ge_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x0c,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_ge_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x0c,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_ge_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x0c,0xd0,0x80,0xee,0x01,0x00]
v_cmp_ge_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x0c,0xd0,0x80,0x04,0x02,0x00]
@@ -43283,9 +44955,15 @@ v_cmp_ge_f32_e64 s[10:11], 0, v2
v_cmp_ge_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x0c,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_ge_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x0c,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_ge_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x0c,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_ge_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x0c,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_o_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x0e,0x7c]
@@ -43379,9 +45057,15 @@ v_cmp_o_f32_e64 tma, 0, s2
v_cmp_o_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x0e,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_o_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x0e,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_o_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x0e,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_o_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x0e,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_o_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x0e,0xd0,0x01,0x05,0x00,0x00]
@@ -43430,11 +45114,14 @@ v_cmp_o_f32_e64 s[10:11], 0, exec_hi
v_cmp_o_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x0e,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_o_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x0e,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_o_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x0e,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_o_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x0e,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_o_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x0e,0xd0,0x80,0xee,0x01,0x00]
v_cmp_o_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x0e,0xd0,0x80,0x04,0x02,0x00]
@@ -43442,9 +45129,15 @@ v_cmp_o_f32_e64 s[10:11], 0, v2
v_cmp_o_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x0e,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_o_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x0e,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_o_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x0e,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_o_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x0e,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_u_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x10,0x7c]
@@ -43538,9 +45231,15 @@ v_cmp_u_f32_e64 tma, 0, s2
v_cmp_u_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x10,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_u_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x10,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_u_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x10,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_u_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x10,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_u_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x10,0xd0,0x01,0x05,0x00,0x00]
@@ -43589,11 +45288,14 @@ v_cmp_u_f32_e64 s[10:11], 0, exec_hi
v_cmp_u_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x10,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_u_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x10,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_u_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x10,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_u_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x10,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_u_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x10,0xd0,0x80,0xee,0x01,0x00]
v_cmp_u_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x10,0xd0,0x80,0x04,0x02,0x00]
@@ -43601,9 +45303,15 @@ v_cmp_u_f32_e64 s[10:11], 0, v2
v_cmp_u_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x10,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_u_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x10,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_u_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x10,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_u_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x10,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_nge_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x12,0x7c]
@@ -43697,9 +45405,15 @@ v_cmp_nge_f32_e64 tma, 0, s2
v_cmp_nge_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x12,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_nge_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x12,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_nge_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x12,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_nge_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x12,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_nge_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x12,0xd0,0x01,0x05,0x00,0x00]
@@ -43748,11 +45462,14 @@ v_cmp_nge_f32_e64 s[10:11], 0, exec_hi
v_cmp_nge_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x12,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_nge_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x12,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_nge_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x12,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_nge_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x12,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_nge_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x12,0xd0,0x80,0xee,0x01,0x00]
v_cmp_nge_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x12,0xd0,0x80,0x04,0x02,0x00]
@@ -43760,9 +45477,15 @@ v_cmp_nge_f32_e64 s[10:11], 0, v2
v_cmp_nge_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x12,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_nge_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x12,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_nge_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x12,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_nge_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x12,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_nlg_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x14,0x7c]
@@ -43856,9 +45579,15 @@ v_cmp_nlg_f32_e64 tma, 0, s2
v_cmp_nlg_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x14,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_nlg_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x14,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_nlg_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x14,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_nlg_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x14,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_nlg_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x14,0xd0,0x01,0x05,0x00,0x00]
@@ -43907,11 +45636,14 @@ v_cmp_nlg_f32_e64 s[10:11], 0, exec_hi
v_cmp_nlg_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x14,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_nlg_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x14,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_nlg_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x14,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_nlg_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x14,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_nlg_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x14,0xd0,0x80,0xee,0x01,0x00]
v_cmp_nlg_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x14,0xd0,0x80,0x04,0x02,0x00]
@@ -43919,9 +45651,15 @@ v_cmp_nlg_f32_e64 s[10:11], 0, v2
v_cmp_nlg_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x14,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_nlg_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x14,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_nlg_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x14,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_nlg_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x14,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_ngt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x16,0x7c]
@@ -44015,9 +45753,15 @@ v_cmp_ngt_f32_e64 tma, 0, s2
v_cmp_ngt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x16,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_ngt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x16,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_ngt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x16,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_ngt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x16,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_ngt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x16,0xd0,0x01,0x05,0x00,0x00]
@@ -44066,11 +45810,14 @@ v_cmp_ngt_f32_e64 s[10:11], 0, exec_hi
v_cmp_ngt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x16,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_ngt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x16,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_ngt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x16,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_ngt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x16,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_ngt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x16,0xd0,0x80,0xee,0x01,0x00]
v_cmp_ngt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x16,0xd0,0x80,0x04,0x02,0x00]
@@ -44078,9 +45825,15 @@ v_cmp_ngt_f32_e64 s[10:11], 0, v2
v_cmp_ngt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x16,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_ngt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x16,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_ngt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x16,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_ngt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x16,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_nle_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x18,0x7c]
@@ -44174,9 +45927,15 @@ v_cmp_nle_f32_e64 tma, 0, s2
v_cmp_nle_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x18,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_nle_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x18,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_nle_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x18,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_nle_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x18,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_nle_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x18,0xd0,0x01,0x05,0x00,0x00]
@@ -44225,11 +45984,14 @@ v_cmp_nle_f32_e64 s[10:11], 0, exec_hi
v_cmp_nle_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x18,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_nle_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x18,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_nle_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x18,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_nle_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x18,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_nle_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x18,0xd0,0x80,0xee,0x01,0x00]
v_cmp_nle_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x18,0xd0,0x80,0x04,0x02,0x00]
@@ -44237,9 +45999,15 @@ v_cmp_nle_f32_e64 s[10:11], 0, v2
v_cmp_nle_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x18,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_nle_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x18,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_nle_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x18,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_nle_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x18,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_neq_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x1a,0x7c]
@@ -44333,9 +46101,15 @@ v_cmp_neq_f32_e64 tma, 0, s2
v_cmp_neq_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x1a,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_neq_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x1a,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_neq_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x1a,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_neq_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x1a,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_neq_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x1a,0xd0,0x01,0x05,0x00,0x00]
@@ -44384,11 +46158,14 @@ v_cmp_neq_f32_e64 s[10:11], 0, exec_hi
v_cmp_neq_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x1a,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_neq_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x1a,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_neq_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x1a,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_neq_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x1a,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_neq_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x1a,0xd0,0x80,0xee,0x01,0x00]
v_cmp_neq_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x1a,0xd0,0x80,0x04,0x02,0x00]
@@ -44396,9 +46173,15 @@ v_cmp_neq_f32_e64 s[10:11], 0, v2
v_cmp_neq_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x1a,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_neq_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x1a,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_neq_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x1a,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_neq_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x1a,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_nlt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x1c,0x7c]
@@ -44492,9 +46275,15 @@ v_cmp_nlt_f32_e64 tma, 0, s2
v_cmp_nlt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x1c,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_nlt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x1c,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_nlt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x1c,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_nlt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x1c,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_nlt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x1c,0xd0,0x01,0x05,0x00,0x00]
@@ -44543,11 +46332,14 @@ v_cmp_nlt_f32_e64 s[10:11], 0, exec_hi
v_cmp_nlt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x1c,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_nlt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x1c,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_nlt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x1c,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_nlt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x1c,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_nlt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x1c,0xd0,0x80,0xee,0x01,0x00]
v_cmp_nlt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x1c,0xd0,0x80,0x04,0x02,0x00]
@@ -44555,9 +46347,15 @@ v_cmp_nlt_f32_e64 s[10:11], 0, v2
v_cmp_nlt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x1c,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_nlt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x1c,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_nlt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x1c,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_nlt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x1c,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_tru_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x1e,0x7c]
@@ -44651,9 +46449,15 @@ v_cmp_tru_f32_e64 tma, 0, s2
v_cmp_tru_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x1e,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_tru_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x1e,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_tru_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x1e,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_tru_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x1e,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_tru_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x1e,0xd0,0x01,0x05,0x00,0x00]
@@ -44702,11 +46506,14 @@ v_cmp_tru_f32_e64 s[10:11], 0, exec_hi
v_cmp_tru_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x1e,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_tru_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x1e,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_tru_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x1e,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_tru_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x1e,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_tru_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x1e,0xd0,0x80,0xee,0x01,0x00]
v_cmp_tru_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x1e,0xd0,0x80,0x04,0x02,0x00]
@@ -44714,9 +46521,15 @@ v_cmp_tru_f32_e64 s[10:11], 0, v2
v_cmp_tru_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x1e,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_tru_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x1e,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_tru_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x1e,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_tru_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x1e,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_f_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x20,0x7c]
@@ -44810,9 +46623,15 @@ v_cmpx_f_f32_e64 tma, 0, s2
v_cmpx_f_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x20,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_f_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x20,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_f_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x20,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_f_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x20,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_f_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x20,0xd0,0x01,0x05,0x00,0x00]
@@ -44861,11 +46680,14 @@ v_cmpx_f_f32_e64 s[10:11], 0, exec_hi
v_cmpx_f_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_f_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_f_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_f_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_f_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_f_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0x04,0x02,0x00]
@@ -44873,9 +46695,15 @@ v_cmpx_f_f32_e64 s[10:11], 0, v2
v_cmpx_f_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_f_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_f_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_f_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_lt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x22,0x7c]
@@ -44969,9 +46797,15 @@ v_cmpx_lt_f32_e64 tma, 0, s2
v_cmpx_lt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x22,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_lt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x22,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_lt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x22,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_lt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x22,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_lt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x22,0xd0,0x01,0x05,0x00,0x00]
@@ -45020,11 +46854,14 @@ v_cmpx_lt_f32_e64 s[10:11], 0, exec_hi
v_cmpx_lt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_lt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_lt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_lt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_lt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_lt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0x04,0x02,0x00]
@@ -45032,9 +46869,15 @@ v_cmpx_lt_f32_e64 s[10:11], 0, v2
v_cmpx_lt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_lt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_lt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_lt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_eq_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x24,0x7c]
@@ -45128,9 +46971,15 @@ v_cmpx_eq_f32_e64 tma, 0, s2
v_cmpx_eq_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x24,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_eq_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x24,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_eq_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x24,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_eq_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x24,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_eq_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x24,0xd0,0x01,0x05,0x00,0x00]
@@ -45179,11 +47028,14 @@ v_cmpx_eq_f32_e64 s[10:11], 0, exec_hi
v_cmpx_eq_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_eq_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_eq_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_eq_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_eq_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_eq_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0x04,0x02,0x00]
@@ -45191,9 +47043,15 @@ v_cmpx_eq_f32_e64 s[10:11], 0, v2
v_cmpx_eq_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_eq_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_eq_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_eq_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_le_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x26,0x7c]
@@ -45287,9 +47145,15 @@ v_cmpx_le_f32_e64 tma, 0, s2
v_cmpx_le_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x26,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_le_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x26,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_le_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x26,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_le_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x26,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_le_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x26,0xd0,0x01,0x05,0x00,0x00]
@@ -45338,11 +47202,14 @@ v_cmpx_le_f32_e64 s[10:11], 0, exec_hi
v_cmpx_le_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_le_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_le_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_le_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_le_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_le_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0x04,0x02,0x00]
@@ -45350,9 +47217,15 @@ v_cmpx_le_f32_e64 s[10:11], 0, v2
v_cmpx_le_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_le_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_le_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_le_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_gt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x28,0x7c]
@@ -45446,9 +47319,15 @@ v_cmpx_gt_f32_e64 tma, 0, s2
v_cmpx_gt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x28,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_gt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x28,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_gt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x28,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_gt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x28,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_gt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x28,0xd0,0x01,0x05,0x00,0x00]
@@ -45497,11 +47376,14 @@ v_cmpx_gt_f32_e64 s[10:11], 0, exec_hi
v_cmpx_gt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_gt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_gt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_gt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_gt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_gt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0x04,0x02,0x00]
@@ -45509,9 +47391,15 @@ v_cmpx_gt_f32_e64 s[10:11], 0, v2
v_cmpx_gt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_gt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_gt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_gt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_lg_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x2a,0x7c]
@@ -45605,9 +47493,15 @@ v_cmpx_lg_f32_e64 tma, 0, s2
v_cmpx_lg_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x2a,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_lg_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x2a,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_lg_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x2a,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_lg_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x2a,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_lg_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x2a,0xd0,0x01,0x05,0x00,0x00]
@@ -45656,11 +47550,14 @@ v_cmpx_lg_f32_e64 s[10:11], 0, exec_hi
v_cmpx_lg_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_lg_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_lg_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_lg_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_lg_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_lg_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0x04,0x02,0x00]
@@ -45668,9 +47565,15 @@ v_cmpx_lg_f32_e64 s[10:11], 0, v2
v_cmpx_lg_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_lg_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_lg_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_lg_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_ge_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x2c,0x7c]
@@ -45764,9 +47667,15 @@ v_cmpx_ge_f32_e64 tma, 0, s2
v_cmpx_ge_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x2c,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_ge_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x2c,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_ge_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x2c,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_ge_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x2c,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_ge_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x2c,0xd0,0x01,0x05,0x00,0x00]
@@ -45815,11 +47724,14 @@ v_cmpx_ge_f32_e64 s[10:11], 0, exec_hi
v_cmpx_ge_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_ge_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_ge_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_ge_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_ge_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_ge_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0x04,0x02,0x00]
@@ -45827,9 +47739,15 @@ v_cmpx_ge_f32_e64 s[10:11], 0, v2
v_cmpx_ge_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_ge_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_ge_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_ge_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_o_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x2e,0x7c]
@@ -45923,9 +47841,15 @@ v_cmpx_o_f32_e64 tma, 0, s2
v_cmpx_o_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x2e,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_o_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x2e,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_o_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x2e,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_o_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x2e,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_o_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x2e,0xd0,0x01,0x05,0x00,0x00]
@@ -45974,11 +47898,14 @@ v_cmpx_o_f32_e64 s[10:11], 0, exec_hi
v_cmpx_o_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_o_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_o_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_o_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_o_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_o_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0x04,0x02,0x00]
@@ -45986,9 +47913,15 @@ v_cmpx_o_f32_e64 s[10:11], 0, v2
v_cmpx_o_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_o_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_o_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_o_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_u_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x30,0x7c]
@@ -46082,9 +48015,15 @@ v_cmpx_u_f32_e64 tma, 0, s2
v_cmpx_u_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x30,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_u_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x30,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_u_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x30,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_u_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x30,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_u_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x30,0xd0,0x01,0x05,0x00,0x00]
@@ -46133,11 +48072,14 @@ v_cmpx_u_f32_e64 s[10:11], 0, exec_hi
v_cmpx_u_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_u_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_u_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_u_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_u_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_u_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0x04,0x02,0x00]
@@ -46145,9 +48087,15 @@ v_cmpx_u_f32_e64 s[10:11], 0, v2
v_cmpx_u_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_u_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_u_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_u_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_nge_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x32,0x7c]
@@ -46241,9 +48189,15 @@ v_cmpx_nge_f32_e64 tma, 0, s2
v_cmpx_nge_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x32,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_nge_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x32,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_nge_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x32,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_nge_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x32,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_nge_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x32,0xd0,0x01,0x05,0x00,0x00]
@@ -46292,11 +48246,14 @@ v_cmpx_nge_f32_e64 s[10:11], 0, exec_hi
v_cmpx_nge_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_nge_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_nge_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_nge_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_nge_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_nge_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0x04,0x02,0x00]
@@ -46304,9 +48261,15 @@ v_cmpx_nge_f32_e64 s[10:11], 0, v2
v_cmpx_nge_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_nge_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_nge_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_nge_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_nlg_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x34,0x7c]
@@ -46400,9 +48363,15 @@ v_cmpx_nlg_f32_e64 tma, 0, s2
v_cmpx_nlg_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x34,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_nlg_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x34,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_nlg_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x34,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_nlg_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x34,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_nlg_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x34,0xd0,0x01,0x05,0x00,0x00]
@@ -46451,11 +48420,14 @@ v_cmpx_nlg_f32_e64 s[10:11], 0, exec_hi
v_cmpx_nlg_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_nlg_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_nlg_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_nlg_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_nlg_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_nlg_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0x04,0x02,0x00]
@@ -46463,9 +48435,15 @@ v_cmpx_nlg_f32_e64 s[10:11], 0, v2
v_cmpx_nlg_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_nlg_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_nlg_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_nlg_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_ngt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x36,0x7c]
@@ -46559,9 +48537,15 @@ v_cmpx_ngt_f32_e64 tma, 0, s2
v_cmpx_ngt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x36,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_ngt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x36,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_ngt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x36,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_ngt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x36,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_ngt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x36,0xd0,0x01,0x05,0x00,0x00]
@@ -46610,11 +48594,14 @@ v_cmpx_ngt_f32_e64 s[10:11], 0, exec_hi
v_cmpx_ngt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_ngt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_ngt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_ngt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_ngt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_ngt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0x04,0x02,0x00]
@@ -46622,9 +48609,15 @@ v_cmpx_ngt_f32_e64 s[10:11], 0, v2
v_cmpx_ngt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_ngt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_ngt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_ngt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_nle_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x38,0x7c]
@@ -46718,9 +48711,15 @@ v_cmpx_nle_f32_e64 tma, 0, s2
v_cmpx_nle_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x38,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_nle_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x38,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_nle_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x38,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_nle_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x38,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_nle_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x38,0xd0,0x01,0x05,0x00,0x00]
@@ -46769,11 +48768,14 @@ v_cmpx_nle_f32_e64 s[10:11], 0, exec_hi
v_cmpx_nle_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_nle_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_nle_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_nle_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_nle_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_nle_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0x04,0x02,0x00]
@@ -46781,9 +48783,15 @@ v_cmpx_nle_f32_e64 s[10:11], 0, v2
v_cmpx_nle_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_nle_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_nle_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_nle_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_neq_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x3a,0x7c]
@@ -46877,9 +48885,15 @@ v_cmpx_neq_f32_e64 tma, 0, s2
v_cmpx_neq_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x3a,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_neq_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x3a,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_neq_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x3a,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_neq_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x3a,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_neq_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x3a,0xd0,0x01,0x05,0x00,0x00]
@@ -46928,11 +48942,14 @@ v_cmpx_neq_f32_e64 s[10:11], 0, exec_hi
v_cmpx_neq_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_neq_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_neq_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_neq_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_neq_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_neq_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0x04,0x02,0x00]
@@ -46940,9 +48957,15 @@ v_cmpx_neq_f32_e64 s[10:11], 0, v2
v_cmpx_neq_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_neq_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_neq_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_neq_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_nlt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x3c,0x7c]
@@ -47036,9 +49059,15 @@ v_cmpx_nlt_f32_e64 tma, 0, s2
v_cmpx_nlt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x3c,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_nlt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x3c,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_nlt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x3c,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_nlt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x3c,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_nlt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x3c,0xd0,0x01,0x05,0x00,0x00]
@@ -47087,11 +49116,14 @@ v_cmpx_nlt_f32_e64 s[10:11], 0, exec_hi
v_cmpx_nlt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_nlt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_nlt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_nlt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_nlt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_nlt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0x04,0x02,0x00]
@@ -47099,9 +49131,15 @@ v_cmpx_nlt_f32_e64 s[10:11], 0, v2
v_cmpx_nlt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_nlt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_nlt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_nlt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_tru_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x3e,0x7c]
@@ -47195,9 +49233,15 @@ v_cmpx_tru_f32_e64 tma, 0, s2
v_cmpx_tru_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x3e,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_tru_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x3e,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_tru_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x3e,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_tru_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x3e,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_tru_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x3e,0xd0,0x01,0x05,0x00,0x00]
@@ -47246,11 +49290,14 @@ v_cmpx_tru_f32_e64 s[10:11], 0, exec_hi
v_cmpx_tru_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_tru_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_tru_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_tru_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_tru_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_tru_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0x04,0x02,0x00]
@@ -47258,9 +49305,15 @@ v_cmpx_tru_f32_e64 s[10:11], 0, v2
v_cmpx_tru_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_tru_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_tru_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_tru_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_f_f64 vcc, s[2:3], v[2:3]
// CHECK: [0x02,0x04,0x40,0x7c]
@@ -47342,9 +49395,15 @@ v_cmp_f_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_f_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x40,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_f_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x40,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_f_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x40,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_f_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x40,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_f_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x40,0xd0,0x01,0x09,0x00,0x00]
@@ -47354,9 +49413,15 @@ v_cmp_f_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_f_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x40,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_f_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x40,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_f_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x40,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_f_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x40,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_f_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x40,0xd0,0x04,0x04,0x02,0x00]
@@ -47453,9 +49518,15 @@ v_cmp_lt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_lt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x42,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_lt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x42,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_lt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x42,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_lt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x42,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_lt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x42,0xd0,0x01,0x09,0x00,0x00]
@@ -47465,9 +49536,15 @@ v_cmp_lt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_lt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x42,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_lt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x42,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_lt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x42,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_lt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x42,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_lt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x42,0xd0,0x04,0x04,0x02,0x00]
@@ -47564,9 +49641,15 @@ v_cmp_eq_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_eq_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x44,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_eq_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x44,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_eq_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x44,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_eq_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x44,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_eq_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x44,0xd0,0x01,0x09,0x00,0x00]
@@ -47576,9 +49659,15 @@ v_cmp_eq_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_eq_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x44,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_eq_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x44,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_eq_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x44,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_eq_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x44,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_eq_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x44,0xd0,0x04,0x04,0x02,0x00]
@@ -47675,9 +49764,15 @@ v_cmp_le_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_le_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x46,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_le_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x46,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_le_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x46,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_le_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x46,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_le_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x46,0xd0,0x01,0x09,0x00,0x00]
@@ -47687,9 +49782,15 @@ v_cmp_le_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_le_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x46,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_le_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x46,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_le_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x46,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_le_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x46,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_le_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x46,0xd0,0x04,0x04,0x02,0x00]
@@ -47786,9 +49887,15 @@ v_cmp_gt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_gt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x48,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_gt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x48,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_gt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x48,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_gt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x48,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_gt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x48,0xd0,0x01,0x09,0x00,0x00]
@@ -47798,9 +49905,15 @@ v_cmp_gt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_gt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x48,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_gt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x48,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_gt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x48,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_gt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x48,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_gt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x48,0xd0,0x04,0x04,0x02,0x00]
@@ -47897,9 +50010,15 @@ v_cmp_lg_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_lg_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x4a,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_lg_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x4a,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_lg_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x4a,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_lg_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x4a,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_lg_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x4a,0xd0,0x01,0x09,0x00,0x00]
@@ -47909,9 +50028,15 @@ v_cmp_lg_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_lg_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x4a,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_lg_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x4a,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_lg_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x4a,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_lg_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x4a,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_lg_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x4a,0xd0,0x04,0x04,0x02,0x00]
@@ -48008,9 +50133,15 @@ v_cmp_ge_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_ge_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x4c,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_ge_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x4c,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_ge_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x4c,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_ge_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x4c,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_ge_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x4c,0xd0,0x01,0x09,0x00,0x00]
@@ -48020,9 +50151,15 @@ v_cmp_ge_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_ge_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x4c,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_ge_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x4c,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_ge_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x4c,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_ge_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x4c,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_ge_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x4c,0xd0,0x04,0x04,0x02,0x00]
@@ -48119,9 +50256,15 @@ v_cmp_o_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_o_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x4e,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_o_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x4e,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_o_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x4e,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_o_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x4e,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_o_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x4e,0xd0,0x01,0x09,0x00,0x00]
@@ -48131,9 +50274,15 @@ v_cmp_o_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_o_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x4e,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_o_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x4e,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_o_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x4e,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_o_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x4e,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_o_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x4e,0xd0,0x04,0x04,0x02,0x00]
@@ -48230,9 +50379,15 @@ v_cmp_u_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_u_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x50,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_u_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x50,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_u_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x50,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_u_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x50,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_u_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x50,0xd0,0x01,0x09,0x00,0x00]
@@ -48242,9 +50397,15 @@ v_cmp_u_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_u_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x50,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_u_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x50,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_u_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x50,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_u_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x50,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_u_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x50,0xd0,0x04,0x04,0x02,0x00]
@@ -48341,9 +50502,15 @@ v_cmp_nge_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_nge_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x52,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_nge_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x52,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_nge_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x52,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_nge_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x52,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_nge_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x52,0xd0,0x01,0x09,0x00,0x00]
@@ -48353,9 +50520,15 @@ v_cmp_nge_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_nge_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x52,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_nge_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x52,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_nge_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x52,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_nge_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x52,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_nge_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x52,0xd0,0x04,0x04,0x02,0x00]
@@ -48452,9 +50625,15 @@ v_cmp_nlg_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_nlg_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x54,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_nlg_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x54,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_nlg_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x54,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_nlg_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x54,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_nlg_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x54,0xd0,0x01,0x09,0x00,0x00]
@@ -48464,9 +50643,15 @@ v_cmp_nlg_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_nlg_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x54,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_nlg_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x54,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_nlg_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x54,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_nlg_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x54,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_nlg_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x54,0xd0,0x04,0x04,0x02,0x00]
@@ -48563,9 +50748,15 @@ v_cmp_ngt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_ngt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x56,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_ngt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x56,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_ngt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x56,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_ngt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x56,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_ngt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x56,0xd0,0x01,0x09,0x00,0x00]
@@ -48575,9 +50766,15 @@ v_cmp_ngt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_ngt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x56,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_ngt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x56,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_ngt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x56,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_ngt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x56,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_ngt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x56,0xd0,0x04,0x04,0x02,0x00]
@@ -48674,9 +50871,15 @@ v_cmp_nle_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_nle_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x58,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_nle_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x58,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_nle_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x58,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_nle_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x58,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_nle_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x58,0xd0,0x01,0x09,0x00,0x00]
@@ -48686,9 +50889,15 @@ v_cmp_nle_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_nle_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x58,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_nle_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x58,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_nle_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x58,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_nle_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x58,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_nle_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x58,0xd0,0x04,0x04,0x02,0x00]
@@ -48785,9 +50994,15 @@ v_cmp_neq_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_neq_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x5a,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_neq_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x5a,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_neq_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x5a,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_neq_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x5a,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_neq_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x5a,0xd0,0x01,0x09,0x00,0x00]
@@ -48797,9 +51012,15 @@ v_cmp_neq_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_neq_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x5a,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_neq_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x5a,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_neq_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x5a,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_neq_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x5a,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_neq_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x5a,0xd0,0x04,0x04,0x02,0x00]
@@ -48896,9 +51117,15 @@ v_cmp_nlt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_nlt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x5c,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_nlt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x5c,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_nlt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x5c,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_nlt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x5c,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_nlt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x5c,0xd0,0x01,0x09,0x00,0x00]
@@ -48908,9 +51135,15 @@ v_cmp_nlt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_nlt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x5c,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_nlt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x5c,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_nlt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x5c,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_nlt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x5c,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_nlt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x5c,0xd0,0x04,0x04,0x02,0x00]
@@ -49007,9 +51240,15 @@ v_cmp_tru_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_tru_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x5e,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_tru_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x5e,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_tru_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x5e,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_tru_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x5e,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_tru_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x5e,0xd0,0x01,0x09,0x00,0x00]
@@ -49019,9 +51258,15 @@ v_cmp_tru_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_tru_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x5e,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_tru_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x5e,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_tru_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x5e,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_tru_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x5e,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_tru_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x5e,0xd0,0x04,0x04,0x02,0x00]
@@ -49118,9 +51363,15 @@ v_cmpx_f_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_f_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x60,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_f_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x60,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_f_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x60,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_f_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x60,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_f_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x60,0xd0,0x01,0x09,0x00,0x00]
@@ -49130,9 +51381,15 @@ v_cmpx_f_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_f_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x60,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_f_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x60,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_f_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x60,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_f_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x60,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_f_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x60,0xd0,0x04,0x04,0x02,0x00]
@@ -49229,9 +51486,15 @@ v_cmpx_lt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_lt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x62,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_lt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x62,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_lt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x62,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_lt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x62,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_lt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x62,0xd0,0x01,0x09,0x00,0x00]
@@ -49241,9 +51504,15 @@ v_cmpx_lt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_lt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x62,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_lt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x62,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_lt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x62,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_lt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x62,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_lt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x62,0xd0,0x04,0x04,0x02,0x00]
@@ -49340,9 +51609,15 @@ v_cmpx_eq_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_eq_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x64,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_eq_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x64,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_eq_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x64,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_eq_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x64,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_eq_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x64,0xd0,0x01,0x09,0x00,0x00]
@@ -49352,9 +51627,15 @@ v_cmpx_eq_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_eq_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x64,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_eq_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x64,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_eq_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x64,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_eq_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x64,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_eq_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x64,0xd0,0x04,0x04,0x02,0x00]
@@ -49451,9 +51732,15 @@ v_cmpx_le_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_le_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x66,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_le_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x66,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_le_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x66,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_le_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x66,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_le_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x66,0xd0,0x01,0x09,0x00,0x00]
@@ -49463,9 +51750,15 @@ v_cmpx_le_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_le_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x66,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_le_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x66,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_le_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x66,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_le_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x66,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_le_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x66,0xd0,0x04,0x04,0x02,0x00]
@@ -49562,9 +51855,15 @@ v_cmpx_gt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_gt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x68,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_gt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x68,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_gt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x68,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_gt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x68,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_gt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x68,0xd0,0x01,0x09,0x00,0x00]
@@ -49574,9 +51873,15 @@ v_cmpx_gt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_gt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x68,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_gt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x68,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_gt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x68,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_gt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x68,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_gt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x68,0xd0,0x04,0x04,0x02,0x00]
@@ -49673,9 +51978,15 @@ v_cmpx_lg_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_lg_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x6a,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_lg_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x6a,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_lg_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x6a,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_lg_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x6a,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_lg_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x6a,0xd0,0x01,0x09,0x00,0x00]
@@ -49685,9 +51996,15 @@ v_cmpx_lg_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_lg_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x6a,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_lg_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x6a,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_lg_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x6a,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_lg_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x6a,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_lg_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x6a,0xd0,0x04,0x04,0x02,0x00]
@@ -49784,9 +52101,15 @@ v_cmpx_ge_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_ge_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x6c,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_ge_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x6c,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_ge_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x6c,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_ge_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x6c,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_ge_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x6c,0xd0,0x01,0x09,0x00,0x00]
@@ -49796,9 +52119,15 @@ v_cmpx_ge_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_ge_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x6c,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_ge_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x6c,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_ge_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x6c,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_ge_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x6c,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_ge_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x6c,0xd0,0x04,0x04,0x02,0x00]
@@ -49895,9 +52224,15 @@ v_cmpx_o_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_o_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x6e,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_o_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x6e,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_o_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x6e,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_o_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x6e,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_o_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x6e,0xd0,0x01,0x09,0x00,0x00]
@@ -49907,9 +52242,15 @@ v_cmpx_o_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_o_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x6e,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_o_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x6e,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_o_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x6e,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_o_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x6e,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_o_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x6e,0xd0,0x04,0x04,0x02,0x00]
@@ -50006,9 +52347,15 @@ v_cmpx_u_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_u_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x70,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_u_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x70,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_u_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x70,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_u_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x70,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_u_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x70,0xd0,0x01,0x09,0x00,0x00]
@@ -50018,9 +52365,15 @@ v_cmpx_u_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_u_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x70,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_u_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x70,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_u_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x70,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_u_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x70,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_u_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x70,0xd0,0x04,0x04,0x02,0x00]
@@ -50117,9 +52470,15 @@ v_cmpx_nge_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_nge_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x72,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_nge_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x72,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_nge_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x72,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_nge_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x72,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_nge_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x72,0xd0,0x01,0x09,0x00,0x00]
@@ -50129,9 +52488,15 @@ v_cmpx_nge_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_nge_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x72,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_nge_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x72,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_nge_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x72,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_nge_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x72,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_nge_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x72,0xd0,0x04,0x04,0x02,0x00]
@@ -50228,9 +52593,15 @@ v_cmpx_nlg_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_nlg_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x74,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_nlg_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x74,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_nlg_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x74,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_nlg_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x74,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_nlg_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x74,0xd0,0x01,0x09,0x00,0x00]
@@ -50240,9 +52611,15 @@ v_cmpx_nlg_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_nlg_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x74,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_nlg_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x74,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_nlg_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x74,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_nlg_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x74,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_nlg_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x74,0xd0,0x04,0x04,0x02,0x00]
@@ -50339,9 +52716,15 @@ v_cmpx_ngt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_ngt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x76,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_ngt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x76,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_ngt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x76,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_ngt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x76,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_ngt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x76,0xd0,0x01,0x09,0x00,0x00]
@@ -50351,9 +52734,15 @@ v_cmpx_ngt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_ngt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x76,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_ngt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x76,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_ngt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x76,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_ngt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x76,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_ngt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x76,0xd0,0x04,0x04,0x02,0x00]
@@ -50450,9 +52839,15 @@ v_cmpx_nle_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_nle_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x78,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_nle_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x78,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_nle_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x78,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_nle_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x78,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_nle_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x78,0xd0,0x01,0x09,0x00,0x00]
@@ -50462,9 +52857,15 @@ v_cmpx_nle_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_nle_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x78,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_nle_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x78,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_nle_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x78,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_nle_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x78,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_nle_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x78,0xd0,0x04,0x04,0x02,0x00]
@@ -50561,9 +52962,15 @@ v_cmpx_neq_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_neq_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x7a,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_neq_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x7a,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_neq_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x7a,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_neq_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x7a,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_neq_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x7a,0xd0,0x01,0x09,0x00,0x00]
@@ -50573,9 +52980,15 @@ v_cmpx_neq_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_neq_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x7a,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_neq_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x7a,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_neq_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x7a,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_neq_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x7a,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_neq_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x7a,0xd0,0x04,0x04,0x02,0x00]
@@ -50672,9 +53085,15 @@ v_cmpx_nlt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_nlt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x7c,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_nlt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x7c,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_nlt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x7c,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_nlt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x7c,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_nlt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x7c,0xd0,0x01,0x09,0x00,0x00]
@@ -50684,9 +53103,15 @@ v_cmpx_nlt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_nlt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x7c,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_nlt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x7c,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_nlt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x7c,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_nlt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x7c,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_nlt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x7c,0xd0,0x04,0x04,0x02,0x00]
@@ -50783,9 +53208,15 @@ v_cmpx_tru_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_tru_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x7e,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_tru_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x7e,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_tru_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x7e,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_tru_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x7e,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_tru_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x7e,0xd0,0x01,0x09,0x00,0x00]
@@ -50795,9 +53226,15 @@ v_cmpx_tru_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_tru_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x7e,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_tru_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x7e,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_tru_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x7e,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_tru_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x7e,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_tru_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x7e,0xd0,0x04,0x04,0x02,0x00]
@@ -50906,9 +53343,15 @@ v_cmps_f_f32_e64 tma, 0, s2
v_cmps_f_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x80,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_f_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x80,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_f_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x80,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_f_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x80,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_f_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x80,0xd0,0x01,0x05,0x00,0x00]
@@ -50957,11 +53400,14 @@ v_cmps_f_f32_e64 s[10:11], 0, exec_hi
v_cmps_f_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x80,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_f_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x80,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_f_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x80,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_f_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x80,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_f_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x80,0xd0,0x80,0xee,0x01,0x00]
v_cmps_f_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x80,0xd0,0x80,0x04,0x02,0x00]
@@ -50969,9 +53415,15 @@ v_cmps_f_f32_e64 s[10:11], 0, v2
v_cmps_f_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x80,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_f_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x80,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_f_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x80,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_f_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x80,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_lt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x82,0x7c]
@@ -51065,9 +53517,15 @@ v_cmps_lt_f32_e64 tma, 0, s2
v_cmps_lt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x82,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_lt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x82,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_lt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x82,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_lt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x82,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_lt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x82,0xd0,0x01,0x05,0x00,0x00]
@@ -51116,11 +53574,14 @@ v_cmps_lt_f32_e64 s[10:11], 0, exec_hi
v_cmps_lt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x82,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_lt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x82,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_lt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x82,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_lt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x82,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_lt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x82,0xd0,0x80,0xee,0x01,0x00]
v_cmps_lt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x82,0xd0,0x80,0x04,0x02,0x00]
@@ -51128,9 +53589,15 @@ v_cmps_lt_f32_e64 s[10:11], 0, v2
v_cmps_lt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x82,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_lt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x82,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_lt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x82,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_lt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x82,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_eq_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x84,0x7c]
@@ -51224,9 +53691,15 @@ v_cmps_eq_f32_e64 tma, 0, s2
v_cmps_eq_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x84,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_eq_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x84,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_eq_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x84,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_eq_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x84,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_eq_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x84,0xd0,0x01,0x05,0x00,0x00]
@@ -51275,11 +53748,14 @@ v_cmps_eq_f32_e64 s[10:11], 0, exec_hi
v_cmps_eq_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x84,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_eq_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x84,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_eq_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x84,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_eq_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x84,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_eq_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x84,0xd0,0x80,0xee,0x01,0x00]
v_cmps_eq_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x84,0xd0,0x80,0x04,0x02,0x00]
@@ -51287,9 +53763,15 @@ v_cmps_eq_f32_e64 s[10:11], 0, v2
v_cmps_eq_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x84,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_eq_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x84,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_eq_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x84,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_eq_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x84,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_le_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x86,0x7c]
@@ -51383,9 +53865,15 @@ v_cmps_le_f32_e64 tma, 0, s2
v_cmps_le_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x86,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_le_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x86,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_le_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x86,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_le_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x86,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_le_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x86,0xd0,0x01,0x05,0x00,0x00]
@@ -51434,11 +53922,14 @@ v_cmps_le_f32_e64 s[10:11], 0, exec_hi
v_cmps_le_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x86,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_le_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x86,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_le_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x86,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_le_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x86,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_le_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x86,0xd0,0x80,0xee,0x01,0x00]
v_cmps_le_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x86,0xd0,0x80,0x04,0x02,0x00]
@@ -51446,9 +53937,15 @@ v_cmps_le_f32_e64 s[10:11], 0, v2
v_cmps_le_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x86,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_le_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x86,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_le_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x86,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_le_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x86,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_gt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x88,0x7c]
@@ -51542,9 +54039,15 @@ v_cmps_gt_f32_e64 tma, 0, s2
v_cmps_gt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x88,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_gt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x88,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_gt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x88,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_gt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x88,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_gt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x88,0xd0,0x01,0x05,0x00,0x00]
@@ -51593,11 +54096,14 @@ v_cmps_gt_f32_e64 s[10:11], 0, exec_hi
v_cmps_gt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x88,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_gt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x88,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_gt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x88,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_gt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x88,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_gt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x88,0xd0,0x80,0xee,0x01,0x00]
v_cmps_gt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x88,0xd0,0x80,0x04,0x02,0x00]
@@ -51605,9 +54111,15 @@ v_cmps_gt_f32_e64 s[10:11], 0, v2
v_cmps_gt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x88,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_gt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x88,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_gt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x88,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_gt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x88,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_lg_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x8a,0x7c]
@@ -51701,9 +54213,15 @@ v_cmps_lg_f32_e64 tma, 0, s2
v_cmps_lg_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x8a,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_lg_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x8a,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_lg_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x8a,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_lg_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x8a,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_lg_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x8a,0xd0,0x01,0x05,0x00,0x00]
@@ -51752,11 +54270,14 @@ v_cmps_lg_f32_e64 s[10:11], 0, exec_hi
v_cmps_lg_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x8a,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_lg_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x8a,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_lg_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x8a,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_lg_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x8a,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_lg_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x8a,0xd0,0x80,0xee,0x01,0x00]
v_cmps_lg_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x8a,0xd0,0x80,0x04,0x02,0x00]
@@ -51764,9 +54285,15 @@ v_cmps_lg_f32_e64 s[10:11], 0, v2
v_cmps_lg_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x8a,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_lg_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x8a,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_lg_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x8a,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_lg_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x8a,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_ge_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x8c,0x7c]
@@ -51860,9 +54387,15 @@ v_cmps_ge_f32_e64 tma, 0, s2
v_cmps_ge_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x8c,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_ge_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x8c,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_ge_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x8c,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_ge_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x8c,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_ge_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x8c,0xd0,0x01,0x05,0x00,0x00]
@@ -51911,11 +54444,14 @@ v_cmps_ge_f32_e64 s[10:11], 0, exec_hi
v_cmps_ge_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x8c,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_ge_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x8c,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_ge_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x8c,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_ge_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x8c,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_ge_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x8c,0xd0,0x80,0xee,0x01,0x00]
v_cmps_ge_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x8c,0xd0,0x80,0x04,0x02,0x00]
@@ -51923,9 +54459,15 @@ v_cmps_ge_f32_e64 s[10:11], 0, v2
v_cmps_ge_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x8c,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_ge_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x8c,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_ge_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x8c,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_ge_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x8c,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_o_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x8e,0x7c]
@@ -52019,9 +54561,15 @@ v_cmps_o_f32_e64 tma, 0, s2
v_cmps_o_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x8e,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_o_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x8e,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_o_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x8e,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_o_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x8e,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_o_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x8e,0xd0,0x01,0x05,0x00,0x00]
@@ -52070,11 +54618,14 @@ v_cmps_o_f32_e64 s[10:11], 0, exec_hi
v_cmps_o_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x8e,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_o_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x8e,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_o_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x8e,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_o_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x8e,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_o_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x8e,0xd0,0x80,0xee,0x01,0x00]
v_cmps_o_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x8e,0xd0,0x80,0x04,0x02,0x00]
@@ -52082,9 +54633,15 @@ v_cmps_o_f32_e64 s[10:11], 0, v2
v_cmps_o_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x8e,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_o_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x8e,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_o_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x8e,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_o_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x8e,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_u_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x90,0x7c]
@@ -52178,9 +54735,15 @@ v_cmps_u_f32_e64 tma, 0, s2
v_cmps_u_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x90,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_u_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x90,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_u_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x90,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_u_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x90,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_u_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x90,0xd0,0x01,0x05,0x00,0x00]
@@ -52229,11 +54792,14 @@ v_cmps_u_f32_e64 s[10:11], 0, exec_hi
v_cmps_u_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x90,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_u_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x90,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_u_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x90,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_u_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x90,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_u_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x90,0xd0,0x80,0xee,0x01,0x00]
v_cmps_u_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x90,0xd0,0x80,0x04,0x02,0x00]
@@ -52241,9 +54807,15 @@ v_cmps_u_f32_e64 s[10:11], 0, v2
v_cmps_u_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x90,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_u_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x90,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_u_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x90,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_u_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x90,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_nge_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x92,0x7c]
@@ -52337,9 +54909,15 @@ v_cmps_nge_f32_e64 tma, 0, s2
v_cmps_nge_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x92,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_nge_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x92,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_nge_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x92,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_nge_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x92,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_nge_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x92,0xd0,0x01,0x05,0x00,0x00]
@@ -52388,11 +54966,14 @@ v_cmps_nge_f32_e64 s[10:11], 0, exec_hi
v_cmps_nge_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x92,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_nge_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x92,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_nge_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x92,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_nge_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x92,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_nge_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x92,0xd0,0x80,0xee,0x01,0x00]
v_cmps_nge_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x92,0xd0,0x80,0x04,0x02,0x00]
@@ -52400,9 +54981,15 @@ v_cmps_nge_f32_e64 s[10:11], 0, v2
v_cmps_nge_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x92,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_nge_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x92,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_nge_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x92,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_nge_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x92,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_nlg_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x94,0x7c]
@@ -52496,9 +55083,15 @@ v_cmps_nlg_f32_e64 tma, 0, s2
v_cmps_nlg_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x94,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_nlg_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x94,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_nlg_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x94,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_nlg_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x94,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_nlg_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x94,0xd0,0x01,0x05,0x00,0x00]
@@ -52547,11 +55140,14 @@ v_cmps_nlg_f32_e64 s[10:11], 0, exec_hi
v_cmps_nlg_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x94,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_nlg_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x94,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_nlg_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x94,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_nlg_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x94,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_nlg_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x94,0xd0,0x80,0xee,0x01,0x00]
v_cmps_nlg_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x94,0xd0,0x80,0x04,0x02,0x00]
@@ -52559,9 +55155,15 @@ v_cmps_nlg_f32_e64 s[10:11], 0, v2
v_cmps_nlg_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x94,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_nlg_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x94,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_nlg_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x94,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_nlg_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x94,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_ngt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x96,0x7c]
@@ -52655,9 +55257,15 @@ v_cmps_ngt_f32_e64 tma, 0, s2
v_cmps_ngt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x96,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_ngt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x96,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_ngt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x96,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_ngt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x96,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_ngt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x96,0xd0,0x01,0x05,0x00,0x00]
@@ -52706,11 +55314,14 @@ v_cmps_ngt_f32_e64 s[10:11], 0, exec_hi
v_cmps_ngt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x96,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_ngt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x96,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_ngt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x96,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_ngt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x96,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_ngt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x96,0xd0,0x80,0xee,0x01,0x00]
v_cmps_ngt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x96,0xd0,0x80,0x04,0x02,0x00]
@@ -52718,9 +55329,15 @@ v_cmps_ngt_f32_e64 s[10:11], 0, v2
v_cmps_ngt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x96,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_ngt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x96,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_ngt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x96,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_ngt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x96,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_nle_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x98,0x7c]
@@ -52814,9 +55431,15 @@ v_cmps_nle_f32_e64 tma, 0, s2
v_cmps_nle_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x98,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_nle_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x98,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_nle_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x98,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_nle_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x98,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_nle_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x98,0xd0,0x01,0x05,0x00,0x00]
@@ -52865,11 +55488,14 @@ v_cmps_nle_f32_e64 s[10:11], 0, exec_hi
v_cmps_nle_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x98,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_nle_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x98,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_nle_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x98,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_nle_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x98,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_nle_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x98,0xd0,0x80,0xee,0x01,0x00]
v_cmps_nle_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x98,0xd0,0x80,0x04,0x02,0x00]
@@ -52877,9 +55503,15 @@ v_cmps_nle_f32_e64 s[10:11], 0, v2
v_cmps_nle_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x98,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_nle_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x98,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_nle_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x98,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_nle_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x98,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_neq_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x9a,0x7c]
@@ -52973,9 +55605,15 @@ v_cmps_neq_f32_e64 tma, 0, s2
v_cmps_neq_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x9a,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_neq_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x9a,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_neq_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x9a,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_neq_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x9a,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_neq_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x9a,0xd0,0x01,0x05,0x00,0x00]
@@ -53024,11 +55662,14 @@ v_cmps_neq_f32_e64 s[10:11], 0, exec_hi
v_cmps_neq_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x9a,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_neq_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x9a,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_neq_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x9a,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_neq_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x9a,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_neq_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x9a,0xd0,0x80,0xee,0x01,0x00]
v_cmps_neq_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x9a,0xd0,0x80,0x04,0x02,0x00]
@@ -53036,9 +55677,15 @@ v_cmps_neq_f32_e64 s[10:11], 0, v2
v_cmps_neq_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x9a,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_neq_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x9a,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_neq_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x9a,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_neq_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x9a,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_nlt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x9c,0x7c]
@@ -53132,9 +55779,15 @@ v_cmps_nlt_f32_e64 tma, 0, s2
v_cmps_nlt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x9c,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_nlt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x9c,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_nlt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x9c,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_nlt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x9c,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_nlt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x9c,0xd0,0x01,0x05,0x00,0x00]
@@ -53183,11 +55836,14 @@ v_cmps_nlt_f32_e64 s[10:11], 0, exec_hi
v_cmps_nlt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x9c,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_nlt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x9c,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_nlt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x9c,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_nlt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x9c,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_nlt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x9c,0xd0,0x80,0xee,0x01,0x00]
v_cmps_nlt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x9c,0xd0,0x80,0x04,0x02,0x00]
@@ -53195,9 +55851,15 @@ v_cmps_nlt_f32_e64 s[10:11], 0, v2
v_cmps_nlt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x9c,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_nlt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x9c,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_nlt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x9c,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_nlt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x9c,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_tru_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0x9e,0x7c]
@@ -53291,9 +55953,15 @@ v_cmps_tru_f32_e64 tma, 0, s2
v_cmps_tru_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x9e,0xd0,0x80,0x04,0x00,0x00]
+v_cmps_tru_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x9e,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmps_tru_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x9e,0xd0,0xf0,0x04,0x00,0x00]
+v_cmps_tru_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x9e,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmps_tru_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x9e,0xd0,0x01,0x05,0x00,0x00]
@@ -53342,11 +56010,14 @@ v_cmps_tru_f32_e64 s[10:11], 0, exec_hi
v_cmps_tru_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x9e,0xd0,0x80,0x00,0x01,0x00]
+v_cmps_tru_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x9e,0xd0,0x80,0x82,0x01,0x00]
+
v_cmps_tru_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x9e,0xd0,0x80,0xe0,0x01,0x00]
-v_cmps_tru_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x9e,0xd0,0x80,0xfa,0x01,0x00]
+v_cmps_tru_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x9e,0xd0,0x80,0xee,0x01,0x00]
v_cmps_tru_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x9e,0xd0,0x80,0x04,0x02,0x00]
@@ -53354,9 +56025,15 @@ v_cmps_tru_f32_e64 s[10:11], 0, v2
v_cmps_tru_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x9e,0xd0,0x80,0xfe,0x03,0x00]
+v_cmps_tru_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x9e,0xd0,0x80,0x04,0x00,0x20]
+
v_cmps_tru_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x9e,0xd0,0x80,0x04,0x00,0x40]
+v_cmps_tru_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x9e,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_f_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xa0,0x7c]
@@ -53450,9 +56127,15 @@ v_cmpsx_f_f32_e64 tma, 0, s2
v_cmpsx_f_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xa0,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_f_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xa0,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_f_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xa0,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_f_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xa0,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_f_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xa0,0xd0,0x01,0x05,0x00,0x00]
@@ -53501,11 +56184,14 @@ v_cmpsx_f_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_f_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xa0,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_f_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xa0,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_f_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xa0,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_f_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xa0,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_f_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xa0,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_f_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xa0,0xd0,0x80,0x04,0x02,0x00]
@@ -53513,9 +56199,15 @@ v_cmpsx_f_f32_e64 s[10:11], 0, v2
v_cmpsx_f_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xa0,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_f_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xa0,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_f_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xa0,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_f_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xa0,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_lt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xa2,0x7c]
@@ -53609,9 +56301,15 @@ v_cmpsx_lt_f32_e64 tma, 0, s2
v_cmpsx_lt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xa2,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_lt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xa2,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_lt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xa2,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_lt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xa2,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_lt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xa2,0xd0,0x01,0x05,0x00,0x00]
@@ -53660,11 +56358,14 @@ v_cmpsx_lt_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_lt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xa2,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_lt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xa2,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_lt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xa2,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_lt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xa2,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_lt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xa2,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_lt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xa2,0xd0,0x80,0x04,0x02,0x00]
@@ -53672,9 +56373,15 @@ v_cmpsx_lt_f32_e64 s[10:11], 0, v2
v_cmpsx_lt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xa2,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_lt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xa2,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_lt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xa2,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_lt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xa2,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_eq_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xa4,0x7c]
@@ -53768,9 +56475,15 @@ v_cmpsx_eq_f32_e64 tma, 0, s2
v_cmpsx_eq_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xa4,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_eq_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xa4,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_eq_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xa4,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_eq_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xa4,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_eq_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xa4,0xd0,0x01,0x05,0x00,0x00]
@@ -53819,11 +56532,14 @@ v_cmpsx_eq_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_eq_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xa4,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_eq_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xa4,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_eq_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xa4,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_eq_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xa4,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_eq_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xa4,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_eq_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xa4,0xd0,0x80,0x04,0x02,0x00]
@@ -53831,9 +56547,15 @@ v_cmpsx_eq_f32_e64 s[10:11], 0, v2
v_cmpsx_eq_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xa4,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_eq_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xa4,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_eq_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xa4,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_eq_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xa4,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_le_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xa6,0x7c]
@@ -53927,9 +56649,15 @@ v_cmpsx_le_f32_e64 tma, 0, s2
v_cmpsx_le_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xa6,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_le_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xa6,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_le_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xa6,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_le_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xa6,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_le_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xa6,0xd0,0x01,0x05,0x00,0x00]
@@ -53978,11 +56706,14 @@ v_cmpsx_le_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_le_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xa6,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_le_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xa6,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_le_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xa6,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_le_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xa6,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_le_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xa6,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_le_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xa6,0xd0,0x80,0x04,0x02,0x00]
@@ -53990,9 +56721,15 @@ v_cmpsx_le_f32_e64 s[10:11], 0, v2
v_cmpsx_le_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xa6,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_le_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xa6,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_le_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xa6,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_le_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xa6,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_gt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xa8,0x7c]
@@ -54086,9 +56823,15 @@ v_cmpsx_gt_f32_e64 tma, 0, s2
v_cmpsx_gt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xa8,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_gt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xa8,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_gt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xa8,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_gt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xa8,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_gt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xa8,0xd0,0x01,0x05,0x00,0x00]
@@ -54137,11 +56880,14 @@ v_cmpsx_gt_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_gt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xa8,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_gt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xa8,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_gt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xa8,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_gt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xa8,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_gt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xa8,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_gt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xa8,0xd0,0x80,0x04,0x02,0x00]
@@ -54149,9 +56895,15 @@ v_cmpsx_gt_f32_e64 s[10:11], 0, v2
v_cmpsx_gt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xa8,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_gt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xa8,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_gt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xa8,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_gt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xa8,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_lg_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xaa,0x7c]
@@ -54245,9 +56997,15 @@ v_cmpsx_lg_f32_e64 tma, 0, s2
v_cmpsx_lg_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xaa,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_lg_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xaa,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_lg_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xaa,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_lg_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xaa,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_lg_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xaa,0xd0,0x01,0x05,0x00,0x00]
@@ -54296,11 +57054,14 @@ v_cmpsx_lg_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_lg_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xaa,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_lg_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xaa,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_lg_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xaa,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_lg_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xaa,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_lg_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xaa,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_lg_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xaa,0xd0,0x80,0x04,0x02,0x00]
@@ -54308,9 +57069,15 @@ v_cmpsx_lg_f32_e64 s[10:11], 0, v2
v_cmpsx_lg_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xaa,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_lg_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xaa,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_lg_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xaa,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_lg_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xaa,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_ge_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xac,0x7c]
@@ -54404,9 +57171,15 @@ v_cmpsx_ge_f32_e64 tma, 0, s2
v_cmpsx_ge_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xac,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_ge_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xac,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_ge_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xac,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_ge_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xac,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_ge_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xac,0xd0,0x01,0x05,0x00,0x00]
@@ -54455,11 +57228,14 @@ v_cmpsx_ge_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_ge_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xac,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_ge_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xac,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_ge_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xac,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_ge_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xac,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_ge_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xac,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_ge_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xac,0xd0,0x80,0x04,0x02,0x00]
@@ -54467,9 +57243,15 @@ v_cmpsx_ge_f32_e64 s[10:11], 0, v2
v_cmpsx_ge_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xac,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_ge_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xac,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_ge_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xac,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_ge_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xac,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_o_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xae,0x7c]
@@ -54563,9 +57345,15 @@ v_cmpsx_o_f32_e64 tma, 0, s2
v_cmpsx_o_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xae,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_o_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xae,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_o_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xae,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_o_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xae,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_o_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xae,0xd0,0x01,0x05,0x00,0x00]
@@ -54614,11 +57402,14 @@ v_cmpsx_o_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_o_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xae,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_o_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xae,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_o_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xae,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_o_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xae,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_o_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xae,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_o_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xae,0xd0,0x80,0x04,0x02,0x00]
@@ -54626,9 +57417,15 @@ v_cmpsx_o_f32_e64 s[10:11], 0, v2
v_cmpsx_o_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xae,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_o_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xae,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_o_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xae,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_o_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xae,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_u_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xb0,0x7c]
@@ -54722,9 +57519,15 @@ v_cmpsx_u_f32_e64 tma, 0, s2
v_cmpsx_u_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xb0,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_u_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xb0,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_u_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xb0,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_u_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xb0,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_u_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xb0,0xd0,0x01,0x05,0x00,0x00]
@@ -54773,11 +57576,14 @@ v_cmpsx_u_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_u_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xb0,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_u_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xb0,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_u_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xb0,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_u_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xb0,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_u_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xb0,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_u_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xb0,0xd0,0x80,0x04,0x02,0x00]
@@ -54785,9 +57591,15 @@ v_cmpsx_u_f32_e64 s[10:11], 0, v2
v_cmpsx_u_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xb0,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_u_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xb0,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_u_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xb0,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_u_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xb0,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_nge_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xb2,0x7c]
@@ -54881,9 +57693,15 @@ v_cmpsx_nge_f32_e64 tma, 0, s2
v_cmpsx_nge_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xb2,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_nge_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xb2,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_nge_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xb2,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_nge_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xb2,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_nge_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xb2,0xd0,0x01,0x05,0x00,0x00]
@@ -54932,11 +57750,14 @@ v_cmpsx_nge_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_nge_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xb2,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_nge_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xb2,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_nge_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xb2,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_nge_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xb2,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_nge_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xb2,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_nge_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xb2,0xd0,0x80,0x04,0x02,0x00]
@@ -54944,9 +57765,15 @@ v_cmpsx_nge_f32_e64 s[10:11], 0, v2
v_cmpsx_nge_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xb2,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_nge_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xb2,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_nge_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xb2,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_nge_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xb2,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_nlg_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xb4,0x7c]
@@ -55040,9 +57867,15 @@ v_cmpsx_nlg_f32_e64 tma, 0, s2
v_cmpsx_nlg_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xb4,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_nlg_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xb4,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_nlg_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xb4,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_nlg_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xb4,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_nlg_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xb4,0xd0,0x01,0x05,0x00,0x00]
@@ -55091,11 +57924,14 @@ v_cmpsx_nlg_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_nlg_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xb4,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_nlg_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xb4,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_nlg_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xb4,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_nlg_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xb4,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_nlg_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xb4,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_nlg_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xb4,0xd0,0x80,0x04,0x02,0x00]
@@ -55103,9 +57939,15 @@ v_cmpsx_nlg_f32_e64 s[10:11], 0, v2
v_cmpsx_nlg_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xb4,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_nlg_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xb4,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_nlg_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xb4,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_nlg_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xb4,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_ngt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xb6,0x7c]
@@ -55199,9 +58041,15 @@ v_cmpsx_ngt_f32_e64 tma, 0, s2
v_cmpsx_ngt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xb6,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_ngt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xb6,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_ngt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xb6,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_ngt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xb6,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_ngt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xb6,0xd0,0x01,0x05,0x00,0x00]
@@ -55250,11 +58098,14 @@ v_cmpsx_ngt_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_ngt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xb6,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_ngt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xb6,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_ngt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xb6,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_ngt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xb6,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_ngt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xb6,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_ngt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xb6,0xd0,0x80,0x04,0x02,0x00]
@@ -55262,9 +58113,15 @@ v_cmpsx_ngt_f32_e64 s[10:11], 0, v2
v_cmpsx_ngt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xb6,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_ngt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xb6,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_ngt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xb6,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_ngt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xb6,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_nle_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xb8,0x7c]
@@ -55358,9 +58215,15 @@ v_cmpsx_nle_f32_e64 tma, 0, s2
v_cmpsx_nle_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xb8,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_nle_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xb8,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_nle_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xb8,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_nle_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xb8,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_nle_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xb8,0xd0,0x01,0x05,0x00,0x00]
@@ -55409,11 +58272,14 @@ v_cmpsx_nle_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_nle_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xb8,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_nle_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xb8,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_nle_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xb8,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_nle_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xb8,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_nle_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xb8,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_nle_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xb8,0xd0,0x80,0x04,0x02,0x00]
@@ -55421,9 +58287,15 @@ v_cmpsx_nle_f32_e64 s[10:11], 0, v2
v_cmpsx_nle_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xb8,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_nle_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xb8,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_nle_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xb8,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_nle_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xb8,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_neq_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xba,0x7c]
@@ -55517,9 +58389,15 @@ v_cmpsx_neq_f32_e64 tma, 0, s2
v_cmpsx_neq_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xba,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_neq_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xba,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_neq_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xba,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_neq_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xba,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_neq_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xba,0xd0,0x01,0x05,0x00,0x00]
@@ -55568,11 +58446,14 @@ v_cmpsx_neq_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_neq_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xba,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_neq_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xba,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_neq_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xba,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_neq_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xba,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_neq_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xba,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_neq_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xba,0xd0,0x80,0x04,0x02,0x00]
@@ -55580,9 +58461,15 @@ v_cmpsx_neq_f32_e64 s[10:11], 0, v2
v_cmpsx_neq_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xba,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_neq_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xba,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_neq_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xba,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_neq_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xba,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_nlt_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xbc,0x7c]
@@ -55676,9 +58563,15 @@ v_cmpsx_nlt_f32_e64 tma, 0, s2
v_cmpsx_nlt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xbc,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_nlt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xbc,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_nlt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xbc,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_nlt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xbc,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_nlt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xbc,0xd0,0x01,0x05,0x00,0x00]
@@ -55727,11 +58620,14 @@ v_cmpsx_nlt_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_nlt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xbc,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_nlt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xbc,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_nlt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xbc,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_nlt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xbc,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_nlt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xbc,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_nlt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xbc,0xd0,0x80,0x04,0x02,0x00]
@@ -55739,9 +58635,15 @@ v_cmpsx_nlt_f32_e64 s[10:11], 0, v2
v_cmpsx_nlt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xbc,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_nlt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xbc,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_nlt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xbc,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_nlt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xbc,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpsx_tru_f32 vcc, s1, v2
// CHECK: [0x01,0x04,0xbe,0x7c]
@@ -55835,9 +58737,15 @@ v_cmpsx_tru_f32_e64 tma, 0, s2
v_cmpsx_tru_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0xbe,0xd0,0x80,0x04,0x00,0x00]
+v_cmpsx_tru_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0xbe,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpsx_tru_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0xbe,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpsx_tru_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0xbe,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpsx_tru_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0xbe,0xd0,0x01,0x05,0x00,0x00]
@@ -55886,11 +58794,14 @@ v_cmpsx_tru_f32_e64 s[10:11], 0, exec_hi
v_cmpsx_tru_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0xbe,0xd0,0x80,0x00,0x01,0x00]
+v_cmpsx_tru_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0xbe,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpsx_tru_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0xbe,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpsx_tru_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0xbe,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpsx_tru_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0xbe,0xd0,0x80,0xee,0x01,0x00]
v_cmpsx_tru_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0xbe,0xd0,0x80,0x04,0x02,0x00]
@@ -55898,9 +58809,15 @@ v_cmpsx_tru_f32_e64 s[10:11], 0, v2
v_cmpsx_tru_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0xbe,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpsx_tru_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0xbe,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpsx_tru_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0xbe,0xd0,0x80,0x04,0x00,0x40]
+v_cmpsx_tru_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0xbe,0xd0,0x80,0x04,0x00,0x60]
+
v_cmps_f_f64 vcc, s[2:3], v[2:3]
// CHECK: [0x02,0x04,0xc0,0x7c]
@@ -55982,9 +58899,15 @@ v_cmps_f_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_f_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xc0,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_f_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xc0,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_f_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xc0,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_f_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xc0,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_f_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xc0,0xd0,0x01,0x09,0x00,0x00]
@@ -55994,9 +58917,15 @@ v_cmps_f_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_f_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xc0,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_f_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xc0,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_f_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xc0,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_f_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xc0,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_f_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xc0,0xd0,0x04,0x04,0x02,0x00]
@@ -56093,9 +59022,15 @@ v_cmps_lt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_lt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xc2,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_lt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xc2,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_lt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xc2,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_lt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xc2,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_lt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xc2,0xd0,0x01,0x09,0x00,0x00]
@@ -56105,9 +59040,15 @@ v_cmps_lt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_lt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xc2,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_lt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xc2,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_lt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xc2,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_lt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xc2,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_lt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xc2,0xd0,0x04,0x04,0x02,0x00]
@@ -56204,9 +59145,15 @@ v_cmps_eq_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_eq_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xc4,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_eq_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xc4,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_eq_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xc4,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_eq_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xc4,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_eq_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xc4,0xd0,0x01,0x09,0x00,0x00]
@@ -56216,9 +59163,15 @@ v_cmps_eq_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_eq_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xc4,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_eq_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xc4,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_eq_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xc4,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_eq_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xc4,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_eq_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xc4,0xd0,0x04,0x04,0x02,0x00]
@@ -56315,9 +59268,15 @@ v_cmps_le_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_le_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xc6,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_le_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xc6,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_le_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xc6,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_le_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xc6,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_le_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xc6,0xd0,0x01,0x09,0x00,0x00]
@@ -56327,9 +59286,15 @@ v_cmps_le_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_le_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xc6,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_le_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xc6,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_le_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xc6,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_le_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xc6,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_le_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xc6,0xd0,0x04,0x04,0x02,0x00]
@@ -56426,9 +59391,15 @@ v_cmps_gt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_gt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xc8,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_gt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xc8,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_gt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xc8,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_gt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xc8,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_gt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xc8,0xd0,0x01,0x09,0x00,0x00]
@@ -56438,9 +59409,15 @@ v_cmps_gt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_gt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xc8,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_gt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xc8,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_gt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xc8,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_gt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xc8,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_gt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xc8,0xd0,0x04,0x04,0x02,0x00]
@@ -56537,9 +59514,15 @@ v_cmps_lg_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_lg_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xca,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_lg_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xca,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_lg_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xca,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_lg_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xca,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_lg_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xca,0xd0,0x01,0x09,0x00,0x00]
@@ -56549,9 +59532,15 @@ v_cmps_lg_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_lg_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xca,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_lg_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xca,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_lg_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xca,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_lg_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xca,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_lg_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xca,0xd0,0x04,0x04,0x02,0x00]
@@ -56648,9 +59637,15 @@ v_cmps_ge_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_ge_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xcc,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_ge_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xcc,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_ge_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xcc,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_ge_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xcc,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_ge_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xcc,0xd0,0x01,0x09,0x00,0x00]
@@ -56660,9 +59655,15 @@ v_cmps_ge_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_ge_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xcc,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_ge_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xcc,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_ge_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xcc,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_ge_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xcc,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_ge_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xcc,0xd0,0x04,0x04,0x02,0x00]
@@ -56759,9 +59760,15 @@ v_cmps_o_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_o_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xce,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_o_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xce,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_o_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xce,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_o_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xce,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_o_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xce,0xd0,0x01,0x09,0x00,0x00]
@@ -56771,9 +59778,15 @@ v_cmps_o_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_o_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xce,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_o_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xce,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_o_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xce,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_o_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xce,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_o_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xce,0xd0,0x04,0x04,0x02,0x00]
@@ -56870,9 +59883,15 @@ v_cmps_u_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_u_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xd0,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_u_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xd0,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_u_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xd0,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_u_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xd0,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_u_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xd0,0xd0,0x01,0x09,0x00,0x00]
@@ -56882,9 +59901,15 @@ v_cmps_u_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_u_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xd0,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_u_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xd0,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_u_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xd0,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_u_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xd0,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_u_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xd0,0xd0,0x04,0x04,0x02,0x00]
@@ -56981,9 +60006,15 @@ v_cmps_nge_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_nge_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xd2,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_nge_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xd2,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_nge_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xd2,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_nge_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xd2,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_nge_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xd2,0xd0,0x01,0x09,0x00,0x00]
@@ -56993,9 +60024,15 @@ v_cmps_nge_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_nge_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xd2,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_nge_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xd2,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_nge_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xd2,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_nge_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xd2,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_nge_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xd2,0xd0,0x04,0x04,0x02,0x00]
@@ -57092,9 +60129,15 @@ v_cmps_nlg_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_nlg_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xd4,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_nlg_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xd4,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_nlg_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xd4,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_nlg_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xd4,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_nlg_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xd4,0xd0,0x01,0x09,0x00,0x00]
@@ -57104,9 +60147,15 @@ v_cmps_nlg_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_nlg_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xd4,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_nlg_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xd4,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_nlg_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xd4,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_nlg_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xd4,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_nlg_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xd4,0xd0,0x04,0x04,0x02,0x00]
@@ -57203,9 +60252,15 @@ v_cmps_ngt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_ngt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xd6,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_ngt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xd6,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_ngt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xd6,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_ngt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xd6,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_ngt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xd6,0xd0,0x01,0x09,0x00,0x00]
@@ -57215,9 +60270,15 @@ v_cmps_ngt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_ngt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xd6,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_ngt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xd6,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_ngt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xd6,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_ngt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xd6,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_ngt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xd6,0xd0,0x04,0x04,0x02,0x00]
@@ -57314,9 +60375,15 @@ v_cmps_nle_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_nle_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xd8,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_nle_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xd8,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_nle_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xd8,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_nle_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xd8,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_nle_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xd8,0xd0,0x01,0x09,0x00,0x00]
@@ -57326,9 +60393,15 @@ v_cmps_nle_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_nle_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xd8,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_nle_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xd8,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_nle_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xd8,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_nle_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xd8,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_nle_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xd8,0xd0,0x04,0x04,0x02,0x00]
@@ -57425,9 +60498,15 @@ v_cmps_neq_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_neq_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xda,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_neq_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xda,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_neq_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xda,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_neq_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xda,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_neq_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xda,0xd0,0x01,0x09,0x00,0x00]
@@ -57437,9 +60516,15 @@ v_cmps_neq_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_neq_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xda,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_neq_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xda,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_neq_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xda,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_neq_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xda,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_neq_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xda,0xd0,0x04,0x04,0x02,0x00]
@@ -57536,9 +60621,15 @@ v_cmps_nlt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_nlt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xdc,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_nlt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xdc,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_nlt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xdc,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_nlt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xdc,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_nlt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xdc,0xd0,0x01,0x09,0x00,0x00]
@@ -57548,9 +60639,15 @@ v_cmps_nlt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_nlt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xdc,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_nlt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xdc,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_nlt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xdc,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_nlt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xdc,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_nlt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xdc,0xd0,0x04,0x04,0x02,0x00]
@@ -57647,9 +60744,15 @@ v_cmps_tru_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmps_tru_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xde,0xd0,0x80,0x08,0x00,0x00]
+v_cmps_tru_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xde,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmps_tru_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xde,0xd0,0xf0,0x08,0x00,0x00]
+v_cmps_tru_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xde,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmps_tru_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xde,0xd0,0x01,0x09,0x00,0x00]
@@ -57659,9 +60762,15 @@ v_cmps_tru_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmps_tru_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xde,0xd0,0x04,0x00,0x01,0x00]
+v_cmps_tru_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xde,0xd0,0x04,0x82,0x01,0x00]
+
v_cmps_tru_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xde,0xd0,0x04,0xe0,0x01,0x00]
+v_cmps_tru_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xde,0xd0,0x04,0xee,0x01,0x00]
+
v_cmps_tru_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xde,0xd0,0x04,0x04,0x02,0x00]
@@ -57758,9 +60867,15 @@ v_cmpsx_f_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_f_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xe0,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_f_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xe0,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_f_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xe0,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_f_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xe0,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_f_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xe0,0xd0,0x01,0x09,0x00,0x00]
@@ -57770,9 +60885,15 @@ v_cmpsx_f_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_f_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xe0,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_f_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xe0,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_f_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xe0,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_f_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xe0,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_f_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xe0,0xd0,0x04,0x04,0x02,0x00]
@@ -57869,9 +60990,15 @@ v_cmpsx_lt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_lt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xe2,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_lt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xe2,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_lt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xe2,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_lt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xe2,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_lt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xe2,0xd0,0x01,0x09,0x00,0x00]
@@ -57881,9 +61008,15 @@ v_cmpsx_lt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_lt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xe2,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_lt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xe2,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_lt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xe2,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_lt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xe2,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_lt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xe2,0xd0,0x04,0x04,0x02,0x00]
@@ -57980,9 +61113,15 @@ v_cmpsx_eq_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_eq_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xe4,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_eq_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xe4,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_eq_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xe4,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_eq_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xe4,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_eq_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xe4,0xd0,0x01,0x09,0x00,0x00]
@@ -57992,9 +61131,15 @@ v_cmpsx_eq_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_eq_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xe4,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_eq_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xe4,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_eq_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xe4,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_eq_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xe4,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_eq_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xe4,0xd0,0x04,0x04,0x02,0x00]
@@ -58091,9 +61236,15 @@ v_cmpsx_le_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_le_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xe6,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_le_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xe6,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_le_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xe6,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_le_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xe6,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_le_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xe6,0xd0,0x01,0x09,0x00,0x00]
@@ -58103,9 +61254,15 @@ v_cmpsx_le_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_le_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xe6,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_le_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xe6,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_le_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xe6,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_le_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xe6,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_le_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xe6,0xd0,0x04,0x04,0x02,0x00]
@@ -58202,9 +61359,15 @@ v_cmpsx_gt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_gt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xe8,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_gt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xe8,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_gt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xe8,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_gt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xe8,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_gt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xe8,0xd0,0x01,0x09,0x00,0x00]
@@ -58214,9 +61377,15 @@ v_cmpsx_gt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_gt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xe8,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_gt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xe8,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_gt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xe8,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_gt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xe8,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_gt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xe8,0xd0,0x04,0x04,0x02,0x00]
@@ -58313,9 +61482,15 @@ v_cmpsx_lg_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_lg_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xea,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_lg_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xea,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_lg_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xea,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_lg_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xea,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_lg_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xea,0xd0,0x01,0x09,0x00,0x00]
@@ -58325,9 +61500,15 @@ v_cmpsx_lg_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_lg_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xea,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_lg_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xea,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_lg_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xea,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_lg_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xea,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_lg_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xea,0xd0,0x04,0x04,0x02,0x00]
@@ -58424,9 +61605,15 @@ v_cmpsx_ge_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_ge_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xec,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_ge_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xec,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_ge_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xec,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_ge_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xec,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_ge_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xec,0xd0,0x01,0x09,0x00,0x00]
@@ -58436,9 +61623,15 @@ v_cmpsx_ge_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_ge_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xec,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_ge_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xec,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_ge_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xec,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_ge_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xec,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_ge_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xec,0xd0,0x04,0x04,0x02,0x00]
@@ -58535,9 +61728,15 @@ v_cmpsx_o_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_o_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xee,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_o_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xee,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_o_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xee,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_o_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xee,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_o_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xee,0xd0,0x01,0x09,0x00,0x00]
@@ -58547,9 +61746,15 @@ v_cmpsx_o_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_o_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xee,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_o_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xee,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_o_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xee,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_o_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xee,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_o_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xee,0xd0,0x04,0x04,0x02,0x00]
@@ -58646,9 +61851,15 @@ v_cmpsx_u_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_u_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xf0,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_u_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xf0,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_u_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xf0,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_u_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xf0,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_u_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xf0,0xd0,0x01,0x09,0x00,0x00]
@@ -58658,9 +61869,15 @@ v_cmpsx_u_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_u_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xf0,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_u_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xf0,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_u_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xf0,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_u_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xf0,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_u_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xf0,0xd0,0x04,0x04,0x02,0x00]
@@ -58757,9 +61974,15 @@ v_cmpsx_nge_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_nge_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xf2,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_nge_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xf2,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_nge_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xf2,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_nge_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xf2,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_nge_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xf2,0xd0,0x01,0x09,0x00,0x00]
@@ -58769,9 +61992,15 @@ v_cmpsx_nge_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_nge_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xf2,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_nge_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xf2,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_nge_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xf2,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_nge_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xf2,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_nge_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xf2,0xd0,0x04,0x04,0x02,0x00]
@@ -58868,9 +62097,15 @@ v_cmpsx_nlg_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_nlg_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xf4,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_nlg_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xf4,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_nlg_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xf4,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_nlg_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xf4,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_nlg_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xf4,0xd0,0x01,0x09,0x00,0x00]
@@ -58880,9 +62115,15 @@ v_cmpsx_nlg_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_nlg_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xf4,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_nlg_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xf4,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_nlg_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xf4,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_nlg_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xf4,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_nlg_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xf4,0xd0,0x04,0x04,0x02,0x00]
@@ -58979,9 +62220,15 @@ v_cmpsx_ngt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_ngt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xf6,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_ngt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xf6,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_ngt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xf6,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_ngt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xf6,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_ngt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xf6,0xd0,0x01,0x09,0x00,0x00]
@@ -58991,9 +62238,15 @@ v_cmpsx_ngt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_ngt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xf6,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_ngt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xf6,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_ngt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xf6,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_ngt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xf6,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_ngt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xf6,0xd0,0x04,0x04,0x02,0x00]
@@ -59090,9 +62343,15 @@ v_cmpsx_nle_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_nle_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xf8,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_nle_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xf8,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_nle_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xf8,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_nle_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xf8,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_nle_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xf8,0xd0,0x01,0x09,0x00,0x00]
@@ -59102,9 +62361,15 @@ v_cmpsx_nle_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_nle_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xf8,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_nle_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xf8,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_nle_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xf8,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_nle_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xf8,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_nle_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xf8,0xd0,0x04,0x04,0x02,0x00]
@@ -59201,9 +62466,15 @@ v_cmpsx_neq_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_neq_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xfa,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_neq_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xfa,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_neq_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xfa,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_neq_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xfa,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_neq_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xfa,0xd0,0x01,0x09,0x00,0x00]
@@ -59213,9 +62484,15 @@ v_cmpsx_neq_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_neq_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xfa,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_neq_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xfa,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_neq_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xfa,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_neq_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xfa,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_neq_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xfa,0xd0,0x04,0x04,0x02,0x00]
@@ -59312,9 +62589,15 @@ v_cmpsx_nlt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_nlt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xfc,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_nlt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xfc,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_nlt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xfc,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_nlt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xfc,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_nlt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xfc,0xd0,0x01,0x09,0x00,0x00]
@@ -59324,9 +62607,15 @@ v_cmpsx_nlt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_nlt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xfc,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_nlt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xfc,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_nlt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xfc,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_nlt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xfc,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_nlt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xfc,0xd0,0x04,0x04,0x02,0x00]
@@ -59423,9 +62712,15 @@ v_cmpsx_tru_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpsx_tru_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0xfe,0xd0,0x80,0x08,0x00,0x00]
+v_cmpsx_tru_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0xfe,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpsx_tru_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0xfe,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpsx_tru_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0xfe,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpsx_tru_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0xfe,0xd0,0x01,0x09,0x00,0x00]
@@ -59435,9 +62730,15 @@ v_cmpsx_tru_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpsx_tru_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0xfe,0xd0,0x04,0x00,0x01,0x00]
+v_cmpsx_tru_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0xfe,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpsx_tru_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0xfe,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpsx_tru_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0xfe,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpsx_tru_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0xfe,0xd0,0x04,0x04,0x02,0x00]
diff --git a/test/MC/AMDGPU/gfx8_asm_all.s b/test/MC/AMDGPU/gfx8_asm_all.s
index 0a0d42c208f91..458427e988c97 100644
--- a/test/MC/AMDGPU/gfx8_asm_all.s
+++ b/test/MC/AMDGPU/gfx8_asm_all.s
@@ -1,7 +1,5 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s
-// *** GENERATED BY TESTGEN, DO NOT EDIT! ***
-
ds_add_u32 v1, v2 offset:65535
// CHECK: [0xff,0xff,0x00,0xd8,0x01,0x02,0x00,0x00]
@@ -455,6 +453,9 @@ ds_max_f32 v1, v2 offset:4
ds_max_f32 v1, v2 offset:65535 gds
// CHECK: [0xff,0xff,0x27,0xd8,0x01,0x02,0x00,0x00]
+ds_nop
+// CHECK: [0x00,0x00,0x28,0xd8,0x00,0x00,0x00,0x00]
+
ds_add_f32 v1, v2 offset:65535
// CHECK: [0xff,0xff,0x2a,0xd8,0x01,0x02,0x00,0x00]
@@ -2678,89 +2679,89 @@ ds_max_src2_f64 v1 offset:4
ds_max_src2_f64 v1 offset:65535 gds
// CHECK: [0xff,0xff,0xa7,0xd9,0x01,0x00,0x00,0x00]
-ds_and_src2_b32 v1
-// CHECK: [0x00,0x00,0x12,0xd9,0x01,0x00,0x00,0x00]
+ds_write_b96 v1, v[2:4] offset:65535
+// CHECK: [0xff,0xff,0xbc,0xd9,0x01,0x02,0x00,0x00]
-ds_and_src2_b32 v1 gds
-// CHECK: [0x00,0x00,0x13,0xd9,0x01,0x00,0x00,0x00]
+ds_write_b96 v255, v[2:4] offset:65535
+// CHECK: [0xff,0xff,0xbc,0xd9,0xff,0x02,0x00,0x00]
-ds_and_src2_b32 v255 offset:65535
-// CHECK: [0xff,0xff,0x12,0xd9,0xff,0x00,0x00,0x00]
+ds_write_b96 v1, v[253:255] offset:65535
+// CHECK: [0xff,0xff,0xbc,0xd9,0x01,0xfd,0x00,0x00]
-ds_append v5
-// CHECK: [0x00,0x00,0x7c,0xd9,0x00,0x00,0x00,0x05]
+ds_write_b96 v1, v[2:4]
+// CHECK: [0x00,0x00,0xbc,0xd9,0x01,0x02,0x00,0x00]
-ds_append v5 gds
-// CHECK: [0x00,0x00,0x7d,0xd9,0x00,0x00,0x00,0x05]
+ds_write_b96 v1, v[2:4] offset:0
+// CHECK: [0x00,0x00,0xbc,0xd9,0x01,0x02,0x00,0x00]
-ds_append v255 offset:65535
-// CHECK: [0xff,0xff,0x7c,0xd9,0x00,0x00,0x00,0xff]
+ds_write_b96 v1, v[2:4] offset:4
+// CHECK: [0x04,0x00,0xbc,0xd9,0x01,0x02,0x00,0x00]
-ds_consume v5
-// CHECK: [0x00,0x00,0x7a,0xd9,0x00,0x00,0x00,0x05]
+ds_write_b96 v1, v[2:4] offset:65535 gds
+// CHECK: [0xff,0xff,0xbd,0xd9,0x01,0x02,0x00,0x00]
-ds_consume v5 gds
-// CHECK: [0x00,0x00,0x7b,0xd9,0x00,0x00,0x00,0x05]
+ds_write_b128 v1, v[2:5] offset:65535
+// CHECK: [0xff,0xff,0xbe,0xd9,0x01,0x02,0x00,0x00]
-ds_consume v255 offset:65535
-// CHECK: [0xff,0xff,0x7a,0xd9,0x00,0x00,0x00,0xff]
+ds_write_b128 v255, v[2:5] offset:65535
+// CHECK: [0xff,0xff,0xbe,0xd9,0xff,0x02,0x00,0x00]
-ds_ordered_count v5, v1 gds
-// CHECK: [0x00,0x00,0x7f,0xd9,0x01,0x00,0x00,0x05]
+ds_write_b128 v1, v[252:255] offset:65535
+// CHECK: [0xff,0xff,0xbe,0xd9,0x01,0xfc,0x00,0x00]
-ds_ordered_count v5, v255 offset:65535 gds
-// CHECK: [0xff,0xff,0x7f,0xd9,0xff,0x00,0x00,0x05]
+ds_write_b128 v1, v[2:5]
+// CHECK: [0x00,0x00,0xbe,0xd9,0x01,0x02,0x00,0x00]
-ds_ordered_count v5, v255 gds
-// CHECK: [0x00,0x00,0x7f,0xd9,0xff,0x00,0x00,0x05]
+ds_write_b128 v1, v[2:5] offset:0
+// CHECK: [0x00,0x00,0xbe,0xd9,0x01,0x02,0x00,0x00]
-ds_gws_barrier v1 gds
-// CHECK: [0x00,0x00,0x3b,0xd9,0x00,0x01,0x00,0x00]
+ds_write_b128 v1, v[2:5] offset:4
+// CHECK: [0x04,0x00,0xbe,0xd9,0x01,0x02,0x00,0x00]
-ds_gws_barrier v255 offset:65535 gds
-// CHECK: [0xff,0xff,0x3b,0xd9,0x00,0xff,0x00,0x00]
+ds_write_b128 v1, v[2:5] offset:65535 gds
+// CHECK: [0xff,0xff,0xbf,0xd9,0x01,0x02,0x00,0x00]
-ds_gws_init v1 gds
-// CHECK: [0x00,0x00,0x33,0xd9,0x00,0x01,0x00,0x00]
+ds_read_b96 v[5:7], v1 offset:65535
+// CHECK: [0xff,0xff,0xfc,0xd9,0x01,0x00,0x00,0x05]
-ds_gws_init v255 offset:65535 gds
-// CHECK: [0xff,0xff,0x33,0xd9,0x00,0xff,0x00,0x00]
+ds_read_b96 v[253:255], v1 offset:65535
+// CHECK: [0xff,0xff,0xfc,0xd9,0x01,0x00,0x00,0xfd]
-ds_gws_sema_br v1 gds
-// CHECK: [0x00,0x00,0x37,0xd9,0x00,0x01,0x00,0x00]
+ds_read_b96 v[5:7], v255 offset:65535
+// CHECK: [0xff,0xff,0xfc,0xd9,0xff,0x00,0x00,0x05]
-ds_gws_sema_br v255 offset:65535 gds
-// CHECK: [0xff,0xff,0x37,0xd9,0x00,0xff,0x00,0x00]
+ds_read_b96 v[5:7], v1
+// CHECK: [0x00,0x00,0xfc,0xd9,0x01,0x00,0x00,0x05]
-ds_gws_sema_p offset:65535 gds
-// CHECK: [0xff,0xff,0x39,0xd9,0x00,0x00,0x00,0x00]
+ds_read_b96 v[5:7], v1 offset:0
+// CHECK: [0x00,0x00,0xfc,0xd9,0x01,0x00,0x00,0x05]
-ds_gws_sema_p gds
-// CHECK: [0x00,0x00,0x39,0xd9,0x00,0x00,0x00,0x00]
+ds_read_b96 v[5:7], v1 offset:4
+// CHECK: [0x04,0x00,0xfc,0xd9,0x01,0x00,0x00,0x05]
-ds_gws_sema_release_all offset:65535 gds
-// CHECK: [0xff,0xff,0x31,0xd9,0x00,0x00,0x00,0x00]
+ds_read_b96 v[5:7], v1 offset:65535 gds
+// CHECK: [0xff,0xff,0xfd,0xd9,0x01,0x00,0x00,0x05]
-ds_gws_sema_release_all gds
-// CHECK: [0x00,0x00,0x31,0xd9,0x00,0x00,0x00,0x00]
+ds_read_b128 v[5:8], v1 offset:65535
+// CHECK: [0xff,0xff,0xfe,0xd9,0x01,0x00,0x00,0x05]
-ds_gws_sema_v offset:65535 gds
-// CHECK: [0xff,0xff,0x35,0xd9,0x00,0x00,0x00,0x00]
+ds_read_b128 v[252:255], v1 offset:65535
+// CHECK: [0xff,0xff,0xfe,0xd9,0x01,0x00,0x00,0xfc]
-ds_gws_sema_v gds
-// CHECK: [0x00,0x00,0x35,0xd9,0x00,0x00,0x00,0x00]
+ds_read_b128 v[5:8], v255 offset:65535
+// CHECK: [0xff,0xff,0xfe,0xd9,0xff,0x00,0x00,0x05]
-ds_wrap_rtn_b32 v5, v255, v2, v3 gds
-// CHECK: [0x00,0x00,0x69,0xd8,0xff,0x02,0x03,0x05]
+ds_read_b128 v[5:8], v1
+// CHECK: [0x00,0x00,0xfe,0xd9,0x01,0x00,0x00,0x05]
-ds_wrap_rtn_b32 v5, v255, v2, v255 offset:65535
-// CHECK: [0xff,0xff,0x68,0xd8,0xff,0x02,0xff,0x05]
+ds_read_b128 v[5:8], v1 offset:0
+// CHECK: [0x00,0x00,0xfe,0xd9,0x01,0x00,0x00,0x05]
-ds_condxchg32_rtn_b64 v[5:6], v1, v[254:255] offset:65535 gds
-// CHECK: [0xff,0xff,0xfd,0xd8,0x01,0xfe,0x00,0x05]
+ds_read_b128 v[5:8], v1 offset:4
+// CHECK: [0x04,0x00,0xfe,0xd9,0x01,0x00,0x00,0x05]
-ds_condxchg32_rtn_b64 v[5:6], v1, v[254:255]
-// CHECK: [0x00,0x00,0xfc,0xd8,0x01,0xfe,0x00,0x05]
+ds_read_b128 v[5:8], v1 offset:65535 gds
+// CHECK: [0xff,0xff,0xff,0xd9,0x01,0x00,0x00,0x05]
exp mrt0, v0, v0, v0, v0
// CHECK: [0x0f,0x00,0x00,0xc4,0x00,0x00,0x00,0x00]
@@ -23561,8 +23562,17 @@ v_cvt_i32_f64_e64 v5, ttmp[10:11]
v_cvt_i32_f64_e64 v5, exec
// CHECK: [0x05,0x00,0x43,0xd1,0x7e,0x00,0x00,0x00]
-v_cvt_i32_f64_e64 v5, scc
-// CHECK: [0x05,0x00,0x43,0xd1,0xfd,0x00,0x00,0x00]
+v_cvt_i32_f64_e64 v5, 0
+// CHECK: [0x05,0x00,0x43,0xd1,0x80,0x00,0x00,0x00]
+
+v_cvt_i32_f64_e64 v5, -1
+// CHECK: [0x05,0x00,0x43,0xd1,0xc1,0x00,0x00,0x00]
+
+v_cvt_i32_f64_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x43,0xd1,0xf0,0x00,0x00,0x00]
+
+v_cvt_i32_f64_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x43,0xd1,0xf7,0x00,0x00,0x00]
v_cvt_i32_f64_e64 v5, v[1:2]
// CHECK: [0x05,0x00,0x43,0xd1,0x01,0x01,0x00,0x00]
@@ -24089,8 +24099,17 @@ v_cvt_u32_f32_e64 v5, exec_lo
v_cvt_u32_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x47,0xd1,0x7f,0x00,0x00,0x00]
-v_cvt_u32_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x47,0xd1,0xfd,0x00,0x00,0x00]
+v_cvt_u32_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x47,0xd1,0x80,0x00,0x00,0x00]
+
+v_cvt_u32_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x47,0xd1,0xc1,0x00,0x00,0x00]
+
+v_cvt_u32_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x47,0xd1,0xf0,0x00,0x00,0x00]
+
+v_cvt_u32_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x47,0xd1,0xf7,0x00,0x00,0x00]
v_cvt_u32_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x47,0xd1,0x01,0x01,0x00,0x00]
@@ -24221,8 +24240,17 @@ v_cvt_i32_f32_e64 v5, exec_lo
v_cvt_i32_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x48,0xd1,0x7f,0x00,0x00,0x00]
-v_cvt_i32_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x48,0xd1,0xfd,0x00,0x00,0x00]
+v_cvt_i32_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x48,0xd1,0x80,0x00,0x00,0x00]
+
+v_cvt_i32_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x48,0xd1,0xc1,0x00,0x00,0x00]
+
+v_cvt_i32_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x48,0xd1,0xf0,0x00,0x00,0x00]
+
+v_cvt_i32_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x48,0xd1,0xf7,0x00,0x00,0x00]
v_cvt_i32_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x48,0xd1,0x01,0x01,0x00,0x00]
@@ -24353,8 +24381,17 @@ v_cvt_f16_f32_e64 v5, exec_lo
v_cvt_f16_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x4a,0xd1,0x7f,0x00,0x00,0x00]
-v_cvt_f16_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x4a,0xd1,0xfd,0x00,0x00,0x00]
+v_cvt_f16_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x4a,0xd1,0x80,0x00,0x00,0x00]
+
+v_cvt_f16_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x4a,0xd1,0xc1,0x00,0x00,0x00]
+
+v_cvt_f16_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x4a,0xd1,0xf0,0x00,0x00,0x00]
+
+v_cvt_f16_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x4a,0xd1,0xf7,0x00,0x00,0x00]
v_cvt_f16_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x4a,0xd1,0x01,0x01,0x00,0x00]
@@ -24494,8 +24531,17 @@ v_cvt_f32_f16_e64 v5, exec_lo
v_cvt_f32_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x4b,0xd1,0x7f,0x00,0x00,0x00]
-v_cvt_f32_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x4b,0xd1,0xfd,0x00,0x00,0x00]
+v_cvt_f32_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x4b,0xd1,0x80,0x00,0x00,0x00]
+
+v_cvt_f32_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x4b,0xd1,0xc1,0x00,0x00,0x00]
+
+v_cvt_f32_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x4b,0xd1,0xf0,0x00,0x00,0x00]
+
+v_cvt_f32_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x4b,0xd1,0xf7,0x00,0x00,0x00]
v_cvt_f32_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x4b,0xd1,0x01,0x01,0x00,0x00]
@@ -24635,8 +24681,17 @@ v_cvt_rpi_i32_f32_e64 v5, exec_lo
v_cvt_rpi_i32_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x4c,0xd1,0x7f,0x00,0x00,0x00]
-v_cvt_rpi_i32_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x4c,0xd1,0xfd,0x00,0x00,0x00]
+v_cvt_rpi_i32_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x4c,0xd1,0x80,0x00,0x00,0x00]
+
+v_cvt_rpi_i32_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x4c,0xd1,0xc1,0x00,0x00,0x00]
+
+v_cvt_rpi_i32_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x4c,0xd1,0xf0,0x00,0x00,0x00]
+
+v_cvt_rpi_i32_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x4c,0xd1,0xf7,0x00,0x00,0x00]
v_cvt_rpi_i32_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x4c,0xd1,0x01,0x01,0x00,0x00]
@@ -24767,8 +24822,17 @@ v_cvt_flr_i32_f32_e64 v5, exec_lo
v_cvt_flr_i32_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x4d,0xd1,0x7f,0x00,0x00,0x00]
-v_cvt_flr_i32_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x4d,0xd1,0xfd,0x00,0x00,0x00]
+v_cvt_flr_i32_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x4d,0xd1,0x80,0x00,0x00,0x00]
+
+v_cvt_flr_i32_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x4d,0xd1,0xc1,0x00,0x00,0x00]
+
+v_cvt_flr_i32_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x4d,0xd1,0xf0,0x00,0x00,0x00]
+
+v_cvt_flr_i32_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x4d,0xd1,0xf7,0x00,0x00,0x00]
v_cvt_flr_i32_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x4d,0xd1,0x01,0x01,0x00,0x00]
@@ -25001,8 +25065,17 @@ v_cvt_f32_f64_e64 v5, ttmp[10:11]
v_cvt_f32_f64_e64 v5, exec
// CHECK: [0x05,0x00,0x4f,0xd1,0x7e,0x00,0x00,0x00]
-v_cvt_f32_f64_e64 v5, scc
-// CHECK: [0x05,0x00,0x4f,0xd1,0xfd,0x00,0x00,0x00]
+v_cvt_f32_f64_e64 v5, 0
+// CHECK: [0x05,0x00,0x4f,0xd1,0x80,0x00,0x00,0x00]
+
+v_cvt_f32_f64_e64 v5, -1
+// CHECK: [0x05,0x00,0x4f,0xd1,0xc1,0x00,0x00,0x00]
+
+v_cvt_f32_f64_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x4f,0xd1,0xf0,0x00,0x00,0x00]
+
+v_cvt_f32_f64_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x4f,0xd1,0xf7,0x00,0x00,0x00]
v_cvt_f32_f64_e64 v5, v[1:2]
// CHECK: [0x05,0x00,0x4f,0xd1,0x01,0x01,0x00,0x00]
@@ -25142,8 +25215,17 @@ v_cvt_f64_f32_e64 v[5:6], exec_lo
v_cvt_f64_f32_e64 v[5:6], exec_hi
// CHECK: [0x05,0x00,0x50,0xd1,0x7f,0x00,0x00,0x00]
-v_cvt_f64_f32_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x50,0xd1,0xfd,0x00,0x00,0x00]
+v_cvt_f64_f32_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x50,0xd1,0x80,0x00,0x00,0x00]
+
+v_cvt_f64_f32_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x50,0xd1,0xc1,0x00,0x00,0x00]
+
+v_cvt_f64_f32_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x50,0xd1,0xf0,0x00,0x00,0x00]
+
+v_cvt_f64_f32_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x50,0xd1,0xf7,0x00,0x00,0x00]
v_cvt_f64_f32_e64 v[5:6], v1
// CHECK: [0x05,0x00,0x50,0xd1,0x01,0x01,0x00,0x00]
@@ -25781,8 +25863,17 @@ v_cvt_u32_f64_e64 v5, ttmp[10:11]
v_cvt_u32_f64_e64 v5, exec
// CHECK: [0x05,0x00,0x55,0xd1,0x7e,0x00,0x00,0x00]
-v_cvt_u32_f64_e64 v5, scc
-// CHECK: [0x05,0x00,0x55,0xd1,0xfd,0x00,0x00,0x00]
+v_cvt_u32_f64_e64 v5, 0
+// CHECK: [0x05,0x00,0x55,0xd1,0x80,0x00,0x00,0x00]
+
+v_cvt_u32_f64_e64 v5, -1
+// CHECK: [0x05,0x00,0x55,0xd1,0xc1,0x00,0x00,0x00]
+
+v_cvt_u32_f64_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x55,0xd1,0xf0,0x00,0x00,0x00]
+
+v_cvt_u32_f64_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x55,0xd1,0xf7,0x00,0x00,0x00]
v_cvt_u32_f64_e64 v5, v[1:2]
// CHECK: [0x05,0x00,0x55,0xd1,0x01,0x01,0x00,0x00]
@@ -26015,8 +26106,17 @@ v_trunc_f64_e64 v[5:6], ttmp[10:11]
v_trunc_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x57,0xd1,0x7e,0x00,0x00,0x00]
-v_trunc_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x57,0xd1,0xfd,0x00,0x00,0x00]
+v_trunc_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x57,0xd1,0x80,0x00,0x00,0x00]
+
+v_trunc_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x57,0xd1,0xc1,0x00,0x00,0x00]
+
+v_trunc_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x57,0xd1,0xf0,0x00,0x00,0x00]
+
+v_trunc_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x57,0xd1,0xf7,0x00,0x00,0x00]
v_trunc_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x57,0xd1,0x01,0x01,0x00,0x00]
@@ -26126,8 +26226,17 @@ v_ceil_f64_e64 v[5:6], ttmp[10:11]
v_ceil_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x58,0xd1,0x7e,0x00,0x00,0x00]
-v_ceil_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x58,0xd1,0xfd,0x00,0x00,0x00]
+v_ceil_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x58,0xd1,0x80,0x00,0x00,0x00]
+
+v_ceil_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x58,0xd1,0xc1,0x00,0x00,0x00]
+
+v_ceil_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x58,0xd1,0xf0,0x00,0x00,0x00]
+
+v_ceil_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x58,0xd1,0xf7,0x00,0x00,0x00]
v_ceil_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x58,0xd1,0x01,0x01,0x00,0x00]
@@ -26240,11 +26349,14 @@ v_rndne_f64_e64 v[5:6], exec
v_rndne_f64_e64 v[5:6], 0
// CHECK: [0x05,0x00,0x59,0xd1,0x80,0x00,0x00,0x00]
+v_rndne_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x59,0xd1,0xc1,0x00,0x00,0x00]
+
v_rndne_f64_e64 v[5:6], 0.5
// CHECK: [0x05,0x00,0x59,0xd1,0xf0,0x00,0x00,0x00]
-v_rndne_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x59,0xd1,0xfd,0x00,0x00,0x00]
+v_rndne_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x59,0xd1,0xf7,0x00,0x00,0x00]
v_rndne_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x59,0xd1,0x01,0x01,0x00,0x00]
@@ -26354,11 +26466,14 @@ v_floor_f64_e64 v[5:6], exec
v_floor_f64_e64 v[5:6], 0
// CHECK: [0x05,0x00,0x5a,0xd1,0x80,0x00,0x00,0x00]
+v_floor_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x5a,0xd1,0xc1,0x00,0x00,0x00]
+
v_floor_f64_e64 v[5:6], 0.5
// CHECK: [0x05,0x00,0x5a,0xd1,0xf0,0x00,0x00,0x00]
-v_floor_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x5a,0xd1,0xfd,0x00,0x00,0x00]
+v_floor_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x5a,0xd1,0xf7,0x00,0x00,0x00]
v_floor_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x5a,0xd1,0x01,0x01,0x00,0x00]
@@ -26498,11 +26613,14 @@ v_fract_f32_e64 v5, exec_hi
v_fract_f32_e64 v5, 0
// CHECK: [0x05,0x00,0x5b,0xd1,0x80,0x00,0x00,0x00]
+v_fract_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x5b,0xd1,0xc1,0x00,0x00,0x00]
+
v_fract_f32_e64 v5, 0.5
// CHECK: [0x05,0x00,0x5b,0xd1,0xf0,0x00,0x00,0x00]
-v_fract_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x5b,0xd1,0xfd,0x00,0x00,0x00]
+v_fract_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x5b,0xd1,0xf7,0x00,0x00,0x00]
v_fract_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x5b,0xd1,0x01,0x01,0x00,0x00]
@@ -26642,11 +26760,14 @@ v_trunc_f32_e64 v5, exec_hi
v_trunc_f32_e64 v5, 0
// CHECK: [0x05,0x00,0x5c,0xd1,0x80,0x00,0x00,0x00]
+v_trunc_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x5c,0xd1,0xc1,0x00,0x00,0x00]
+
v_trunc_f32_e64 v5, 0.5
// CHECK: [0x05,0x00,0x5c,0xd1,0xf0,0x00,0x00,0x00]
-v_trunc_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x5c,0xd1,0xfd,0x00,0x00,0x00]
+v_trunc_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x5c,0xd1,0xf7,0x00,0x00,0x00]
v_trunc_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x5c,0xd1,0x01,0x01,0x00,0x00]
@@ -26786,11 +26907,14 @@ v_ceil_f32_e64 v5, exec_hi
v_ceil_f32_e64 v5, 0
// CHECK: [0x05,0x00,0x5d,0xd1,0x80,0x00,0x00,0x00]
+v_ceil_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x5d,0xd1,0xc1,0x00,0x00,0x00]
+
v_ceil_f32_e64 v5, 0.5
// CHECK: [0x05,0x00,0x5d,0xd1,0xf0,0x00,0x00,0x00]
-v_ceil_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x5d,0xd1,0xfd,0x00,0x00,0x00]
+v_ceil_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x5d,0xd1,0xf7,0x00,0x00,0x00]
v_ceil_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x5d,0xd1,0x01,0x01,0x00,0x00]
@@ -26930,11 +27054,14 @@ v_rndne_f32_e64 v5, exec_hi
v_rndne_f32_e64 v5, 0
// CHECK: [0x05,0x00,0x5e,0xd1,0x80,0x00,0x00,0x00]
+v_rndne_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x5e,0xd1,0xc1,0x00,0x00,0x00]
+
v_rndne_f32_e64 v5, 0.5
// CHECK: [0x05,0x00,0x5e,0xd1,0xf0,0x00,0x00,0x00]
-v_rndne_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x5e,0xd1,0xfd,0x00,0x00,0x00]
+v_rndne_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x5e,0xd1,0xf7,0x00,0x00,0x00]
v_rndne_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x5e,0xd1,0x01,0x01,0x00,0x00]
@@ -27071,8 +27198,17 @@ v_floor_f32_e64 v5, exec_lo
v_floor_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x5f,0xd1,0x7f,0x00,0x00,0x00]
-v_floor_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x5f,0xd1,0xfd,0x00,0x00,0x00]
+v_floor_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x5f,0xd1,0x80,0x00,0x00,0x00]
+
+v_floor_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x5f,0xd1,0xc1,0x00,0x00,0x00]
+
+v_floor_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x5f,0xd1,0xf0,0x00,0x00,0x00]
+
+v_floor_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x5f,0xd1,0xf7,0x00,0x00,0x00]
v_floor_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x5f,0xd1,0x01,0x01,0x00,0x00]
@@ -27212,8 +27348,17 @@ v_exp_f32_e64 v5, exec_lo
v_exp_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x60,0xd1,0x7f,0x00,0x00,0x00]
-v_exp_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x60,0xd1,0xfd,0x00,0x00,0x00]
+v_exp_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x60,0xd1,0x80,0x00,0x00,0x00]
+
+v_exp_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x60,0xd1,0xc1,0x00,0x00,0x00]
+
+v_exp_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x60,0xd1,0xf0,0x00,0x00,0x00]
+
+v_exp_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x60,0xd1,0xf7,0x00,0x00,0x00]
v_exp_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x60,0xd1,0x01,0x01,0x00,0x00]
@@ -27353,8 +27498,17 @@ v_log_f32_e64 v5, exec_lo
v_log_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x61,0xd1,0x7f,0x00,0x00,0x00]
-v_log_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x61,0xd1,0xfd,0x00,0x00,0x00]
+v_log_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x61,0xd1,0x80,0x00,0x00,0x00]
+
+v_log_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x61,0xd1,0xc1,0x00,0x00,0x00]
+
+v_log_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x61,0xd1,0xf0,0x00,0x00,0x00]
+
+v_log_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x61,0xd1,0xf7,0x00,0x00,0x00]
v_log_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x61,0xd1,0x01,0x01,0x00,0x00]
@@ -27494,8 +27648,17 @@ v_rcp_f32_e64 v5, exec_lo
v_rcp_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x62,0xd1,0x7f,0x00,0x00,0x00]
-v_rcp_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x62,0xd1,0xfd,0x00,0x00,0x00]
+v_rcp_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x62,0xd1,0x80,0x00,0x00,0x00]
+
+v_rcp_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x62,0xd1,0xc1,0x00,0x00,0x00]
+
+v_rcp_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x62,0xd1,0xf0,0x00,0x00,0x00]
+
+v_rcp_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x62,0xd1,0xf7,0x00,0x00,0x00]
v_rcp_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x62,0xd1,0x01,0x01,0x00,0x00]
@@ -27635,8 +27798,17 @@ v_rcp_iflag_f32_e64 v5, exec_lo
v_rcp_iflag_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x63,0xd1,0x7f,0x00,0x00,0x00]
-v_rcp_iflag_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x63,0xd1,0xfd,0x00,0x00,0x00]
+v_rcp_iflag_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x63,0xd1,0x80,0x00,0x00,0x00]
+
+v_rcp_iflag_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x63,0xd1,0xc1,0x00,0x00,0x00]
+
+v_rcp_iflag_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x63,0xd1,0xf0,0x00,0x00,0x00]
+
+v_rcp_iflag_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x63,0xd1,0xf7,0x00,0x00,0x00]
v_rcp_iflag_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x63,0xd1,0x01,0x01,0x00,0x00]
@@ -27776,8 +27948,17 @@ v_rsq_f32_e64 v5, exec_lo
v_rsq_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x64,0xd1,0x7f,0x00,0x00,0x00]
-v_rsq_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x64,0xd1,0xfd,0x00,0x00,0x00]
+v_rsq_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x64,0xd1,0x80,0x00,0x00,0x00]
+
+v_rsq_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x64,0xd1,0xc1,0x00,0x00,0x00]
+
+v_rsq_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x64,0xd1,0xf0,0x00,0x00,0x00]
+
+v_rsq_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x64,0xd1,0xf7,0x00,0x00,0x00]
v_rsq_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x64,0xd1,0x01,0x01,0x00,0x00]
@@ -27887,8 +28068,17 @@ v_rcp_f64_e64 v[5:6], ttmp[10:11]
v_rcp_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x65,0xd1,0x7e,0x00,0x00,0x00]
-v_rcp_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x65,0xd1,0xfd,0x00,0x00,0x00]
+v_rcp_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x65,0xd1,0x80,0x00,0x00,0x00]
+
+v_rcp_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x65,0xd1,0xc1,0x00,0x00,0x00]
+
+v_rcp_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x65,0xd1,0xf0,0x00,0x00,0x00]
+
+v_rcp_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x65,0xd1,0xf7,0x00,0x00,0x00]
v_rcp_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x65,0xd1,0x01,0x01,0x00,0x00]
@@ -27998,8 +28188,17 @@ v_rsq_f64_e64 v[5:6], ttmp[10:11]
v_rsq_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x66,0xd1,0x7e,0x00,0x00,0x00]
-v_rsq_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x66,0xd1,0xfd,0x00,0x00,0x00]
+v_rsq_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x66,0xd1,0x80,0x00,0x00,0x00]
+
+v_rsq_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x66,0xd1,0xc1,0x00,0x00,0x00]
+
+v_rsq_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x66,0xd1,0xf0,0x00,0x00,0x00]
+
+v_rsq_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x66,0xd1,0xf7,0x00,0x00,0x00]
v_rsq_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x66,0xd1,0x01,0x01,0x00,0x00]
@@ -28139,8 +28338,17 @@ v_sqrt_f32_e64 v5, exec_lo
v_sqrt_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x67,0xd1,0x7f,0x00,0x00,0x00]
-v_sqrt_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x67,0xd1,0xfd,0x00,0x00,0x00]
+v_sqrt_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x67,0xd1,0x80,0x00,0x00,0x00]
+
+v_sqrt_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x67,0xd1,0xc1,0x00,0x00,0x00]
+
+v_sqrt_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x67,0xd1,0xf0,0x00,0x00,0x00]
+
+v_sqrt_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x67,0xd1,0xf7,0x00,0x00,0x00]
v_sqrt_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x67,0xd1,0x01,0x01,0x00,0x00]
@@ -28250,8 +28458,17 @@ v_sqrt_f64_e64 v[5:6], ttmp[10:11]
v_sqrt_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x68,0xd1,0x7e,0x00,0x00,0x00]
-v_sqrt_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x68,0xd1,0xfd,0x00,0x00,0x00]
+v_sqrt_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x68,0xd1,0x80,0x00,0x00,0x00]
+
+v_sqrt_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x68,0xd1,0xc1,0x00,0x00,0x00]
+
+v_sqrt_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x68,0xd1,0xf0,0x00,0x00,0x00]
+
+v_sqrt_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x68,0xd1,0xf7,0x00,0x00,0x00]
v_sqrt_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x68,0xd1,0x01,0x01,0x00,0x00]
@@ -28391,8 +28608,17 @@ v_sin_f32_e64 v5, exec_lo
v_sin_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x69,0xd1,0x7f,0x00,0x00,0x00]
-v_sin_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x69,0xd1,0xfd,0x00,0x00,0x00]
+v_sin_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x69,0xd1,0x80,0x00,0x00,0x00]
+
+v_sin_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x69,0xd1,0xc1,0x00,0x00,0x00]
+
+v_sin_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x69,0xd1,0xf0,0x00,0x00,0x00]
+
+v_sin_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x69,0xd1,0xf7,0x00,0x00,0x00]
v_sin_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x69,0xd1,0x01,0x01,0x00,0x00]
@@ -28532,8 +28758,17 @@ v_cos_f32_e64 v5, exec_lo
v_cos_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x6a,0xd1,0x7f,0x00,0x00,0x00]
-v_cos_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x6a,0xd1,0xfd,0x00,0x00,0x00]
+v_cos_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x6a,0xd1,0x80,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x6a,0xd1,0xc1,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x6a,0xd1,0xf0,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x6a,0xd1,0xf7,0x00,0x00,0x00]
v_cos_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x6a,0xd1,0x01,0x01,0x00,0x00]
@@ -29303,8 +29538,17 @@ v_frexp_exp_i32_f64_e64 v5, ttmp[10:11]
v_frexp_exp_i32_f64_e64 v5, exec
// CHECK: [0x05,0x00,0x70,0xd1,0x7e,0x00,0x00,0x00]
-v_frexp_exp_i32_f64_e64 v5, scc
-// CHECK: [0x05,0x00,0x70,0xd1,0xfd,0x00,0x00,0x00]
+v_frexp_exp_i32_f64_e64 v5, 0
+// CHECK: [0x05,0x00,0x70,0xd1,0x80,0x00,0x00,0x00]
+
+v_frexp_exp_i32_f64_e64 v5, -1
+// CHECK: [0x05,0x00,0x70,0xd1,0xc1,0x00,0x00,0x00]
+
+v_frexp_exp_i32_f64_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x70,0xd1,0xf0,0x00,0x00,0x00]
+
+v_frexp_exp_i32_f64_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x70,0xd1,0xf7,0x00,0x00,0x00]
v_frexp_exp_i32_f64_e64 v5, v[1:2]
// CHECK: [0x05,0x00,0x70,0xd1,0x01,0x01,0x00,0x00]
@@ -29405,8 +29649,17 @@ v_frexp_mant_f64_e64 v[5:6], ttmp[10:11]
v_frexp_mant_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x71,0xd1,0x7e,0x00,0x00,0x00]
-v_frexp_mant_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x71,0xd1,0xfd,0x00,0x00,0x00]
+v_frexp_mant_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x71,0xd1,0x80,0x00,0x00,0x00]
+
+v_frexp_mant_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x71,0xd1,0xc1,0x00,0x00,0x00]
+
+v_frexp_mant_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x71,0xd1,0xf0,0x00,0x00,0x00]
+
+v_frexp_mant_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x71,0xd1,0xf7,0x00,0x00,0x00]
v_frexp_mant_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x71,0xd1,0x01,0x01,0x00,0x00]
@@ -29516,8 +29769,17 @@ v_fract_f64_e64 v[5:6], ttmp[10:11]
v_fract_f64_e64 v[5:6], exec
// CHECK: [0x05,0x00,0x72,0xd1,0x7e,0x00,0x00,0x00]
-v_fract_f64_e64 v[5:6], scc
-// CHECK: [0x05,0x00,0x72,0xd1,0xfd,0x00,0x00,0x00]
+v_fract_f64_e64 v[5:6], 0
+// CHECK: [0x05,0x00,0x72,0xd1,0x80,0x00,0x00,0x00]
+
+v_fract_f64_e64 v[5:6], -1
+// CHECK: [0x05,0x00,0x72,0xd1,0xc1,0x00,0x00,0x00]
+
+v_fract_f64_e64 v[5:6], 0.5
+// CHECK: [0x05,0x00,0x72,0xd1,0xf0,0x00,0x00,0x00]
+
+v_fract_f64_e64 v[5:6], -4.0
+// CHECK: [0x05,0x00,0x72,0xd1,0xf7,0x00,0x00,0x00]
v_fract_f64_e64 v[5:6], v[1:2]
// CHECK: [0x05,0x00,0x72,0xd1,0x01,0x01,0x00,0x00]
@@ -29657,8 +29919,17 @@ v_frexp_exp_i32_f32_e64 v5, exec_lo
v_frexp_exp_i32_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x73,0xd1,0x7f,0x00,0x00,0x00]
-v_frexp_exp_i32_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x73,0xd1,0xfd,0x00,0x00,0x00]
+v_frexp_exp_i32_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x73,0xd1,0x80,0x00,0x00,0x00]
+
+v_frexp_exp_i32_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x73,0xd1,0xc1,0x00,0x00,0x00]
+
+v_frexp_exp_i32_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x73,0xd1,0xf0,0x00,0x00,0x00]
+
+v_frexp_exp_i32_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x73,0xd1,0xf7,0x00,0x00,0x00]
v_frexp_exp_i32_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x73,0xd1,0x01,0x01,0x00,0x00]
@@ -29789,8 +30060,17 @@ v_frexp_mant_f32_e64 v5, exec_lo
v_frexp_mant_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x74,0xd1,0x7f,0x00,0x00,0x00]
-v_frexp_mant_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x74,0xd1,0xfd,0x00,0x00,0x00]
+v_frexp_mant_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x74,0xd1,0x80,0x00,0x00,0x00]
+
+v_frexp_mant_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x74,0xd1,0xc1,0x00,0x00,0x00]
+
+v_frexp_mant_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x74,0xd1,0xf0,0x00,0x00,0x00]
+
+v_frexp_mant_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x74,0xd1,0xf7,0x00,0x00,0x00]
v_frexp_mant_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x74,0xd1,0x01,0x01,0x00,0x00]
@@ -30284,8 +30564,17 @@ v_cvt_u16_f16_e64 v5, exec_lo
v_cvt_u16_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x7b,0xd1,0x7f,0x00,0x00,0x00]
-v_cvt_u16_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x7b,0xd1,0xfd,0x00,0x00,0x00]
+v_cvt_u16_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x7b,0xd1,0x80,0x00,0x00,0x00]
+
+v_cvt_u16_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x7b,0xd1,0xc1,0x00,0x00,0x00]
+
+v_cvt_u16_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x7b,0xd1,0xf0,0x00,0x00,0x00]
+
+v_cvt_u16_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x7b,0xd1,0xf7,0x00,0x00,0x00]
v_cvt_u16_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x7b,0xd1,0x01,0x01,0x00,0x00]
@@ -30416,8 +30705,17 @@ v_cvt_i16_f16_e64 v5, exec_lo
v_cvt_i16_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x7c,0xd1,0x7f,0x00,0x00,0x00]
-v_cvt_i16_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x7c,0xd1,0xfd,0x00,0x00,0x00]
+v_cvt_i16_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x7c,0xd1,0x80,0x00,0x00,0x00]
+
+v_cvt_i16_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x7c,0xd1,0xc1,0x00,0x00,0x00]
+
+v_cvt_i16_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x7c,0xd1,0xf0,0x00,0x00,0x00]
+
+v_cvt_i16_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x7c,0xd1,0xf7,0x00,0x00,0x00]
v_cvt_i16_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x7c,0xd1,0x01,0x01,0x00,0x00]
@@ -30548,8 +30846,17 @@ v_rcp_f16_e64 v5, exec_lo
v_rcp_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x7d,0xd1,0x7f,0x00,0x00,0x00]
-v_rcp_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x7d,0xd1,0xfd,0x00,0x00,0x00]
+v_rcp_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x7d,0xd1,0x80,0x00,0x00,0x00]
+
+v_rcp_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x7d,0xd1,0xc1,0x00,0x00,0x00]
+
+v_rcp_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x7d,0xd1,0xf0,0x00,0x00,0x00]
+
+v_rcp_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x7d,0xd1,0xf7,0x00,0x00,0x00]
v_rcp_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x7d,0xd1,0x01,0x01,0x00,0x00]
@@ -30680,8 +30987,17 @@ v_sqrt_f16_e64 v5, exec_lo
v_sqrt_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x7e,0xd1,0x7f,0x00,0x00,0x00]
-v_sqrt_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x7e,0xd1,0xfd,0x00,0x00,0x00]
+v_sqrt_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x7e,0xd1,0x80,0x00,0x00,0x00]
+
+v_sqrt_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x7e,0xd1,0xc1,0x00,0x00,0x00]
+
+v_sqrt_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x7e,0xd1,0xf0,0x00,0x00,0x00]
+
+v_sqrt_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x7e,0xd1,0xf7,0x00,0x00,0x00]
v_sqrt_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x7e,0xd1,0x01,0x01,0x00,0x00]
@@ -30812,8 +31128,17 @@ v_rsq_f16_e64 v5, exec_lo
v_rsq_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x7f,0xd1,0x7f,0x00,0x00,0x00]
-v_rsq_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x7f,0xd1,0xfd,0x00,0x00,0x00]
+v_rsq_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x7f,0xd1,0x80,0x00,0x00,0x00]
+
+v_rsq_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x7f,0xd1,0xc1,0x00,0x00,0x00]
+
+v_rsq_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x7f,0xd1,0xf0,0x00,0x00,0x00]
+
+v_rsq_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x7f,0xd1,0xf7,0x00,0x00,0x00]
v_rsq_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x7f,0xd1,0x01,0x01,0x00,0x00]
@@ -30944,8 +31269,17 @@ v_log_f16_e64 v5, exec_lo
v_log_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x80,0xd1,0x7f,0x00,0x00,0x00]
-v_log_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x80,0xd1,0xfd,0x00,0x00,0x00]
+v_log_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x80,0xd1,0x80,0x00,0x00,0x00]
+
+v_log_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x80,0xd1,0xc1,0x00,0x00,0x00]
+
+v_log_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x80,0xd1,0xf0,0x00,0x00,0x00]
+
+v_log_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x80,0xd1,0xf7,0x00,0x00,0x00]
v_log_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x80,0xd1,0x01,0x01,0x00,0x00]
@@ -31076,8 +31410,17 @@ v_exp_f16_e64 v5, exec_lo
v_exp_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x81,0xd1,0x7f,0x00,0x00,0x00]
-v_exp_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x81,0xd1,0xfd,0x00,0x00,0x00]
+v_exp_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x81,0xd1,0x80,0x00,0x00,0x00]
+
+v_exp_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x81,0xd1,0xc1,0x00,0x00,0x00]
+
+v_exp_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x81,0xd1,0xf0,0x00,0x00,0x00]
+
+v_exp_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x81,0xd1,0xf7,0x00,0x00,0x00]
v_exp_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x81,0xd1,0x01,0x01,0x00,0x00]
@@ -31208,8 +31551,17 @@ v_frexp_mant_f16_e64 v5, exec_lo
v_frexp_mant_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x82,0xd1,0x7f,0x00,0x00,0x00]
-v_frexp_mant_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x82,0xd1,0xfd,0x00,0x00,0x00]
+v_frexp_mant_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x82,0xd1,0x80,0x00,0x00,0x00]
+
+v_frexp_mant_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x82,0xd1,0xc1,0x00,0x00,0x00]
+
+v_frexp_mant_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x82,0xd1,0xf0,0x00,0x00,0x00]
+
+v_frexp_mant_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x82,0xd1,0xf7,0x00,0x00,0x00]
v_frexp_mant_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x82,0xd1,0x01,0x01,0x00,0x00]
@@ -31340,8 +31692,17 @@ v_frexp_exp_i16_f16_e64 v5, exec_lo
v_frexp_exp_i16_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x83,0xd1,0x7f,0x00,0x00,0x00]
-v_frexp_exp_i16_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x83,0xd1,0xfd,0x00,0x00,0x00]
+v_frexp_exp_i16_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x83,0xd1,0x80,0x00,0x00,0x00]
+
+v_frexp_exp_i16_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x83,0xd1,0xc1,0x00,0x00,0x00]
+
+v_frexp_exp_i16_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x83,0xd1,0xf0,0x00,0x00,0x00]
+
+v_frexp_exp_i16_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x83,0xd1,0xf7,0x00,0x00,0x00]
v_frexp_exp_i16_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x83,0xd1,0x01,0x01,0x00,0x00]
@@ -31472,8 +31833,17 @@ v_floor_f16_e64 v5, exec_lo
v_floor_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x84,0xd1,0x7f,0x00,0x00,0x00]
-v_floor_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x84,0xd1,0xfd,0x00,0x00,0x00]
+v_floor_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x84,0xd1,0x80,0x00,0x00,0x00]
+
+v_floor_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x84,0xd1,0xc1,0x00,0x00,0x00]
+
+v_floor_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x84,0xd1,0xf0,0x00,0x00,0x00]
+
+v_floor_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x84,0xd1,0xf7,0x00,0x00,0x00]
v_floor_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x84,0xd1,0x01,0x01,0x00,0x00]
@@ -31604,8 +31974,17 @@ v_ceil_f16_e64 v5, exec_lo
v_ceil_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x85,0xd1,0x7f,0x00,0x00,0x00]
-v_ceil_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x85,0xd1,0xfd,0x00,0x00,0x00]
+v_ceil_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x85,0xd1,0x80,0x00,0x00,0x00]
+
+v_ceil_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x85,0xd1,0xc1,0x00,0x00,0x00]
+
+v_ceil_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x85,0xd1,0xf0,0x00,0x00,0x00]
+
+v_ceil_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x85,0xd1,0xf7,0x00,0x00,0x00]
v_ceil_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x85,0xd1,0x01,0x01,0x00,0x00]
@@ -31736,8 +32115,17 @@ v_trunc_f16_e64 v5, exec_lo
v_trunc_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x86,0xd1,0x7f,0x00,0x00,0x00]
-v_trunc_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x86,0xd1,0xfd,0x00,0x00,0x00]
+v_trunc_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x86,0xd1,0x80,0x00,0x00,0x00]
+
+v_trunc_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x86,0xd1,0xc1,0x00,0x00,0x00]
+
+v_trunc_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x86,0xd1,0xf0,0x00,0x00,0x00]
+
+v_trunc_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x86,0xd1,0xf7,0x00,0x00,0x00]
v_trunc_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x86,0xd1,0x01,0x01,0x00,0x00]
@@ -31868,8 +32256,17 @@ v_rndne_f16_e64 v5, exec_lo
v_rndne_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x87,0xd1,0x7f,0x00,0x00,0x00]
-v_rndne_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x87,0xd1,0xfd,0x00,0x00,0x00]
+v_rndne_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x87,0xd1,0x80,0x00,0x00,0x00]
+
+v_rndne_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x87,0xd1,0xc1,0x00,0x00,0x00]
+
+v_rndne_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x87,0xd1,0xf0,0x00,0x00,0x00]
+
+v_rndne_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x87,0xd1,0xf7,0x00,0x00,0x00]
v_rndne_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x87,0xd1,0x01,0x01,0x00,0x00]
@@ -32000,8 +32397,17 @@ v_fract_f16_e64 v5, exec_lo
v_fract_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x88,0xd1,0x7f,0x00,0x00,0x00]
-v_fract_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x88,0xd1,0xfd,0x00,0x00,0x00]
+v_fract_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x88,0xd1,0x80,0x00,0x00,0x00]
+
+v_fract_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x88,0xd1,0xc1,0x00,0x00,0x00]
+
+v_fract_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x88,0xd1,0xf0,0x00,0x00,0x00]
+
+v_fract_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x88,0xd1,0xf7,0x00,0x00,0x00]
v_fract_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x88,0xd1,0x01,0x01,0x00,0x00]
@@ -32132,8 +32538,17 @@ v_sin_f16_e64 v5, exec_lo
v_sin_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x89,0xd1,0x7f,0x00,0x00,0x00]
-v_sin_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x89,0xd1,0xfd,0x00,0x00,0x00]
+v_sin_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x89,0xd1,0x80,0x00,0x00,0x00]
+
+v_sin_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x89,0xd1,0xc1,0x00,0x00,0x00]
+
+v_sin_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x89,0xd1,0xf0,0x00,0x00,0x00]
+
+v_sin_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x89,0xd1,0xf7,0x00,0x00,0x00]
v_sin_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x89,0xd1,0x01,0x01,0x00,0x00]
@@ -32264,8 +32679,17 @@ v_cos_f16_e64 v5, exec_lo
v_cos_f16_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x8a,0xd1,0x7f,0x00,0x00,0x00]
-v_cos_f16_e64 v5, scc
-// CHECK: [0x05,0x00,0x8a,0xd1,0xfd,0x00,0x00,0x00]
+v_cos_f16_e64 v5, 0
+// CHECK: [0x05,0x00,0x8a,0xd1,0x80,0x00,0x00,0x00]
+
+v_cos_f16_e64 v5, -1
+// CHECK: [0x05,0x00,0x8a,0xd1,0xc1,0x00,0x00,0x00]
+
+v_cos_f16_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x8a,0xd1,0xf0,0x00,0x00,0x00]
+
+v_cos_f16_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x8a,0xd1,0xf7,0x00,0x00,0x00]
v_cos_f16_e64 v5, v1
// CHECK: [0x05,0x00,0x8a,0xd1,0x01,0x01,0x00,0x00]
@@ -32396,8 +32820,17 @@ v_exp_legacy_f32_e64 v5, exec_lo
v_exp_legacy_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x8b,0xd1,0x7f,0x00,0x00,0x00]
-v_exp_legacy_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x8b,0xd1,0xfd,0x00,0x00,0x00]
+v_exp_legacy_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x8b,0xd1,0x80,0x00,0x00,0x00]
+
+v_exp_legacy_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x8b,0xd1,0xc1,0x00,0x00,0x00]
+
+v_exp_legacy_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x8b,0xd1,0xf0,0x00,0x00,0x00]
+
+v_exp_legacy_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x8b,0xd1,0xf7,0x00,0x00,0x00]
v_exp_legacy_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x8b,0xd1,0x01,0x01,0x00,0x00]
@@ -32537,8 +32970,17 @@ v_log_legacy_f32_e64 v5, exec_lo
v_log_legacy_f32_e64 v5, exec_hi
// CHECK: [0x05,0x00,0x8c,0xd1,0x7f,0x00,0x00,0x00]
-v_log_legacy_f32_e64 v5, scc
-// CHECK: [0x05,0x00,0x8c,0xd1,0xfd,0x00,0x00,0x00]
+v_log_legacy_f32_e64 v5, 0
+// CHECK: [0x05,0x00,0x8c,0xd1,0x80,0x00,0x00,0x00]
+
+v_log_legacy_f32_e64 v5, -1
+// CHECK: [0x05,0x00,0x8c,0xd1,0xc1,0x00,0x00,0x00]
+
+v_log_legacy_f32_e64 v5, 0.5
+// CHECK: [0x05,0x00,0x8c,0xd1,0xf0,0x00,0x00,0x00]
+
+v_log_legacy_f32_e64 v5, -4.0
+// CHECK: [0x05,0x00,0x8c,0xd1,0xf7,0x00,0x00,0x00]
v_log_legacy_f32_e64 v5, v1
// CHECK: [0x05,0x00,0x8c,0xd1,0x01,0x01,0x00,0x00]
@@ -32717,92 +33159,113 @@ v_add_f32 v5, v255, v2
v_add_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x02]
+v_add_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0x04,0x00,0x00]
+
+v_add_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x01,0xd1,0x80,0x04,0x00,0x00]
+
+v_add_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x01,0xd1,0xc1,0x04,0x00,0x00]
+
+v_add_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x01,0xd1,0xf0,0x04,0x00,0x00]
+
+v_add_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x01,0xd1,0xf7,0x04,0x00,0x00]
+
v_add_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x01,0xd1,0x01,0x05,0x00,0x00]
-v_add_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x01,0xd1,0x01,0x05,0x00,0x00]
-
v_add_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x01,0xd1,0xff,0x05,0x00,0x00]
-v_add_f32_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0xcb,0x00,0x00]
+v_add_f32_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xca,0x00,0x00]
+
+v_add_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xcc,0x00,0x00]
-v_add_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0xcd,0x00,0x00]
+v_add_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xce,0x00,0x00]
-v_add_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0xcf,0x00,0x00]
+v_add_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xd4,0x00,0x00]
-v_add_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0xd5,0x00,0x00]
+v_add_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xd6,0x00,0x00]
-v_add_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0xd7,0x00,0x00]
+v_add_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xd8,0x00,0x00]
-v_add_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0xd9,0x00,0x00]
+v_add_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xda,0x00,0x00]
-v_add_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0xdb,0x00,0x00]
+v_add_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xdc,0x00,0x00]
-v_add_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0xdd,0x00,0x00]
+v_add_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xde,0x00,0x00]
-v_add_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0xdf,0x00,0x00]
+v_add_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xf6,0x00,0x00]
-v_add_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0xf7,0x00,0x00]
+v_add_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xf8,0x00,0x00]
-v_add_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0xf9,0x00,0x00]
+v_add_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xfc,0x00,0x00]
-v_add_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0xfd,0x00,0x00]
+v_add_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xfe,0x00,0x00]
-v_add_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0xff,0x00,0x00]
+v_add_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0x00,0x01,0x00]
-v_add_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0xfb,0x01,0x00]
+v_add_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0x82,0x01,0x00]
-v_add_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0x05,0x02,0x00]
+v_add_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xe0,0x01,0x00]
-v_add_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0xff,0x03,0x00]
+v_add_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xee,0x01,0x00]
-v_add_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0x05,0x00,0x20]
+v_add_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0x04,0x02,0x00]
-v_add_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0x05,0x00,0x40]
+v_add_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0xfe,0x03,0x00]
-v_add_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0x05,0x00,0x60]
+v_add_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0x04,0x00,0x20]
-v_add_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x01,0xd1,0x01,0x05,0x00,0x00]
+v_add_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0x04,0x00,0x40]
-v_add_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x01,0xd1,0x01,0x05,0x00,0x00]
+v_add_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0x04,0x00,0x60]
-v_add_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x01,0xd1,0x01,0x05,0x00,0x00]
+v_add_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x01,0xd1,0x80,0x04,0x00,0x00]
-v_add_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x01,0xd1,0x01,0x05,0x00,0x00]
+v_add_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x01,0xd1,0x80,0x04,0x00,0x00]
-v_add_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0x05,0x00,0x08]
+v_add_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x01,0xd1,0x80,0x04,0x00,0x00]
-v_add_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0x05,0x00,0x10]
+v_add_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x01,0xd1,0x80,0x04,0x00,0x00]
-v_add_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x01,0xd1,0x01,0x05,0x00,0x18]
+v_add_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0x04,0x00,0x08]
+
+v_add_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0x04,0x00,0x10]
+
+v_add_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x01,0xd1,0x80,0x04,0x00,0x18]
v_sub_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x04]
@@ -32876,92 +33339,113 @@ v_sub_f32 v5, v255, v2
v_sub_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x04]
+v_sub_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0x04,0x00,0x00]
+
+v_sub_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x02,0xd1,0x80,0x04,0x00,0x00]
+
+v_sub_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x02,0xd1,0xc1,0x04,0x00,0x00]
+
+v_sub_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x02,0xd1,0xf0,0x04,0x00,0x00]
+
+v_sub_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x02,0xd1,0xf7,0x04,0x00,0x00]
+
v_sub_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x02,0xd1,0x01,0x05,0x00,0x00]
-v_sub_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x02,0xd1,0x01,0x05,0x00,0x00]
-
v_sub_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x02,0xd1,0xff,0x05,0x00,0x00]
-v_sub_f32_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0xcb,0x00,0x00]
+v_sub_f32_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xca,0x00,0x00]
+
+v_sub_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xcc,0x00,0x00]
-v_sub_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0xcd,0x00,0x00]
+v_sub_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xce,0x00,0x00]
-v_sub_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0xcf,0x00,0x00]
+v_sub_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xd4,0x00,0x00]
-v_sub_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0xd5,0x00,0x00]
+v_sub_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xd6,0x00,0x00]
-v_sub_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0xd7,0x00,0x00]
+v_sub_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xd8,0x00,0x00]
-v_sub_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0xd9,0x00,0x00]
+v_sub_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xda,0x00,0x00]
-v_sub_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0xdb,0x00,0x00]
+v_sub_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xdc,0x00,0x00]
-v_sub_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0xdd,0x00,0x00]
+v_sub_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xde,0x00,0x00]
-v_sub_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0xdf,0x00,0x00]
+v_sub_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xf6,0x00,0x00]
-v_sub_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0xf7,0x00,0x00]
+v_sub_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xf8,0x00,0x00]
-v_sub_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0xf9,0x00,0x00]
+v_sub_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xfc,0x00,0x00]
-v_sub_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0xfd,0x00,0x00]
+v_sub_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xfe,0x00,0x00]
-v_sub_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0xff,0x00,0x00]
+v_sub_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0x00,0x01,0x00]
-v_sub_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0xfb,0x01,0x00]
+v_sub_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0x82,0x01,0x00]
-v_sub_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0x05,0x02,0x00]
+v_sub_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xe0,0x01,0x00]
-v_sub_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0xff,0x03,0x00]
+v_sub_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xee,0x01,0x00]
-v_sub_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0x05,0x00,0x20]
+v_sub_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0x04,0x02,0x00]
-v_sub_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0x05,0x00,0x40]
+v_sub_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0xfe,0x03,0x00]
-v_sub_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0x05,0x00,0x60]
+v_sub_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0x04,0x00,0x20]
-v_sub_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x02,0xd1,0x01,0x05,0x00,0x00]
+v_sub_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0x04,0x00,0x40]
-v_sub_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x02,0xd1,0x01,0x05,0x00,0x00]
+v_sub_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0x04,0x00,0x60]
-v_sub_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x02,0xd1,0x01,0x05,0x00,0x00]
+v_sub_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x02,0xd1,0x80,0x04,0x00,0x00]
-v_sub_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x02,0xd1,0x01,0x05,0x00,0x00]
+v_sub_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x02,0xd1,0x80,0x04,0x00,0x00]
-v_sub_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0x05,0x00,0x08]
+v_sub_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x02,0xd1,0x80,0x04,0x00,0x00]
-v_sub_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0x05,0x00,0x10]
+v_sub_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x02,0xd1,0x80,0x04,0x00,0x00]
-v_sub_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x02,0xd1,0x01,0x05,0x00,0x18]
+v_sub_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0x04,0x00,0x08]
+
+v_sub_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0x04,0x00,0x10]
+
+v_sub_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x02,0xd1,0x80,0x04,0x00,0x18]
v_subrev_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x06]
@@ -33035,92 +33519,113 @@ v_subrev_f32 v5, v255, v2
v_subrev_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x06]
+v_subrev_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0x04,0x00,0x00]
+
+v_subrev_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x03,0xd1,0x80,0x04,0x00,0x00]
+
+v_subrev_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x03,0xd1,0xc1,0x04,0x00,0x00]
+
+v_subrev_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x03,0xd1,0xf0,0x04,0x00,0x00]
+
+v_subrev_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x03,0xd1,0xf7,0x04,0x00,0x00]
+
v_subrev_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x03,0xd1,0x01,0x05,0x00,0x00]
-v_subrev_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x03,0xd1,0x01,0x05,0x00,0x00]
-
v_subrev_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x03,0xd1,0xff,0x05,0x00,0x00]
-v_subrev_f32_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0xcb,0x00,0x00]
+v_subrev_f32_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xca,0x00,0x00]
-v_subrev_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0xcd,0x00,0x00]
+v_subrev_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xcc,0x00,0x00]
-v_subrev_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0xcf,0x00,0x00]
+v_subrev_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xce,0x00,0x00]
-v_subrev_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0xd5,0x00,0x00]
+v_subrev_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xd4,0x00,0x00]
-v_subrev_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0xd7,0x00,0x00]
+v_subrev_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xd6,0x00,0x00]
-v_subrev_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0xd9,0x00,0x00]
+v_subrev_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xd8,0x00,0x00]
-v_subrev_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0xdb,0x00,0x00]
+v_subrev_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xda,0x00,0x00]
-v_subrev_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0xdd,0x00,0x00]
+v_subrev_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xdc,0x00,0x00]
-v_subrev_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0xdf,0x00,0x00]
+v_subrev_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xde,0x00,0x00]
-v_subrev_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0xf7,0x00,0x00]
+v_subrev_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xf6,0x00,0x00]
-v_subrev_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0xf9,0x00,0x00]
+v_subrev_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xf8,0x00,0x00]
-v_subrev_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0xfd,0x00,0x00]
+v_subrev_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xfc,0x00,0x00]
-v_subrev_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0xff,0x00,0x00]
+v_subrev_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xfe,0x00,0x00]
-v_subrev_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0xfb,0x01,0x00]
+v_subrev_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0x00,0x01,0x00]
-v_subrev_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0x05,0x02,0x00]
+v_subrev_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0x82,0x01,0x00]
-v_subrev_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0xff,0x03,0x00]
+v_subrev_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xe0,0x01,0x00]
-v_subrev_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0x05,0x00,0x20]
+v_subrev_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xee,0x01,0x00]
-v_subrev_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0x05,0x00,0x40]
+v_subrev_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0x04,0x02,0x00]
-v_subrev_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0x05,0x00,0x60]
+v_subrev_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0xfe,0x03,0x00]
-v_subrev_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x03,0xd1,0x01,0x05,0x00,0x00]
+v_subrev_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0x04,0x00,0x20]
-v_subrev_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x03,0xd1,0x01,0x05,0x00,0x00]
+v_subrev_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0x04,0x00,0x40]
-v_subrev_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x03,0xd1,0x01,0x05,0x00,0x00]
+v_subrev_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0x04,0x00,0x60]
-v_subrev_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x03,0xd1,0x01,0x05,0x00,0x00]
+v_subrev_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x03,0xd1,0x80,0x04,0x00,0x00]
-v_subrev_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0x05,0x00,0x08]
+v_subrev_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x03,0xd1,0x80,0x04,0x00,0x00]
-v_subrev_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0x05,0x00,0x10]
+v_subrev_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x03,0xd1,0x80,0x04,0x00,0x00]
-v_subrev_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x03,0xd1,0x01,0x05,0x00,0x18]
+v_subrev_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x03,0xd1,0x80,0x04,0x00,0x00]
+
+v_subrev_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0x04,0x00,0x08]
+
+v_subrev_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0x04,0x00,0x10]
+
+v_subrev_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x03,0xd1,0x80,0x04,0x00,0x18]
v_mul_legacy_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x08]
@@ -33194,92 +33699,113 @@ v_mul_legacy_f32 v5, v255, v2
v_mul_legacy_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x08]
+v_mul_legacy_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0x04,0x00,0x00]
+
+v_mul_legacy_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x04,0xd1,0x80,0x04,0x00,0x00]
+
+v_mul_legacy_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x04,0xd1,0xc1,0x04,0x00,0x00]
+
+v_mul_legacy_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x04,0xd1,0xf0,0x04,0x00,0x00]
+
+v_mul_legacy_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x04,0xd1,0xf7,0x04,0x00,0x00]
+
v_mul_legacy_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x04,0xd1,0x01,0x05,0x00,0x00]
-v_mul_legacy_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x04,0xd1,0x01,0x05,0x00,0x00]
-
v_mul_legacy_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x04,0xd1,0xff,0x05,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0xcb,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xca,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0xcd,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xcc,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0xcf,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xce,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0xd5,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xd4,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0xd7,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xd6,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0xd9,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xd8,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0xdb,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xda,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0xdd,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xdc,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0xdf,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xde,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0xf7,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xf6,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0xf9,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xf8,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0xfd,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xfc,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0xff,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xfe,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0xfb,0x01,0x00]
+v_mul_legacy_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0x00,0x01,0x00]
-v_mul_legacy_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0x05,0x02,0x00]
+v_mul_legacy_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0x82,0x01,0x00]
-v_mul_legacy_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0xff,0x03,0x00]
+v_mul_legacy_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xe0,0x01,0x00]
-v_mul_legacy_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0x05,0x00,0x20]
+v_mul_legacy_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xee,0x01,0x00]
-v_mul_legacy_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0x05,0x00,0x40]
+v_mul_legacy_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0x04,0x02,0x00]
-v_mul_legacy_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0x05,0x00,0x60]
+v_mul_legacy_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0xfe,0x03,0x00]
-v_mul_legacy_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x04,0xd1,0x01,0x05,0x00,0x00]
+v_mul_legacy_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0x04,0x00,0x20]
-v_mul_legacy_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x04,0xd1,0x01,0x05,0x00,0x00]
+v_mul_legacy_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0x04,0x00,0x40]
-v_mul_legacy_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x04,0xd1,0x01,0x05,0x00,0x00]
+v_mul_legacy_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0x04,0x00,0x60]
-v_mul_legacy_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x04,0xd1,0x01,0x05,0x00,0x00]
+v_mul_legacy_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x04,0xd1,0x80,0x04,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0x05,0x00,0x08]
+v_mul_legacy_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x04,0xd1,0x80,0x04,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0x05,0x00,0x10]
+v_mul_legacy_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x04,0xd1,0x80,0x04,0x00,0x00]
-v_mul_legacy_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x04,0xd1,0x01,0x05,0x00,0x18]
+v_mul_legacy_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x04,0xd1,0x80,0x04,0x00,0x00]
+
+v_mul_legacy_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0x04,0x00,0x08]
+
+v_mul_legacy_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0x04,0x00,0x10]
+
+v_mul_legacy_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x04,0xd1,0x80,0x04,0x00,0x18]
v_mul_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x0a]
@@ -33353,92 +33879,113 @@ v_mul_f32 v5, v255, v2
v_mul_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x0a]
+v_mul_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0x04,0x00,0x00]
+
+v_mul_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x05,0xd1,0x80,0x04,0x00,0x00]
+
+v_mul_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x05,0xd1,0xc1,0x04,0x00,0x00]
+
+v_mul_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x05,0xd1,0xf0,0x04,0x00,0x00]
+
+v_mul_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x05,0xd1,0xf7,0x04,0x00,0x00]
+
v_mul_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x05,0xd1,0x01,0x05,0x00,0x00]
-v_mul_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x05,0xd1,0x01,0x05,0x00,0x00]
-
v_mul_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x05,0xd1,0xff,0x05,0x00,0x00]
-v_mul_f32_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0xcb,0x00,0x00]
+v_mul_f32_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xca,0x00,0x00]
+
+v_mul_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xcc,0x00,0x00]
+
+v_mul_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xce,0x00,0x00]
-v_mul_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0xcd,0x00,0x00]
+v_mul_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xd4,0x00,0x00]
-v_mul_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0xcf,0x00,0x00]
+v_mul_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xd6,0x00,0x00]
-v_mul_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0xd5,0x00,0x00]
+v_mul_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xd8,0x00,0x00]
-v_mul_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0xd7,0x00,0x00]
+v_mul_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xda,0x00,0x00]
-v_mul_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0xd9,0x00,0x00]
+v_mul_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xdc,0x00,0x00]
-v_mul_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0xdb,0x00,0x00]
+v_mul_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xde,0x00,0x00]
-v_mul_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0xdd,0x00,0x00]
+v_mul_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xf6,0x00,0x00]
-v_mul_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0xdf,0x00,0x00]
+v_mul_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xf8,0x00,0x00]
-v_mul_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0xf7,0x00,0x00]
+v_mul_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xfc,0x00,0x00]
-v_mul_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0xf9,0x00,0x00]
+v_mul_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xfe,0x00,0x00]
-v_mul_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0xfd,0x00,0x00]
+v_mul_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0x00,0x01,0x00]
-v_mul_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0xff,0x00,0x00]
+v_mul_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0x82,0x01,0x00]
-v_mul_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0xfb,0x01,0x00]
+v_mul_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xe0,0x01,0x00]
-v_mul_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0x05,0x02,0x00]
+v_mul_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xee,0x01,0x00]
-v_mul_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0xff,0x03,0x00]
+v_mul_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0x04,0x02,0x00]
-v_mul_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0x05,0x00,0x20]
+v_mul_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0xfe,0x03,0x00]
-v_mul_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0x05,0x00,0x40]
+v_mul_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0x04,0x00,0x20]
-v_mul_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0x05,0x00,0x60]
+v_mul_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0x04,0x00,0x40]
-v_mul_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x05,0xd1,0x01,0x05,0x00,0x00]
+v_mul_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0x04,0x00,0x60]
-v_mul_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x05,0xd1,0x01,0x05,0x00,0x00]
+v_mul_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x05,0xd1,0x80,0x04,0x00,0x00]
-v_mul_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x05,0xd1,0x01,0x05,0x00,0x00]
+v_mul_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x05,0xd1,0x80,0x04,0x00,0x00]
-v_mul_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x05,0xd1,0x01,0x05,0x00,0x00]
+v_mul_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x05,0xd1,0x80,0x04,0x00,0x00]
-v_mul_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0x05,0x00,0x08]
+v_mul_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x05,0xd1,0x80,0x04,0x00,0x00]
-v_mul_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0x05,0x00,0x10]
+v_mul_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0x04,0x00,0x08]
-v_mul_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x05,0xd1,0x01,0x05,0x00,0x18]
+v_mul_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0x04,0x00,0x10]
+
+v_mul_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x05,0xd1,0x80,0x04,0x00,0x18]
v_mul_i32_i24 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x0c]
@@ -34112,92 +34659,113 @@ v_min_f32 v5, v255, v2
v_min_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x14]
+v_min_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0x04,0x00,0x00]
+
+v_min_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x0a,0xd1,0x80,0x04,0x00,0x00]
+
+v_min_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x0a,0xd1,0xc1,0x04,0x00,0x00]
+
+v_min_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x0a,0xd1,0xf0,0x04,0x00,0x00]
+
+v_min_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x0a,0xd1,0xf7,0x04,0x00,0x00]
+
v_min_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0x05,0x00,0x00]
-v_min_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x0a,0xd1,0x01,0x05,0x00,0x00]
-
v_min_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x0a,0xd1,0xff,0x05,0x00,0x00]
-v_min_f32_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0xcb,0x00,0x00]
+v_min_f32_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xca,0x00,0x00]
+
+v_min_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xcc,0x00,0x00]
+
+v_min_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xce,0x00,0x00]
+
+v_min_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xd4,0x00,0x00]
-v_min_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0xcd,0x00,0x00]
+v_min_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xd6,0x00,0x00]
-v_min_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0xcf,0x00,0x00]
+v_min_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xd8,0x00,0x00]
-v_min_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0xd5,0x00,0x00]
+v_min_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xda,0x00,0x00]
-v_min_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0xd7,0x00,0x00]
+v_min_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xdc,0x00,0x00]
-v_min_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0xd9,0x00,0x00]
+v_min_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xde,0x00,0x00]
-v_min_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0xdb,0x00,0x00]
+v_min_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xf6,0x00,0x00]
-v_min_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0xdd,0x00,0x00]
+v_min_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xf8,0x00,0x00]
-v_min_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0xdf,0x00,0x00]
+v_min_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xfc,0x00,0x00]
-v_min_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0xf7,0x00,0x00]
+v_min_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xfe,0x00,0x00]
-v_min_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0xf9,0x00,0x00]
+v_min_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0x00,0x01,0x00]
-v_min_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0xfd,0x00,0x00]
+v_min_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0x82,0x01,0x00]
-v_min_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0xff,0x00,0x00]
+v_min_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xe0,0x01,0x00]
-v_min_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0xfb,0x01,0x00]
+v_min_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xee,0x01,0x00]
-v_min_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0x05,0x02,0x00]
+v_min_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0x04,0x02,0x00]
-v_min_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0xff,0x03,0x00]
+v_min_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0xfe,0x03,0x00]
-v_min_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0x05,0x00,0x20]
+v_min_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0x04,0x00,0x20]
-v_min_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0x05,0x00,0x40]
+v_min_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0x04,0x00,0x40]
-v_min_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0x05,0x00,0x60]
+v_min_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0x04,0x00,0x60]
-v_min_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x0a,0xd1,0x01,0x05,0x00,0x00]
+v_min_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x0a,0xd1,0x80,0x04,0x00,0x00]
-v_min_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x0a,0xd1,0x01,0x05,0x00,0x00]
+v_min_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x0a,0xd1,0x80,0x04,0x00,0x00]
-v_min_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x0a,0xd1,0x01,0x05,0x00,0x00]
+v_min_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x0a,0xd1,0x80,0x04,0x00,0x00]
-v_min_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x0a,0xd1,0x01,0x05,0x00,0x00]
+v_min_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x0a,0xd1,0x80,0x04,0x00,0x00]
-v_min_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0x05,0x00,0x08]
+v_min_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0x04,0x00,0x08]
-v_min_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0x05,0x00,0x10]
+v_min_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0x04,0x00,0x10]
-v_min_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x0a,0xd1,0x01,0x05,0x00,0x18]
+v_min_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x0a,0xd1,0x80,0x04,0x00,0x18]
v_max_f32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x16]
@@ -34271,92 +34839,113 @@ v_max_f32 v5, v255, v2
v_max_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x16]
+v_max_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0x04,0x00,0x00]
+
+v_max_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x0b,0xd1,0x80,0x04,0x00,0x00]
+
+v_max_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x0b,0xd1,0xc1,0x04,0x00,0x00]
+
+v_max_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x0b,0xd1,0xf0,0x04,0x00,0x00]
+
+v_max_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x0b,0xd1,0xf7,0x04,0x00,0x00]
+
v_max_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0x05,0x00,0x00]
-v_max_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x0b,0xd1,0x01,0x05,0x00,0x00]
-
v_max_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x0b,0xd1,0xff,0x05,0x00,0x00]
-v_max_f32_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0xcb,0x00,0x00]
+v_max_f32_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xca,0x00,0x00]
+
+v_max_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xcc,0x00,0x00]
+
+v_max_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xce,0x00,0x00]
+
+v_max_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xd4,0x00,0x00]
-v_max_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0xcd,0x00,0x00]
+v_max_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xd6,0x00,0x00]
-v_max_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0xcf,0x00,0x00]
+v_max_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xd8,0x00,0x00]
-v_max_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0xd5,0x00,0x00]
+v_max_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xda,0x00,0x00]
-v_max_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0xd7,0x00,0x00]
+v_max_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xdc,0x00,0x00]
-v_max_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0xd9,0x00,0x00]
+v_max_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xde,0x00,0x00]
-v_max_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0xdb,0x00,0x00]
+v_max_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xf6,0x00,0x00]
-v_max_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0xdd,0x00,0x00]
+v_max_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xf8,0x00,0x00]
-v_max_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0xdf,0x00,0x00]
+v_max_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xfc,0x00,0x00]
-v_max_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0xf7,0x00,0x00]
+v_max_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xfe,0x00,0x00]
-v_max_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0xf9,0x00,0x00]
+v_max_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0x00,0x01,0x00]
-v_max_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0xfd,0x00,0x00]
+v_max_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0x82,0x01,0x00]
-v_max_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0xff,0x00,0x00]
+v_max_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xe0,0x01,0x00]
-v_max_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0xfb,0x01,0x00]
+v_max_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xee,0x01,0x00]
-v_max_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0x05,0x02,0x00]
+v_max_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0x04,0x02,0x00]
-v_max_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0xff,0x03,0x00]
+v_max_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0xfe,0x03,0x00]
-v_max_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0x05,0x00,0x20]
+v_max_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0x04,0x00,0x20]
-v_max_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0x05,0x00,0x40]
+v_max_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0x04,0x00,0x40]
-v_max_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0x05,0x00,0x60]
+v_max_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0x04,0x00,0x60]
-v_max_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x0b,0xd1,0x01,0x05,0x00,0x00]
+v_max_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x0b,0xd1,0x80,0x04,0x00,0x00]
-v_max_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x0b,0xd1,0x01,0x05,0x00,0x00]
+v_max_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x0b,0xd1,0x80,0x04,0x00,0x00]
-v_max_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x0b,0xd1,0x01,0x05,0x00,0x00]
+v_max_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x0b,0xd1,0x80,0x04,0x00,0x00]
-v_max_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x0b,0xd1,0x01,0x05,0x00,0x00]
+v_max_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x0b,0xd1,0x80,0x04,0x00,0x00]
-v_max_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0x05,0x00,0x08]
+v_max_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0x04,0x00,0x08]
-v_max_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0x05,0x00,0x10]
+v_max_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0x04,0x00,0x10]
-v_max_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x0b,0xd1,0x01,0x05,0x00,0x18]
+v_max_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x0b,0xd1,0x80,0x04,0x00,0x18]
v_min_i32 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x18]
@@ -35930,92 +36519,113 @@ v_mac_f32 v5, v255, v2
v_mac_f32 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x2c]
+v_mac_f32_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0x04,0x00,0x00]
+
+v_mac_f32_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x16,0xd1,0x80,0x04,0x00,0x00]
+
+v_mac_f32_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x16,0xd1,0xc1,0x04,0x00,0x00]
+
+v_mac_f32_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x16,0xd1,0xf0,0x04,0x00,0x00]
+
+v_mac_f32_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x16,0xd1,0xf7,0x04,0x00,0x00]
+
v_mac_f32_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x16,0xd1,0x01,0x05,0x00,0x00]
-v_mac_f32_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x16,0xd1,0x01,0x05,0x00,0x00]
-
v_mac_f32_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x16,0xd1,0xff,0x05,0x00,0x00]
-v_mac_f32_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0xcb,0x00,0x00]
+v_mac_f32_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xca,0x00,0x00]
+
+v_mac_f32_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xcc,0x00,0x00]
+
+v_mac_f32_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xce,0x00,0x00]
+
+v_mac_f32_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xd4,0x00,0x00]
-v_mac_f32_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0xcd,0x00,0x00]
+v_mac_f32_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xd6,0x00,0x00]
-v_mac_f32_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0xcf,0x00,0x00]
+v_mac_f32_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xd8,0x00,0x00]
-v_mac_f32_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0xd5,0x00,0x00]
+v_mac_f32_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xda,0x00,0x00]
-v_mac_f32_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0xd7,0x00,0x00]
+v_mac_f32_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xdc,0x00,0x00]
-v_mac_f32_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0xd9,0x00,0x00]
+v_mac_f32_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xde,0x00,0x00]
-v_mac_f32_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0xdb,0x00,0x00]
+v_mac_f32_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xf6,0x00,0x00]
-v_mac_f32_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0xdd,0x00,0x00]
+v_mac_f32_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xf8,0x00,0x00]
-v_mac_f32_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0xdf,0x00,0x00]
+v_mac_f32_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xfc,0x00,0x00]
-v_mac_f32_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0xf7,0x00,0x00]
+v_mac_f32_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xfe,0x00,0x00]
-v_mac_f32_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0xf9,0x00,0x00]
+v_mac_f32_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0x00,0x01,0x00]
-v_mac_f32_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0xfd,0x00,0x00]
+v_mac_f32_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0x82,0x01,0x00]
-v_mac_f32_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0xff,0x00,0x00]
+v_mac_f32_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xe0,0x01,0x00]
-v_mac_f32_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0xfb,0x01,0x00]
+v_mac_f32_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xee,0x01,0x00]
-v_mac_f32_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0x05,0x02,0x00]
+v_mac_f32_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0x04,0x02,0x00]
-v_mac_f32_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0xff,0x03,0x00]
+v_mac_f32_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0xfe,0x03,0x00]
-v_mac_f32_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0x05,0x00,0x20]
+v_mac_f32_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0x04,0x00,0x20]
-v_mac_f32_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0x05,0x00,0x40]
+v_mac_f32_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0x04,0x00,0x40]
-v_mac_f32_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0x05,0x00,0x60]
+v_mac_f32_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0x04,0x00,0x60]
-v_mac_f32_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x16,0xd1,0x01,0x05,0x00,0x00]
+v_mac_f32_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x16,0xd1,0x80,0x04,0x00,0x00]
-v_mac_f32_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x16,0xd1,0x01,0x05,0x00,0x00]
+v_mac_f32_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x16,0xd1,0x80,0x04,0x00,0x00]
-v_mac_f32_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x16,0xd1,0x01,0x05,0x00,0x00]
+v_mac_f32_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x16,0xd1,0x80,0x04,0x00,0x00]
-v_mac_f32_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x16,0xd1,0x01,0x05,0x00,0x00]
+v_mac_f32_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x16,0xd1,0x80,0x04,0x00,0x00]
-v_mac_f32_e64 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0x05,0x00,0x08]
+v_mac_f32_e64 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0x04,0x00,0x08]
-v_mac_f32_e64 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0x05,0x00,0x10]
+v_mac_f32_e64 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0x04,0x00,0x10]
-v_mac_f32_e64 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x16,0xd1,0x01,0x05,0x00,0x18]
+v_mac_f32_e64 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x16,0xd1,0x80,0x04,0x00,0x18]
v_madmk_f32 v5, 0, 0x11213141, v3
// CHECK: [0x80,0x06,0x0a,0x2e,0x41,0x31,0x21,0x11]
@@ -36962,83 +37572,104 @@ v_add_f16 v5, v255, v2
v_add_f16 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x3e]
+v_add_f16_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0x04,0x00,0x00]
+
+v_add_f16_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x1f,0xd1,0x80,0x04,0x00,0x00]
+
+v_add_f16_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x1f,0xd1,0xc1,0x04,0x00,0x00]
+
+v_add_f16_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x1f,0xd1,0xf0,0x04,0x00,0x00]
+
+v_add_f16_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x1f,0xd1,0xf7,0x04,0x00,0x00]
+
v_add_f16_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0x05,0x00,0x00]
-v_add_f16_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x1f,0xd1,0x01,0x05,0x00,0x00]
-
v_add_f16_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x1f,0xd1,0xff,0x05,0x00,0x00]
-v_add_f16_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0xcb,0x00,0x00]
+v_add_f16_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xca,0x00,0x00]
+
+v_add_f16_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xcc,0x00,0x00]
+
+v_add_f16_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xce,0x00,0x00]
+
+v_add_f16_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xd4,0x00,0x00]
-v_add_f16_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0xcd,0x00,0x00]
+v_add_f16_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xd6,0x00,0x00]
-v_add_f16_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0xcf,0x00,0x00]
+v_add_f16_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xd8,0x00,0x00]
-v_add_f16_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0xd5,0x00,0x00]
+v_add_f16_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xda,0x00,0x00]
-v_add_f16_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0xd7,0x00,0x00]
+v_add_f16_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xdc,0x00,0x00]
-v_add_f16_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0xd9,0x00,0x00]
+v_add_f16_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xde,0x00,0x00]
-v_add_f16_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0xdb,0x00,0x00]
+v_add_f16_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xf6,0x00,0x00]
-v_add_f16_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0xdd,0x00,0x00]
+v_add_f16_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xf8,0x00,0x00]
-v_add_f16_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0xdf,0x00,0x00]
+v_add_f16_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xfc,0x00,0x00]
-v_add_f16_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0xf7,0x00,0x00]
+v_add_f16_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xfe,0x00,0x00]
-v_add_f16_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0xf9,0x00,0x00]
+v_add_f16_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0x00,0x01,0x00]
-v_add_f16_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0xfd,0x00,0x00]
+v_add_f16_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0x82,0x01,0x00]
-v_add_f16_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0xff,0x00,0x00]
+v_add_f16_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xe0,0x01,0x00]
-v_add_f16_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0xfb,0x01,0x00]
+v_add_f16_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xee,0x01,0x00]
-v_add_f16_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0x05,0x02,0x00]
+v_add_f16_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0x04,0x02,0x00]
-v_add_f16_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0xff,0x03,0x00]
+v_add_f16_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0xfe,0x03,0x00]
-v_add_f16_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0x05,0x00,0x20]
+v_add_f16_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0x04,0x00,0x20]
-v_add_f16_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0x05,0x00,0x40]
+v_add_f16_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0x04,0x00,0x40]
-v_add_f16_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x1f,0xd1,0x01,0x05,0x00,0x60]
+v_add_f16_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x1f,0xd1,0x80,0x04,0x00,0x60]
-v_add_f16_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x1f,0xd1,0x01,0x05,0x00,0x00]
+v_add_f16_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x1f,0xd1,0x80,0x04,0x00,0x00]
-v_add_f16_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x1f,0xd1,0x01,0x05,0x00,0x00]
+v_add_f16_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x1f,0xd1,0x80,0x04,0x00,0x00]
-v_add_f16_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x1f,0xd1,0x01,0x05,0x00,0x00]
+v_add_f16_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x1f,0xd1,0x80,0x04,0x00,0x00]
-v_add_f16_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x1f,0xd1,0x01,0x05,0x00,0x00]
+v_add_f16_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x1f,0xd1,0x80,0x04,0x00,0x00]
v_sub_f16 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x40]
@@ -37112,83 +37743,104 @@ v_sub_f16 v5, v255, v2
v_sub_f16 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x40]
+v_sub_f16_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0x04,0x00,0x00]
+
+v_sub_f16_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x20,0xd1,0x80,0x04,0x00,0x00]
+
+v_sub_f16_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x20,0xd1,0xc1,0x04,0x00,0x00]
+
+v_sub_f16_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x20,0xd1,0xf0,0x04,0x00,0x00]
+
+v_sub_f16_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x20,0xd1,0xf7,0x04,0x00,0x00]
+
v_sub_f16_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x20,0xd1,0x01,0x05,0x00,0x00]
-v_sub_f16_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x20,0xd1,0x01,0x05,0x00,0x00]
-
v_sub_f16_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x20,0xd1,0xff,0x05,0x00,0x00]
-v_sub_f16_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0xcb,0x00,0x00]
+v_sub_f16_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xca,0x00,0x00]
+
+v_sub_f16_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xcc,0x00,0x00]
+
+v_sub_f16_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xce,0x00,0x00]
-v_sub_f16_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0xcd,0x00,0x00]
+v_sub_f16_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xd4,0x00,0x00]
-v_sub_f16_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0xcf,0x00,0x00]
+v_sub_f16_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xd6,0x00,0x00]
-v_sub_f16_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0xd5,0x00,0x00]
+v_sub_f16_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xd8,0x00,0x00]
-v_sub_f16_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0xd7,0x00,0x00]
+v_sub_f16_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xda,0x00,0x00]
-v_sub_f16_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0xd9,0x00,0x00]
+v_sub_f16_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xdc,0x00,0x00]
-v_sub_f16_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0xdb,0x00,0x00]
+v_sub_f16_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xde,0x00,0x00]
-v_sub_f16_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0xdd,0x00,0x00]
+v_sub_f16_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xf6,0x00,0x00]
-v_sub_f16_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0xdf,0x00,0x00]
+v_sub_f16_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xf8,0x00,0x00]
-v_sub_f16_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0xf7,0x00,0x00]
+v_sub_f16_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xfc,0x00,0x00]
-v_sub_f16_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0xf9,0x00,0x00]
+v_sub_f16_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xfe,0x00,0x00]
-v_sub_f16_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0xfd,0x00,0x00]
+v_sub_f16_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0x00,0x01,0x00]
-v_sub_f16_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0xff,0x00,0x00]
+v_sub_f16_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0x82,0x01,0x00]
-v_sub_f16_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0xfb,0x01,0x00]
+v_sub_f16_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xe0,0x01,0x00]
-v_sub_f16_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0x05,0x02,0x00]
+v_sub_f16_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xee,0x01,0x00]
-v_sub_f16_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0xff,0x03,0x00]
+v_sub_f16_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0x04,0x02,0x00]
-v_sub_f16_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0x05,0x00,0x20]
+v_sub_f16_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0xfe,0x03,0x00]
-v_sub_f16_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0x05,0x00,0x40]
+v_sub_f16_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0x04,0x00,0x20]
-v_sub_f16_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x20,0xd1,0x01,0x05,0x00,0x60]
+v_sub_f16_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0x04,0x00,0x40]
-v_sub_f16_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x20,0xd1,0x01,0x05,0x00,0x00]
+v_sub_f16_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x20,0xd1,0x80,0x04,0x00,0x60]
-v_sub_f16_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x20,0xd1,0x01,0x05,0x00,0x00]
+v_sub_f16_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x20,0xd1,0x80,0x04,0x00,0x00]
-v_sub_f16_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x20,0xd1,0x01,0x05,0x00,0x00]
+v_sub_f16_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x20,0xd1,0x80,0x04,0x00,0x00]
-v_sub_f16_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x20,0xd1,0x01,0x05,0x00,0x00]
+v_sub_f16_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x20,0xd1,0x80,0x04,0x00,0x00]
+
+v_sub_f16_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x20,0xd1,0x80,0x04,0x00,0x00]
v_subrev_f16 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x42]
@@ -37262,83 +37914,104 @@ v_subrev_f16 v5, v255, v2
v_subrev_f16 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x42]
+v_subrev_f16_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0x04,0x00,0x00]
+
+v_subrev_f16_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x21,0xd1,0x80,0x04,0x00,0x00]
+
+v_subrev_f16_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x21,0xd1,0xc1,0x04,0x00,0x00]
+
+v_subrev_f16_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x21,0xd1,0xf0,0x04,0x00,0x00]
+
+v_subrev_f16_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x21,0xd1,0xf7,0x04,0x00,0x00]
+
v_subrev_f16_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x21,0xd1,0x01,0x05,0x00,0x00]
-v_subrev_f16_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x21,0xd1,0x01,0x05,0x00,0x00]
-
v_subrev_f16_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x21,0xd1,0xff,0x05,0x00,0x00]
-v_subrev_f16_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0xcb,0x00,0x00]
+v_subrev_f16_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xca,0x00,0x00]
+
+v_subrev_f16_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xcc,0x00,0x00]
+
+v_subrev_f16_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xce,0x00,0x00]
-v_subrev_f16_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0xcd,0x00,0x00]
+v_subrev_f16_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xd4,0x00,0x00]
-v_subrev_f16_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0xcf,0x00,0x00]
+v_subrev_f16_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xd6,0x00,0x00]
-v_subrev_f16_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0xd5,0x00,0x00]
+v_subrev_f16_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xd8,0x00,0x00]
-v_subrev_f16_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0xd7,0x00,0x00]
+v_subrev_f16_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xda,0x00,0x00]
-v_subrev_f16_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0xd9,0x00,0x00]
+v_subrev_f16_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xdc,0x00,0x00]
-v_subrev_f16_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0xdb,0x00,0x00]
+v_subrev_f16_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xde,0x00,0x00]
-v_subrev_f16_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0xdd,0x00,0x00]
+v_subrev_f16_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xf6,0x00,0x00]
-v_subrev_f16_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0xdf,0x00,0x00]
+v_subrev_f16_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xf8,0x00,0x00]
-v_subrev_f16_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0xf7,0x00,0x00]
+v_subrev_f16_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xfc,0x00,0x00]
-v_subrev_f16_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0xf9,0x00,0x00]
+v_subrev_f16_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xfe,0x00,0x00]
-v_subrev_f16_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0xfd,0x00,0x00]
+v_subrev_f16_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0x00,0x01,0x00]
-v_subrev_f16_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0xff,0x00,0x00]
+v_subrev_f16_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0x82,0x01,0x00]
-v_subrev_f16_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0xfb,0x01,0x00]
+v_subrev_f16_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xe0,0x01,0x00]
-v_subrev_f16_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0x05,0x02,0x00]
+v_subrev_f16_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xee,0x01,0x00]
-v_subrev_f16_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0xff,0x03,0x00]
+v_subrev_f16_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0x04,0x02,0x00]
-v_subrev_f16_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0x05,0x00,0x20]
+v_subrev_f16_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0xfe,0x03,0x00]
-v_subrev_f16_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0x05,0x00,0x40]
+v_subrev_f16_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0x04,0x00,0x20]
-v_subrev_f16_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x21,0xd1,0x01,0x05,0x00,0x60]
+v_subrev_f16_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0x04,0x00,0x40]
-v_subrev_f16_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x21,0xd1,0x01,0x05,0x00,0x00]
+v_subrev_f16_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x21,0xd1,0x80,0x04,0x00,0x60]
-v_subrev_f16_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x21,0xd1,0x01,0x05,0x00,0x00]
+v_subrev_f16_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x21,0xd1,0x80,0x04,0x00,0x00]
-v_subrev_f16_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x21,0xd1,0x01,0x05,0x00,0x00]
+v_subrev_f16_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x21,0xd1,0x80,0x04,0x00,0x00]
-v_subrev_f16_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x21,0xd1,0x01,0x05,0x00,0x00]
+v_subrev_f16_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x21,0xd1,0x80,0x04,0x00,0x00]
+
+v_subrev_f16_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x21,0xd1,0x80,0x04,0x00,0x00]
v_mul_f16 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x44]
@@ -37412,83 +38085,104 @@ v_mul_f16 v5, v255, v2
v_mul_f16 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x44]
+v_mul_f16_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0x04,0x00,0x00]
+
+v_mul_f16_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x22,0xd1,0x80,0x04,0x00,0x00]
+
+v_mul_f16_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x22,0xd1,0xc1,0x04,0x00,0x00]
+
+v_mul_f16_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x22,0xd1,0xf0,0x04,0x00,0x00]
+
+v_mul_f16_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x22,0xd1,0xf7,0x04,0x00,0x00]
+
v_mul_f16_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x22,0xd1,0x01,0x05,0x00,0x00]
-v_mul_f16_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x22,0xd1,0x01,0x05,0x00,0x00]
-
v_mul_f16_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x22,0xd1,0xff,0x05,0x00,0x00]
-v_mul_f16_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0xcb,0x00,0x00]
+v_mul_f16_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xca,0x00,0x00]
+
+v_mul_f16_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xcc,0x00,0x00]
-v_mul_f16_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0xcd,0x00,0x00]
+v_mul_f16_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xce,0x00,0x00]
-v_mul_f16_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0xcf,0x00,0x00]
+v_mul_f16_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xd4,0x00,0x00]
-v_mul_f16_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0xd5,0x00,0x00]
+v_mul_f16_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xd6,0x00,0x00]
-v_mul_f16_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0xd7,0x00,0x00]
+v_mul_f16_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xd8,0x00,0x00]
-v_mul_f16_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0xd9,0x00,0x00]
+v_mul_f16_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xda,0x00,0x00]
-v_mul_f16_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0xdb,0x00,0x00]
+v_mul_f16_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xdc,0x00,0x00]
-v_mul_f16_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0xdd,0x00,0x00]
+v_mul_f16_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xde,0x00,0x00]
-v_mul_f16_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0xdf,0x00,0x00]
+v_mul_f16_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xf6,0x00,0x00]
-v_mul_f16_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0xf7,0x00,0x00]
+v_mul_f16_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xf8,0x00,0x00]
-v_mul_f16_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0xf9,0x00,0x00]
+v_mul_f16_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xfc,0x00,0x00]
-v_mul_f16_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0xfd,0x00,0x00]
+v_mul_f16_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xfe,0x00,0x00]
-v_mul_f16_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0xff,0x00,0x00]
+v_mul_f16_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0x00,0x01,0x00]
-v_mul_f16_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0xfb,0x01,0x00]
+v_mul_f16_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0x82,0x01,0x00]
-v_mul_f16_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0x05,0x02,0x00]
+v_mul_f16_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xe0,0x01,0x00]
-v_mul_f16_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0xff,0x03,0x00]
+v_mul_f16_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xee,0x01,0x00]
-v_mul_f16_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0x05,0x00,0x20]
+v_mul_f16_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0x04,0x02,0x00]
-v_mul_f16_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0x05,0x00,0x40]
+v_mul_f16_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0xfe,0x03,0x00]
-v_mul_f16_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x22,0xd1,0x01,0x05,0x00,0x60]
+v_mul_f16_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0x04,0x00,0x20]
-v_mul_f16_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x22,0xd1,0x01,0x05,0x00,0x00]
+v_mul_f16_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0x04,0x00,0x40]
-v_mul_f16_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x22,0xd1,0x01,0x05,0x00,0x00]
+v_mul_f16_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x22,0xd1,0x80,0x04,0x00,0x60]
-v_mul_f16_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x22,0xd1,0x01,0x05,0x00,0x00]
+v_mul_f16_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x22,0xd1,0x80,0x04,0x00,0x00]
-v_mul_f16_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x22,0xd1,0x01,0x05,0x00,0x00]
+v_mul_f16_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x22,0xd1,0x80,0x04,0x00,0x00]
+
+v_mul_f16_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x22,0xd1,0x80,0x04,0x00,0x00]
+
+v_mul_f16_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x22,0xd1,0x80,0x04,0x00,0x00]
v_mac_f16 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x46]
@@ -37562,83 +38256,104 @@ v_mac_f16 v5, v255, v2
v_mac_f16 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x46]
+v_mac_f16_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0x04,0x00,0x00]
+
+v_mac_f16_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x23,0xd1,0x80,0x04,0x00,0x00]
+
+v_mac_f16_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x23,0xd1,0xc1,0x04,0x00,0x00]
+
+v_mac_f16_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x23,0xd1,0xf0,0x04,0x00,0x00]
+
+v_mac_f16_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x23,0xd1,0xf7,0x04,0x00,0x00]
+
v_mac_f16_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x23,0xd1,0x01,0x05,0x00,0x00]
-v_mac_f16_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x23,0xd1,0x01,0x05,0x00,0x00]
-
v_mac_f16_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x23,0xd1,0xff,0x05,0x00,0x00]
-v_mac_f16_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0xcb,0x00,0x00]
+v_mac_f16_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xca,0x00,0x00]
+
+v_mac_f16_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xcc,0x00,0x00]
-v_mac_f16_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0xcd,0x00,0x00]
+v_mac_f16_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xce,0x00,0x00]
-v_mac_f16_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0xcf,0x00,0x00]
+v_mac_f16_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xd4,0x00,0x00]
-v_mac_f16_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0xd5,0x00,0x00]
+v_mac_f16_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xd6,0x00,0x00]
-v_mac_f16_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0xd7,0x00,0x00]
+v_mac_f16_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xd8,0x00,0x00]
-v_mac_f16_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0xd9,0x00,0x00]
+v_mac_f16_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xda,0x00,0x00]
-v_mac_f16_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0xdb,0x00,0x00]
+v_mac_f16_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xdc,0x00,0x00]
-v_mac_f16_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0xdd,0x00,0x00]
+v_mac_f16_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xde,0x00,0x00]
-v_mac_f16_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0xdf,0x00,0x00]
+v_mac_f16_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xf6,0x00,0x00]
-v_mac_f16_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0xf7,0x00,0x00]
+v_mac_f16_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xf8,0x00,0x00]
-v_mac_f16_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0xf9,0x00,0x00]
+v_mac_f16_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xfc,0x00,0x00]
-v_mac_f16_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0xfd,0x00,0x00]
+v_mac_f16_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xfe,0x00,0x00]
-v_mac_f16_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0xff,0x00,0x00]
+v_mac_f16_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0x00,0x01,0x00]
-v_mac_f16_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0xfb,0x01,0x00]
+v_mac_f16_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0x82,0x01,0x00]
-v_mac_f16_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0x05,0x02,0x00]
+v_mac_f16_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xe0,0x01,0x00]
-v_mac_f16_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0xff,0x03,0x00]
+v_mac_f16_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xee,0x01,0x00]
-v_mac_f16_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0x05,0x00,0x20]
+v_mac_f16_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0x04,0x02,0x00]
-v_mac_f16_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0x05,0x00,0x40]
+v_mac_f16_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0xfe,0x03,0x00]
-v_mac_f16_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x23,0xd1,0x01,0x05,0x00,0x60]
+v_mac_f16_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0x04,0x00,0x20]
-v_mac_f16_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x23,0xd1,0x01,0x05,0x00,0x00]
+v_mac_f16_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0x04,0x00,0x40]
-v_mac_f16_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x23,0xd1,0x01,0x05,0x00,0x00]
+v_mac_f16_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x23,0xd1,0x80,0x04,0x00,0x60]
-v_mac_f16_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x23,0xd1,0x01,0x05,0x00,0x00]
+v_mac_f16_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x23,0xd1,0x80,0x04,0x00,0x00]
-v_mac_f16_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x23,0xd1,0x01,0x05,0x00,0x00]
+v_mac_f16_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x23,0xd1,0x80,0x04,0x00,0x00]
+
+v_mac_f16_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x23,0xd1,0x80,0x04,0x00,0x00]
+
+v_mac_f16_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x23,0xd1,0x80,0x04,0x00,0x00]
v_madmk_f16 v5, 0, 0x1121, v3
// CHECK: [0x80,0x06,0x0a,0x48,0x21,0x11,0x00,0x00]
@@ -38816,83 +39531,104 @@ v_max_f16 v5, v255, v2
v_max_f16 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x5a]
+v_max_f16_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0x04,0x00,0x00]
+
+v_max_f16_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x2d,0xd1,0x80,0x04,0x00,0x00]
+
+v_max_f16_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x2d,0xd1,0xc1,0x04,0x00,0x00]
+
+v_max_f16_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x2d,0xd1,0xf0,0x04,0x00,0x00]
+
+v_max_f16_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x2d,0xd1,0xf7,0x04,0x00,0x00]
+
v_max_f16_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0x05,0x00,0x00]
-v_max_f16_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x2d,0xd1,0x01,0x05,0x00,0x00]
-
v_max_f16_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x2d,0xd1,0xff,0x05,0x00,0x00]
-v_max_f16_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0xcb,0x00,0x00]
+v_max_f16_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xca,0x00,0x00]
-v_max_f16_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0xcd,0x00,0x00]
+v_max_f16_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xcc,0x00,0x00]
-v_max_f16_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0xcf,0x00,0x00]
+v_max_f16_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xce,0x00,0x00]
-v_max_f16_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0xd5,0x00,0x00]
+v_max_f16_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xd4,0x00,0x00]
-v_max_f16_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0xd7,0x00,0x00]
+v_max_f16_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xd6,0x00,0x00]
-v_max_f16_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0xd9,0x00,0x00]
+v_max_f16_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xd8,0x00,0x00]
-v_max_f16_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0xdb,0x00,0x00]
+v_max_f16_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xda,0x00,0x00]
-v_max_f16_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0xdd,0x00,0x00]
+v_max_f16_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xdc,0x00,0x00]
-v_max_f16_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0xdf,0x00,0x00]
+v_max_f16_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xde,0x00,0x00]
-v_max_f16_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0xf7,0x00,0x00]
+v_max_f16_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xf6,0x00,0x00]
-v_max_f16_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0xf9,0x00,0x00]
+v_max_f16_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xf8,0x00,0x00]
-v_max_f16_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0xfd,0x00,0x00]
+v_max_f16_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xfc,0x00,0x00]
-v_max_f16_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0xff,0x00,0x00]
+v_max_f16_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xfe,0x00,0x00]
-v_max_f16_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0xfb,0x01,0x00]
+v_max_f16_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0x00,0x01,0x00]
-v_max_f16_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0x05,0x02,0x00]
+v_max_f16_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0x82,0x01,0x00]
-v_max_f16_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0xff,0x03,0x00]
+v_max_f16_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xe0,0x01,0x00]
-v_max_f16_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0x05,0x00,0x20]
+v_max_f16_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xee,0x01,0x00]
-v_max_f16_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0x05,0x00,0x40]
+v_max_f16_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0x04,0x02,0x00]
-v_max_f16_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x2d,0xd1,0x01,0x05,0x00,0x60]
+v_max_f16_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0xfe,0x03,0x00]
-v_max_f16_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x2d,0xd1,0x01,0x05,0x00,0x00]
+v_max_f16_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0x04,0x00,0x20]
-v_max_f16_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x2d,0xd1,0x01,0x05,0x00,0x00]
+v_max_f16_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0x04,0x00,0x40]
-v_max_f16_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x2d,0xd1,0x01,0x05,0x00,0x00]
+v_max_f16_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x2d,0xd1,0x80,0x04,0x00,0x60]
-v_max_f16_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x2d,0xd1,0x01,0x05,0x00,0x00]
+v_max_f16_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x2d,0xd1,0x80,0x04,0x00,0x00]
+
+v_max_f16_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x2d,0xd1,0x80,0x04,0x00,0x00]
+
+v_max_f16_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x2d,0xd1,0x80,0x04,0x00,0x00]
+
+v_max_f16_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x2d,0xd1,0x80,0x04,0x00,0x00]
v_min_f16 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x5c]
@@ -38966,83 +39702,104 @@ v_min_f16 v5, v255, v2
v_min_f16 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x5c]
+v_min_f16_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0x04,0x00,0x00]
+
+v_min_f16_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x2e,0xd1,0x80,0x04,0x00,0x00]
+
+v_min_f16_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x2e,0xd1,0xc1,0x04,0x00,0x00]
+
+v_min_f16_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x2e,0xd1,0xf0,0x04,0x00,0x00]
+
+v_min_f16_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x2e,0xd1,0xf7,0x04,0x00,0x00]
+
v_min_f16_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0x05,0x00,0x00]
-v_min_f16_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x2e,0xd1,0x01,0x05,0x00,0x00]
-
v_min_f16_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x2e,0xd1,0xff,0x05,0x00,0x00]
-v_min_f16_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0xcb,0x00,0x00]
+v_min_f16_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xca,0x00,0x00]
-v_min_f16_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0xcd,0x00,0x00]
+v_min_f16_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xcc,0x00,0x00]
-v_min_f16_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0xcf,0x00,0x00]
+v_min_f16_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xce,0x00,0x00]
-v_min_f16_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0xd5,0x00,0x00]
+v_min_f16_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xd4,0x00,0x00]
-v_min_f16_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0xd7,0x00,0x00]
+v_min_f16_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xd6,0x00,0x00]
-v_min_f16_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0xd9,0x00,0x00]
+v_min_f16_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xd8,0x00,0x00]
-v_min_f16_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0xdb,0x00,0x00]
+v_min_f16_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xda,0x00,0x00]
-v_min_f16_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0xdd,0x00,0x00]
+v_min_f16_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xdc,0x00,0x00]
-v_min_f16_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0xdf,0x00,0x00]
+v_min_f16_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xde,0x00,0x00]
-v_min_f16_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0xf7,0x00,0x00]
+v_min_f16_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xf6,0x00,0x00]
-v_min_f16_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0xf9,0x00,0x00]
+v_min_f16_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xf8,0x00,0x00]
-v_min_f16_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0xfd,0x00,0x00]
+v_min_f16_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xfc,0x00,0x00]
-v_min_f16_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0xff,0x00,0x00]
+v_min_f16_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xfe,0x00,0x00]
-v_min_f16_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0xfb,0x01,0x00]
+v_min_f16_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0x00,0x01,0x00]
-v_min_f16_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0x05,0x02,0x00]
+v_min_f16_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0x82,0x01,0x00]
-v_min_f16_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0xff,0x03,0x00]
+v_min_f16_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xe0,0x01,0x00]
-v_min_f16_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0x05,0x00,0x20]
+v_min_f16_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xee,0x01,0x00]
-v_min_f16_e64 v5, v1, -s2
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0x05,0x00,0x40]
+v_min_f16_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0x04,0x02,0x00]
-v_min_f16_e64 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x2e,0xd1,0x01,0x05,0x00,0x60]
+v_min_f16_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0xfe,0x03,0x00]
-v_min_f16_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x2e,0xd1,0x01,0x05,0x00,0x00]
+v_min_f16_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0x04,0x00,0x20]
-v_min_f16_e64 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x2e,0xd1,0x01,0x05,0x00,0x00]
+v_min_f16_e64 v5, 0, -s2
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0x04,0x00,0x40]
-v_min_f16_e64 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x2e,0xd1,0x01,0x05,0x00,0x00]
+v_min_f16_e64 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x2e,0xd1,0x80,0x04,0x00,0x60]
-v_min_f16_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x2e,0xd1,0x01,0x05,0x00,0x00]
+v_min_f16_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x2e,0xd1,0x80,0x04,0x00,0x00]
+
+v_min_f16_e64 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x2e,0xd1,0x80,0x04,0x00,0x00]
+
+v_min_f16_e64 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x2e,0xd1,0x80,0x04,0x00,0x00]
+
+v_min_f16_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x2e,0xd1,0x80,0x04,0x00,0x00]
v_max_u16 v5, s1, v2
// CHECK: [0x01,0x04,0x0a,0x5e]
@@ -39716,275 +40473,350 @@ v_ldexp_f16 v5, v255, v2
v_ldexp_f16 v5, s1, v255
// CHECK: [0x01,0xfe,0x0b,0x66]
+v_ldexp_f16_e64 v5, 0, s2
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0x04,0x00,0x00]
+
+v_ldexp_f16_e64 v255, 0, s2
+// CHECK: [0xff,0x00,0x33,0xd1,0x80,0x04,0x00,0x00]
+
+v_ldexp_f16_e64 v5, -1, s2
+// CHECK: [0x05,0x00,0x33,0xd1,0xc1,0x04,0x00,0x00]
+
+v_ldexp_f16_e64 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x33,0xd1,0xf0,0x04,0x00,0x00]
+
+v_ldexp_f16_e64 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x33,0xd1,0xf7,0x04,0x00,0x00]
+
v_ldexp_f16_e64 v5, v1, s2
// CHECK: [0x05,0x00,0x33,0xd1,0x01,0x05,0x00,0x00]
-v_ldexp_f16_e64 v255, v1, s2
-// CHECK: [0xff,0x00,0x33,0xd1,0x01,0x05,0x00,0x00]
-
v_ldexp_f16_e64 v5, v255, s2
// CHECK: [0x05,0x00,0x33,0xd1,0xff,0x05,0x00,0x00]
-v_ldexp_f16_e64 v5, v1, s101
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xcb,0x00,0x00]
+v_ldexp_f16_e64 v5, 0, s101
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xca,0x00,0x00]
+
+v_ldexp_f16_e64 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xcc,0x00,0x00]
+
+v_ldexp_f16_e64 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xce,0x00,0x00]
+
+v_ldexp_f16_e64 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xd4,0x00,0x00]
+
+v_ldexp_f16_e64 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xd6,0x00,0x00]
+
+v_ldexp_f16_e64 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xd8,0x00,0x00]
+
+v_ldexp_f16_e64 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xda,0x00,0x00]
+
+v_ldexp_f16_e64 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xdc,0x00,0x00]
+
+v_ldexp_f16_e64 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xde,0x00,0x00]
+
+v_ldexp_f16_e64 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xf6,0x00,0x00]
+
+v_ldexp_f16_e64 v5, 0, m0
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xf8,0x00,0x00]
+
+v_ldexp_f16_e64 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xfc,0x00,0x00]
+
+v_ldexp_f16_e64 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xfe,0x00,0x00]
+
+v_ldexp_f16_e64 v5, 0, 0
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0x00,0x01,0x00]
+
+v_ldexp_f16_e64 v5, 0, -1
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0x82,0x01,0x00]
+
+v_ldexp_f16_e64 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xe0,0x01,0x00]
+
+v_ldexp_f16_e64 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xee,0x01,0x00]
+
+v_ldexp_f16_e64 v5, 0, v2
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0x04,0x02,0x00]
+
+v_ldexp_f16_e64 v5, 0, v255
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0xfe,0x03,0x00]
+
+v_ldexp_f16_e64 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x33,0xd1,0x80,0x04,0x00,0x20]
+
+v_ldexp_f16_e64 v5, |0|, s2
+// CHECK: [0x05,0x01,0x33,0xd1,0x80,0x04,0x00,0x00]
+
+v_ldexp_f16_e64 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x33,0xd1,0x80,0x04,0x00,0x00]
-v_ldexp_f16_e64 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xcd,0x00,0x00]
+v_mad_legacy_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xcf,0x00,0x00]
+v_mad_legacy_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xc0,0xd1,0x01,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xd5,0x00,0x00]
+v_mad_legacy_f32 v5, s101, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x65,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xd7,0x00,0x00]
+v_mad_legacy_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x66,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xd9,0x00,0x00]
+v_mad_legacy_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x67,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xdb,0x00,0x00]
+v_mad_legacy_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x6a,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xdd,0x00,0x00]
+v_mad_legacy_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x6b,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xdf,0x00,0x00]
+v_mad_legacy_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x6c,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xf7,0x00,0x00]
+v_mad_legacy_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x6d,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, m0
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xf9,0x00,0x00]
+v_mad_legacy_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x6e,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xfd,0x00,0x00]
+v_mad_legacy_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x6f,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xff,0x00,0x00]
+v_mad_legacy_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x7b,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, 0
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0x01,0x01,0x00]
+v_mad_legacy_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x7c,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, -1
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00]
+v_mad_legacy_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x7e,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, 0.5
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00]
+v_mad_legacy_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x7f,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, -4.0
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00]
+v_mad_legacy_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x80,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, scc
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xfb,0x01,0x00]
+v_mad_legacy_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0xc1,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, v2
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0x05,0x02,0x00]
+v_mad_legacy_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0xf0,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, v255
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xff,0x03,0x00]
+v_mad_legacy_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0xf7,0x00,0x01,0x02]
-v_ldexp_f16_e64 v5, -v1, s2
-// CHECK: [0x05,0x00,0x33,0xd1,0x01,0x05,0x00,0x20]
+v_mad_legacy_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x01,0x01,0x02]
-v_ldexp_f16_e64 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x33,0xd1,0x01,0x05,0x00,0x00]
+v_mad_legacy_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0xff,0x01,0x01,0x02]
-v_ldexp_f16_e64 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x33,0xd1,0x01,0x05,0x00,0x00]
+v_mad_legacy_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x82,0x01,0x02]
-v_mad_legacy_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0xe0,0x01,0x02]
-v_mad_legacy_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xc0,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0xee,0x01,0x02]
-v_mad_legacy_f32 v5, s101, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x65,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x04,0x02,0x02]
-v_mad_legacy_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x66,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0xfe,0x03,0x02]
-v_mad_legacy_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x67,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x00,0x05,0x03]
-v_mad_legacy_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x6a,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x00,0xc1,0x03]
-v_mad_legacy_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x6b,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x00,0xdd,0x03]
-v_mad_legacy_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x6c,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x00,0x0d,0x04]
-v_mad_legacy_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x6d,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x00,0xfd,0x07]
-v_mad_legacy_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x6e,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x00,0x01,0x22]
-v_mad_legacy_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x6f,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x00,0x01,0x42]
-v_mad_legacy_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x7b,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x00,0x01,0x82]
-v_mad_legacy_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x7c,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x00,0x01,0xe2]
-v_mad_legacy_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x7e,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xc0,0xd1,0x01,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x7f,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xc0,0xd1,0x01,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0xfd,0x04,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xc0,0xd1,0x01,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x05,0x0e,0x04]
+v_mad_legacy_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xc0,0xd1,0x01,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0xff,0x05,0x0e,0x04]
+v_mad_legacy_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x80,0xc0,0xd1,0x01,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0xfe,0x0f,0x04]
+v_mad_legacy_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x00,0x01,0x0a]
-v_mad_legacy_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x04,0xfe,0x07]
+v_mad_legacy_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x00,0x01,0x12]
-v_mad_legacy_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x04,0x0e,0x24]
+v_mad_legacy_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x00,0x01,0x1a]
-v_mad_legacy_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x04,0x0e,0x44]
+v_mad_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x04,0x0e,0x84]
+v_mad_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xc1,0xd1,0x01,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x04,0x0e,0xe4]
+v_mad_f32 v5, s101, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x65,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xc0,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x66,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xc0,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x67,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xc0,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x6a,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xc0,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x6b,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x80,0xc0,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x6c,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x04,0x0e,0x0c]
+v_mad_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x6d,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x04,0x0e,0x14]
+v_mad_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x6e,0x00,0x01,0x02]
-v_mad_legacy_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xc0,0xd1,0x01,0x04,0x0e,0x1c]
+v_mad_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x6f,0x00,0x01,0x02]
-v_mad_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x7b,0x00,0x01,0x02]
-v_mad_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xc1,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x7c,0x00,0x01,0x02]
-v_mad_f32 v5, s101, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x65,0x04,0x0e,0x04]
+v_mad_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x7e,0x00,0x01,0x02]
-v_mad_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x66,0x04,0x0e,0x04]
+v_mad_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x7f,0x00,0x01,0x02]
-v_mad_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x67,0x04,0x0e,0x04]
+v_mad_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x80,0x00,0x01,0x02]
-v_mad_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x6a,0x04,0x0e,0x04]
+v_mad_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0xc1,0x00,0x01,0x02]
-v_mad_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x6b,0x04,0x0e,0x04]
+v_mad_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0xf0,0x00,0x01,0x02]
-v_mad_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x6c,0x04,0x0e,0x04]
+v_mad_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0xf7,0x00,0x01,0x02]
-v_mad_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x6d,0x04,0x0e,0x04]
+v_mad_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x01,0x01,0x02]
-v_mad_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x6e,0x04,0x0e,0x04]
+v_mad_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0xff,0x01,0x01,0x02]
-v_mad_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x6f,0x04,0x0e,0x04]
+v_mad_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x82,0x01,0x02]
-v_mad_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x7b,0x04,0x0e,0x04]
+v_mad_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0xe0,0x01,0x02]
-v_mad_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x7c,0x04,0x0e,0x04]
+v_mad_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0xee,0x01,0x02]
-v_mad_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x7e,0x04,0x0e,0x04]
+v_mad_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x04,0x02,0x02]
-v_mad_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x7f,0x04,0x0e,0x04]
+v_mad_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0xfe,0x03,0x02]
-v_mad_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0xfd,0x04,0x0e,0x04]
+v_mad_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x00,0x05,0x03]
-v_mad_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x05,0x0e,0x04]
+v_mad_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x00,0xc1,0x03]
-v_mad_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0xff,0x05,0x0e,0x04]
+v_mad_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x00,0xdd,0x03]
-v_mad_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0xfe,0x0f,0x04]
+v_mad_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x00,0x0d,0x04]
-v_mad_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x04,0xfe,0x07]
+v_mad_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x00,0xfd,0x07]
-v_mad_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x04,0x0e,0x24]
+v_mad_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x00,0x01,0x22]
-v_mad_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x04,0x0e,0x44]
+v_mad_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x00,0x01,0x42]
-v_mad_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x04,0x0e,0x84]
+v_mad_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x00,0x01,0x82]
-v_mad_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x04,0x0e,0xe4]
+v_mad_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x00,0x01,0xe2]
-v_mad_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xc1,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xc1,0xd1,0x01,0x00,0x01,0x02]
-v_mad_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xc1,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xc1,0xd1,0x01,0x00,0x01,0x02]
-v_mad_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xc1,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xc1,0xd1,0x01,0x00,0x01,0x02]
-v_mad_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xc1,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xc1,0xd1,0x01,0x00,0x01,0x02]
-v_mad_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x80,0xc1,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x80,0xc1,0xd1,0x01,0x00,0x01,0x02]
-v_mad_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x04,0x0e,0x0c]
+v_mad_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x00,0x01,0x0a]
-v_mad_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x04,0x0e,0x14]
+v_mad_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x00,0x01,0x12]
-v_mad_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x04,0x0e,0x1c]
+v_mad_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xc1,0xd1,0x01,0x00,0x01,0x1a]
v_mad_i32_i24 v5, s1, 0, 0
// CHECK: [0x05,0x00,0xc2,0xd1,0x01,0x00,0x01,0x02]
@@ -40172,389 +41004,521 @@ v_mad_u32_u24 v5, s1, 0, v3
v_mad_u32_u24 v5, s1, 0, v255
// CHECK: [0x05,0x00,0xc3,0xd1,0x01,0x00,0xfd,0x07]
-v_cubeid_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x00,0x01,0x02]
+
+v_cubeid_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xc4,0xd1,0x01,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, s101, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x65,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x66,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x67,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x6a,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x6b,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x6c,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x6d,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x6e,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x6f,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x7b,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x7c,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x7e,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x7f,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x80,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0xc1,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0xf0,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0xf7,0x00,0x01,0x02]
+
+v_cubeid_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x01,0x01,0x02]
+
+v_cubeid_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0xff,0x01,0x01,0x02]
+
+v_cubeid_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x82,0x01,0x02]
+
+v_cubeid_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0xe0,0x01,0x02]
+
+v_cubeid_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0xee,0x01,0x02]
+
+v_cubeid_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x04,0x02,0x02]
+
+v_cubeid_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0xfe,0x03,0x02]
+
+v_cubeid_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x00,0x05,0x03]
+
+v_cubeid_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x00,0xc1,0x03]
+
+v_cubeid_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x00,0xdd,0x03]
+
+v_cubeid_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x00,0x0d,0x04]
+
+v_cubeid_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x00,0xfd,0x07]
+
+v_cubeid_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x00,0x01,0x22]
+
+v_cubeid_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x00,0x01,0x42]
+
+v_cubeid_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x00,0x01,0x82]
+
+v_cubeid_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x00,0x01,0xe2]
-v_cubeid_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xc4,0xd1,0x01,0x04,0x0e,0x04]
+v_cubeid_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xc4,0xd1,0x01,0x00,0x01,0x02]
-v_cubeid_f32 v5, s101, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x65,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xc4,0xd1,0x01,0x00,0x01,0x02]
-v_cubeid_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x66,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xc4,0xd1,0x01,0x00,0x01,0x02]
-v_cubeid_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x67,0x04,0x0e,0x04]
+v_cubeid_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xc4,0xd1,0x01,0x00,0x01,0x02]
-v_cubeid_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x6a,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x80,0xc4,0xd1,0x01,0x00,0x01,0x02]
-v_cubeid_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x6b,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x00,0x01,0x0a]
-v_cubeid_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x6c,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x00,0x01,0x12]
-v_cubeid_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x6d,0x04,0x0e,0x04]
+v_cubeid_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x00,0x01,0x1a]
-v_cubeid_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x6e,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x00,0x01,0x02]
-v_cubeid_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x6f,0x04,0x0e,0x04]
+v_cubesc_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xc5,0xd1,0x01,0x00,0x01,0x02]
-v_cubeid_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x7b,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s101, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x65,0x00,0x01,0x02]
-v_cubeid_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x7c,0x04,0x0e,0x04]
+v_cubesc_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x66,0x00,0x01,0x02]
-v_cubeid_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x7e,0x04,0x0e,0x04]
+v_cubesc_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x67,0x00,0x01,0x02]
-v_cubeid_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x7f,0x04,0x0e,0x04]
+v_cubesc_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x6a,0x00,0x01,0x02]
-v_cubeid_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0xfd,0x04,0x0e,0x04]
+v_cubesc_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x6b,0x00,0x01,0x02]
-v_cubeid_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x05,0x0e,0x04]
+v_cubesc_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x6c,0x00,0x01,0x02]
-v_cubeid_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0xff,0x05,0x0e,0x04]
+v_cubesc_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x6d,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0xfe,0x0f,0x04]
+v_cubesc_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x6e,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x04,0xfe,0x07]
+v_cubesc_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x6f,0x00,0x01,0x02]
-v_cubeid_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x04,0x0e,0x24]
+v_cubesc_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x7b,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x04,0x0e,0x44]
+v_cubesc_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x7c,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x04,0x0e,0x84]
+v_cubesc_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x7e,0x00,0x01,0x02]
-v_cubeid_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x04,0x0e,0xe4]
+v_cubesc_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x7f,0x00,0x01,0x02]
-v_cubeid_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xc4,0xd1,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x80,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xc4,0xd1,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0xc1,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xc4,0xd1,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0xf0,0x00,0x01,0x02]
-v_cubeid_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xc4,0xd1,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0xf7,0x00,0x01,0x02]
-v_cubeid_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x80,0xc4,0xd1,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x01,0x01,0x02]
-v_cubeid_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x04,0x0e,0x0c]
+v_cubesc_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0xff,0x01,0x01,0x02]
-v_cubeid_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x04,0x0e,0x14]
+v_cubesc_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x82,0x01,0x02]
-v_cubeid_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xc4,0xd1,0x01,0x04,0x0e,0x1c]
+v_cubesc_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0xe0,0x01,0x02]
-v_cubesc_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0xee,0x01,0x02]
-v_cubesc_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xc5,0xd1,0x01,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x04,0x02,0x02]
-v_cubesc_f32 v5, s101, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x65,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0xfe,0x03,0x02]
-v_cubesc_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x66,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x00,0x05,0x03]
-v_cubesc_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x67,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x00,0xc1,0x03]
-v_cubesc_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x6a,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x00,0xdd,0x03]
-v_cubesc_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x6b,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x00,0x0d,0x04]
-v_cubesc_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x6c,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x00,0xfd,0x07]
-v_cubesc_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x6d,0x04,0x0e,0x04]
+v_cubesc_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x00,0x01,0x22]
-v_cubesc_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x6e,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x00,0x01,0x42]
-v_cubesc_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x6f,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x00,0x01,0x82]
-v_cubesc_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x7b,0x04,0x0e,0x04]
+v_cubesc_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x00,0x01,0xe2]
-v_cubesc_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x7c,0x04,0x0e,0x04]
+v_cubesc_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xc5,0xd1,0x01,0x00,0x01,0x02]
-v_cubesc_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x7e,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xc5,0xd1,0x01,0x00,0x01,0x02]
-v_cubesc_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x7f,0x04,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xc5,0xd1,0x01,0x00,0x01,0x02]
-v_cubesc_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0xfd,0x04,0x0e,0x04]
+v_cubesc_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xc5,0xd1,0x01,0x00,0x01,0x02]
-v_cubesc_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x05,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x80,0xc5,0xd1,0x01,0x00,0x01,0x02]
-v_cubesc_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0xff,0x05,0x0e,0x04]
+v_cubesc_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x00,0x01,0x0a]
-v_cubesc_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0xfe,0x0f,0x04]
+v_cubesc_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x00,0x01,0x12]
-v_cubesc_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x04,0xfe,0x07]
+v_cubesc_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x00,0x01,0x1a]
-v_cubesc_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x04,0x0e,0x24]
+v_cubetc_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x00,0x01,0x02]
-v_cubesc_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x04,0x0e,0x44]
+v_cubetc_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xc6,0xd1,0x01,0x00,0x01,0x02]
-v_cubesc_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x04,0x0e,0x84]
+v_cubetc_f32 v5, s101, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x65,0x00,0x01,0x02]
-v_cubesc_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x04,0x0e,0xe4]
+v_cubetc_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x66,0x00,0x01,0x02]
-v_cubesc_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xc5,0xd1,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x67,0x00,0x01,0x02]
-v_cubesc_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xc5,0xd1,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x6a,0x00,0x01,0x02]
-v_cubesc_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xc5,0xd1,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x6b,0x00,0x01,0x02]
-v_cubesc_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xc5,0xd1,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x6c,0x00,0x01,0x02]
-v_cubesc_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x80,0xc5,0xd1,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x6d,0x00,0x01,0x02]
-v_cubesc_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x04,0x0e,0x0c]
+v_cubetc_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x6e,0x00,0x01,0x02]
-v_cubesc_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x04,0x0e,0x14]
+v_cubetc_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x6f,0x00,0x01,0x02]
-v_cubesc_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xc5,0xd1,0x01,0x04,0x0e,0x1c]
+v_cubetc_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x7b,0x00,0x01,0x02]
-v_cubetc_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x7c,0x00,0x01,0x02]
-v_cubetc_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xc6,0xd1,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x7e,0x00,0x01,0x02]
-v_cubetc_f32 v5, s101, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x65,0x04,0x0e,0x04]
+v_cubetc_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x7f,0x00,0x01,0x02]
-v_cubetc_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x66,0x04,0x0e,0x04]
+v_cubetc_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x80,0x00,0x01,0x02]
-v_cubetc_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x67,0x04,0x0e,0x04]
+v_cubetc_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0xc1,0x00,0x01,0x02]
-v_cubetc_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x6a,0x04,0x0e,0x04]
+v_cubetc_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0xf0,0x00,0x01,0x02]
-v_cubetc_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x6b,0x04,0x0e,0x04]
+v_cubetc_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0xf7,0x00,0x01,0x02]
-v_cubetc_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x6c,0x04,0x0e,0x04]
+v_cubetc_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x01,0x01,0x02]
-v_cubetc_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x6d,0x04,0x0e,0x04]
+v_cubetc_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0xff,0x01,0x01,0x02]
-v_cubetc_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x6e,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x82,0x01,0x02]
-v_cubetc_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x6f,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0xe0,0x01,0x02]
-v_cubetc_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x7b,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0xee,0x01,0x02]
-v_cubetc_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x7c,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x04,0x02,0x02]
-v_cubetc_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x7e,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0xfe,0x03,0x02]
-v_cubetc_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x7f,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x00,0x05,0x03]
-v_cubetc_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0xfd,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x00,0xc1,0x03]
-v_cubetc_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x05,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x00,0xdd,0x03]
-v_cubetc_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0xff,0x05,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x00,0x0d,0x04]
-v_cubetc_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0xfe,0x0f,0x04]
+v_cubetc_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x00,0xfd,0x07]
-v_cubetc_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x04,0xfe,0x07]
+v_cubetc_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x00,0x01,0x22]
-v_cubetc_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x04,0x0e,0x24]
+v_cubetc_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x00,0x01,0x42]
-v_cubetc_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x04,0x0e,0x44]
+v_cubetc_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x00,0x01,0x82]
-v_cubetc_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x04,0x0e,0x84]
+v_cubetc_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x00,0x01,0xe2]
-v_cubetc_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x04,0x0e,0xe4]
+v_cubetc_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xc6,0xd1,0x01,0x00,0x01,0x02]
-v_cubetc_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xc6,0xd1,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xc6,0xd1,0x01,0x00,0x01,0x02]
-v_cubetc_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xc6,0xd1,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xc6,0xd1,0x01,0x00,0x01,0x02]
-v_cubetc_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xc6,0xd1,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xc6,0xd1,0x01,0x00,0x01,0x02]
-v_cubetc_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xc6,0xd1,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x80,0xc6,0xd1,0x01,0x00,0x01,0x02]
-v_cubetc_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x80,0xc6,0xd1,0x01,0x04,0x0e,0x04]
+v_cubetc_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x00,0x01,0x0a]
-v_cubetc_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x04,0x0e,0x0c]
+v_cubetc_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x00,0x01,0x12]
-v_cubetc_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x04,0x0e,0x14]
+v_cubetc_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x00,0x01,0x1a]
-v_cubetc_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xc6,0xd1,0x01,0x04,0x0e,0x1c]
+v_cubema_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x00,0x01,0x02]
-v_cubema_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x04,0x0e,0x04]
+v_cubema_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xc7,0xd1,0x01,0x00,0x01,0x02]
-v_cubema_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xc7,0xd1,0x01,0x04,0x0e,0x04]
+v_cubema_f32 v5, s101, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x65,0x00,0x01,0x02]
-v_cubema_f32 v5, s101, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x65,0x04,0x0e,0x04]
+v_cubema_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x66,0x00,0x01,0x02]
-v_cubema_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x66,0x04,0x0e,0x04]
+v_cubema_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x67,0x00,0x01,0x02]
-v_cubema_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x67,0x04,0x0e,0x04]
+v_cubema_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x6a,0x00,0x01,0x02]
-v_cubema_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x6a,0x04,0x0e,0x04]
+v_cubema_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x6b,0x00,0x01,0x02]
-v_cubema_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x6b,0x04,0x0e,0x04]
+v_cubema_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x6c,0x00,0x01,0x02]
-v_cubema_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x6c,0x04,0x0e,0x04]
+v_cubema_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x6d,0x00,0x01,0x02]
-v_cubema_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x6d,0x04,0x0e,0x04]
+v_cubema_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x6e,0x00,0x01,0x02]
-v_cubema_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x6e,0x04,0x0e,0x04]
+v_cubema_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x6f,0x00,0x01,0x02]
-v_cubema_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x6f,0x04,0x0e,0x04]
+v_cubema_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x7b,0x00,0x01,0x02]
-v_cubema_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x7b,0x04,0x0e,0x04]
+v_cubema_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x7c,0x00,0x01,0x02]
-v_cubema_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x7c,0x04,0x0e,0x04]
+v_cubema_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x7e,0x00,0x01,0x02]
-v_cubema_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x7e,0x04,0x0e,0x04]
+v_cubema_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x7f,0x00,0x01,0x02]
-v_cubema_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x7f,0x04,0x0e,0x04]
+v_cubema_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x80,0x00,0x01,0x02]
-v_cubema_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0xfd,0x04,0x0e,0x04]
+v_cubema_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0xc1,0x00,0x01,0x02]
-v_cubema_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x05,0x0e,0x04]
+v_cubema_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0xf0,0x00,0x01,0x02]
-v_cubema_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0xff,0x05,0x0e,0x04]
+v_cubema_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0xf7,0x00,0x01,0x02]
-v_cubema_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0xfe,0x0f,0x04]
+v_cubema_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x01,0x01,0x02]
-v_cubema_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x04,0xfe,0x07]
+v_cubema_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0xff,0x01,0x01,0x02]
-v_cubema_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x04,0x0e,0x24]
+v_cubema_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x82,0x01,0x02]
-v_cubema_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x04,0x0e,0x44]
+v_cubema_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0xe0,0x01,0x02]
-v_cubema_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x04,0x0e,0x84]
+v_cubema_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0xee,0x01,0x02]
-v_cubema_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x04,0x0e,0xe4]
+v_cubema_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x04,0x02,0x02]
-v_cubema_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xc7,0xd1,0x01,0x04,0x0e,0x04]
+v_cubema_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0xfe,0x03,0x02]
-v_cubema_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xc7,0xd1,0x01,0x04,0x0e,0x04]
+v_cubema_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x00,0x05,0x03]
-v_cubema_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xc7,0xd1,0x01,0x04,0x0e,0x04]
+v_cubema_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x00,0xc1,0x03]
-v_cubema_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xc7,0xd1,0x01,0x04,0x0e,0x04]
+v_cubema_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x00,0xdd,0x03]
-v_cubema_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x80,0xc7,0xd1,0x01,0x04,0x0e,0x04]
+v_cubema_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x00,0x0d,0x04]
-v_cubema_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x04,0x0e,0x0c]
+v_cubema_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x00,0xfd,0x07]
-v_cubema_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x04,0x0e,0x14]
+v_cubema_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x00,0x01,0x22]
-v_cubema_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x04,0x0e,0x1c]
+v_cubema_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x00,0x01,0x42]
+
+v_cubema_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x00,0x01,0x82]
+
+v_cubema_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x00,0x01,0xe2]
+
+v_cubema_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xc7,0xd1,0x01,0x00,0x01,0x02]
+
+v_cubema_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xc7,0xd1,0x01,0x00,0x01,0x02]
+
+v_cubema_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xc7,0xd1,0x01,0x00,0x01,0x02]
+
+v_cubema_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xc7,0xd1,0x01,0x00,0x01,0x02]
+
+v_cubema_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x80,0xc7,0xd1,0x01,0x00,0x01,0x02]
+
+v_cubema_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x00,0x01,0x0a]
+
+v_cubema_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x00,0x01,0x12]
+
+v_cubema_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xc7,0xd1,0x01,0x00,0x01,0x1a]
v_bfe_u32 v5, s1, 0, 0
// CHECK: [0x05,0x00,0xc8,0xd1,0x01,0x00,0x01,0x02]
@@ -40835,182 +41799,248 @@ v_bfi_b32 v5, s1, 0, v3
v_bfi_b32 v5, s1, 0, v255
// CHECK: [0x05,0x00,0xca,0xd1,0x01,0x00,0xfd,0x07]
-v_fma_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x00,0x01,0x02]
+
+v_fma_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xcb,0xd1,0x01,0x00,0x01,0x02]
+
+v_fma_f32 v5, s101, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x65,0x00,0x01,0x02]
+
+v_fma_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x66,0x00,0x01,0x02]
+
+v_fma_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x67,0x00,0x01,0x02]
+
+v_fma_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x6a,0x00,0x01,0x02]
+
+v_fma_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x6b,0x00,0x01,0x02]
+
+v_fma_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x6c,0x00,0x01,0x02]
+
+v_fma_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x6d,0x00,0x01,0x02]
-v_fma_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xcb,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x6e,0x00,0x01,0x02]
-v_fma_f32 v5, s101, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x65,0x04,0x0e,0x04]
+v_fma_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x6f,0x00,0x01,0x02]
-v_fma_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x66,0x04,0x0e,0x04]
+v_fma_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x7b,0x00,0x01,0x02]
-v_fma_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x67,0x04,0x0e,0x04]
+v_fma_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x7c,0x00,0x01,0x02]
-v_fma_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x6a,0x04,0x0e,0x04]
+v_fma_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x7e,0x00,0x01,0x02]
-v_fma_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x6b,0x04,0x0e,0x04]
+v_fma_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x7f,0x00,0x01,0x02]
-v_fma_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x6c,0x04,0x0e,0x04]
+v_fma_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x80,0x00,0x01,0x02]
-v_fma_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x6d,0x04,0x0e,0x04]
+v_fma_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0xc1,0x00,0x01,0x02]
-v_fma_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x6e,0x04,0x0e,0x04]
+v_fma_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0xf0,0x00,0x01,0x02]
-v_fma_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x6f,0x04,0x0e,0x04]
+v_fma_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0xf7,0x00,0x01,0x02]
-v_fma_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x7b,0x04,0x0e,0x04]
+v_fma_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x01,0x01,0x02]
-v_fma_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x7c,0x04,0x0e,0x04]
+v_fma_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0xff,0x01,0x01,0x02]
-v_fma_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x7e,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x82,0x01,0x02]
-v_fma_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x7f,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0xe0,0x01,0x02]
-v_fma_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0xfd,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0xee,0x01,0x02]
-v_fma_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x05,0x0e,0x04]
+v_fma_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x04,0x02,0x02]
-v_fma_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0xff,0x05,0x0e,0x04]
+v_fma_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0xfe,0x03,0x02]
-v_fma_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0xfe,0x0f,0x04]
+v_fma_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x00,0x05,0x03]
-v_fma_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x04,0xfe,0x07]
+v_fma_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x00,0xc1,0x03]
-v_fma_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x04,0x0e,0x24]
+v_fma_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x00,0xdd,0x03]
-v_fma_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x04,0x0e,0x44]
+v_fma_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x00,0x0d,0x04]
-v_fma_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x04,0x0e,0x84]
+v_fma_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x00,0xfd,0x07]
-v_fma_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x04,0x0e,0xe4]
+v_fma_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x00,0x01,0x22]
-v_fma_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xcb,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x00,0x01,0x42]
-v_fma_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xcb,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x00,0x01,0x82]
-v_fma_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xcb,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x00,0x01,0xe2]
-v_fma_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xcb,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xcb,0xd1,0x01,0x00,0x01,0x02]
-v_fma_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x80,0xcb,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xcb,0xd1,0x01,0x00,0x01,0x02]
-v_fma_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x04,0x0e,0x0c]
+v_fma_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xcb,0xd1,0x01,0x00,0x01,0x02]
-v_fma_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x04,0x0e,0x14]
+v_fma_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xcb,0xd1,0x01,0x00,0x01,0x02]
-v_fma_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x04,0x0e,0x1c]
+v_fma_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x80,0xcb,0xd1,0x01,0x00,0x01,0x02]
-v_fma_f64 v[5:6], s[2:3], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x00,0x01,0x0a]
-v_fma_f64 v[254:255], s[2:3], v[2:3], v[3:4]
-// CHECK: [0xfe,0x00,0xcc,0xd1,0x02,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x00,0x01,0x12]
-v_fma_f64 v[5:6], s[4:5], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x04,0x04,0x0e,0x04]
+v_fma_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xcb,0xd1,0x01,0x00,0x01,0x1a]
-v_fma_f64 v[5:6], s[100:101], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x64,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], s[2:3], 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x00,0x01,0x02]
-v_fma_f64 v[5:6], flat_scratch, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x66,0x04,0x0e,0x04]
+v_fma_f64 v[254:255], s[2:3], 0, 0
+// CHECK: [0xfe,0x00,0xcc,0xd1,0x02,0x00,0x01,0x02]
-v_fma_f64 v[5:6], vcc, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x6a,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], s[4:5], 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x04,0x00,0x01,0x02]
-v_fma_f64 v[5:6], tba, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x6c,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], s[100:101], 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x64,0x00,0x01,0x02]
-v_fma_f64 v[5:6], tma, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x6e,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], flat_scratch, 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x66,0x00,0x01,0x02]
-v_fma_f64 v[5:6], ttmp[10:11], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x7a,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], vcc, 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x6a,0x00,0x01,0x02]
-v_fma_f64 v[5:6], exec, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x7e,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], tba, 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x6c,0x00,0x01,0x02]
-v_fma_f64 v[5:6], scc, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0xfd,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], tma, 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x6e,0x00,0x01,0x02]
-v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x01,0x05,0x0e,0x04]
+v_fma_f64 v[5:6], ttmp[10:11], 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x7a,0x00,0x01,0x02]
-v_fma_f64 v[5:6], v[254:255], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0xfe,0x05,0x0e,0x04]
+v_fma_f64 v[5:6], exec, 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x7e,0x00,0x01,0x02]
-v_fma_f64 v[5:6], s[2:3], v[254:255], v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0xfc,0x0f,0x04]
+v_fma_f64 v[5:6], 0, 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x80,0x00,0x01,0x02]
-v_fma_f64 v[5:6], s[2:3], v[2:3], v[254:255]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x04,0xfa,0x07]
+v_fma_f64 v[5:6], -1, 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0xc1,0x00,0x01,0x02]
-v_fma_f64 v[5:6], -s[2:3], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x04,0x0e,0x24]
+v_fma_f64 v[5:6], 0.5, 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0xf0,0x00,0x01,0x02]
-v_fma_f64 v[5:6], s[2:3], -v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x04,0x0e,0x44]
+v_fma_f64 v[5:6], -4.0, 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0xf7,0x00,0x01,0x02]
-v_fma_f64 v[5:6], s[2:3], v[2:3], -v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x04,0x0e,0x84]
+v_fma_f64 v[5:6], v[1:2], 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x01,0x01,0x01,0x02]
-v_fma_f64 v[5:6], -s[2:3], -v[2:3], -v[3:4]
-// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x04,0x0e,0xe4]
+v_fma_f64 v[5:6], v[254:255], 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0xfe,0x01,0x01,0x02]
-v_fma_f64 v[5:6], |s[2:3]|, v[2:3], v[3:4]
-// CHECK: [0x05,0x01,0xcc,0xd1,0x02,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], s[2:3], -1, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x82,0x01,0x02]
-v_fma_f64 v[5:6], s[2:3], |v[2:3]|, v[3:4]
-// CHECK: [0x05,0x02,0xcc,0xd1,0x02,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], s[2:3], 0.5, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0xe0,0x01,0x02]
-v_fma_f64 v[5:6], s[2:3], v[2:3], |v[3:4]|
-// CHECK: [0x05,0x04,0xcc,0xd1,0x02,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], s[2:3], -4.0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0xee,0x01,0x02]
-v_fma_f64 v[5:6], |s[2:3]|, |v[2:3]|, |v[3:4]|
-// CHECK: [0x05,0x07,0xcc,0xd1,0x02,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], s[2:3], v[2:3], 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x04,0x02,0x02]
-v_fma_f64 v[5:6], s[2:3], v[2:3], v[3:4] clamp
-// CHECK: [0x05,0x80,0xcc,0xd1,0x02,0x04,0x0e,0x04]
+v_fma_f64 v[5:6], s[2:3], v[254:255], 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0xfc,0x03,0x02]
-v_fma_f64 v[5:6], s[2:3], v[2:3], v[3:4] mul:2
-// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x04,0x0e,0x0c]
+v_fma_f64 v[5:6], s[2:3], 0, -1
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x00,0x05,0x03]
-v_fma_f64 v[5:6], s[2:3], v[2:3], v[3:4] mul:4
-// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x04,0x0e,0x14]
+v_fma_f64 v[5:6], s[2:3], 0, 0.5
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x00,0xc1,0x03]
-v_fma_f64 v[5:6], s[2:3], v[2:3], v[3:4] div:2
-// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x04,0x0e,0x1c]
+v_fma_f64 v[5:6], s[2:3], 0, -4.0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x00,0xdd,0x03]
+
+v_fma_f64 v[5:6], s[2:3], 0, v[3:4]
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x00,0x0d,0x04]
+
+v_fma_f64 v[5:6], s[2:3], 0, v[254:255]
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x00,0xf9,0x07]
+
+v_fma_f64 v[5:6], -s[2:3], 0, 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x00,0x01,0x22]
+
+v_fma_f64 v[5:6], s[2:3], neg(0), 0
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x00,0x01,0x42]
+
+v_fma_f64 v[5:6], s[2:3], 0, neg(0)
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x00,0x01,0x82]
+
+v_fma_f64 v[5:6], -s[2:3], neg(0), neg(0)
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x00,0x01,0xe2]
+
+v_fma_f64 v[5:6], |s[2:3]|, 0, 0
+// CHECK: [0x05,0x01,0xcc,0xd1,0x02,0x00,0x01,0x02]
+
+v_fma_f64 v[5:6], s[2:3], |0|, 0
+// CHECK: [0x05,0x02,0xcc,0xd1,0x02,0x00,0x01,0x02]
+
+v_fma_f64 v[5:6], s[2:3], 0, |0|
+// CHECK: [0x05,0x04,0xcc,0xd1,0x02,0x00,0x01,0x02]
+
+v_fma_f64 v[5:6], |s[2:3]|, |0|, |0|
+// CHECK: [0x05,0x07,0xcc,0xd1,0x02,0x00,0x01,0x02]
+
+v_fma_f64 v[5:6], s[2:3], 0, 0 clamp
+// CHECK: [0x05,0x80,0xcc,0xd1,0x02,0x00,0x01,0x02]
+
+v_fma_f64 v[5:6], s[2:3], 0, 0 mul:2
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x00,0x01,0x0a]
+
+v_fma_f64 v[5:6], s[2:3], 0, 0 mul:4
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x00,0x01,0x12]
+
+v_fma_f64 v[5:6], s[2:3], 0, 0 div:2
+// CHECK: [0x05,0x00,0xcc,0xd1,0x02,0x00,0x01,0x1a]
v_lerp_u8 v5, s1, 0, 0
// CHECK: [0x05,0x00,0xcd,0xd1,0x01,0x00,0x01,0x02]
@@ -41291,101 +42321,134 @@ v_alignbyte_b32 v5, s1, 0, v3
v_alignbyte_b32 v5, s1, 0, v255
// CHECK: [0x05,0x00,0xcf,0xd1,0x01,0x00,0xfd,0x07]
-v_min3_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x04,0x0e,0x04]
+v_min3_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x00,0x01,0x02]
+
+v_min3_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xd0,0xd1,0x01,0x00,0x01,0x02]
+
+v_min3_f32 v5, s101, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x65,0x00,0x01,0x02]
+
+v_min3_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x66,0x00,0x01,0x02]
-v_min3_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xd0,0xd1,0x01,0x04,0x0e,0x04]
+v_min3_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x67,0x00,0x01,0x02]
-v_min3_f32 v5, s101, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x65,0x04,0x0e,0x04]
+v_min3_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x6a,0x00,0x01,0x02]
-v_min3_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x66,0x04,0x0e,0x04]
+v_min3_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x6b,0x00,0x01,0x02]
-v_min3_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x67,0x04,0x0e,0x04]
+v_min3_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x6c,0x00,0x01,0x02]
-v_min3_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x6a,0x04,0x0e,0x04]
+v_min3_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x6d,0x00,0x01,0x02]
-v_min3_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x6b,0x04,0x0e,0x04]
+v_min3_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x6e,0x00,0x01,0x02]
-v_min3_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x6c,0x04,0x0e,0x04]
+v_min3_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x6f,0x00,0x01,0x02]
-v_min3_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x6d,0x04,0x0e,0x04]
+v_min3_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x7b,0x00,0x01,0x02]
-v_min3_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x6e,0x04,0x0e,0x04]
+v_min3_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x7c,0x00,0x01,0x02]
-v_min3_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x6f,0x04,0x0e,0x04]
+v_min3_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x7e,0x00,0x01,0x02]
-v_min3_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x7b,0x04,0x0e,0x04]
+v_min3_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x7f,0x00,0x01,0x02]
-v_min3_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x7c,0x04,0x0e,0x04]
+v_min3_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x80,0x00,0x01,0x02]
-v_min3_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x7e,0x04,0x0e,0x04]
+v_min3_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0xc1,0x00,0x01,0x02]
-v_min3_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x7f,0x04,0x0e,0x04]
+v_min3_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0xf0,0x00,0x01,0x02]
-v_min3_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0xfd,0x04,0x0e,0x04]
+v_min3_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0xf7,0x00,0x01,0x02]
-v_min3_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x05,0x0e,0x04]
+v_min3_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x01,0x01,0x02]
-v_min3_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0xff,0x05,0x0e,0x04]
+v_min3_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0xff,0x01,0x01,0x02]
-v_min3_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0xfe,0x0f,0x04]
+v_min3_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x82,0x01,0x02]
-v_min3_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x04,0xfe,0x07]
+v_min3_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0xe0,0x01,0x02]
-v_min3_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x04,0x0e,0x24]
+v_min3_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0xee,0x01,0x02]
-v_min3_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x04,0x0e,0x44]
+v_min3_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x04,0x02,0x02]
-v_min3_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x04,0x0e,0x84]
+v_min3_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0xfe,0x03,0x02]
-v_min3_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x04,0x0e,0xe4]
+v_min3_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x00,0x05,0x03]
-v_min3_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xd0,0xd1,0x01,0x04,0x0e,0x04]
+v_min3_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x00,0xc1,0x03]
-v_min3_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xd0,0xd1,0x01,0x04,0x0e,0x04]
+v_min3_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x00,0xdd,0x03]
-v_min3_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xd0,0xd1,0x01,0x04,0x0e,0x04]
+v_min3_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x00,0x0d,0x04]
-v_min3_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xd0,0xd1,0x01,0x04,0x0e,0x04]
+v_min3_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x00,0xfd,0x07]
-v_min3_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x80,0xd0,0xd1,0x01,0x04,0x0e,0x04]
+v_min3_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x00,0x01,0x22]
-v_min3_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x04,0x0e,0x0c]
+v_min3_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x00,0x01,0x42]
-v_min3_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x04,0x0e,0x14]
+v_min3_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x00,0x01,0x82]
-v_min3_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x04,0x0e,0x1c]
+v_min3_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x00,0x01,0xe2]
+
+v_min3_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xd0,0xd1,0x01,0x00,0x01,0x02]
+
+v_min3_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xd0,0xd1,0x01,0x00,0x01,0x02]
+
+v_min3_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xd0,0xd1,0x01,0x00,0x01,0x02]
+
+v_min3_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xd0,0xd1,0x01,0x00,0x01,0x02]
+
+v_min3_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x80,0xd0,0xd1,0x01,0x00,0x01,0x02]
+
+v_min3_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x00,0x01,0x0a]
+
+v_min3_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x00,0x01,0x12]
+
+v_min3_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x00,0x01,0x1a]
v_min3_i32 v5, s1, 0, 0
// CHECK: [0x05,0x00,0xd1,0xd1,0x01,0x00,0x01,0x02]
@@ -41573,101 +42636,134 @@ v_min3_u32 v5, s1, 0, v3
v_min3_u32 v5, s1, 0, v255
// CHECK: [0x05,0x00,0xd2,0xd1,0x01,0x00,0xfd,0x07]
-v_max3_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x00,0x01,0x02]
+
+v_max3_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xd3,0xd1,0x01,0x00,0x01,0x02]
+
+v_max3_f32 v5, s101, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x65,0x00,0x01,0x02]
+
+v_max3_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x66,0x00,0x01,0x02]
+
+v_max3_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x67,0x00,0x01,0x02]
+
+v_max3_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x6a,0x00,0x01,0x02]
+
+v_max3_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x6b,0x00,0x01,0x02]
+
+v_max3_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x6c,0x00,0x01,0x02]
+
+v_max3_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x6d,0x00,0x01,0x02]
+
+v_max3_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x6e,0x00,0x01,0x02]
-v_max3_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xd3,0xd1,0x01,0x04,0x0e,0x04]
+v_max3_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x6f,0x00,0x01,0x02]
-v_max3_f32 v5, s101, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x65,0x04,0x0e,0x04]
+v_max3_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x7b,0x00,0x01,0x02]
-v_max3_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x66,0x04,0x0e,0x04]
+v_max3_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x7c,0x00,0x01,0x02]
-v_max3_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x67,0x04,0x0e,0x04]
+v_max3_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x7e,0x00,0x01,0x02]
-v_max3_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x6a,0x04,0x0e,0x04]
+v_max3_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x7f,0x00,0x01,0x02]
-v_max3_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x6b,0x04,0x0e,0x04]
+v_max3_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x80,0x00,0x01,0x02]
-v_max3_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x6c,0x04,0x0e,0x04]
+v_max3_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0xc1,0x00,0x01,0x02]
-v_max3_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x6d,0x04,0x0e,0x04]
+v_max3_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0xf0,0x00,0x01,0x02]
-v_max3_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x6e,0x04,0x0e,0x04]
+v_max3_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0xf7,0x00,0x01,0x02]
-v_max3_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x6f,0x04,0x0e,0x04]
+v_max3_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x01,0x01,0x02]
-v_max3_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x7b,0x04,0x0e,0x04]
+v_max3_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0xff,0x01,0x01,0x02]
-v_max3_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x7c,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x82,0x01,0x02]
-v_max3_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x7e,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0xe0,0x01,0x02]
-v_max3_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x7f,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0xee,0x01,0x02]
-v_max3_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0xfd,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x04,0x02,0x02]
-v_max3_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x05,0x0e,0x04]
+v_max3_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0xfe,0x03,0x02]
-v_max3_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0xff,0x05,0x0e,0x04]
+v_max3_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x00,0x05,0x03]
-v_max3_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0xfe,0x0f,0x04]
+v_max3_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x00,0xc1,0x03]
-v_max3_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x04,0xfe,0x07]
+v_max3_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x00,0xdd,0x03]
-v_max3_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x04,0x0e,0x24]
+v_max3_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x00,0x0d,0x04]
-v_max3_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x04,0x0e,0x44]
+v_max3_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x00,0xfd,0x07]
-v_max3_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x04,0x0e,0x84]
+v_max3_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x00,0x01,0x22]
-v_max3_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x04,0x0e,0xe4]
+v_max3_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x00,0x01,0x42]
-v_max3_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xd3,0xd1,0x01,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x00,0x01,0x82]
-v_max3_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xd3,0xd1,0x01,0x04,0x0e,0x04]
+v_max3_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x00,0x01,0xe2]
-v_max3_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xd3,0xd1,0x01,0x04,0x0e,0x04]
+v_max3_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xd3,0xd1,0x01,0x00,0x01,0x02]
-v_max3_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xd3,0xd1,0x01,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xd3,0xd1,0x01,0x00,0x01,0x02]
-v_max3_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x80,0xd3,0xd1,0x01,0x04,0x0e,0x04]
+v_max3_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xd3,0xd1,0x01,0x00,0x01,0x02]
-v_max3_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x04,0x0e,0x0c]
+v_max3_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xd3,0xd1,0x01,0x00,0x01,0x02]
-v_max3_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x04,0x0e,0x14]
+v_max3_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x80,0xd3,0xd1,0x01,0x00,0x01,0x02]
-v_max3_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x04,0x0e,0x1c]
+v_max3_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x00,0x01,0x0a]
+
+v_max3_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x00,0x01,0x12]
+
+v_max3_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xd3,0xd1,0x01,0x00,0x01,0x1a]
v_max3_i32 v5, s1, 0, 0
// CHECK: [0x05,0x00,0xd4,0xd1,0x01,0x00,0x01,0x02]
@@ -41855,101 +42951,134 @@ v_max3_u32 v5, s1, 0, v3
v_max3_u32 v5, s1, 0, v255
// CHECK: [0x05,0x00,0xd5,0xd1,0x01,0x00,0xfd,0x07]
-v_med3_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x04,0x0e,0x04]
+v_med3_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x00,0x01,0x02]
+
+v_med3_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xd6,0xd1,0x01,0x00,0x01,0x02]
+
+v_med3_f32 v5, s101, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x65,0x00,0x01,0x02]
+
+v_med3_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x66,0x00,0x01,0x02]
+
+v_med3_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x67,0x00,0x01,0x02]
+
+v_med3_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x6a,0x00,0x01,0x02]
+
+v_med3_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x6b,0x00,0x01,0x02]
+
+v_med3_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x6c,0x00,0x01,0x02]
-v_med3_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xd6,0xd1,0x01,0x04,0x0e,0x04]
+v_med3_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x6d,0x00,0x01,0x02]
-v_med3_f32 v5, s101, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x65,0x04,0x0e,0x04]
+v_med3_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x6e,0x00,0x01,0x02]
-v_med3_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x66,0x04,0x0e,0x04]
+v_med3_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x6f,0x00,0x01,0x02]
-v_med3_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x67,0x04,0x0e,0x04]
+v_med3_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x7b,0x00,0x01,0x02]
-v_med3_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x6a,0x04,0x0e,0x04]
+v_med3_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x7c,0x00,0x01,0x02]
-v_med3_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x6b,0x04,0x0e,0x04]
+v_med3_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x7e,0x00,0x01,0x02]
-v_med3_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x6c,0x04,0x0e,0x04]
+v_med3_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x7f,0x00,0x01,0x02]
-v_med3_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x6d,0x04,0x0e,0x04]
+v_med3_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x80,0x00,0x01,0x02]
-v_med3_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x6e,0x04,0x0e,0x04]
+v_med3_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0xc1,0x00,0x01,0x02]
-v_med3_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x6f,0x04,0x0e,0x04]
+v_med3_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0xf0,0x00,0x01,0x02]
-v_med3_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x7b,0x04,0x0e,0x04]
+v_med3_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0xf7,0x00,0x01,0x02]
-v_med3_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x7c,0x04,0x0e,0x04]
+v_med3_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x01,0x01,0x02]
-v_med3_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x7e,0x04,0x0e,0x04]
+v_med3_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0xff,0x01,0x01,0x02]
-v_med3_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x7f,0x04,0x0e,0x04]
+v_med3_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x82,0x01,0x02]
-v_med3_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0xfd,0x04,0x0e,0x04]
+v_med3_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0xe0,0x01,0x02]
-v_med3_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x05,0x0e,0x04]
+v_med3_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0xee,0x01,0x02]
-v_med3_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0xff,0x05,0x0e,0x04]
+v_med3_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x04,0x02,0x02]
-v_med3_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0xfe,0x0f,0x04]
+v_med3_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0xfe,0x03,0x02]
-v_med3_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x04,0xfe,0x07]
+v_med3_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x00,0x05,0x03]
-v_med3_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x04,0x0e,0x24]
+v_med3_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x00,0xc1,0x03]
-v_med3_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x04,0x0e,0x44]
+v_med3_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x00,0xdd,0x03]
-v_med3_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x04,0x0e,0x84]
+v_med3_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x00,0x0d,0x04]
-v_med3_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x04,0x0e,0xe4]
+v_med3_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x00,0xfd,0x07]
-v_med3_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xd6,0xd1,0x01,0x04,0x0e,0x04]
+v_med3_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x00,0x01,0x22]
-v_med3_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xd6,0xd1,0x01,0x04,0x0e,0x04]
+v_med3_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x00,0x01,0x42]
-v_med3_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xd6,0xd1,0x01,0x04,0x0e,0x04]
+v_med3_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x00,0x01,0x82]
-v_med3_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xd6,0xd1,0x01,0x04,0x0e,0x04]
+v_med3_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x00,0x01,0xe2]
-v_med3_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x80,0xd6,0xd1,0x01,0x04,0x0e,0x04]
+v_med3_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xd6,0xd1,0x01,0x00,0x01,0x02]
-v_med3_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x04,0x0e,0x0c]
+v_med3_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xd6,0xd1,0x01,0x00,0x01,0x02]
-v_med3_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x04,0x0e,0x14]
+v_med3_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xd6,0xd1,0x01,0x00,0x01,0x02]
-v_med3_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x04,0x0e,0x1c]
+v_med3_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xd6,0xd1,0x01,0x00,0x01,0x02]
+
+v_med3_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x80,0xd6,0xd1,0x01,0x00,0x01,0x02]
+
+v_med3_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x00,0x01,0x0a]
+
+v_med3_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x00,0x01,0x12]
+
+v_med3_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xd6,0xd1,0x01,0x00,0x01,0x1a]
v_med3_i32 v5, s1, 0, 0
// CHECK: [0x05,0x00,0xd7,0xd1,0x01,0x00,0x01,0x02]
@@ -42554,8 +43683,17 @@ v_cvt_pk_u8_f32 v5, exec_lo, 0, 0
v_cvt_pk_u8_f32 v5, exec_hi, 0, 0
// CHECK: [0x05,0x00,0xdd,0xd1,0x7f,0x00,0x01,0x02]
-v_cvt_pk_u8_f32 v5, scc, 0, 0
-// CHECK: [0x05,0x00,0xdd,0xd1,0xfd,0x00,0x01,0x02]
+v_cvt_pk_u8_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xdd,0xd1,0x80,0x00,0x01,0x02]
+
+v_cvt_pk_u8_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xdd,0xd1,0xc1,0x00,0x01,0x02]
+
+v_cvt_pk_u8_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xdd,0xd1,0xf0,0x00,0x01,0x02]
+
+v_cvt_pk_u8_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xdd,0xd1,0xf7,0x00,0x01,0x02]
v_cvt_pk_u8_f32 v5, v1, 0, 0
// CHECK: [0x05,0x00,0xdd,0xd1,0x01,0x01,0x01,0x02]
@@ -42602,182 +43740,248 @@ v_cvt_pk_u8_f32 v5, |s1|, 0, 0
v_cvt_pk_u8_f32 v5, s1, 0, 0 clamp
// CHECK: [0x05,0x80,0xdd,0xd1,0x01,0x00,0x01,0x02]
-v_div_fixup_f32 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x00,0x01,0x02]
+
+v_div_fixup_f32 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xde,0xd1,0x01,0x00,0x01,0x02]
-v_div_fixup_f32 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xde,0xd1,0x01,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s101, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x65,0x00,0x01,0x02]
-v_div_fixup_f32 v5, s101, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x65,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x66,0x00,0x01,0x02]
-v_div_fixup_f32 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x66,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x67,0x00,0x01,0x02]
-v_div_fixup_f32 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x67,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x6a,0x00,0x01,0x02]
-v_div_fixup_f32 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x6a,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x6b,0x00,0x01,0x02]
-v_div_fixup_f32 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x6b,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x6c,0x00,0x01,0x02]
-v_div_fixup_f32 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x6c,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x6d,0x00,0x01,0x02]
-v_div_fixup_f32 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x6d,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x6e,0x00,0x01,0x02]
-v_div_fixup_f32 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x6e,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x6f,0x00,0x01,0x02]
-v_div_fixup_f32 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x6f,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x7b,0x00,0x01,0x02]
-v_div_fixup_f32 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x7b,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x7c,0x00,0x01,0x02]
-v_div_fixup_f32 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x7c,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x7e,0x00,0x01,0x02]
-v_div_fixup_f32 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x7e,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x7f,0x00,0x01,0x02]
-v_div_fixup_f32 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x7f,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x80,0x00,0x01,0x02]
-v_div_fixup_f32 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0xfd,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0xc1,0x00,0x01,0x02]
-v_div_fixup_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x05,0x0e,0x04]
+v_div_fixup_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0xf0,0x00,0x01,0x02]
-v_div_fixup_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0xff,0x05,0x0e,0x04]
+v_div_fixup_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0xf7,0x00,0x01,0x02]
-v_div_fixup_f32 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x01,0xfe,0x0f,0x04]
+v_div_fixup_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x01,0x01,0x02]
-v_div_fixup_f32 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x04,0xfe,0x07]
+v_div_fixup_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0xff,0x01,0x01,0x02]
-v_div_fixup_f32 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x04,0x0e,0x24]
+v_div_fixup_f32 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x82,0x01,0x02]
-v_div_fixup_f32 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x04,0x0e,0x44]
+v_div_fixup_f32 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0xe0,0x01,0x02]
-v_div_fixup_f32 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x04,0x0e,0x84]
+v_div_fixup_f32 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0xee,0x01,0x02]
-v_div_fixup_f32 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x04,0x0e,0xe4]
+v_div_fixup_f32 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x04,0x02,0x02]
-v_div_fixup_f32 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xde,0xd1,0x01,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0xfe,0x03,0x02]
-v_div_fixup_f32 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xde,0xd1,0x01,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x00,0x05,0x03]
-v_div_fixup_f32 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xde,0xd1,0x01,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x00,0xc1,0x03]
-v_div_fixup_f32 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xde,0xd1,0x01,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x00,0xdd,0x03]
-v_div_fixup_f32 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x80,0xde,0xd1,0x01,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x00,0x0d,0x04]
-v_div_fixup_f32 v5, s1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x04,0x0e,0x0c]
+v_div_fixup_f32 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x00,0xfd,0x07]
-v_div_fixup_f32 v5, s1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x04,0x0e,0x14]
+v_div_fixup_f32 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x00,0x01,0x22]
-v_div_fixup_f32 v5, s1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x04,0x0e,0x1c]
+v_div_fixup_f32 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x00,0x01,0x42]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x00,0x01,0x82]
-v_div_fixup_f64 v[254:255], s[2:3], v[2:3], v[3:4]
-// CHECK: [0xfe,0x00,0xdf,0xd1,0x02,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x00,0x01,0xe2]
-v_div_fixup_f64 v[5:6], s[4:5], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x04,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xde,0xd1,0x01,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[100:101], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x64,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xde,0xd1,0x01,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], flat_scratch, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x66,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xde,0xd1,0x01,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], vcc, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x6a,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xde,0xd1,0x01,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], tba, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x6c,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x80,0xde,0xd1,0x01,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], tma, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x6e,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x00,0x01,0x0a]
-v_div_fixup_f64 v[5:6], ttmp[10:11], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x7a,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x00,0x01,0x12]
-v_div_fixup_f64 v[5:6], exec, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x7e,0x04,0x0e,0x04]
+v_div_fixup_f32 v5, s1, 0, 0 div:2
+// CHECK: [0x05,0x00,0xde,0xd1,0x01,0x00,0x01,0x1a]
-v_div_fixup_f64 v[5:6], scc, v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0xfd,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], s[2:3], 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x01,0x05,0x0e,0x04]
+v_div_fixup_f64 v[254:255], s[2:3], 0, 0
+// CHECK: [0xfe,0x00,0xdf,0xd1,0x02,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], v[254:255], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0xfe,0x05,0x0e,0x04]
+v_div_fixup_f64 v[5:6], s[4:5], 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x04,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[2:3], v[254:255], v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0xfc,0x0f,0x04]
+v_div_fixup_f64 v[5:6], s[100:101], 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x64,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], v[254:255]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x04,0xfa,0x07]
+v_div_fixup_f64 v[5:6], flat_scratch, 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x66,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], -s[2:3], v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x04,0x0e,0x24]
+v_div_fixup_f64 v[5:6], vcc, 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x6a,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[2:3], -v[2:3], v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x04,0x0e,0x44]
+v_div_fixup_f64 v[5:6], tba, 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x6c,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], -v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x04,0x0e,0x84]
+v_div_fixup_f64 v[5:6], tma, 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x6e,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], -s[2:3], -v[2:3], -v[3:4]
-// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x04,0x0e,0xe4]
+v_div_fixup_f64 v[5:6], ttmp[10:11], 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x7a,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], |s[2:3]|, v[2:3], v[3:4]
-// CHECK: [0x05,0x01,0xdf,0xd1,0x02,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], exec, 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x7e,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[2:3], |v[2:3]|, v[3:4]
-// CHECK: [0x05,0x02,0xdf,0xd1,0x02,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], 0, 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x80,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], |v[3:4]|
-// CHECK: [0x05,0x04,0xdf,0xd1,0x02,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], -1, 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0xc1,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], |s[2:3]|, |v[2:3]|, |v[3:4]|
-// CHECK: [0x05,0x07,0xdf,0xd1,0x02,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], 0.5, 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0xf0,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], v[3:4] clamp
-// CHECK: [0x05,0x80,0xdf,0xd1,0x02,0x04,0x0e,0x04]
+v_div_fixup_f64 v[5:6], -4.0, 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0xf7,0x00,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], v[3:4] mul:2
-// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x04,0x0e,0x0c]
+v_div_fixup_f64 v[5:6], v[1:2], 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x01,0x01,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], v[3:4] mul:4
-// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x04,0x0e,0x14]
+v_div_fixup_f64 v[5:6], v[254:255], 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0xfe,0x01,0x01,0x02]
-v_div_fixup_f64 v[5:6], s[2:3], v[2:3], v[3:4] div:2
-// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x04,0x0e,0x1c]
+v_div_fixup_f64 v[5:6], s[2:3], -1, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x82,0x01,0x02]
+
+v_div_fixup_f64 v[5:6], s[2:3], 0.5, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0xe0,0x01,0x02]
+
+v_div_fixup_f64 v[5:6], s[2:3], -4.0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0xee,0x01,0x02]
+
+v_div_fixup_f64 v[5:6], s[2:3], v[2:3], 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x04,0x02,0x02]
+
+v_div_fixup_f64 v[5:6], s[2:3], v[254:255], 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0xfc,0x03,0x02]
+
+v_div_fixup_f64 v[5:6], s[2:3], 0, -1
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x00,0x05,0x03]
+
+v_div_fixup_f64 v[5:6], s[2:3], 0, 0.5
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x00,0xc1,0x03]
+
+v_div_fixup_f64 v[5:6], s[2:3], 0, -4.0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x00,0xdd,0x03]
+
+v_div_fixup_f64 v[5:6], s[2:3], 0, v[3:4]
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x00,0x0d,0x04]
+
+v_div_fixup_f64 v[5:6], s[2:3], 0, v[254:255]
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x00,0xf9,0x07]
+
+v_div_fixup_f64 v[5:6], -s[2:3], 0, 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x00,0x01,0x22]
+
+v_div_fixup_f64 v[5:6], s[2:3], neg(0), 0
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x00,0x01,0x42]
+
+v_div_fixup_f64 v[5:6], s[2:3], 0, neg(0)
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x00,0x01,0x82]
+
+v_div_fixup_f64 v[5:6], -s[2:3], neg(0), neg(0)
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x00,0x01,0xe2]
+
+v_div_fixup_f64 v[5:6], |s[2:3]|, 0, 0
+// CHECK: [0x05,0x01,0xdf,0xd1,0x02,0x00,0x01,0x02]
+
+v_div_fixup_f64 v[5:6], s[2:3], |0|, 0
+// CHECK: [0x05,0x02,0xdf,0xd1,0x02,0x00,0x01,0x02]
+
+v_div_fixup_f64 v[5:6], s[2:3], 0, |0|
+// CHECK: [0x05,0x04,0xdf,0xd1,0x02,0x00,0x01,0x02]
+
+v_div_fixup_f64 v[5:6], |s[2:3]|, |0|, |0|
+// CHECK: [0x05,0x07,0xdf,0xd1,0x02,0x00,0x01,0x02]
+
+v_div_fixup_f64 v[5:6], s[2:3], 0, 0 clamp
+// CHECK: [0x05,0x80,0xdf,0xd1,0x02,0x00,0x01,0x02]
+
+v_div_fixup_f64 v[5:6], s[2:3], 0, 0 mul:2
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x00,0x01,0x0a]
+
+v_div_fixup_f64 v[5:6], s[2:3], 0, 0 mul:4
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x00,0x01,0x12]
+
+v_div_fixup_f64 v[5:6], s[2:3], 0, 0 div:2
+// CHECK: [0x05,0x00,0xdf,0xd1,0x02,0x00,0x01,0x1a]
v_div_scale_f32 v5, vcc, s1, 0, 0
// CHECK: [0x05,0x6a,0xe0,0xd1,0x01,0x00,0x01,0x02]
@@ -42950,56 +44154,92 @@ v_div_scale_f64 v[5:6], vcc, s[2:3], 0, v[3:4]
v_div_scale_f64 v[5:6], vcc, s[2:3], 0, v[254:255]
// CHECK: [0x05,0x6a,0xe1,0xd1,0x02,0x00,0xf9,0x07]
-v_div_fmas_f32 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xe2,0xd1,0x01,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0x00,0x01,0x02]
+
+v_div_fmas_f32 v255, 0, 0, 0
+// CHECK: [0xff,0x00,0xe2,0xd1,0x80,0x00,0x01,0x02]
+
+v_div_fmas_f32 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xe2,0xd1,0xc1,0x00,0x01,0x02]
+
+v_div_fmas_f32 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xe2,0xd1,0xf0,0x00,0x01,0x02]
+
+v_div_fmas_f32 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xe2,0xd1,0xf7,0x00,0x01,0x02]
+
+v_div_fmas_f32 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xe2,0xd1,0x01,0x01,0x01,0x02]
+
+v_div_fmas_f32 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xe2,0xd1,0xff,0x01,0x01,0x02]
-v_div_fmas_f32 v255, v1, v2, v3
-// CHECK: [0xff,0x00,0xe2,0xd1,0x01,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, 0, -1, 0
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0x82,0x01,0x02]
-v_div_fmas_f32 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xe2,0xd1,0xff,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, 0, 0.5, 0
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0xe0,0x01,0x02]
-v_div_fmas_f32 v5, v1, v255, v3
-// CHECK: [0x05,0x00,0xe2,0xd1,0x01,0xff,0x0f,0x04]
+v_div_fmas_f32 v5, 0, -4.0, 0
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0xee,0x01,0x02]
-v_div_fmas_f32 v5, v1, v2, v255
-// CHECK: [0x05,0x00,0xe2,0xd1,0x01,0x05,0xfe,0x07]
+v_div_fmas_f32 v5, 0, v2, 0
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0x04,0x02,0x02]
-v_div_fmas_f32 v5, -v1, v2, v3
-// CHECK: [0x05,0x00,0xe2,0xd1,0x01,0x05,0x0e,0x24]
+v_div_fmas_f32 v5, 0, v255, 0
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0xfe,0x03,0x02]
-v_div_fmas_f32 v5, v1, -v2, v3
-// CHECK: [0x05,0x00,0xe2,0xd1,0x01,0x05,0x0e,0x44]
+v_div_fmas_f32 v5, 0, 0, -1
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0x00,0x05,0x03]
-v_div_fmas_f32 v5, v1, v2, -v3
-// CHECK: [0x05,0x00,0xe2,0xd1,0x01,0x05,0x0e,0x84]
+v_div_fmas_f32 v5, 0, 0, 0.5
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0x00,0xc1,0x03]
-v_div_fmas_f32 v5, -v1, -v2, -v3
-// CHECK: [0x05,0x00,0xe2,0xd1,0x01,0x05,0x0e,0xe4]
+v_div_fmas_f32 v5, 0, 0, -4.0
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0x00,0xdd,0x03]
-v_div_fmas_f32 v5, |v1|, v2, v3
-// CHECK: [0x05,0x01,0xe2,0xd1,0x01,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, 0, 0, v3
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0x00,0x0d,0x04]
-v_div_fmas_f32 v5, v1, |v2|, v3
-// CHECK: [0x05,0x02,0xe2,0xd1,0x01,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, 0, 0, v255
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0x00,0xfd,0x07]
-v_div_fmas_f32 v5, v1, v2, |v3|
-// CHECK: [0x05,0x04,0xe2,0xd1,0x01,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, neg(0), 0, 0
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0x00,0x01,0x22]
-v_div_fmas_f32 v5, |v1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xe2,0xd1,0x01,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, 0, neg(0), 0
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0x00,0x01,0x42]
-v_div_fmas_f32 v5, v1, v2, v3 clamp
-// CHECK: [0x05,0x80,0xe2,0xd1,0x01,0x05,0x0e,0x04]
+v_div_fmas_f32 v5, 0, 0, neg(0)
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0x00,0x01,0x82]
-v_div_fmas_f32 v5, v1, v2, v3 mul:2
-// CHECK: [0x05,0x00,0xe2,0xd1,0x01,0x05,0x0e,0x0c]
+v_div_fmas_f32 v5, neg(0), neg(0), neg(0)
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0x00,0x01,0xe2]
-v_div_fmas_f32 v5, v1, v2, v3 mul:4
-// CHECK: [0x05,0x00,0xe2,0xd1,0x01,0x05,0x0e,0x14]
+v_div_fmas_f32 v5, |0|, 0, 0
+// CHECK: [0x05,0x01,0xe2,0xd1,0x80,0x00,0x01,0x02]
-v_div_fmas_f32 v5, v1, v2, v3 div:2
-// CHECK: [0x05,0x00,0xe2,0xd1,0x01,0x05,0x0e,0x1c]
+v_div_fmas_f32 v5, 0, |0|, 0
+// CHECK: [0x05,0x02,0xe2,0xd1,0x80,0x00,0x01,0x02]
+
+v_div_fmas_f32 v5, 0, 0, |0|
+// CHECK: [0x05,0x04,0xe2,0xd1,0x80,0x00,0x01,0x02]
+
+v_div_fmas_f32 v5, |0|, |0|, |0|
+// CHECK: [0x05,0x07,0xe2,0xd1,0x80,0x00,0x01,0x02]
+
+v_div_fmas_f32 v5, 0, 0, 0 clamp
+// CHECK: [0x05,0x80,0xe2,0xd1,0x80,0x00,0x01,0x02]
+
+v_div_fmas_f32 v5, 0, 0, 0 mul:2
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0x00,0x01,0x0a]
+
+v_div_fmas_f32 v5, 0, 0, 0 mul:4
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0x00,0x01,0x12]
+
+v_div_fmas_f32 v5, 0, 0, 0 div:2
+// CHECK: [0x05,0x00,0xe2,0xd1,0x80,0x00,0x01,0x1a]
v_div_fmas_f64 v[5:6], vcc, vcc, vcc
// CHECK: [0x05,0x00,0xe3,0xd1,0x6a,0xd4,0xa8,0x01]
@@ -43007,18 +44247,54 @@ v_div_fmas_f64 v[5:6], vcc, vcc, vcc
v_div_fmas_f64 v[254:255], vcc, vcc, vcc
// CHECK: [0xfe,0x00,0xe3,0xd1,0x6a,0xd4,0xa8,0x01]
+v_div_fmas_f64 v[5:6], 0, vcc, vcc
+// CHECK: [0x05,0x00,0xe3,0xd1,0x80,0xd4,0xa8,0x01]
+
+v_div_fmas_f64 v[5:6], -1, vcc, vcc
+// CHECK: [0x05,0x00,0xe3,0xd1,0xc1,0xd4,0xa8,0x01]
+
+v_div_fmas_f64 v[5:6], 0.5, vcc, vcc
+// CHECK: [0x05,0x00,0xe3,0xd1,0xf0,0xd4,0xa8,0x01]
+
+v_div_fmas_f64 v[5:6], -4.0, vcc, vcc
+// CHECK: [0x05,0x00,0xe3,0xd1,0xf7,0xd4,0xa8,0x01]
+
v_div_fmas_f64 v[5:6], v[1:2], vcc, vcc
// CHECK: [0x05,0x00,0xe3,0xd1,0x01,0xd5,0xa8,0x01]
v_div_fmas_f64 v[5:6], v[254:255], vcc, vcc
// CHECK: [0x05,0x00,0xe3,0xd1,0xfe,0xd5,0xa8,0x01]
+v_div_fmas_f64 v[5:6], vcc, 0, vcc
+// CHECK: [0x05,0x00,0xe3,0xd1,0x6a,0x00,0xa9,0x01]
+
+v_div_fmas_f64 v[5:6], vcc, -1, vcc
+// CHECK: [0x05,0x00,0xe3,0xd1,0x6a,0x82,0xa9,0x01]
+
+v_div_fmas_f64 v[5:6], vcc, 0.5, vcc
+// CHECK: [0x05,0x00,0xe3,0xd1,0x6a,0xe0,0xa9,0x01]
+
+v_div_fmas_f64 v[5:6], vcc, -4.0, vcc
+// CHECK: [0x05,0x00,0xe3,0xd1,0x6a,0xee,0xa9,0x01]
+
v_div_fmas_f64 v[5:6], vcc, v[2:3], vcc
// CHECK: [0x05,0x00,0xe3,0xd1,0x6a,0x04,0xaa,0x01]
v_div_fmas_f64 v[5:6], vcc, v[254:255], vcc
// CHECK: [0x05,0x00,0xe3,0xd1,0x6a,0xfc,0xab,0x01]
+v_div_fmas_f64 v[5:6], vcc, vcc, 0
+// CHECK: [0x05,0x00,0xe3,0xd1,0x6a,0xd4,0x00,0x02]
+
+v_div_fmas_f64 v[5:6], vcc, vcc, -1
+// CHECK: [0x05,0x00,0xe3,0xd1,0x6a,0xd4,0x04,0x03]
+
+v_div_fmas_f64 v[5:6], vcc, vcc, 0.5
+// CHECK: [0x05,0x00,0xe3,0xd1,0x6a,0xd4,0xc0,0x03]
+
+v_div_fmas_f64 v[5:6], vcc, vcc, -4.0
+// CHECK: [0x05,0x00,0xe3,0xd1,0x6a,0xd4,0xdc,0x03]
+
v_div_fmas_f64 v[5:6], vcc, vcc, v[3:4]
// CHECK: [0x05,0x00,0xe3,0xd1,0x6a,0xd4,0x0c,0x04]
@@ -43391,92 +44667,353 @@ v_mqsad_u32_u8 v[5:8], 0, v255, v[3:6]
v_mqsad_u32_u8 v[5:8], 0, s2, v[252:255]
// CHECK: [0x05,0x00,0xe7,0xd1,0x80,0x04,0xf0,0x07]
-v_mad_f16 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_u64_u32 v[5:6], s[12:13], s1, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[254:255], s[12:13], s1, 0, 0
+// CHECK: [0xfe,0x0c,0xe8,0xd1,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[14:15], s1, 0, 0
+// CHECK: [0x05,0x0e,0xe8,0xd1,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[100:101], s1, 0, 0
+// CHECK: [0x05,0x64,0xe8,0xd1,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], flat_scratch, s1, 0, 0
+// CHECK: [0x05,0x66,0xe8,0xd1,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], vcc, s1, 0, 0
+// CHECK: [0x05,0x6a,0xe8,0xd1,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], tba, s1, 0, 0
+// CHECK: [0x05,0x6c,0xe8,0xd1,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], tma, s1, 0, 0
+// CHECK: [0x05,0x6e,0xe8,0xd1,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], ttmp[10:11], s1, 0, 0
+// CHECK: [0x05,0x7a,0xe8,0xd1,0x01,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], s101, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x65,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x66,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x67,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], vcc_lo, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x6a,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], vcc_hi, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x6b,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], tba_lo, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x6c,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], tba_hi, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x6d,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], tma_lo, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x6e,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], tma_hi, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x6f,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], ttmp11, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x7b,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], m0, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x7c,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], exec_lo, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x7e,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], exec_hi, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x7f,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], 0, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x80,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], -1, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0xc1,0x00,0x01,0x02]
+
+v_mad_u64_u32 v[5:6], s[12:13], 0.5, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0xf0,0x00,0x01,0x02]
-v_mad_f16 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xea,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_u64_u32 v[5:6], s[12:13], -4.0, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0xf7,0x00,0x01,0x02]
-v_mad_f16 v5, s101, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x65,0x04,0x0e,0x04]
+v_mad_u64_u32 v[5:6], s[12:13], v1, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x01,0x01,0x01,0x02]
-v_mad_f16 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x66,0x04,0x0e,0x04]
+v_mad_u64_u32 v[5:6], s[12:13], v255, 0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0xff,0x01,0x01,0x02]
-v_mad_f16 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x67,0x04,0x0e,0x04]
+v_mad_u64_u32 v[5:6], s[12:13], s1, -1, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x01,0x82,0x01,0x02]
-v_mad_f16 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x6a,0x04,0x0e,0x04]
+v_mad_u64_u32 v[5:6], s[12:13], s1, 0.5, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x01,0xe0,0x01,0x02]
-v_mad_f16 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x6b,0x04,0x0e,0x04]
+v_mad_u64_u32 v[5:6], s[12:13], s1, -4.0, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x01,0xee,0x01,0x02]
-v_mad_f16 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x6c,0x04,0x0e,0x04]
+v_mad_u64_u32 v[5:6], s[12:13], s1, v2, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x01,0x04,0x02,0x02]
-v_mad_f16 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x6d,0x04,0x0e,0x04]
+v_mad_u64_u32 v[5:6], s[12:13], s1, v255, 0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x01,0xfe,0x03,0x02]
-v_mad_f16 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x6e,0x04,0x0e,0x04]
+v_mad_u64_u32 v[5:6], s[12:13], s1, 0, -1
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x01,0x00,0x05,0x03]
-v_mad_f16 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x6f,0x04,0x0e,0x04]
+v_mad_u64_u32 v[5:6], s[12:13], s1, 0, 0.5
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x01,0x00,0xc1,0x03]
-v_mad_f16 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x7b,0x04,0x0e,0x04]
+v_mad_u64_u32 v[5:6], s[12:13], s1, 0, -4.0
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x01,0x00,0xdd,0x03]
-v_mad_f16 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x7c,0x04,0x0e,0x04]
+v_mad_u64_u32 v[5:6], s[12:13], s1, 0, v[3:4]
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x01,0x00,0x0d,0x04]
-v_mad_f16 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x7e,0x04,0x0e,0x04]
+v_mad_u64_u32 v[5:6], s[12:13], s1, 0, v[254:255]
+// CHECK: [0x05,0x0c,0xe8,0xd1,0x01,0x00,0xf9,0x07]
-v_mad_f16 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x7f,0x04,0x0e,0x04]
+v_mad_i64_i32 v[5:6], s[12:13], s1, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x01,0x00,0x01,0x02]
-v_mad_f16 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0xfd,0x04,0x0e,0x04]
+v_mad_i64_i32 v[254:255], s[12:13], s1, 0, 0
+// CHECK: [0xfe,0x0c,0xe9,0xd1,0x01,0x00,0x01,0x02]
-v_mad_f16 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x04]
+v_mad_i64_i32 v[5:6], s[14:15], s1, 0, 0
+// CHECK: [0x05,0x0e,0xe9,0xd1,0x01,0x00,0x01,0x02]
-v_mad_f16 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0xff,0x05,0x0e,0x04]
+v_mad_i64_i32 v[5:6], s[100:101], s1, 0, 0
+// CHECK: [0x05,0x64,0xe9,0xd1,0x01,0x00,0x01,0x02]
-v_mad_f16 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x01,0xfe,0x0f,0x04]
+v_mad_i64_i32 v[5:6], flat_scratch, s1, 0, 0
+// CHECK: [0x05,0x66,0xe9,0xd1,0x01,0x00,0x01,0x02]
-v_mad_f16 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x04,0xfe,0x07]
+v_mad_i64_i32 v[5:6], vcc, s1, 0, 0
+// CHECK: [0x05,0x6a,0xe9,0xd1,0x01,0x00,0x01,0x02]
-v_mad_f16 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x04,0x0e,0x24]
+v_mad_i64_i32 v[5:6], tba, s1, 0, 0
+// CHECK: [0x05,0x6c,0xe9,0xd1,0x01,0x00,0x01,0x02]
-v_mad_f16 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x04,0x0e,0x44]
+v_mad_i64_i32 v[5:6], tma, s1, 0, 0
+// CHECK: [0x05,0x6e,0xe9,0xd1,0x01,0x00,0x01,0x02]
-v_mad_f16 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x04,0x0e,0x84]
+v_mad_i64_i32 v[5:6], ttmp[10:11], s1, 0, 0
+// CHECK: [0x05,0x7a,0xe9,0xd1,0x01,0x00,0x01,0x02]
-v_mad_f16 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x04,0x0e,0xe4]
+v_mad_i64_i32 v[5:6], s[12:13], s101, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x65,0x00,0x01,0x02]
-v_mad_f16 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xea,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_i64_i32 v[5:6], s[12:13], flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x66,0x00,0x01,0x02]
-v_mad_f16 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xea,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_i64_i32 v[5:6], s[12:13], flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x67,0x00,0x01,0x02]
-v_mad_f16 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xea,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_i64_i32 v[5:6], s[12:13], vcc_lo, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x6a,0x00,0x01,0x02]
-v_mad_f16 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xea,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_i64_i32 v[5:6], s[12:13], vcc_hi, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x6b,0x00,0x01,0x02]
-v_mad_f16 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x80,0xea,0xd1,0x01,0x04,0x0e,0x04]
+v_mad_i64_i32 v[5:6], s[12:13], tba_lo, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x6c,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], tba_hi, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x6d,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], tma_lo, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x6e,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], tma_hi, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x6f,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], ttmp11, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x7b,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], m0, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x7c,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], exec_lo, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x7e,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], exec_hi, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x7f,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], 0, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x80,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], -1, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0xc1,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], 0.5, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0xf0,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], -4.0, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0xf7,0x00,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], v1, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x01,0x01,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], v255, 0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0xff,0x01,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, -1, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x01,0x82,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, 0.5, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x01,0xe0,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, -4.0, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x01,0xee,0x01,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, v2, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x01,0x04,0x02,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, v255, 0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x01,0xfe,0x03,0x02]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, 0, -1
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x01,0x00,0x05,0x03]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, 0, 0.5
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x01,0x00,0xc1,0x03]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, 0, -4.0
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x01,0x00,0xdd,0x03]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, 0, v[3:4]
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x01,0x00,0x0d,0x04]
+
+v_mad_i64_i32 v[5:6], s[12:13], s1, 0, v[254:255]
+// CHECK: [0x05,0x0c,0xe9,0xd1,0x01,0x00,0xf9,0x07]
+
+v_mad_f16 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x00,0x01,0x02]
+
+v_mad_f16 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xea,0xd1,0x01,0x00,0x01,0x02]
+
+v_mad_f16 v5, s101, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x65,0x00,0x01,0x02]
+
+v_mad_f16 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x66,0x00,0x01,0x02]
+
+v_mad_f16 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x67,0x00,0x01,0x02]
+
+v_mad_f16 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x6a,0x00,0x01,0x02]
+
+v_mad_f16 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x6b,0x00,0x01,0x02]
+
+v_mad_f16 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x6c,0x00,0x01,0x02]
+
+v_mad_f16 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x6d,0x00,0x01,0x02]
+
+v_mad_f16 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x6e,0x00,0x01,0x02]
+
+v_mad_f16 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x6f,0x00,0x01,0x02]
+
+v_mad_f16 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x7b,0x00,0x01,0x02]
+
+v_mad_f16 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x7c,0x00,0x01,0x02]
+
+v_mad_f16 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x7e,0x00,0x01,0x02]
+
+v_mad_f16 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x7f,0x00,0x01,0x02]
+
+v_mad_f16 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x80,0x00,0x01,0x02]
+
+v_mad_f16 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0xc1,0x00,0x01,0x02]
+
+v_mad_f16 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0xf0,0x00,0x01,0x02]
+
+v_mad_f16 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0xf7,0x00,0x01,0x02]
+
+v_mad_f16 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x01,0x01,0x02]
+
+v_mad_f16 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0xff,0x01,0x01,0x02]
+
+v_mad_f16 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x82,0x01,0x02]
+
+v_mad_f16 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0xe0,0x01,0x02]
+
+v_mad_f16 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0xee,0x01,0x02]
+
+v_mad_f16 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x04,0x02,0x02]
+
+v_mad_f16 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0xfe,0x03,0x02]
+
+v_mad_f16 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x00,0x05,0x03]
+
+v_mad_f16 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x00,0xc1,0x03]
+
+v_mad_f16 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x00,0xdd,0x03]
+
+v_mad_f16 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x00,0x0d,0x04]
+
+v_mad_f16 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x00,0xfd,0x07]
+
+v_mad_f16 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x00,0x01,0x22]
+
+v_mad_f16 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x00,0x01,0x42]
+
+v_mad_f16 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x00,0x01,0x82]
+
+v_mad_f16 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xea,0xd1,0x01,0x00,0x01,0xe2]
+
+v_mad_f16 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xea,0xd1,0x01,0x00,0x01,0x02]
+
+v_mad_f16 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xea,0xd1,0x01,0x00,0x01,0x02]
+
+v_mad_f16 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xea,0xd1,0x01,0x00,0x01,0x02]
+
+v_mad_f16 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xea,0xd1,0x01,0x00,0x01,0x02]
+
+v_mad_f16 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x80,0xea,0xd1,0x01,0x00,0x01,0x02]
v_mad_u16 v5, s1, 0, 0
// CHECK: [0x05,0x00,0xeb,0xd1,0x01,0x00,0x01,0x02]
@@ -43664,257 +45201,332 @@ v_mad_i16 v5, s1, 0, v3
v_mad_i16 v5, s1, 0, v255
// CHECK: [0x05,0x00,0xec,0xd1,0x01,0x00,0xfd,0x07]
-v_fma_f16 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f16 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x00,0x01,0x02]
+
+v_fma_f16 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xee,0xd1,0x01,0x00,0x01,0x02]
+
+v_fma_f16 v5, s101, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x65,0x00,0x01,0x02]
+
+v_fma_f16 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x66,0x00,0x01,0x02]
+
+v_fma_f16 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x67,0x00,0x01,0x02]
+
+v_fma_f16 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x6a,0x00,0x01,0x02]
+
+v_fma_f16 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x6b,0x00,0x01,0x02]
-v_fma_f16 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xee,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f16 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x6c,0x00,0x01,0x02]
-v_fma_f16 v5, s101, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x65,0x04,0x0e,0x04]
+v_fma_f16 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x6d,0x00,0x01,0x02]
-v_fma_f16 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x66,0x04,0x0e,0x04]
+v_fma_f16 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x6e,0x00,0x01,0x02]
-v_fma_f16 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x67,0x04,0x0e,0x04]
+v_fma_f16 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x6f,0x00,0x01,0x02]
-v_fma_f16 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x6a,0x04,0x0e,0x04]
+v_fma_f16 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x7b,0x00,0x01,0x02]
-v_fma_f16 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x6b,0x04,0x0e,0x04]
+v_fma_f16 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x7c,0x00,0x01,0x02]
-v_fma_f16 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x6c,0x04,0x0e,0x04]
+v_fma_f16 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x7e,0x00,0x01,0x02]
-v_fma_f16 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x6d,0x04,0x0e,0x04]
+v_fma_f16 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x7f,0x00,0x01,0x02]
-v_fma_f16 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x6e,0x04,0x0e,0x04]
+v_fma_f16 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x80,0x00,0x01,0x02]
-v_fma_f16 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x6f,0x04,0x0e,0x04]
+v_fma_f16 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0xc1,0x00,0x01,0x02]
-v_fma_f16 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x7b,0x04,0x0e,0x04]
+v_fma_f16 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0xf0,0x00,0x01,0x02]
-v_fma_f16 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x7c,0x04,0x0e,0x04]
+v_fma_f16 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0xf7,0x00,0x01,0x02]
-v_fma_f16 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x7e,0x04,0x0e,0x04]
+v_fma_f16 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x01,0x01,0x02]
-v_fma_f16 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x7f,0x04,0x0e,0x04]
+v_fma_f16 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0xff,0x01,0x01,0x02]
-v_fma_f16 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0xfd,0x04,0x0e,0x04]
+v_fma_f16 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x82,0x01,0x02]
-v_fma_f16 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04]
+v_fma_f16 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0xe0,0x01,0x02]
-v_fma_f16 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0xff,0x05,0x0e,0x04]
+v_fma_f16 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0xee,0x01,0x02]
-v_fma_f16 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x01,0xfe,0x0f,0x04]
+v_fma_f16 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x04,0x02,0x02]
-v_fma_f16 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x04,0xfe,0x07]
+v_fma_f16 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0xfe,0x03,0x02]
-v_fma_f16 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x04,0x0e,0x24]
+v_fma_f16 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x00,0x05,0x03]
-v_fma_f16 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x04,0x0e,0x44]
+v_fma_f16 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x00,0xc1,0x03]
-v_fma_f16 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x04,0x0e,0x84]
+v_fma_f16 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x00,0xdd,0x03]
-v_fma_f16 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x04,0x0e,0xe4]
+v_fma_f16 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x00,0x0d,0x04]
-v_fma_f16 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xee,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f16 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x00,0xfd,0x07]
-v_fma_f16 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xee,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f16 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x00,0x01,0x22]
-v_fma_f16 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xee,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f16 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x00,0x01,0x42]
-v_fma_f16 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xee,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f16 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x00,0x01,0x82]
-v_fma_f16 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x80,0xee,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f16 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xee,0xd1,0x01,0x00,0x01,0xe2]
-v_div_fixup_f16 v5, s1, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f16 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xee,0xd1,0x01,0x00,0x01,0x02]
-v_div_fixup_f16 v255, s1, v2, v3
-// CHECK: [0xff,0x00,0xef,0xd1,0x01,0x04,0x0e,0x04]
+v_fma_f16 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xee,0xd1,0x01,0x00,0x01,0x02]
-v_div_fixup_f16 v5, s101, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x65,0x04,0x0e,0x04]
+v_fma_f16 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xee,0xd1,0x01,0x00,0x01,0x02]
-v_div_fixup_f16 v5, flat_scratch_lo, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x66,0x04,0x0e,0x04]
+v_fma_f16 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xee,0xd1,0x01,0x00,0x01,0x02]
-v_div_fixup_f16 v5, flat_scratch_hi, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x67,0x04,0x0e,0x04]
+v_fma_f16 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x80,0xee,0xd1,0x01,0x00,0x01,0x02]
-v_div_fixup_f16 v5, vcc_lo, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x6a,0x04,0x0e,0x04]
+v_div_fixup_f16 v5, s1, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x00,0x01,0x02]
-v_div_fixup_f16 v5, vcc_hi, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x6b,0x04,0x0e,0x04]
+v_div_fixup_f16 v255, s1, 0, 0
+// CHECK: [0xff,0x00,0xef,0xd1,0x01,0x00,0x01,0x02]
-v_div_fixup_f16 v5, tba_lo, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x6c,0x04,0x0e,0x04]
+v_div_fixup_f16 v5, s101, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x65,0x00,0x01,0x02]
-v_div_fixup_f16 v5, tba_hi, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x6d,0x04,0x0e,0x04]
+v_div_fixup_f16 v5, flat_scratch_lo, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x66,0x00,0x01,0x02]
-v_div_fixup_f16 v5, tma_lo, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x6e,0x04,0x0e,0x04]
+v_div_fixup_f16 v5, flat_scratch_hi, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x67,0x00,0x01,0x02]
-v_div_fixup_f16 v5, tma_hi, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x6f,0x04,0x0e,0x04]
+v_div_fixup_f16 v5, vcc_lo, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x6a,0x00,0x01,0x02]
-v_div_fixup_f16 v5, ttmp11, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x7b,0x04,0x0e,0x04]
+v_div_fixup_f16 v5, vcc_hi, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x6b,0x00,0x01,0x02]
-v_div_fixup_f16 v5, m0, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x7c,0x04,0x0e,0x04]
+v_div_fixup_f16 v5, tba_lo, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x6c,0x00,0x01,0x02]
-v_div_fixup_f16 v5, exec_lo, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x7e,0x04,0x0e,0x04]
+v_div_fixup_f16 v5, tba_hi, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x6d,0x00,0x01,0x02]
-v_div_fixup_f16 v5, exec_hi, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x7f,0x04,0x0e,0x04]
+v_div_fixup_f16 v5, tma_lo, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x6e,0x00,0x01,0x02]
-v_div_fixup_f16 v5, scc, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0xfd,0x04,0x0e,0x04]
+v_div_fixup_f16 v5, tma_hi, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x6f,0x00,0x01,0x02]
-v_div_fixup_f16 v5, v1, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x04]
+v_div_fixup_f16 v5, ttmp11, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x7b,0x00,0x01,0x02]
-v_div_fixup_f16 v5, v255, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0xff,0x05,0x0e,0x04]
+v_div_fixup_f16 v5, m0, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x7c,0x00,0x01,0x02]
-v_div_fixup_f16 v5, s1, v255, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x01,0xfe,0x0f,0x04]
+v_div_fixup_f16 v5, exec_lo, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x7e,0x00,0x01,0x02]
-v_div_fixup_f16 v5, s1, v2, v255
-// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x04,0xfe,0x07]
+v_div_fixup_f16 v5, exec_hi, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x7f,0x00,0x01,0x02]
-v_div_fixup_f16 v5, -s1, v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x04,0x0e,0x24]
+v_div_fixup_f16 v5, 0, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x80,0x00,0x01,0x02]
-v_div_fixup_f16 v5, s1, -v2, v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x04,0x0e,0x44]
+v_div_fixup_f16 v5, -1, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0xc1,0x00,0x01,0x02]
-v_div_fixup_f16 v5, s1, v2, -v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x04,0x0e,0x84]
+v_div_fixup_f16 v5, 0.5, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0xf0,0x00,0x01,0x02]
-v_div_fixup_f16 v5, -s1, -v2, -v3
-// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x04,0x0e,0xe4]
+v_div_fixup_f16 v5, -4.0, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0xf7,0x00,0x01,0x02]
-v_div_fixup_f16 v5, |s1|, v2, v3
-// CHECK: [0x05,0x01,0xef,0xd1,0x01,0x04,0x0e,0x04]
+v_div_fixup_f16 v5, v1, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x01,0x01,0x02]
-v_div_fixup_f16 v5, s1, |v2|, v3
-// CHECK: [0x05,0x02,0xef,0xd1,0x01,0x04,0x0e,0x04]
+v_div_fixup_f16 v5, v255, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0xff,0x01,0x01,0x02]
-v_div_fixup_f16 v5, s1, v2, |v3|
-// CHECK: [0x05,0x04,0xef,0xd1,0x01,0x04,0x0e,0x04]
+v_div_fixup_f16 v5, s1, -1, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x82,0x01,0x02]
-v_div_fixup_f16 v5, |s1|, |v2|, |v3|
-// CHECK: [0x05,0x07,0xef,0xd1,0x01,0x04,0x0e,0x04]
+v_div_fixup_f16 v5, s1, 0.5, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0xe0,0x01,0x02]
-v_div_fixup_f16 v5, s1, v2, v3 clamp
-// CHECK: [0x05,0x80,0xef,0xd1,0x01,0x04,0x0e,0x04]
+v_div_fixup_f16 v5, s1, -4.0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0xee,0x01,0x02]
+
+v_div_fixup_f16 v5, s1, v2, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x04,0x02,0x02]
+
+v_div_fixup_f16 v5, s1, v255, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0xfe,0x03,0x02]
+
+v_div_fixup_f16 v5, s1, 0, -1
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x00,0x05,0x03]
+
+v_div_fixup_f16 v5, s1, 0, 0.5
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x00,0xc1,0x03]
+
+v_div_fixup_f16 v5, s1, 0, -4.0
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x00,0xdd,0x03]
+
+v_div_fixup_f16 v5, s1, 0, v3
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x00,0x0d,0x04]
+
+v_div_fixup_f16 v5, s1, 0, v255
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x00,0xfd,0x07]
+
+v_div_fixup_f16 v5, -s1, 0, 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x00,0x01,0x22]
+
+v_div_fixup_f16 v5, s1, neg(0), 0
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x00,0x01,0x42]
+
+v_div_fixup_f16 v5, s1, 0, neg(0)
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x00,0x01,0x82]
+
+v_div_fixup_f16 v5, -s1, neg(0), neg(0)
+// CHECK: [0x05,0x00,0xef,0xd1,0x01,0x00,0x01,0xe2]
+
+v_div_fixup_f16 v5, |s1|, 0, 0
+// CHECK: [0x05,0x01,0xef,0xd1,0x01,0x00,0x01,0x02]
+
+v_div_fixup_f16 v5, s1, |0|, 0
+// CHECK: [0x05,0x02,0xef,0xd1,0x01,0x00,0x01,0x02]
+
+v_div_fixup_f16 v5, s1, 0, |0|
+// CHECK: [0x05,0x04,0xef,0xd1,0x01,0x00,0x01,0x02]
+
+v_div_fixup_f16 v5, |s1|, |0|, |0|
+// CHECK: [0x05,0x07,0xef,0xd1,0x01,0x00,0x01,0x02]
+
+v_div_fixup_f16 v5, s1, 0, 0 clamp
+// CHECK: [0x05,0x80,0xef,0xd1,0x01,0x00,0x01,0x02]
+
+v_cvt_pkaccum_u8_f32 v5, 0, s2
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0x04,0x00,0x00]
+
+v_cvt_pkaccum_u8_f32 v255, 0, s2
+// CHECK: [0xff,0x00,0xf0,0xd1,0x80,0x04,0x00,0x00]
+
+v_cvt_pkaccum_u8_f32 v5, -1, s2
+// CHECK: [0x05,0x00,0xf0,0xd1,0xc1,0x04,0x00,0x00]
+
+v_cvt_pkaccum_u8_f32 v5, 0.5, s2
+// CHECK: [0x05,0x00,0xf0,0xd1,0xf0,0x04,0x00,0x00]
+
+v_cvt_pkaccum_u8_f32 v5, -4.0, s2
+// CHECK: [0x05,0x00,0xf0,0xd1,0xf7,0x04,0x00,0x00]
v_cvt_pkaccum_u8_f32 v5, v1, s2
// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0x05,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v255, v1, s2
-// CHECK: [0xff,0x00,0xf0,0xd1,0x01,0x05,0x00,0x00]
-
v_cvt_pkaccum_u8_f32 v5, v255, s2
// CHECK: [0x05,0x00,0xf0,0xd1,0xff,0x05,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, s101
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xcb,0x00,0x00]
-
-v_cvt_pkaccum_u8_f32 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xcd,0x00,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, s101
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xca,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xcf,0x00,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xcc,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xd5,0x00,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xce,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xd7,0x00,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xd4,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xd9,0x00,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xd6,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xdb,0x00,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xd8,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xdd,0x00,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xda,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xdf,0x00,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xdc,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xf7,0x00,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xde,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, m0
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xf9,0x00,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xf6,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xfd,0x00,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, m0
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xf8,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xff,0x00,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xfc,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, 0
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0x01,0x01,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xfe,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, -1
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0x83,0x01,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, 0
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0x00,0x01,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, 0.5
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xe1,0x01,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, -1
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0x82,0x01,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, -4.0
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xef,0x01,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, 0.5
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xe0,0x01,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, scc
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xfb,0x01,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, -4.0
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xee,0x01,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, v2
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0x05,0x02,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, v2
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0x04,0x02,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, v255
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0xff,0x03,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, v255
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0xfe,0x03,0x00]
-v_cvt_pkaccum_u8_f32 v5, -v1, s2
-// CHECK: [0x05,0x00,0xf0,0xd1,0x01,0x05,0x00,0x20]
+v_cvt_pkaccum_u8_f32 v5, neg(0), s2
+// CHECK: [0x05,0x00,0xf0,0xd1,0x80,0x04,0x00,0x20]
-v_cvt_pkaccum_u8_f32 v5, |v1|, s2
-// CHECK: [0x05,0x01,0xf0,0xd1,0x01,0x05,0x00,0x00]
+v_cvt_pkaccum_u8_f32 v5, |0|, s2
+// CHECK: [0x05,0x01,0xf0,0xd1,0x80,0x04,0x00,0x00]
-v_cvt_pkaccum_u8_f32 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0xf0,0xd1,0x01,0x05,0x00,0x00]
+v_cvt_pkaccum_u8_f32 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0xf0,0xd1,0x80,0x04,0x00,0x00]
v_add_f64 v[5:6], s[4:5], s[4:5]
// CHECK: [0x05,0x00,0x80,0xd2,0x04,0x08,0x00,0x00]
@@ -43922,12 +45534,36 @@ v_add_f64 v[5:6], s[4:5], s[4:5]
v_add_f64 v[254:255], s[4:5], s[4:5]
// CHECK: [0xfe,0x00,0x80,0xd2,0x04,0x08,0x00,0x00]
+v_add_f64 v[5:6], 0, s[4:5]
+// CHECK: [0x05,0x00,0x80,0xd2,0x80,0x08,0x00,0x00]
+
+v_add_f64 v[5:6], -1, s[4:5]
+// CHECK: [0x05,0x00,0x80,0xd2,0xc1,0x08,0x00,0x00]
+
+v_add_f64 v[5:6], 0.5, s[4:5]
+// CHECK: [0x05,0x00,0x80,0xd2,0xf0,0x08,0x00,0x00]
+
+v_add_f64 v[5:6], -4.0, s[4:5]
+// CHECK: [0x05,0x00,0x80,0xd2,0xf7,0x08,0x00,0x00]
+
v_add_f64 v[5:6], v[1:2], s[4:5]
// CHECK: [0x05,0x00,0x80,0xd2,0x01,0x09,0x00,0x00]
v_add_f64 v[5:6], v[254:255], s[4:5]
// CHECK: [0x05,0x00,0x80,0xd2,0xfe,0x09,0x00,0x00]
+v_add_f64 v[5:6], s[4:5], 0
+// CHECK: [0x05,0x00,0x80,0xd2,0x04,0x00,0x01,0x00]
+
+v_add_f64 v[5:6], s[4:5], -1
+// CHECK: [0x05,0x00,0x80,0xd2,0x04,0x82,0x01,0x00]
+
+v_add_f64 v[5:6], s[4:5], 0.5
+// CHECK: [0x05,0x00,0x80,0xd2,0x04,0xe0,0x01,0x00]
+
+v_add_f64 v[5:6], s[4:5], -4.0
+// CHECK: [0x05,0x00,0x80,0xd2,0x04,0xee,0x01,0x00]
+
v_add_f64 v[5:6], s[4:5], v[2:3]
// CHECK: [0x05,0x00,0x80,0xd2,0x04,0x04,0x02,0x00]
@@ -43970,12 +45606,36 @@ v_mul_f64 v[5:6], s[4:5], s[4:5]
v_mul_f64 v[254:255], s[4:5], s[4:5]
// CHECK: [0xfe,0x00,0x81,0xd2,0x04,0x08,0x00,0x00]
+v_mul_f64 v[5:6], 0, s[4:5]
+// CHECK: [0x05,0x00,0x81,0xd2,0x80,0x08,0x00,0x00]
+
+v_mul_f64 v[5:6], -1, s[4:5]
+// CHECK: [0x05,0x00,0x81,0xd2,0xc1,0x08,0x00,0x00]
+
+v_mul_f64 v[5:6], 0.5, s[4:5]
+// CHECK: [0x05,0x00,0x81,0xd2,0xf0,0x08,0x00,0x00]
+
+v_mul_f64 v[5:6], -4.0, s[4:5]
+// CHECK: [0x05,0x00,0x81,0xd2,0xf7,0x08,0x00,0x00]
+
v_mul_f64 v[5:6], v[1:2], s[4:5]
// CHECK: [0x05,0x00,0x81,0xd2,0x01,0x09,0x00,0x00]
v_mul_f64 v[5:6], v[254:255], s[4:5]
// CHECK: [0x05,0x00,0x81,0xd2,0xfe,0x09,0x00,0x00]
+v_mul_f64 v[5:6], s[4:5], 0
+// CHECK: [0x05,0x00,0x81,0xd2,0x04,0x00,0x01,0x00]
+
+v_mul_f64 v[5:6], s[4:5], -1
+// CHECK: [0x05,0x00,0x81,0xd2,0x04,0x82,0x01,0x00]
+
+v_mul_f64 v[5:6], s[4:5], 0.5
+// CHECK: [0x05,0x00,0x81,0xd2,0x04,0xe0,0x01,0x00]
+
+v_mul_f64 v[5:6], s[4:5], -4.0
+// CHECK: [0x05,0x00,0x81,0xd2,0x04,0xee,0x01,0x00]
+
v_mul_f64 v[5:6], s[4:5], v[2:3]
// CHECK: [0x05,0x00,0x81,0xd2,0x04,0x04,0x02,0x00]
@@ -44018,12 +45678,36 @@ v_min_f64 v[5:6], s[4:5], s[4:5]
v_min_f64 v[254:255], s[4:5], s[4:5]
// CHECK: [0xfe,0x00,0x82,0xd2,0x04,0x08,0x00,0x00]
+v_min_f64 v[5:6], 0, s[4:5]
+// CHECK: [0x05,0x00,0x82,0xd2,0x80,0x08,0x00,0x00]
+
+v_min_f64 v[5:6], -1, s[4:5]
+// CHECK: [0x05,0x00,0x82,0xd2,0xc1,0x08,0x00,0x00]
+
+v_min_f64 v[5:6], 0.5, s[4:5]
+// CHECK: [0x05,0x00,0x82,0xd2,0xf0,0x08,0x00,0x00]
+
+v_min_f64 v[5:6], -4.0, s[4:5]
+// CHECK: [0x05,0x00,0x82,0xd2,0xf7,0x08,0x00,0x00]
+
v_min_f64 v[5:6], v[1:2], s[4:5]
// CHECK: [0x05,0x00,0x82,0xd2,0x01,0x09,0x00,0x00]
v_min_f64 v[5:6], v[254:255], s[4:5]
// CHECK: [0x05,0x00,0x82,0xd2,0xfe,0x09,0x00,0x00]
+v_min_f64 v[5:6], s[4:5], 0
+// CHECK: [0x05,0x00,0x82,0xd2,0x04,0x00,0x01,0x00]
+
+v_min_f64 v[5:6], s[4:5], -1
+// CHECK: [0x05,0x00,0x82,0xd2,0x04,0x82,0x01,0x00]
+
+v_min_f64 v[5:6], s[4:5], 0.5
+// CHECK: [0x05,0x00,0x82,0xd2,0x04,0xe0,0x01,0x00]
+
+v_min_f64 v[5:6], s[4:5], -4.0
+// CHECK: [0x05,0x00,0x82,0xd2,0x04,0xee,0x01,0x00]
+
v_min_f64 v[5:6], s[4:5], v[2:3]
// CHECK: [0x05,0x00,0x82,0xd2,0x04,0x04,0x02,0x00]
@@ -44066,12 +45750,36 @@ v_max_f64 v[5:6], s[4:5], s[4:5]
v_max_f64 v[254:255], s[4:5], s[4:5]
// CHECK: [0xfe,0x00,0x83,0xd2,0x04,0x08,0x00,0x00]
+v_max_f64 v[5:6], 0, s[4:5]
+// CHECK: [0x05,0x00,0x83,0xd2,0x80,0x08,0x00,0x00]
+
+v_max_f64 v[5:6], -1, s[4:5]
+// CHECK: [0x05,0x00,0x83,0xd2,0xc1,0x08,0x00,0x00]
+
+v_max_f64 v[5:6], 0.5, s[4:5]
+// CHECK: [0x05,0x00,0x83,0xd2,0xf0,0x08,0x00,0x00]
+
+v_max_f64 v[5:6], -4.0, s[4:5]
+// CHECK: [0x05,0x00,0x83,0xd2,0xf7,0x08,0x00,0x00]
+
v_max_f64 v[5:6], v[1:2], s[4:5]
// CHECK: [0x05,0x00,0x83,0xd2,0x01,0x09,0x00,0x00]
v_max_f64 v[5:6], v[254:255], s[4:5]
// CHECK: [0x05,0x00,0x83,0xd2,0xfe,0x09,0x00,0x00]
+v_max_f64 v[5:6], s[4:5], 0
+// CHECK: [0x05,0x00,0x83,0xd2,0x04,0x00,0x01,0x00]
+
+v_max_f64 v[5:6], s[4:5], -1
+// CHECK: [0x05,0x00,0x83,0xd2,0x04,0x82,0x01,0x00]
+
+v_max_f64 v[5:6], s[4:5], 0.5
+// CHECK: [0x05,0x00,0x83,0xd2,0x04,0xe0,0x01,0x00]
+
+v_max_f64 v[5:6], s[4:5], -4.0
+// CHECK: [0x05,0x00,0x83,0xd2,0x04,0xee,0x01,0x00]
+
v_max_f64 v[5:6], s[4:5], v[2:3]
// CHECK: [0x05,0x00,0x83,0xd2,0x04,0x04,0x02,0x00]
@@ -44114,9 +45822,15 @@ v_ldexp_f64 v[5:6], 0, s2
v_ldexp_f64 v[254:255], 0, s2
// CHECK: [0xfe,0x00,0x84,0xd2,0x80,0x04,0x00,0x00]
+v_ldexp_f64 v[5:6], -1, s2
+// CHECK: [0x05,0x00,0x84,0xd2,0xc1,0x04,0x00,0x00]
+
v_ldexp_f64 v[5:6], 0.5, s2
// CHECK: [0x05,0x00,0x84,0xd2,0xf0,0x04,0x00,0x00]
+v_ldexp_f64 v[5:6], -4.0, s2
+// CHECK: [0x05,0x00,0x84,0xd2,0xf7,0x04,0x00,0x00]
+
v_ldexp_f64 v[5:6], v[1:2], s2
// CHECK: [0x05,0x00,0x84,0xd2,0x01,0x05,0x00,0x00]
@@ -44174,15 +45888,18 @@ v_ldexp_f64 v[5:6], 0, 0.5
v_ldexp_f64 v[5:6], 0, -4.0
// CHECK: [0x05,0x00,0x84,0xd2,0x80,0xee,0x01,0x00]
-v_ldexp_f64 v[5:6], 0, scc
-// CHECK: [0x05,0x00,0x84,0xd2,0x80,0xfa,0x01,0x00]
-
v_ldexp_f64 v[5:6], 0, v2
// CHECK: [0x05,0x00,0x84,0xd2,0x80,0x04,0x02,0x00]
v_ldexp_f64 v[5:6], 0, v255
// CHECK: [0x05,0x00,0x84,0xd2,0x80,0xfe,0x03,0x00]
+v_ldexp_f64 v[5:6], neg(0), s2
+// CHECK: [0x05,0x00,0x84,0xd2,0x80,0x04,0x00,0x20]
+
+v_ldexp_f64 v[5:6], |0|, s2
+// CHECK: [0x05,0x01,0x84,0xd2,0x80,0x04,0x00,0x00]
+
v_ldexp_f64 v[5:6], 0, s2 clamp
// CHECK: [0x05,0x80,0x84,0xd2,0x80,0x04,0x00,0x00]
@@ -44429,92 +46146,101 @@ v_mul_hi_i32 v5, 0, v2
v_mul_hi_i32 v5, 0, v255
// CHECK: [0x05,0x00,0x87,0xd2,0x80,0xfe,0x03,0x00]
+v_ldexp_f32 v5, 0, s2
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0x04,0x00,0x00]
+
+v_ldexp_f32 v255, 0, s2
+// CHECK: [0xff,0x00,0x88,0xd2,0x80,0x04,0x00,0x00]
+
+v_ldexp_f32 v5, -1, s2
+// CHECK: [0x05,0x00,0x88,0xd2,0xc1,0x04,0x00,0x00]
+
+v_ldexp_f32 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x88,0xd2,0xf0,0x04,0x00,0x00]
+
+v_ldexp_f32 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x88,0xd2,0xf7,0x04,0x00,0x00]
+
v_ldexp_f32 v5, v1, s2
// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x05,0x00,0x00]
-v_ldexp_f32 v255, v1, s2
-// CHECK: [0xff,0x00,0x88,0xd2,0x01,0x05,0x00,0x00]
-
v_ldexp_f32 v5, v255, s2
// CHECK: [0x05,0x00,0x88,0xd2,0xff,0x05,0x00,0x00]
-v_ldexp_f32 v5, v1, s101
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xcb,0x00,0x00]
-
-v_ldexp_f32 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xcd,0x00,0x00]
+v_ldexp_f32 v5, 0, s101
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xca,0x00,0x00]
-v_ldexp_f32 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xcf,0x00,0x00]
+v_ldexp_f32 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xcc,0x00,0x00]
-v_ldexp_f32 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xd5,0x00,0x00]
+v_ldexp_f32 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xce,0x00,0x00]
-v_ldexp_f32 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xd7,0x00,0x00]
+v_ldexp_f32 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xd4,0x00,0x00]
-v_ldexp_f32 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xd9,0x00,0x00]
+v_ldexp_f32 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xd6,0x00,0x00]
-v_ldexp_f32 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xdb,0x00,0x00]
+v_ldexp_f32 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xd8,0x00,0x00]
-v_ldexp_f32 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xdd,0x00,0x00]
+v_ldexp_f32 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xda,0x00,0x00]
-v_ldexp_f32 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xdf,0x00,0x00]
+v_ldexp_f32 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xdc,0x00,0x00]
-v_ldexp_f32 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xf7,0x00,0x00]
+v_ldexp_f32 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xde,0x00,0x00]
-v_ldexp_f32 v5, v1, m0
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xf9,0x00,0x00]
+v_ldexp_f32 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xf6,0x00,0x00]
-v_ldexp_f32 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xfd,0x00,0x00]
+v_ldexp_f32 v5, 0, m0
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xf8,0x00,0x00]
-v_ldexp_f32 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xff,0x00,0x00]
+v_ldexp_f32 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xfc,0x00,0x00]
-v_ldexp_f32 v5, v1, 0
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x01,0x01,0x00]
+v_ldexp_f32 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xfe,0x00,0x00]
-v_ldexp_f32 v5, v1, -1
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x83,0x01,0x00]
+v_ldexp_f32 v5, 0, 0
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0x00,0x01,0x00]
-v_ldexp_f32 v5, v1, 0.5
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xe1,0x01,0x00]
+v_ldexp_f32 v5, 0, -1
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0x82,0x01,0x00]
-v_ldexp_f32 v5, v1, -4.0
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xef,0x01,0x00]
+v_ldexp_f32 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xe0,0x01,0x00]
-v_ldexp_f32 v5, v1, scc
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xfb,0x01,0x00]
+v_ldexp_f32 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xee,0x01,0x00]
-v_ldexp_f32 v5, v1, v2
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x05,0x02,0x00]
+v_ldexp_f32 v5, 0, v2
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0x04,0x02,0x00]
-v_ldexp_f32 v5, v1, v255
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0xff,0x03,0x00]
+v_ldexp_f32 v5, 0, v255
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0xfe,0x03,0x00]
-v_ldexp_f32 v5, -v1, s2
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x05,0x00,0x20]
+v_ldexp_f32 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0x04,0x00,0x20]
-v_ldexp_f32 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x88,0xd2,0x01,0x05,0x00,0x00]
+v_ldexp_f32 v5, |0|, s2
+// CHECK: [0x05,0x01,0x88,0xd2,0x80,0x04,0x00,0x00]
-v_ldexp_f32 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x88,0xd2,0x01,0x05,0x00,0x00]
+v_ldexp_f32 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x88,0xd2,0x80,0x04,0x00,0x00]
-v_ldexp_f32 v5, v1, s2 mul:2
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x05,0x00,0x08]
+v_ldexp_f32 v5, 0, s2 mul:2
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0x04,0x00,0x08]
-v_ldexp_f32 v5, v1, s2 mul:4
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x05,0x00,0x10]
+v_ldexp_f32 v5, 0, s2 mul:4
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0x04,0x00,0x10]
-v_ldexp_f32 v5, v1, s2 div:2
-// CHECK: [0x05,0x00,0x88,0xd2,0x01,0x05,0x00,0x18]
+v_ldexp_f32 v5, 0, s2 div:2
+// CHECK: [0x05,0x00,0x88,0xd2,0x80,0x04,0x00,0x18]
v_readlane_b32 s5, v1, s2
// CHECK: [0x05,0x00,0x89,0xd2,0x01,0x05,0x00,0x00]
@@ -45056,9 +46782,15 @@ v_trig_preop_f64 v[5:6], 0, s2
v_trig_preop_f64 v[254:255], 0, s2
// CHECK: [0xfe,0x00,0x92,0xd2,0x80,0x04,0x00,0x00]
+v_trig_preop_f64 v[5:6], -1, s2
+// CHECK: [0x05,0x00,0x92,0xd2,0xc1,0x04,0x00,0x00]
+
v_trig_preop_f64 v[5:6], 0.5, s2
// CHECK: [0x05,0x00,0x92,0xd2,0xf0,0x04,0x00,0x00]
+v_trig_preop_f64 v[5:6], -4.0, s2
+// CHECK: [0x05,0x00,0x92,0xd2,0xf7,0x04,0x00,0x00]
+
v_trig_preop_f64 v[5:6], v[1:2], s2
// CHECK: [0x05,0x00,0x92,0xd2,0x01,0x05,0x00,0x00]
@@ -45116,15 +46848,18 @@ v_trig_preop_f64 v[5:6], 0, 0.5
v_trig_preop_f64 v[5:6], 0, -4.0
// CHECK: [0x05,0x00,0x92,0xd2,0x80,0xee,0x01,0x00]
-v_trig_preop_f64 v[5:6], 0, scc
-// CHECK: [0x05,0x00,0x92,0xd2,0x80,0xfa,0x01,0x00]
-
v_trig_preop_f64 v[5:6], 0, v2
// CHECK: [0x05,0x00,0x92,0xd2,0x80,0x04,0x02,0x00]
v_trig_preop_f64 v[5:6], 0, v255
// CHECK: [0x05,0x00,0x92,0xd2,0x80,0xfe,0x03,0x00]
+v_trig_preop_f64 v[5:6], neg(0), s2
+// CHECK: [0x05,0x00,0x92,0xd2,0x80,0x04,0x00,0x20]
+
+v_trig_preop_f64 v[5:6], |0|, s2
+// CHECK: [0x05,0x01,0x92,0xd2,0x80,0x04,0x00,0x00]
+
v_trig_preop_f64 v[5:6], 0, s2 clamp
// CHECK: [0x05,0x80,0x92,0xd2,0x80,0x04,0x00,0x00]
@@ -45215,239 +46950,302 @@ v_bfm_b32 v5, 0, v2
v_bfm_b32 v5, 0, v255
// CHECK: [0x05,0x00,0x93,0xd2,0x80,0xfe,0x03,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, s2
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_i16_f32 v255, 0, s2
+// CHECK: [0xff,0x00,0x94,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_i16_f32 v5, -1, s2
+// CHECK: [0x05,0x00,0x94,0xd2,0xc1,0x04,0x00,0x00]
+
+v_cvt_pknorm_i16_f32 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x94,0xd2,0xf0,0x04,0x00,0x00]
+
+v_cvt_pknorm_i16_f32 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x94,0xd2,0xf7,0x04,0x00,0x00]
+
v_cvt_pknorm_i16_f32 v5, v1, s2
// CHECK: [0x05,0x00,0x94,0xd2,0x01,0x05,0x00,0x00]
-v_cvt_pknorm_i16_f32 v255, v1, s2
-// CHECK: [0xff,0x00,0x94,0xd2,0x01,0x05,0x00,0x00]
-
v_cvt_pknorm_i16_f32 v5, v255, s2
// CHECK: [0x05,0x00,0x94,0xd2,0xff,0x05,0x00,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, s101
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0xcb,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, s101
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xca,0x00,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0xcd,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xcc,0x00,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0xcf,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xce,0x00,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0xd5,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xd4,0x00,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0xd7,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xd6,0x00,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0xd9,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xd8,0x00,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0xdb,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xda,0x00,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0xdd,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xdc,0x00,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0xdf,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xde,0x00,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0xf7,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xf6,0x00,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, m0
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0xf9,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, m0
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xf8,0x00,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0xfd,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xfc,0x00,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0xff,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xfe,0x00,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, scc
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0xfb,0x01,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, 0
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0x00,0x01,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, v2
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0x05,0x02,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, -1
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0x82,0x01,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, v255
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0xff,0x03,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xe0,0x01,0x00]
-v_cvt_pknorm_i16_f32 v5, -v1, s2
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0x05,0x00,0x20]
+v_cvt_pknorm_i16_f32 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xee,0x01,0x00]
-v_cvt_pknorm_i16_f32 v5, v1, -s2
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0x05,0x00,0x40]
+v_cvt_pknorm_i16_f32 v5, 0, v2
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0x04,0x02,0x00]
-v_cvt_pknorm_i16_f32 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x94,0xd2,0x01,0x05,0x00,0x60]
+v_cvt_pknorm_i16_f32 v5, 0, v255
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0xfe,0x03,0x00]
-v_cvt_pknorm_i16_f32 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x94,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0x04,0x00,0x20]
-v_cvt_pknorm_i16_f32 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x94,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, 0, -s2
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0x04,0x00,0x40]
-v_cvt_pknorm_i16_f32 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x94,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x94,0xd2,0x80,0x04,0x00,0x60]
-v_cvt_pknorm_i16_f32 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x94,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pknorm_i16_f32 v5, |0|, s2
+// CHECK: [0x05,0x01,0x94,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_i16_f32 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x94,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_i16_f32 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x94,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_i16_f32 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x94,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_u16_f32 v5, 0, s2
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_u16_f32 v255, 0, s2
+// CHECK: [0xff,0x00,0x95,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_u16_f32 v5, -1, s2
+// CHECK: [0x05,0x00,0x95,0xd2,0xc1,0x04,0x00,0x00]
+
+v_cvt_pknorm_u16_f32 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x95,0xd2,0xf0,0x04,0x00,0x00]
+
+v_cvt_pknorm_u16_f32 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x95,0xd2,0xf7,0x04,0x00,0x00]
v_cvt_pknorm_u16_f32 v5, v1, s2
// CHECK: [0x05,0x00,0x95,0xd2,0x01,0x05,0x00,0x00]
-v_cvt_pknorm_u16_f32 v255, v1, s2
-// CHECK: [0xff,0x00,0x95,0xd2,0x01,0x05,0x00,0x00]
-
v_cvt_pknorm_u16_f32 v5, v255, s2
// CHECK: [0x05,0x00,0x95,0xd2,0xff,0x05,0x00,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, s101
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0xcb,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, s101
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xca,0x00,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0xcd,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xcc,0x00,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0xcf,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xce,0x00,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0xd5,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xd4,0x00,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0xd7,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xd6,0x00,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0xd9,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xd8,0x00,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0xdb,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xda,0x00,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0xdd,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xdc,0x00,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0xdf,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xde,0x00,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0xf7,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xf6,0x00,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, m0
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0xf9,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, m0
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xf8,0x00,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0xfd,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xfc,0x00,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0xff,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xfe,0x00,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, scc
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0xfb,0x01,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, 0
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0x00,0x01,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, v2
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0x05,0x02,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, -1
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0x82,0x01,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, v255
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0xff,0x03,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xe0,0x01,0x00]
-v_cvt_pknorm_u16_f32 v5, -v1, s2
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0x05,0x00,0x20]
+v_cvt_pknorm_u16_f32 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xee,0x01,0x00]
-v_cvt_pknorm_u16_f32 v5, v1, -s2
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0x05,0x00,0x40]
+v_cvt_pknorm_u16_f32 v5, 0, v2
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0x04,0x02,0x00]
-v_cvt_pknorm_u16_f32 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x95,0xd2,0x01,0x05,0x00,0x60]
+v_cvt_pknorm_u16_f32 v5, 0, v255
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0xfe,0x03,0x00]
-v_cvt_pknorm_u16_f32 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x95,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0x04,0x00,0x20]
-v_cvt_pknorm_u16_f32 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x95,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, 0, -s2
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0x04,0x00,0x40]
-v_cvt_pknorm_u16_f32 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x95,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x95,0xd2,0x80,0x04,0x00,0x60]
-v_cvt_pknorm_u16_f32 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x95,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pknorm_u16_f32 v5, |0|, s2
+// CHECK: [0x05,0x01,0x95,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_u16_f32 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x95,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_u16_f32 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x95,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pknorm_u16_f32 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x95,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pkrtz_f16_f32 v5, 0, s2
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pkrtz_f16_f32 v255, 0, s2
+// CHECK: [0xff,0x00,0x96,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pkrtz_f16_f32 v5, -1, s2
+// CHECK: [0x05,0x00,0x96,0xd2,0xc1,0x04,0x00,0x00]
+
+v_cvt_pkrtz_f16_f32 v5, 0.5, s2
+// CHECK: [0x05,0x00,0x96,0xd2,0xf0,0x04,0x00,0x00]
+
+v_cvt_pkrtz_f16_f32 v5, -4.0, s2
+// CHECK: [0x05,0x00,0x96,0xd2,0xf7,0x04,0x00,0x00]
v_cvt_pkrtz_f16_f32 v5, v1, s2
// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x05,0x00,0x00]
-v_cvt_pkrtz_f16_f32 v255, v1, s2
-// CHECK: [0xff,0x00,0x96,0xd2,0x01,0x05,0x00,0x00]
-
v_cvt_pkrtz_f16_f32 v5, v255, s2
// CHECK: [0x05,0x00,0x96,0xd2,0xff,0x05,0x00,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, s101
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xcb,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, s101
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xca,0x00,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, flat_scratch_lo
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xcd,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, flat_scratch_lo
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xcc,0x00,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, flat_scratch_hi
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xcf,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, flat_scratch_hi
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xce,0x00,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, vcc_lo
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xd5,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, vcc_lo
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xd4,0x00,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, vcc_hi
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xd7,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, vcc_hi
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xd6,0x00,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, tba_lo
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xd9,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, tba_lo
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xd8,0x00,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, tba_hi
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xdb,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, tba_hi
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xda,0x00,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, tma_lo
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xdd,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, tma_lo
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xdc,0x00,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, tma_hi
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xdf,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, tma_hi
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xde,0x00,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, ttmp11
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xf7,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, ttmp11
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xf6,0x00,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, m0
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xf9,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, m0
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xf8,0x00,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, exec_lo
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xfd,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, exec_lo
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xfc,0x00,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, exec_hi
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xff,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, exec_hi
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xfe,0x00,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, scc
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xfb,0x01,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, 0
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0x00,0x01,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, v2
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x05,0x02,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, -1
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0x82,0x01,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, v255
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0xff,0x03,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, 0.5
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xe0,0x01,0x00]
-v_cvt_pkrtz_f16_f32 v5, -v1, s2
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x05,0x00,0x20]
+v_cvt_pkrtz_f16_f32 v5, 0, -4.0
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xee,0x01,0x00]
-v_cvt_pkrtz_f16_f32 v5, v1, -s2
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x05,0x00,0x40]
+v_cvt_pkrtz_f16_f32 v5, 0, v2
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0x04,0x02,0x00]
-v_cvt_pkrtz_f16_f32 v5, -v1, -s2
-// CHECK: [0x05,0x00,0x96,0xd2,0x01,0x05,0x00,0x60]
+v_cvt_pkrtz_f16_f32 v5, 0, v255
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0xfe,0x03,0x00]
-v_cvt_pkrtz_f16_f32 v5, |v1|, s2
-// CHECK: [0x05,0x01,0x96,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, neg(0), s2
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0x04,0x00,0x20]
-v_cvt_pkrtz_f16_f32 v5, v1, |s2|
-// CHECK: [0x05,0x02,0x96,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, 0, -s2
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0x04,0x00,0x40]
-v_cvt_pkrtz_f16_f32 v5, |v1|, |s2|
-// CHECK: [0x05,0x03,0x96,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, neg(0), -s2
+// CHECK: [0x05,0x00,0x96,0xd2,0x80,0x04,0x00,0x60]
-v_cvt_pkrtz_f16_f32 v5, v1, s2 clamp
-// CHECK: [0x05,0x80,0x96,0xd2,0x01,0x05,0x00,0x00]
+v_cvt_pkrtz_f16_f32 v5, |0|, s2
+// CHECK: [0x05,0x01,0x96,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pkrtz_f16_f32 v5, 0, |s2|
+// CHECK: [0x05,0x02,0x96,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pkrtz_f16_f32 v5, |0|, |s2|
+// CHECK: [0x05,0x03,0x96,0xd2,0x80,0x04,0x00,0x00]
+
+v_cvt_pkrtz_f16_f32 v5, 0, s2 clamp
+// CHECK: [0x05,0x80,0x96,0xd2,0x80,0x04,0x00,0x00]
v_cvt_pk_u16_u32 v5, 0, s2
// CHECK: [0x05,0x00,0x97,0xd2,0x80,0x04,0x00,0x00]
@@ -46082,9 +47880,15 @@ v_cmp_f_f16_e64 tma, 0, s2
v_cmp_f_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x20,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_f_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x20,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_f_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x20,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_f_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x20,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_f_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x20,0xd0,0x01,0x05,0x00,0x00]
@@ -46133,11 +47937,14 @@ v_cmp_f_f16_e64 s[10:11], 0, exec_hi
v_cmp_f_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_f_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_f_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_f_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_f_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0xee,0x01,0x00]
v_cmp_f_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0x04,0x02,0x00]
@@ -46145,9 +47952,15 @@ v_cmp_f_f16_e64 s[10:11], 0, v2
v_cmp_f_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_f_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_f_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_f_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x20,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_f_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x20,0xd0,0x80,0x04,0x00,0x00]
@@ -46244,9 +48057,15 @@ v_cmp_lt_f16_e64 tma, 0, s2
v_cmp_lt_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x21,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_lt_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x21,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_lt_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x21,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_lt_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x21,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_lt_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x21,0xd0,0x01,0x05,0x00,0x00]
@@ -46295,11 +48114,14 @@ v_cmp_lt_f16_e64 s[10:11], 0, exec_hi
v_cmp_lt_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x21,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_lt_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x21,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_lt_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x21,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_lt_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x21,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_lt_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x21,0xd0,0x80,0xee,0x01,0x00]
v_cmp_lt_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x21,0xd0,0x80,0x04,0x02,0x00]
@@ -46307,9 +48129,15 @@ v_cmp_lt_f16_e64 s[10:11], 0, v2
v_cmp_lt_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x21,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_lt_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x21,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_lt_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x21,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_lt_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x21,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_lt_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x21,0xd0,0x80,0x04,0x00,0x00]
@@ -46406,9 +48234,15 @@ v_cmp_eq_f16_e64 tma, 0, s2
v_cmp_eq_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x22,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_eq_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x22,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_eq_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x22,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_eq_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x22,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_eq_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x22,0xd0,0x01,0x05,0x00,0x00]
@@ -46457,11 +48291,14 @@ v_cmp_eq_f16_e64 s[10:11], 0, exec_hi
v_cmp_eq_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_eq_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_eq_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_eq_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_eq_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0xee,0x01,0x00]
v_cmp_eq_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0x04,0x02,0x00]
@@ -46469,9 +48306,15 @@ v_cmp_eq_f16_e64 s[10:11], 0, v2
v_cmp_eq_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_eq_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_eq_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_eq_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x22,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_eq_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x22,0xd0,0x80,0x04,0x00,0x00]
@@ -46568,9 +48411,15 @@ v_cmp_le_f16_e64 tma, 0, s2
v_cmp_le_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x23,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_le_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x23,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_le_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x23,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_le_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x23,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_le_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x23,0xd0,0x01,0x05,0x00,0x00]
@@ -46619,11 +48468,14 @@ v_cmp_le_f16_e64 s[10:11], 0, exec_hi
v_cmp_le_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x23,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_le_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x23,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_le_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x23,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_le_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x23,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_le_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x23,0xd0,0x80,0xee,0x01,0x00]
v_cmp_le_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x23,0xd0,0x80,0x04,0x02,0x00]
@@ -46631,9 +48483,15 @@ v_cmp_le_f16_e64 s[10:11], 0, v2
v_cmp_le_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x23,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_le_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x23,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_le_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x23,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_le_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x23,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_le_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x23,0xd0,0x80,0x04,0x00,0x00]
@@ -46730,9 +48588,15 @@ v_cmp_gt_f16_e64 tma, 0, s2
v_cmp_gt_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x24,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_gt_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x24,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_gt_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x24,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_gt_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x24,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_gt_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x24,0xd0,0x01,0x05,0x00,0x00]
@@ -46781,11 +48645,14 @@ v_cmp_gt_f16_e64 s[10:11], 0, exec_hi
v_cmp_gt_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_gt_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_gt_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_gt_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_gt_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0xee,0x01,0x00]
v_cmp_gt_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0x04,0x02,0x00]
@@ -46793,9 +48660,15 @@ v_cmp_gt_f16_e64 s[10:11], 0, v2
v_cmp_gt_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_gt_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_gt_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_gt_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x24,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_gt_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x24,0xd0,0x80,0x04,0x00,0x00]
@@ -46892,9 +48765,15 @@ v_cmp_lg_f16_e64 tma, 0, s2
v_cmp_lg_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x25,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_lg_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x25,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_lg_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x25,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_lg_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x25,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_lg_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x25,0xd0,0x01,0x05,0x00,0x00]
@@ -46943,11 +48822,14 @@ v_cmp_lg_f16_e64 s[10:11], 0, exec_hi
v_cmp_lg_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x25,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_lg_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x25,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_lg_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x25,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_lg_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x25,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_lg_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x25,0xd0,0x80,0xee,0x01,0x00]
v_cmp_lg_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x25,0xd0,0x80,0x04,0x02,0x00]
@@ -46955,9 +48837,15 @@ v_cmp_lg_f16_e64 s[10:11], 0, v2
v_cmp_lg_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x25,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_lg_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x25,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_lg_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x25,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_lg_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x25,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_lg_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x25,0xd0,0x80,0x04,0x00,0x00]
@@ -47054,9 +48942,15 @@ v_cmp_ge_f16_e64 tma, 0, s2
v_cmp_ge_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x26,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_ge_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x26,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_ge_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x26,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_ge_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x26,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_ge_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x26,0xd0,0x01,0x05,0x00,0x00]
@@ -47105,11 +48999,14 @@ v_cmp_ge_f16_e64 s[10:11], 0, exec_hi
v_cmp_ge_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_ge_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_ge_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_ge_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_ge_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0xee,0x01,0x00]
v_cmp_ge_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0x04,0x02,0x00]
@@ -47117,9 +49014,15 @@ v_cmp_ge_f16_e64 s[10:11], 0, v2
v_cmp_ge_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_ge_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_ge_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_ge_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x26,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_ge_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x26,0xd0,0x80,0x04,0x00,0x00]
@@ -47216,9 +49119,15 @@ v_cmp_o_f16_e64 tma, 0, s2
v_cmp_o_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x27,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_o_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x27,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_o_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x27,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_o_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x27,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_o_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x27,0xd0,0x01,0x05,0x00,0x00]
@@ -47267,11 +49176,14 @@ v_cmp_o_f16_e64 s[10:11], 0, exec_hi
v_cmp_o_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x27,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_o_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x27,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_o_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x27,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_o_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x27,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_o_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x27,0xd0,0x80,0xee,0x01,0x00]
v_cmp_o_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x27,0xd0,0x80,0x04,0x02,0x00]
@@ -47279,9 +49191,15 @@ v_cmp_o_f16_e64 s[10:11], 0, v2
v_cmp_o_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x27,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_o_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x27,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_o_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x27,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_o_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x27,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_o_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x27,0xd0,0x80,0x04,0x00,0x00]
@@ -47378,9 +49296,15 @@ v_cmp_u_f16_e64 tma, 0, s2
v_cmp_u_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x28,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_u_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x28,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_u_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x28,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_u_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x28,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_u_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x28,0xd0,0x01,0x05,0x00,0x00]
@@ -47429,11 +49353,14 @@ v_cmp_u_f16_e64 s[10:11], 0, exec_hi
v_cmp_u_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_u_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_u_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_u_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_u_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0xee,0x01,0x00]
v_cmp_u_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0x04,0x02,0x00]
@@ -47441,9 +49368,15 @@ v_cmp_u_f16_e64 s[10:11], 0, v2
v_cmp_u_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_u_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_u_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_u_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x28,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_u_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x28,0xd0,0x80,0x04,0x00,0x00]
@@ -47540,9 +49473,15 @@ v_cmp_nge_f16_e64 tma, 0, s2
v_cmp_nge_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x29,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_nge_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x29,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_nge_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x29,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_nge_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x29,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_nge_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x29,0xd0,0x01,0x05,0x00,0x00]
@@ -47591,11 +49530,14 @@ v_cmp_nge_f16_e64 s[10:11], 0, exec_hi
v_cmp_nge_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x29,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_nge_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x29,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_nge_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x29,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_nge_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x29,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_nge_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x29,0xd0,0x80,0xee,0x01,0x00]
v_cmp_nge_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x29,0xd0,0x80,0x04,0x02,0x00]
@@ -47603,9 +49545,15 @@ v_cmp_nge_f16_e64 s[10:11], 0, v2
v_cmp_nge_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x29,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_nge_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x29,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_nge_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x29,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_nge_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x29,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_nge_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x29,0xd0,0x80,0x04,0x00,0x00]
@@ -47702,9 +49650,15 @@ v_cmp_nlg_f16_e64 tma, 0, s2
v_cmp_nlg_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x2a,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_nlg_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x2a,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_nlg_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x2a,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_nlg_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x2a,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_nlg_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x2a,0xd0,0x01,0x05,0x00,0x00]
@@ -47753,11 +49707,14 @@ v_cmp_nlg_f16_e64 s[10:11], 0, exec_hi
v_cmp_nlg_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_nlg_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_nlg_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_nlg_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_nlg_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0xee,0x01,0x00]
v_cmp_nlg_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0x04,0x02,0x00]
@@ -47765,9 +49722,15 @@ v_cmp_nlg_f16_e64 s[10:11], 0, v2
v_cmp_nlg_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_nlg_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_nlg_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_nlg_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x2a,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_nlg_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x2a,0xd0,0x80,0x04,0x00,0x00]
@@ -47864,9 +49827,15 @@ v_cmp_ngt_f16_e64 tma, 0, s2
v_cmp_ngt_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x2b,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_ngt_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x2b,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_ngt_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x2b,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_ngt_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x2b,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_ngt_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x2b,0xd0,0x01,0x05,0x00,0x00]
@@ -47915,11 +49884,14 @@ v_cmp_ngt_f16_e64 s[10:11], 0, exec_hi
v_cmp_ngt_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x2b,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_ngt_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x2b,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_ngt_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x2b,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_ngt_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x2b,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_ngt_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x2b,0xd0,0x80,0xee,0x01,0x00]
v_cmp_ngt_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x2b,0xd0,0x80,0x04,0x02,0x00]
@@ -47927,9 +49899,15 @@ v_cmp_ngt_f16_e64 s[10:11], 0, v2
v_cmp_ngt_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x2b,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_ngt_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x2b,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_ngt_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x2b,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_ngt_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x2b,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_ngt_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x2b,0xd0,0x80,0x04,0x00,0x00]
@@ -48026,9 +50004,15 @@ v_cmp_nle_f16_e64 tma, 0, s2
v_cmp_nle_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x2c,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_nle_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x2c,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_nle_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x2c,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_nle_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x2c,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_nle_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x2c,0xd0,0x01,0x05,0x00,0x00]
@@ -48077,11 +50061,14 @@ v_cmp_nle_f16_e64 s[10:11], 0, exec_hi
v_cmp_nle_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_nle_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_nle_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_nle_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_nle_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0xee,0x01,0x00]
v_cmp_nle_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0x04,0x02,0x00]
@@ -48089,9 +50076,15 @@ v_cmp_nle_f16_e64 s[10:11], 0, v2
v_cmp_nle_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_nle_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_nle_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_nle_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x2c,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_nle_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x2c,0xd0,0x80,0x04,0x00,0x00]
@@ -48188,9 +50181,15 @@ v_cmp_neq_f16_e64 tma, 0, s2
v_cmp_neq_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x2d,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_neq_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x2d,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_neq_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x2d,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_neq_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x2d,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_neq_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x2d,0xd0,0x01,0x05,0x00,0x00]
@@ -48239,11 +50238,14 @@ v_cmp_neq_f16_e64 s[10:11], 0, exec_hi
v_cmp_neq_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x2d,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_neq_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x2d,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_neq_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x2d,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_neq_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x2d,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_neq_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x2d,0xd0,0x80,0xee,0x01,0x00]
v_cmp_neq_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x2d,0xd0,0x80,0x04,0x02,0x00]
@@ -48251,9 +50253,15 @@ v_cmp_neq_f16_e64 s[10:11], 0, v2
v_cmp_neq_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x2d,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_neq_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x2d,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_neq_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x2d,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_neq_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x2d,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_neq_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x2d,0xd0,0x80,0x04,0x00,0x00]
@@ -48350,9 +50358,15 @@ v_cmp_nlt_f16_e64 tma, 0, s2
v_cmp_nlt_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x2e,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_nlt_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x2e,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_nlt_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x2e,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_nlt_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x2e,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_nlt_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x2e,0xd0,0x01,0x05,0x00,0x00]
@@ -48401,11 +50415,14 @@ v_cmp_nlt_f16_e64 s[10:11], 0, exec_hi
v_cmp_nlt_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_nlt_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_nlt_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_nlt_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_nlt_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0xee,0x01,0x00]
v_cmp_nlt_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0x04,0x02,0x00]
@@ -48413,9 +50430,15 @@ v_cmp_nlt_f16_e64 s[10:11], 0, v2
v_cmp_nlt_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_nlt_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_nlt_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_nlt_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x2e,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_nlt_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x2e,0xd0,0x80,0x04,0x00,0x00]
@@ -48512,9 +50535,15 @@ v_cmp_tru_f16_e64 tma, 0, s2
v_cmp_tru_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x2f,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_tru_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x2f,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_tru_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x2f,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_tru_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x2f,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_tru_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x2f,0xd0,0x01,0x05,0x00,0x00]
@@ -48563,11 +50592,14 @@ v_cmp_tru_f16_e64 s[10:11], 0, exec_hi
v_cmp_tru_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x2f,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_tru_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x2f,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_tru_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x2f,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_tru_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x2f,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_tru_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x2f,0xd0,0x80,0xee,0x01,0x00]
v_cmp_tru_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x2f,0xd0,0x80,0x04,0x02,0x00]
@@ -48575,9 +50607,15 @@ v_cmp_tru_f16_e64 s[10:11], 0, v2
v_cmp_tru_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x2f,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_tru_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x2f,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_tru_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x2f,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_tru_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x2f,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_tru_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x2f,0xd0,0x80,0x04,0x00,0x00]
@@ -48674,9 +50712,15 @@ v_cmpx_f_f16_e64 tma, 0, s2
v_cmpx_f_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x30,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_f_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x30,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_f_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x30,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_f_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x30,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_f_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x30,0xd0,0x01,0x05,0x00,0x00]
@@ -48725,11 +50769,14 @@ v_cmpx_f_f16_e64 s[10:11], 0, exec_hi
v_cmpx_f_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_f_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_f_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_f_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_f_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_f_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0x04,0x02,0x00]
@@ -48737,9 +50784,15 @@ v_cmpx_f_f16_e64 s[10:11], 0, v2
v_cmpx_f_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_f_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_f_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_f_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x30,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_f_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x30,0xd0,0x80,0x04,0x00,0x00]
@@ -48836,9 +50889,15 @@ v_cmpx_lt_f16_e64 tma, 0, s2
v_cmpx_lt_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x31,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_lt_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x31,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_lt_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x31,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_lt_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x31,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_lt_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x31,0xd0,0x01,0x05,0x00,0x00]
@@ -48887,11 +50946,14 @@ v_cmpx_lt_f16_e64 s[10:11], 0, exec_hi
v_cmpx_lt_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x31,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_lt_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x31,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_lt_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x31,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_lt_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x31,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_lt_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x31,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_lt_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x31,0xd0,0x80,0x04,0x02,0x00]
@@ -48899,9 +50961,15 @@ v_cmpx_lt_f16_e64 s[10:11], 0, v2
v_cmpx_lt_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x31,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_lt_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x31,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_lt_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x31,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_lt_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x31,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_lt_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x31,0xd0,0x80,0x04,0x00,0x00]
@@ -48998,9 +51066,15 @@ v_cmpx_eq_f16_e64 tma, 0, s2
v_cmpx_eq_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x32,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_eq_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x32,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_eq_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x32,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_eq_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x32,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_eq_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x32,0xd0,0x01,0x05,0x00,0x00]
@@ -49049,11 +51123,14 @@ v_cmpx_eq_f16_e64 s[10:11], 0, exec_hi
v_cmpx_eq_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_eq_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_eq_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_eq_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_eq_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_eq_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0x04,0x02,0x00]
@@ -49061,9 +51138,15 @@ v_cmpx_eq_f16_e64 s[10:11], 0, v2
v_cmpx_eq_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_eq_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_eq_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_eq_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x32,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_eq_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x32,0xd0,0x80,0x04,0x00,0x00]
@@ -49160,9 +51243,15 @@ v_cmpx_le_f16_e64 tma, 0, s2
v_cmpx_le_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x33,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_le_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x33,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_le_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x33,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_le_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x33,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_le_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x33,0xd0,0x01,0x05,0x00,0x00]
@@ -49211,11 +51300,14 @@ v_cmpx_le_f16_e64 s[10:11], 0, exec_hi
v_cmpx_le_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x33,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_le_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x33,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_le_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x33,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_le_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x33,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_le_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x33,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_le_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x33,0xd0,0x80,0x04,0x02,0x00]
@@ -49223,9 +51315,15 @@ v_cmpx_le_f16_e64 s[10:11], 0, v2
v_cmpx_le_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x33,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_le_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x33,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_le_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x33,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_le_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x33,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_le_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x33,0xd0,0x80,0x04,0x00,0x00]
@@ -49322,9 +51420,15 @@ v_cmpx_gt_f16_e64 tma, 0, s2
v_cmpx_gt_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x34,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_gt_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x34,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_gt_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x34,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_gt_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x34,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_gt_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x34,0xd0,0x01,0x05,0x00,0x00]
@@ -49373,11 +51477,14 @@ v_cmpx_gt_f16_e64 s[10:11], 0, exec_hi
v_cmpx_gt_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_gt_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_gt_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_gt_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_gt_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_gt_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0x04,0x02,0x00]
@@ -49385,9 +51492,15 @@ v_cmpx_gt_f16_e64 s[10:11], 0, v2
v_cmpx_gt_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_gt_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_gt_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_gt_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x34,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_gt_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x34,0xd0,0x80,0x04,0x00,0x00]
@@ -49484,9 +51597,15 @@ v_cmpx_lg_f16_e64 tma, 0, s2
v_cmpx_lg_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x35,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_lg_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x35,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_lg_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x35,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_lg_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x35,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_lg_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x35,0xd0,0x01,0x05,0x00,0x00]
@@ -49535,11 +51654,14 @@ v_cmpx_lg_f16_e64 s[10:11], 0, exec_hi
v_cmpx_lg_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x35,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_lg_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x35,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_lg_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x35,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_lg_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x35,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_lg_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x35,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_lg_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x35,0xd0,0x80,0x04,0x02,0x00]
@@ -49547,9 +51669,15 @@ v_cmpx_lg_f16_e64 s[10:11], 0, v2
v_cmpx_lg_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x35,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_lg_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x35,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_lg_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x35,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_lg_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x35,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_lg_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x35,0xd0,0x80,0x04,0x00,0x00]
@@ -49646,9 +51774,15 @@ v_cmpx_ge_f16_e64 tma, 0, s2
v_cmpx_ge_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x36,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_ge_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x36,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_ge_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x36,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_ge_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x36,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_ge_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x36,0xd0,0x01,0x05,0x00,0x00]
@@ -49697,11 +51831,14 @@ v_cmpx_ge_f16_e64 s[10:11], 0, exec_hi
v_cmpx_ge_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_ge_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_ge_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_ge_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_ge_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_ge_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0x04,0x02,0x00]
@@ -49709,9 +51846,15 @@ v_cmpx_ge_f16_e64 s[10:11], 0, v2
v_cmpx_ge_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_ge_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_ge_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_ge_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x36,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_ge_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x36,0xd0,0x80,0x04,0x00,0x00]
@@ -49808,9 +51951,15 @@ v_cmpx_o_f16_e64 tma, 0, s2
v_cmpx_o_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x37,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_o_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x37,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_o_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x37,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_o_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x37,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_o_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x37,0xd0,0x01,0x05,0x00,0x00]
@@ -49859,11 +52008,14 @@ v_cmpx_o_f16_e64 s[10:11], 0, exec_hi
v_cmpx_o_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x37,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_o_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x37,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_o_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x37,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_o_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x37,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_o_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x37,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_o_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x37,0xd0,0x80,0x04,0x02,0x00]
@@ -49871,9 +52023,15 @@ v_cmpx_o_f16_e64 s[10:11], 0, v2
v_cmpx_o_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x37,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_o_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x37,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_o_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x37,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_o_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x37,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_o_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x37,0xd0,0x80,0x04,0x00,0x00]
@@ -49970,9 +52128,15 @@ v_cmpx_u_f16_e64 tma, 0, s2
v_cmpx_u_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x38,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_u_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x38,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_u_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x38,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_u_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x38,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_u_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x38,0xd0,0x01,0x05,0x00,0x00]
@@ -50021,11 +52185,14 @@ v_cmpx_u_f16_e64 s[10:11], 0, exec_hi
v_cmpx_u_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_u_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_u_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_u_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_u_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_u_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0x04,0x02,0x00]
@@ -50033,9 +52200,15 @@ v_cmpx_u_f16_e64 s[10:11], 0, v2
v_cmpx_u_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_u_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_u_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_u_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x38,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_u_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x38,0xd0,0x80,0x04,0x00,0x00]
@@ -50132,9 +52305,15 @@ v_cmpx_nge_f16_e64 tma, 0, s2
v_cmpx_nge_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x39,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_nge_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x39,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_nge_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x39,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_nge_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x39,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_nge_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x39,0xd0,0x01,0x05,0x00,0x00]
@@ -50183,11 +52362,14 @@ v_cmpx_nge_f16_e64 s[10:11], 0, exec_hi
v_cmpx_nge_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x39,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_nge_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x39,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_nge_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x39,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_nge_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x39,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_nge_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x39,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_nge_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x39,0xd0,0x80,0x04,0x02,0x00]
@@ -50195,9 +52377,15 @@ v_cmpx_nge_f16_e64 s[10:11], 0, v2
v_cmpx_nge_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x39,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_nge_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x39,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_nge_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x39,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_nge_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x39,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_nge_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x39,0xd0,0x80,0x04,0x00,0x00]
@@ -50294,9 +52482,15 @@ v_cmpx_nlg_f16_e64 tma, 0, s2
v_cmpx_nlg_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x3a,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_nlg_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x3a,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_nlg_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x3a,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_nlg_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x3a,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_nlg_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x3a,0xd0,0x01,0x05,0x00,0x00]
@@ -50345,11 +52539,14 @@ v_cmpx_nlg_f16_e64 s[10:11], 0, exec_hi
v_cmpx_nlg_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_nlg_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_nlg_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_nlg_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_nlg_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_nlg_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0x04,0x02,0x00]
@@ -50357,9 +52554,15 @@ v_cmpx_nlg_f16_e64 s[10:11], 0, v2
v_cmpx_nlg_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_nlg_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_nlg_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_nlg_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x3a,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_nlg_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x3a,0xd0,0x80,0x04,0x00,0x00]
@@ -50456,9 +52659,15 @@ v_cmpx_ngt_f16_e64 tma, 0, s2
v_cmpx_ngt_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x3b,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_ngt_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x3b,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_ngt_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x3b,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_ngt_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x3b,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_ngt_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x3b,0xd0,0x01,0x05,0x00,0x00]
@@ -50507,11 +52716,14 @@ v_cmpx_ngt_f16_e64 s[10:11], 0, exec_hi
v_cmpx_ngt_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x3b,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_ngt_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x3b,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_ngt_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x3b,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_ngt_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x3b,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_ngt_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x3b,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_ngt_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x3b,0xd0,0x80,0x04,0x02,0x00]
@@ -50519,9 +52731,15 @@ v_cmpx_ngt_f16_e64 s[10:11], 0, v2
v_cmpx_ngt_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x3b,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_ngt_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x3b,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_ngt_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x3b,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_ngt_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x3b,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_ngt_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x3b,0xd0,0x80,0x04,0x00,0x00]
@@ -50618,9 +52836,15 @@ v_cmpx_nle_f16_e64 tma, 0, s2
v_cmpx_nle_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x3c,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_nle_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x3c,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_nle_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x3c,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_nle_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x3c,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_nle_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x3c,0xd0,0x01,0x05,0x00,0x00]
@@ -50669,11 +52893,14 @@ v_cmpx_nle_f16_e64 s[10:11], 0, exec_hi
v_cmpx_nle_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_nle_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_nle_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_nle_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_nle_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_nle_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0x04,0x02,0x00]
@@ -50681,9 +52908,15 @@ v_cmpx_nle_f16_e64 s[10:11], 0, v2
v_cmpx_nle_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_nle_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_nle_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_nle_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x3c,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_nle_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x3c,0xd0,0x80,0x04,0x00,0x00]
@@ -50780,9 +53013,15 @@ v_cmpx_neq_f16_e64 tma, 0, s2
v_cmpx_neq_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x3d,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_neq_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x3d,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_neq_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x3d,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_neq_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x3d,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_neq_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x3d,0xd0,0x01,0x05,0x00,0x00]
@@ -50831,11 +53070,14 @@ v_cmpx_neq_f16_e64 s[10:11], 0, exec_hi
v_cmpx_neq_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x3d,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_neq_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x3d,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_neq_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x3d,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_neq_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x3d,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_neq_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x3d,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_neq_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x3d,0xd0,0x80,0x04,0x02,0x00]
@@ -50843,9 +53085,15 @@ v_cmpx_neq_f16_e64 s[10:11], 0, v2
v_cmpx_neq_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x3d,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_neq_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x3d,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_neq_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x3d,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_neq_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x3d,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_neq_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x3d,0xd0,0x80,0x04,0x00,0x00]
@@ -50942,9 +53190,15 @@ v_cmpx_nlt_f16_e64 tma, 0, s2
v_cmpx_nlt_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x3e,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_nlt_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x3e,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_nlt_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x3e,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_nlt_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x3e,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_nlt_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x3e,0xd0,0x01,0x05,0x00,0x00]
@@ -50993,11 +53247,14 @@ v_cmpx_nlt_f16_e64 s[10:11], 0, exec_hi
v_cmpx_nlt_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_nlt_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_nlt_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_nlt_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_nlt_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_nlt_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0x04,0x02,0x00]
@@ -51005,9 +53262,15 @@ v_cmpx_nlt_f16_e64 s[10:11], 0, v2
v_cmpx_nlt_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_nlt_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_nlt_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_nlt_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x3e,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_nlt_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x3e,0xd0,0x80,0x04,0x00,0x00]
@@ -51104,9 +53367,15 @@ v_cmpx_tru_f16_e64 tma, 0, s2
v_cmpx_tru_f16_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x3f,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_tru_f16_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x3f,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_tru_f16_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x3f,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_tru_f16_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x3f,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_tru_f16_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x3f,0xd0,0x01,0x05,0x00,0x00]
@@ -51155,11 +53424,14 @@ v_cmpx_tru_f16_e64 s[10:11], 0, exec_hi
v_cmpx_tru_f16_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x3f,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_tru_f16_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x3f,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_tru_f16_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x3f,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_tru_f16_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x3f,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_tru_f16_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x3f,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_tru_f16_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x3f,0xd0,0x80,0x04,0x02,0x00]
@@ -51167,9 +53439,15 @@ v_cmpx_tru_f16_e64 s[10:11], 0, v2
v_cmpx_tru_f16_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x3f,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_tru_f16_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x3f,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_tru_f16_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x3f,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_tru_f16_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x3f,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_tru_f16_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x3f,0xd0,0x80,0x04,0x00,0x00]
@@ -51266,9 +53544,15 @@ v_cmp_f_f32_e64 tma, 0, s2
v_cmp_f_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x40,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_f_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x40,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_f_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x40,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_f_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x40,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_f_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x40,0xd0,0x01,0x05,0x00,0x00]
@@ -51317,11 +53601,14 @@ v_cmp_f_f32_e64 s[10:11], 0, exec_hi
v_cmp_f_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x40,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_f_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x40,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_f_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x40,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_f_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x40,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_f_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x40,0xd0,0x80,0xee,0x01,0x00]
v_cmp_f_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x40,0xd0,0x80,0x04,0x02,0x00]
@@ -51329,9 +53616,15 @@ v_cmp_f_f32_e64 s[10:11], 0, v2
v_cmp_f_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x40,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_f_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x40,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_f_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x40,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_f_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x40,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_f_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x40,0xd0,0x80,0x04,0x00,0x00]
@@ -51428,9 +53721,15 @@ v_cmp_lt_f32_e64 tma, 0, s2
v_cmp_lt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x41,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_lt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x41,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_lt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x41,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_lt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x41,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_lt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x41,0xd0,0x01,0x05,0x00,0x00]
@@ -51479,11 +53778,14 @@ v_cmp_lt_f32_e64 s[10:11], 0, exec_hi
v_cmp_lt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x41,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_lt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x41,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_lt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x41,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_lt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x41,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_lt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x41,0xd0,0x80,0xee,0x01,0x00]
v_cmp_lt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x41,0xd0,0x80,0x04,0x02,0x00]
@@ -51491,9 +53793,15 @@ v_cmp_lt_f32_e64 s[10:11], 0, v2
v_cmp_lt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x41,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_lt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x41,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_lt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x41,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_lt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x41,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_lt_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x41,0xd0,0x80,0x04,0x00,0x00]
@@ -51590,9 +53898,15 @@ v_cmp_eq_f32_e64 tma, 0, s2
v_cmp_eq_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x42,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_eq_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x42,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_eq_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x42,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_eq_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x42,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_eq_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x42,0xd0,0x01,0x05,0x00,0x00]
@@ -51641,11 +53955,14 @@ v_cmp_eq_f32_e64 s[10:11], 0, exec_hi
v_cmp_eq_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x42,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_eq_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x42,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_eq_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x42,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_eq_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x42,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_eq_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x42,0xd0,0x80,0xee,0x01,0x00]
v_cmp_eq_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x42,0xd0,0x80,0x04,0x02,0x00]
@@ -51653,9 +53970,15 @@ v_cmp_eq_f32_e64 s[10:11], 0, v2
v_cmp_eq_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x42,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_eq_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x42,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_eq_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x42,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_eq_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x42,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_eq_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x42,0xd0,0x80,0x04,0x00,0x00]
@@ -51752,9 +54075,15 @@ v_cmp_le_f32_e64 tma, 0, s2
v_cmp_le_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x43,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_le_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x43,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_le_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x43,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_le_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x43,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_le_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x43,0xd0,0x01,0x05,0x00,0x00]
@@ -51803,11 +54132,14 @@ v_cmp_le_f32_e64 s[10:11], 0, exec_hi
v_cmp_le_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x43,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_le_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x43,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_le_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x43,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_le_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x43,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_le_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x43,0xd0,0x80,0xee,0x01,0x00]
v_cmp_le_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x43,0xd0,0x80,0x04,0x02,0x00]
@@ -51815,9 +54147,15 @@ v_cmp_le_f32_e64 s[10:11], 0, v2
v_cmp_le_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x43,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_le_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x43,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_le_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x43,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_le_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x43,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_le_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x43,0xd0,0x80,0x04,0x00,0x00]
@@ -51914,9 +54252,15 @@ v_cmp_gt_f32_e64 tma, 0, s2
v_cmp_gt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x44,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_gt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x44,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_gt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x44,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_gt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x44,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_gt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x44,0xd0,0x01,0x05,0x00,0x00]
@@ -51965,11 +54309,14 @@ v_cmp_gt_f32_e64 s[10:11], 0, exec_hi
v_cmp_gt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x44,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_gt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x44,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_gt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x44,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_gt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x44,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_gt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x44,0xd0,0x80,0xee,0x01,0x00]
v_cmp_gt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x44,0xd0,0x80,0x04,0x02,0x00]
@@ -51977,9 +54324,15 @@ v_cmp_gt_f32_e64 s[10:11], 0, v2
v_cmp_gt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x44,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_gt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x44,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_gt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x44,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_gt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x44,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_gt_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x44,0xd0,0x80,0x04,0x00,0x00]
@@ -52076,9 +54429,15 @@ v_cmp_lg_f32_e64 tma, 0, s2
v_cmp_lg_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x45,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_lg_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x45,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_lg_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x45,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_lg_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x45,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_lg_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x45,0xd0,0x01,0x05,0x00,0x00]
@@ -52127,11 +54486,14 @@ v_cmp_lg_f32_e64 s[10:11], 0, exec_hi
v_cmp_lg_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x45,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_lg_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x45,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_lg_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x45,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_lg_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x45,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_lg_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x45,0xd0,0x80,0xee,0x01,0x00]
v_cmp_lg_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x45,0xd0,0x80,0x04,0x02,0x00]
@@ -52139,9 +54501,15 @@ v_cmp_lg_f32_e64 s[10:11], 0, v2
v_cmp_lg_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x45,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_lg_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x45,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_lg_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x45,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_lg_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x45,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_lg_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x45,0xd0,0x80,0x04,0x00,0x00]
@@ -52238,9 +54606,15 @@ v_cmp_ge_f32_e64 tma, 0, s2
v_cmp_ge_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x46,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_ge_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x46,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_ge_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x46,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_ge_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x46,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_ge_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x46,0xd0,0x01,0x05,0x00,0x00]
@@ -52289,11 +54663,14 @@ v_cmp_ge_f32_e64 s[10:11], 0, exec_hi
v_cmp_ge_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x46,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_ge_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x46,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_ge_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x46,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_ge_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x46,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_ge_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x46,0xd0,0x80,0xee,0x01,0x00]
v_cmp_ge_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x46,0xd0,0x80,0x04,0x02,0x00]
@@ -52301,9 +54678,15 @@ v_cmp_ge_f32_e64 s[10:11], 0, v2
v_cmp_ge_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x46,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_ge_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x46,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_ge_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x46,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_ge_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x46,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_ge_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x46,0xd0,0x80,0x04,0x00,0x00]
@@ -52400,9 +54783,15 @@ v_cmp_o_f32_e64 tma, 0, s2
v_cmp_o_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x47,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_o_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x47,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_o_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x47,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_o_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x47,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_o_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x47,0xd0,0x01,0x05,0x00,0x00]
@@ -52451,11 +54840,14 @@ v_cmp_o_f32_e64 s[10:11], 0, exec_hi
v_cmp_o_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x47,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_o_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x47,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_o_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x47,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_o_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x47,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_o_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x47,0xd0,0x80,0xee,0x01,0x00]
v_cmp_o_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x47,0xd0,0x80,0x04,0x02,0x00]
@@ -52463,9 +54855,15 @@ v_cmp_o_f32_e64 s[10:11], 0, v2
v_cmp_o_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x47,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_o_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x47,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_o_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x47,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_o_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x47,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_o_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x47,0xd0,0x80,0x04,0x00,0x00]
@@ -52562,9 +54960,15 @@ v_cmp_u_f32_e64 tma, 0, s2
v_cmp_u_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x48,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_u_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x48,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_u_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x48,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_u_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x48,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_u_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x48,0xd0,0x01,0x05,0x00,0x00]
@@ -52613,11 +55017,14 @@ v_cmp_u_f32_e64 s[10:11], 0, exec_hi
v_cmp_u_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x48,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_u_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x48,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_u_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x48,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_u_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x48,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_u_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x48,0xd0,0x80,0xee,0x01,0x00]
v_cmp_u_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x48,0xd0,0x80,0x04,0x02,0x00]
@@ -52625,9 +55032,15 @@ v_cmp_u_f32_e64 s[10:11], 0, v2
v_cmp_u_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x48,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_u_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x48,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_u_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x48,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_u_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x48,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_u_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x48,0xd0,0x80,0x04,0x00,0x00]
@@ -52724,9 +55137,15 @@ v_cmp_nge_f32_e64 tma, 0, s2
v_cmp_nge_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x49,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_nge_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x49,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_nge_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x49,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_nge_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x49,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_nge_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x49,0xd0,0x01,0x05,0x00,0x00]
@@ -52775,11 +55194,14 @@ v_cmp_nge_f32_e64 s[10:11], 0, exec_hi
v_cmp_nge_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x49,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_nge_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x49,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_nge_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x49,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_nge_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x49,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_nge_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x49,0xd0,0x80,0xee,0x01,0x00]
v_cmp_nge_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x49,0xd0,0x80,0x04,0x02,0x00]
@@ -52787,9 +55209,15 @@ v_cmp_nge_f32_e64 s[10:11], 0, v2
v_cmp_nge_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x49,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_nge_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x49,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_nge_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x49,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_nge_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x49,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_nge_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x49,0xd0,0x80,0x04,0x00,0x00]
@@ -52886,9 +55314,15 @@ v_cmp_nlg_f32_e64 tma, 0, s2
v_cmp_nlg_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x4a,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_nlg_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x4a,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_nlg_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x4a,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_nlg_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x4a,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_nlg_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x4a,0xd0,0x01,0x05,0x00,0x00]
@@ -52937,11 +55371,14 @@ v_cmp_nlg_f32_e64 s[10:11], 0, exec_hi
v_cmp_nlg_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x4a,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_nlg_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x4a,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_nlg_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x4a,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_nlg_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x4a,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_nlg_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x4a,0xd0,0x80,0xee,0x01,0x00]
v_cmp_nlg_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x4a,0xd0,0x80,0x04,0x02,0x00]
@@ -52949,9 +55386,15 @@ v_cmp_nlg_f32_e64 s[10:11], 0, v2
v_cmp_nlg_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x4a,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_nlg_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x4a,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_nlg_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x4a,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_nlg_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x4a,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_nlg_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x4a,0xd0,0x80,0x04,0x00,0x00]
@@ -53048,9 +55491,15 @@ v_cmp_ngt_f32_e64 tma, 0, s2
v_cmp_ngt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x4b,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_ngt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x4b,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_ngt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x4b,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_ngt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x4b,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_ngt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x4b,0xd0,0x01,0x05,0x00,0x00]
@@ -53099,11 +55548,14 @@ v_cmp_ngt_f32_e64 s[10:11], 0, exec_hi
v_cmp_ngt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x4b,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_ngt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x4b,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_ngt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x4b,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_ngt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x4b,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_ngt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x4b,0xd0,0x80,0xee,0x01,0x00]
v_cmp_ngt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x4b,0xd0,0x80,0x04,0x02,0x00]
@@ -53111,9 +55563,15 @@ v_cmp_ngt_f32_e64 s[10:11], 0, v2
v_cmp_ngt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x4b,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_ngt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x4b,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_ngt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x4b,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_ngt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x4b,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_ngt_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x4b,0xd0,0x80,0x04,0x00,0x00]
@@ -53210,9 +55668,15 @@ v_cmp_nle_f32_e64 tma, 0, s2
v_cmp_nle_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x4c,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_nle_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x4c,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_nle_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x4c,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_nle_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x4c,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_nle_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x4c,0xd0,0x01,0x05,0x00,0x00]
@@ -53261,11 +55725,14 @@ v_cmp_nle_f32_e64 s[10:11], 0, exec_hi
v_cmp_nle_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x4c,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_nle_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x4c,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_nle_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x4c,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_nle_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x4c,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_nle_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x4c,0xd0,0x80,0xee,0x01,0x00]
v_cmp_nle_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x4c,0xd0,0x80,0x04,0x02,0x00]
@@ -53273,9 +55740,15 @@ v_cmp_nle_f32_e64 s[10:11], 0, v2
v_cmp_nle_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x4c,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_nle_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x4c,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_nle_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x4c,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_nle_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x4c,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_nle_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x4c,0xd0,0x80,0x04,0x00,0x00]
@@ -53372,9 +55845,15 @@ v_cmp_neq_f32_e64 tma, 0, s2
v_cmp_neq_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x4d,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_neq_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x4d,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_neq_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x4d,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_neq_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x4d,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_neq_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x4d,0xd0,0x01,0x05,0x00,0x00]
@@ -53423,11 +55902,14 @@ v_cmp_neq_f32_e64 s[10:11], 0, exec_hi
v_cmp_neq_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x4d,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_neq_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x4d,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_neq_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x4d,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_neq_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x4d,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_neq_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x4d,0xd0,0x80,0xee,0x01,0x00]
v_cmp_neq_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x4d,0xd0,0x80,0x04,0x02,0x00]
@@ -53435,9 +55917,15 @@ v_cmp_neq_f32_e64 s[10:11], 0, v2
v_cmp_neq_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x4d,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_neq_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x4d,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_neq_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x4d,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_neq_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x4d,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_neq_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x4d,0xd0,0x80,0x04,0x00,0x00]
@@ -53534,9 +56022,15 @@ v_cmp_nlt_f32_e64 tma, 0, s2
v_cmp_nlt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x4e,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_nlt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x4e,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_nlt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x4e,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_nlt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x4e,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_nlt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x4e,0xd0,0x01,0x05,0x00,0x00]
@@ -53585,11 +56079,14 @@ v_cmp_nlt_f32_e64 s[10:11], 0, exec_hi
v_cmp_nlt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x4e,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_nlt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x4e,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_nlt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x4e,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_nlt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x4e,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_nlt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x4e,0xd0,0x80,0xee,0x01,0x00]
v_cmp_nlt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x4e,0xd0,0x80,0x04,0x02,0x00]
@@ -53597,9 +56094,15 @@ v_cmp_nlt_f32_e64 s[10:11], 0, v2
v_cmp_nlt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x4e,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_nlt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x4e,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_nlt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x4e,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_nlt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x4e,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_nlt_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x4e,0xd0,0x80,0x04,0x00,0x00]
@@ -53696,9 +56199,15 @@ v_cmp_tru_f32_e64 tma, 0, s2
v_cmp_tru_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x4f,0xd0,0x80,0x04,0x00,0x00]
+v_cmp_tru_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x4f,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmp_tru_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x4f,0xd0,0xf0,0x04,0x00,0x00]
+v_cmp_tru_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x4f,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmp_tru_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x4f,0xd0,0x01,0x05,0x00,0x00]
@@ -53747,11 +56256,14 @@ v_cmp_tru_f32_e64 s[10:11], 0, exec_hi
v_cmp_tru_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x4f,0xd0,0x80,0x00,0x01,0x00]
+v_cmp_tru_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x4f,0xd0,0x80,0x82,0x01,0x00]
+
v_cmp_tru_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x4f,0xd0,0x80,0xe0,0x01,0x00]
-v_cmp_tru_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x4f,0xd0,0x80,0xfa,0x01,0x00]
+v_cmp_tru_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x4f,0xd0,0x80,0xee,0x01,0x00]
v_cmp_tru_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x4f,0xd0,0x80,0x04,0x02,0x00]
@@ -53759,9 +56271,15 @@ v_cmp_tru_f32_e64 s[10:11], 0, v2
v_cmp_tru_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x4f,0xd0,0x80,0xfe,0x03,0x00]
+v_cmp_tru_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x4f,0xd0,0x80,0x04,0x00,0x20]
+
v_cmp_tru_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x4f,0xd0,0x80,0x04,0x00,0x40]
+v_cmp_tru_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x4f,0xd0,0x80,0x04,0x00,0x60]
+
v_cmp_tru_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x4f,0xd0,0x80,0x04,0x00,0x00]
@@ -53858,9 +56376,15 @@ v_cmpx_f_f32_e64 tma, 0, s2
v_cmpx_f_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x50,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_f_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x50,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_f_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x50,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_f_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x50,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_f_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x50,0xd0,0x01,0x05,0x00,0x00]
@@ -53909,11 +56433,14 @@ v_cmpx_f_f32_e64 s[10:11], 0, exec_hi
v_cmpx_f_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x50,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_f_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x50,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_f_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x50,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_f_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x50,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_f_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x50,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_f_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x50,0xd0,0x80,0x04,0x02,0x00]
@@ -53921,9 +56448,15 @@ v_cmpx_f_f32_e64 s[10:11], 0, v2
v_cmpx_f_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x50,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_f_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x50,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_f_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x50,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_f_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x50,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_f_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x50,0xd0,0x80,0x04,0x00,0x00]
@@ -54020,9 +56553,15 @@ v_cmpx_lt_f32_e64 tma, 0, s2
v_cmpx_lt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x51,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_lt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x51,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_lt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x51,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_lt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x51,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_lt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x51,0xd0,0x01,0x05,0x00,0x00]
@@ -54071,11 +56610,14 @@ v_cmpx_lt_f32_e64 s[10:11], 0, exec_hi
v_cmpx_lt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x51,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_lt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x51,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_lt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x51,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_lt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x51,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_lt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x51,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_lt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x51,0xd0,0x80,0x04,0x02,0x00]
@@ -54083,9 +56625,15 @@ v_cmpx_lt_f32_e64 s[10:11], 0, v2
v_cmpx_lt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x51,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_lt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x51,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_lt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x51,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_lt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x51,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_lt_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x51,0xd0,0x80,0x04,0x00,0x00]
@@ -54182,9 +56730,15 @@ v_cmpx_eq_f32_e64 tma, 0, s2
v_cmpx_eq_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x52,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_eq_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x52,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_eq_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x52,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_eq_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x52,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_eq_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x52,0xd0,0x01,0x05,0x00,0x00]
@@ -54233,11 +56787,14 @@ v_cmpx_eq_f32_e64 s[10:11], 0, exec_hi
v_cmpx_eq_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x52,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_eq_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x52,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_eq_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x52,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_eq_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x52,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_eq_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x52,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_eq_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x52,0xd0,0x80,0x04,0x02,0x00]
@@ -54245,9 +56802,15 @@ v_cmpx_eq_f32_e64 s[10:11], 0, v2
v_cmpx_eq_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x52,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_eq_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x52,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_eq_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x52,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_eq_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x52,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_eq_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x52,0xd0,0x80,0x04,0x00,0x00]
@@ -54344,9 +56907,15 @@ v_cmpx_le_f32_e64 tma, 0, s2
v_cmpx_le_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x53,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_le_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x53,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_le_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x53,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_le_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x53,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_le_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x53,0xd0,0x01,0x05,0x00,0x00]
@@ -54395,11 +56964,14 @@ v_cmpx_le_f32_e64 s[10:11], 0, exec_hi
v_cmpx_le_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x53,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_le_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x53,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_le_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x53,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_le_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x53,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_le_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x53,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_le_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x53,0xd0,0x80,0x04,0x02,0x00]
@@ -54407,9 +56979,15 @@ v_cmpx_le_f32_e64 s[10:11], 0, v2
v_cmpx_le_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x53,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_le_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x53,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_le_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x53,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_le_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x53,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_le_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x53,0xd0,0x80,0x04,0x00,0x00]
@@ -54506,9 +57084,15 @@ v_cmpx_gt_f32_e64 tma, 0, s2
v_cmpx_gt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x54,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_gt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x54,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_gt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x54,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_gt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x54,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_gt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x54,0xd0,0x01,0x05,0x00,0x00]
@@ -54557,11 +57141,14 @@ v_cmpx_gt_f32_e64 s[10:11], 0, exec_hi
v_cmpx_gt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x54,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_gt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x54,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_gt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x54,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_gt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x54,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_gt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x54,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_gt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x54,0xd0,0x80,0x04,0x02,0x00]
@@ -54569,9 +57156,15 @@ v_cmpx_gt_f32_e64 s[10:11], 0, v2
v_cmpx_gt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x54,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_gt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x54,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_gt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x54,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_gt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x54,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_gt_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x54,0xd0,0x80,0x04,0x00,0x00]
@@ -54668,9 +57261,15 @@ v_cmpx_lg_f32_e64 tma, 0, s2
v_cmpx_lg_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x55,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_lg_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x55,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_lg_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x55,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_lg_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x55,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_lg_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x55,0xd0,0x01,0x05,0x00,0x00]
@@ -54719,11 +57318,14 @@ v_cmpx_lg_f32_e64 s[10:11], 0, exec_hi
v_cmpx_lg_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x55,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_lg_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x55,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_lg_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x55,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_lg_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x55,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_lg_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x55,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_lg_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x55,0xd0,0x80,0x04,0x02,0x00]
@@ -54731,9 +57333,15 @@ v_cmpx_lg_f32_e64 s[10:11], 0, v2
v_cmpx_lg_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x55,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_lg_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x55,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_lg_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x55,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_lg_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x55,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_lg_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x55,0xd0,0x80,0x04,0x00,0x00]
@@ -54830,9 +57438,15 @@ v_cmpx_ge_f32_e64 tma, 0, s2
v_cmpx_ge_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x56,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_ge_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x56,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_ge_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x56,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_ge_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x56,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_ge_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x56,0xd0,0x01,0x05,0x00,0x00]
@@ -54881,11 +57495,14 @@ v_cmpx_ge_f32_e64 s[10:11], 0, exec_hi
v_cmpx_ge_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x56,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_ge_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x56,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_ge_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x56,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_ge_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x56,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_ge_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x56,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_ge_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x56,0xd0,0x80,0x04,0x02,0x00]
@@ -54893,9 +57510,15 @@ v_cmpx_ge_f32_e64 s[10:11], 0, v2
v_cmpx_ge_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x56,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_ge_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x56,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_ge_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x56,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_ge_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x56,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_ge_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x56,0xd0,0x80,0x04,0x00,0x00]
@@ -54992,9 +57615,15 @@ v_cmpx_o_f32_e64 tma, 0, s2
v_cmpx_o_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x57,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_o_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x57,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_o_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x57,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_o_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x57,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_o_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x57,0xd0,0x01,0x05,0x00,0x00]
@@ -55043,11 +57672,14 @@ v_cmpx_o_f32_e64 s[10:11], 0, exec_hi
v_cmpx_o_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x57,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_o_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x57,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_o_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x57,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_o_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x57,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_o_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x57,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_o_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x57,0xd0,0x80,0x04,0x02,0x00]
@@ -55055,9 +57687,15 @@ v_cmpx_o_f32_e64 s[10:11], 0, v2
v_cmpx_o_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x57,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_o_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x57,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_o_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x57,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_o_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x57,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_o_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x57,0xd0,0x80,0x04,0x00,0x00]
@@ -55154,9 +57792,15 @@ v_cmpx_u_f32_e64 tma, 0, s2
v_cmpx_u_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x58,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_u_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x58,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_u_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x58,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_u_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x58,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_u_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x58,0xd0,0x01,0x05,0x00,0x00]
@@ -55205,11 +57849,14 @@ v_cmpx_u_f32_e64 s[10:11], 0, exec_hi
v_cmpx_u_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x58,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_u_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x58,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_u_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x58,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_u_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x58,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_u_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x58,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_u_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x58,0xd0,0x80,0x04,0x02,0x00]
@@ -55217,9 +57864,15 @@ v_cmpx_u_f32_e64 s[10:11], 0, v2
v_cmpx_u_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x58,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_u_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x58,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_u_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x58,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_u_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x58,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_u_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x58,0xd0,0x80,0x04,0x00,0x00]
@@ -55316,9 +57969,15 @@ v_cmpx_nge_f32_e64 tma, 0, s2
v_cmpx_nge_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x59,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_nge_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x59,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_nge_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x59,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_nge_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x59,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_nge_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x59,0xd0,0x01,0x05,0x00,0x00]
@@ -55367,11 +58026,14 @@ v_cmpx_nge_f32_e64 s[10:11], 0, exec_hi
v_cmpx_nge_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x59,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_nge_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x59,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_nge_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x59,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_nge_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x59,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_nge_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x59,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_nge_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x59,0xd0,0x80,0x04,0x02,0x00]
@@ -55379,9 +58041,15 @@ v_cmpx_nge_f32_e64 s[10:11], 0, v2
v_cmpx_nge_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x59,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_nge_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x59,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_nge_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x59,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_nge_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x59,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_nge_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x59,0xd0,0x80,0x04,0x00,0x00]
@@ -55478,9 +58146,15 @@ v_cmpx_nlg_f32_e64 tma, 0, s2
v_cmpx_nlg_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x5a,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_nlg_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x5a,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_nlg_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x5a,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_nlg_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x5a,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_nlg_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x5a,0xd0,0x01,0x05,0x00,0x00]
@@ -55529,11 +58203,14 @@ v_cmpx_nlg_f32_e64 s[10:11], 0, exec_hi
v_cmpx_nlg_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x5a,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_nlg_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x5a,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_nlg_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x5a,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_nlg_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x5a,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_nlg_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x5a,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_nlg_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x5a,0xd0,0x80,0x04,0x02,0x00]
@@ -55541,9 +58218,15 @@ v_cmpx_nlg_f32_e64 s[10:11], 0, v2
v_cmpx_nlg_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x5a,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_nlg_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x5a,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_nlg_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x5a,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_nlg_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x5a,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_nlg_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x5a,0xd0,0x80,0x04,0x00,0x00]
@@ -55640,9 +58323,15 @@ v_cmpx_ngt_f32_e64 tma, 0, s2
v_cmpx_ngt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x5b,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_ngt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x5b,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_ngt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x5b,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_ngt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x5b,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_ngt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x5b,0xd0,0x01,0x05,0x00,0x00]
@@ -55691,11 +58380,14 @@ v_cmpx_ngt_f32_e64 s[10:11], 0, exec_hi
v_cmpx_ngt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x5b,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_ngt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x5b,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_ngt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x5b,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_ngt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x5b,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_ngt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x5b,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_ngt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x5b,0xd0,0x80,0x04,0x02,0x00]
@@ -55703,9 +58395,15 @@ v_cmpx_ngt_f32_e64 s[10:11], 0, v2
v_cmpx_ngt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x5b,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_ngt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x5b,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_ngt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x5b,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_ngt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x5b,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_ngt_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x5b,0xd0,0x80,0x04,0x00,0x00]
@@ -55802,9 +58500,15 @@ v_cmpx_nle_f32_e64 tma, 0, s2
v_cmpx_nle_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x5c,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_nle_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x5c,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_nle_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x5c,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_nle_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x5c,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_nle_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x5c,0xd0,0x01,0x05,0x00,0x00]
@@ -55853,11 +58557,14 @@ v_cmpx_nle_f32_e64 s[10:11], 0, exec_hi
v_cmpx_nle_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x5c,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_nle_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x5c,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_nle_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x5c,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_nle_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x5c,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_nle_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x5c,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_nle_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x5c,0xd0,0x80,0x04,0x02,0x00]
@@ -55865,9 +58572,15 @@ v_cmpx_nle_f32_e64 s[10:11], 0, v2
v_cmpx_nle_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x5c,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_nle_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x5c,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_nle_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x5c,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_nle_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x5c,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_nle_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x5c,0xd0,0x80,0x04,0x00,0x00]
@@ -55964,9 +58677,15 @@ v_cmpx_neq_f32_e64 tma, 0, s2
v_cmpx_neq_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x5d,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_neq_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x5d,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_neq_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x5d,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_neq_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x5d,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_neq_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x5d,0xd0,0x01,0x05,0x00,0x00]
@@ -56015,11 +58734,14 @@ v_cmpx_neq_f32_e64 s[10:11], 0, exec_hi
v_cmpx_neq_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x5d,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_neq_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x5d,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_neq_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x5d,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_neq_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x5d,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_neq_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x5d,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_neq_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x5d,0xd0,0x80,0x04,0x02,0x00]
@@ -56027,9 +58749,15 @@ v_cmpx_neq_f32_e64 s[10:11], 0, v2
v_cmpx_neq_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x5d,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_neq_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x5d,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_neq_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x5d,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_neq_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x5d,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_neq_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x5d,0xd0,0x80,0x04,0x00,0x00]
@@ -56126,9 +58854,15 @@ v_cmpx_nlt_f32_e64 tma, 0, s2
v_cmpx_nlt_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x5e,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_nlt_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x5e,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_nlt_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x5e,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_nlt_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x5e,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_nlt_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x5e,0xd0,0x01,0x05,0x00,0x00]
@@ -56177,11 +58911,14 @@ v_cmpx_nlt_f32_e64 s[10:11], 0, exec_hi
v_cmpx_nlt_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x5e,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_nlt_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x5e,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_nlt_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x5e,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_nlt_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x5e,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_nlt_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x5e,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_nlt_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x5e,0xd0,0x80,0x04,0x02,0x00]
@@ -56189,9 +58926,15 @@ v_cmpx_nlt_f32_e64 s[10:11], 0, v2
v_cmpx_nlt_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x5e,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_nlt_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x5e,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_nlt_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x5e,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_nlt_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x5e,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_nlt_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x5e,0xd0,0x80,0x04,0x00,0x00]
@@ -56288,9 +59031,15 @@ v_cmpx_tru_f32_e64 tma, 0, s2
v_cmpx_tru_f32_e64 ttmp[10:11], 0, s2
// CHECK: [0x7a,0x00,0x5f,0xd0,0x80,0x04,0x00,0x00]
+v_cmpx_tru_f32_e64 s[10:11], -1, s2
+// CHECK: [0x0a,0x00,0x5f,0xd0,0xc1,0x04,0x00,0x00]
+
v_cmpx_tru_f32_e64 s[10:11], 0.5, s2
// CHECK: [0x0a,0x00,0x5f,0xd0,0xf0,0x04,0x00,0x00]
+v_cmpx_tru_f32_e64 s[10:11], -4.0, s2
+// CHECK: [0x0a,0x00,0x5f,0xd0,0xf7,0x04,0x00,0x00]
+
v_cmpx_tru_f32_e64 s[10:11], v1, s2
// CHECK: [0x0a,0x00,0x5f,0xd0,0x01,0x05,0x00,0x00]
@@ -56339,11 +59088,14 @@ v_cmpx_tru_f32_e64 s[10:11], 0, exec_hi
v_cmpx_tru_f32_e64 s[10:11], 0, 0
// CHECK: [0x0a,0x00,0x5f,0xd0,0x80,0x00,0x01,0x00]
+v_cmpx_tru_f32_e64 s[10:11], 0, -1
+// CHECK: [0x0a,0x00,0x5f,0xd0,0x80,0x82,0x01,0x00]
+
v_cmpx_tru_f32_e64 s[10:11], 0, 0.5
// CHECK: [0x0a,0x00,0x5f,0xd0,0x80,0xe0,0x01,0x00]
-v_cmpx_tru_f32_e64 s[10:11], 0, scc
-// CHECK: [0x0a,0x00,0x5f,0xd0,0x80,0xfa,0x01,0x00]
+v_cmpx_tru_f32_e64 s[10:11], 0, -4.0
+// CHECK: [0x0a,0x00,0x5f,0xd0,0x80,0xee,0x01,0x00]
v_cmpx_tru_f32_e64 s[10:11], 0, v2
// CHECK: [0x0a,0x00,0x5f,0xd0,0x80,0x04,0x02,0x00]
@@ -56351,9 +59103,15 @@ v_cmpx_tru_f32_e64 s[10:11], 0, v2
v_cmpx_tru_f32_e64 s[10:11], 0, v255
// CHECK: [0x0a,0x00,0x5f,0xd0,0x80,0xfe,0x03,0x00]
+v_cmpx_tru_f32_e64 s[10:11], neg(0), s2
+// CHECK: [0x0a,0x00,0x5f,0xd0,0x80,0x04,0x00,0x20]
+
v_cmpx_tru_f32_e64 s[10:11], 0, -s2
// CHECK: [0x0a,0x00,0x5f,0xd0,0x80,0x04,0x00,0x40]
+v_cmpx_tru_f32_e64 s[10:11], neg(0), -s2
+// CHECK: [0x0a,0x00,0x5f,0xd0,0x80,0x04,0x00,0x60]
+
v_cmpx_tru_f32_e64 s[10:11], 0, s2 clamp
// CHECK: [0x0a,0x80,0x5f,0xd0,0x80,0x04,0x00,0x00]
@@ -56438,9 +59196,15 @@ v_cmp_f_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_f_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x60,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_f_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x60,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_f_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x60,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_f_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x60,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_f_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x60,0xd0,0x01,0x09,0x00,0x00]
@@ -56450,9 +59214,15 @@ v_cmp_f_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_f_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x60,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_f_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x60,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_f_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x60,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_f_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x60,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_f_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x60,0xd0,0x04,0x04,0x02,0x00]
@@ -56552,9 +59322,15 @@ v_cmp_lt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_lt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x61,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_lt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x61,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_lt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x61,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_lt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x61,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_lt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x61,0xd0,0x01,0x09,0x00,0x00]
@@ -56564,9 +59340,15 @@ v_cmp_lt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_lt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x61,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_lt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x61,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_lt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x61,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_lt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x61,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_lt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x61,0xd0,0x04,0x04,0x02,0x00]
@@ -56666,9 +59448,15 @@ v_cmp_eq_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_eq_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x62,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_eq_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x62,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_eq_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x62,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_eq_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x62,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_eq_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x62,0xd0,0x01,0x09,0x00,0x00]
@@ -56678,9 +59466,15 @@ v_cmp_eq_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_eq_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x62,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_eq_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x62,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_eq_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x62,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_eq_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x62,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_eq_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x62,0xd0,0x04,0x04,0x02,0x00]
@@ -56780,9 +59574,15 @@ v_cmp_le_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_le_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x63,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_le_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x63,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_le_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x63,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_le_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x63,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_le_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x63,0xd0,0x01,0x09,0x00,0x00]
@@ -56792,9 +59592,15 @@ v_cmp_le_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_le_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x63,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_le_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x63,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_le_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x63,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_le_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x63,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_le_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x63,0xd0,0x04,0x04,0x02,0x00]
@@ -56894,9 +59700,15 @@ v_cmp_gt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_gt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x64,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_gt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x64,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_gt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x64,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_gt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x64,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_gt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x64,0xd0,0x01,0x09,0x00,0x00]
@@ -56906,9 +59718,15 @@ v_cmp_gt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_gt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x64,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_gt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x64,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_gt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x64,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_gt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x64,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_gt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x64,0xd0,0x04,0x04,0x02,0x00]
@@ -57008,9 +59826,15 @@ v_cmp_lg_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_lg_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x65,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_lg_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x65,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_lg_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x65,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_lg_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x65,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_lg_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x65,0xd0,0x01,0x09,0x00,0x00]
@@ -57020,9 +59844,15 @@ v_cmp_lg_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_lg_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x65,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_lg_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x65,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_lg_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x65,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_lg_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x65,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_lg_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x65,0xd0,0x04,0x04,0x02,0x00]
@@ -57122,9 +59952,15 @@ v_cmp_ge_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_ge_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x66,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_ge_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x66,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_ge_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x66,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_ge_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x66,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_ge_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x66,0xd0,0x01,0x09,0x00,0x00]
@@ -57134,9 +59970,15 @@ v_cmp_ge_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_ge_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x66,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_ge_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x66,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_ge_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x66,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_ge_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x66,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_ge_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x66,0xd0,0x04,0x04,0x02,0x00]
@@ -57236,9 +60078,15 @@ v_cmp_o_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_o_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x67,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_o_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x67,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_o_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x67,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_o_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x67,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_o_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x67,0xd0,0x01,0x09,0x00,0x00]
@@ -57248,9 +60096,15 @@ v_cmp_o_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_o_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x67,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_o_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x67,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_o_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x67,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_o_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x67,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_o_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x67,0xd0,0x04,0x04,0x02,0x00]
@@ -57350,9 +60204,15 @@ v_cmp_u_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_u_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x68,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_u_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x68,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_u_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x68,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_u_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x68,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_u_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x68,0xd0,0x01,0x09,0x00,0x00]
@@ -57362,9 +60222,15 @@ v_cmp_u_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_u_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x68,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_u_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x68,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_u_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x68,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_u_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x68,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_u_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x68,0xd0,0x04,0x04,0x02,0x00]
@@ -57464,9 +60330,15 @@ v_cmp_nge_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_nge_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x69,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_nge_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x69,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_nge_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x69,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_nge_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x69,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_nge_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x69,0xd0,0x01,0x09,0x00,0x00]
@@ -57476,9 +60348,15 @@ v_cmp_nge_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_nge_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x69,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_nge_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x69,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_nge_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x69,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_nge_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x69,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_nge_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x69,0xd0,0x04,0x04,0x02,0x00]
@@ -57578,9 +60456,15 @@ v_cmp_nlg_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_nlg_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x6a,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_nlg_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x6a,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_nlg_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x6a,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_nlg_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x6a,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_nlg_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x6a,0xd0,0x01,0x09,0x00,0x00]
@@ -57590,9 +60474,15 @@ v_cmp_nlg_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_nlg_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x6a,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_nlg_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x6a,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_nlg_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x6a,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_nlg_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x6a,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_nlg_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x6a,0xd0,0x04,0x04,0x02,0x00]
@@ -57692,9 +60582,15 @@ v_cmp_ngt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_ngt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x6b,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_ngt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x6b,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_ngt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x6b,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_ngt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x6b,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_ngt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x6b,0xd0,0x01,0x09,0x00,0x00]
@@ -57704,9 +60600,15 @@ v_cmp_ngt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_ngt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x6b,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_ngt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x6b,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_ngt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x6b,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_ngt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x6b,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_ngt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x6b,0xd0,0x04,0x04,0x02,0x00]
@@ -57806,9 +60708,15 @@ v_cmp_nle_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_nle_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x6c,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_nle_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x6c,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_nle_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x6c,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_nle_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x6c,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_nle_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x6c,0xd0,0x01,0x09,0x00,0x00]
@@ -57818,9 +60726,15 @@ v_cmp_nle_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_nle_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x6c,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_nle_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x6c,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_nle_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x6c,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_nle_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x6c,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_nle_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x6c,0xd0,0x04,0x04,0x02,0x00]
@@ -57920,9 +60834,15 @@ v_cmp_neq_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_neq_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x6d,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_neq_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x6d,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_neq_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x6d,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_neq_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x6d,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_neq_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x6d,0xd0,0x01,0x09,0x00,0x00]
@@ -57932,9 +60852,15 @@ v_cmp_neq_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_neq_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x6d,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_neq_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x6d,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_neq_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x6d,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_neq_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x6d,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_neq_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x6d,0xd0,0x04,0x04,0x02,0x00]
@@ -58034,9 +60960,15 @@ v_cmp_nlt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_nlt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x6e,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_nlt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x6e,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_nlt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x6e,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_nlt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x6e,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_nlt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x6e,0xd0,0x01,0x09,0x00,0x00]
@@ -58046,9 +60978,15 @@ v_cmp_nlt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_nlt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x6e,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_nlt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x6e,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_nlt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x6e,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_nlt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x6e,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_nlt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x6e,0xd0,0x04,0x04,0x02,0x00]
@@ -58148,9 +61086,15 @@ v_cmp_tru_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmp_tru_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x6f,0xd0,0x80,0x08,0x00,0x00]
+v_cmp_tru_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x6f,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmp_tru_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x6f,0xd0,0xf0,0x08,0x00,0x00]
+v_cmp_tru_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x6f,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmp_tru_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x6f,0xd0,0x01,0x09,0x00,0x00]
@@ -58160,9 +61104,15 @@ v_cmp_tru_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmp_tru_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x6f,0xd0,0x04,0x00,0x01,0x00]
+v_cmp_tru_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x6f,0xd0,0x04,0x82,0x01,0x00]
+
v_cmp_tru_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x6f,0xd0,0x04,0xe0,0x01,0x00]
+v_cmp_tru_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x6f,0xd0,0x04,0xee,0x01,0x00]
+
v_cmp_tru_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x6f,0xd0,0x04,0x04,0x02,0x00]
@@ -58262,9 +61212,15 @@ v_cmpx_f_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_f_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x70,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_f_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x70,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_f_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x70,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_f_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x70,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_f_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x70,0xd0,0x01,0x09,0x00,0x00]
@@ -58274,9 +61230,15 @@ v_cmpx_f_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_f_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x70,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_f_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x70,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_f_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x70,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_f_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x70,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_f_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x70,0xd0,0x04,0x04,0x02,0x00]
@@ -58376,9 +61338,15 @@ v_cmpx_lt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_lt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x71,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_lt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x71,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_lt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x71,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_lt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x71,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_lt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x71,0xd0,0x01,0x09,0x00,0x00]
@@ -58388,9 +61356,15 @@ v_cmpx_lt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_lt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x71,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_lt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x71,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_lt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x71,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_lt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x71,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_lt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x71,0xd0,0x04,0x04,0x02,0x00]
@@ -58490,9 +61464,15 @@ v_cmpx_eq_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_eq_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x72,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_eq_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x72,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_eq_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x72,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_eq_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x72,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_eq_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x72,0xd0,0x01,0x09,0x00,0x00]
@@ -58502,9 +61482,15 @@ v_cmpx_eq_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_eq_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x72,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_eq_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x72,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_eq_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x72,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_eq_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x72,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_eq_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x72,0xd0,0x04,0x04,0x02,0x00]
@@ -58604,9 +61590,15 @@ v_cmpx_le_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_le_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x73,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_le_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x73,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_le_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x73,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_le_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x73,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_le_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x73,0xd0,0x01,0x09,0x00,0x00]
@@ -58616,9 +61608,15 @@ v_cmpx_le_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_le_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x73,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_le_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x73,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_le_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x73,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_le_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x73,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_le_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x73,0xd0,0x04,0x04,0x02,0x00]
@@ -58718,9 +61716,15 @@ v_cmpx_gt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_gt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x74,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_gt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x74,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_gt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x74,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_gt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x74,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_gt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x74,0xd0,0x01,0x09,0x00,0x00]
@@ -58730,9 +61734,15 @@ v_cmpx_gt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_gt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x74,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_gt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x74,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_gt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x74,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_gt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x74,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_gt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x74,0xd0,0x04,0x04,0x02,0x00]
@@ -58832,9 +61842,15 @@ v_cmpx_lg_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_lg_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x75,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_lg_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x75,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_lg_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x75,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_lg_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x75,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_lg_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x75,0xd0,0x01,0x09,0x00,0x00]
@@ -58844,9 +61860,15 @@ v_cmpx_lg_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_lg_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x75,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_lg_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x75,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_lg_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x75,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_lg_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x75,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_lg_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x75,0xd0,0x04,0x04,0x02,0x00]
@@ -58946,9 +61968,15 @@ v_cmpx_ge_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_ge_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x76,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_ge_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x76,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_ge_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x76,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_ge_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x76,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_ge_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x76,0xd0,0x01,0x09,0x00,0x00]
@@ -58958,9 +61986,15 @@ v_cmpx_ge_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_ge_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x76,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_ge_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x76,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_ge_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x76,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_ge_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x76,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_ge_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x76,0xd0,0x04,0x04,0x02,0x00]
@@ -59060,9 +62094,15 @@ v_cmpx_o_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_o_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x77,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_o_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x77,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_o_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x77,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_o_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x77,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_o_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x77,0xd0,0x01,0x09,0x00,0x00]
@@ -59072,9 +62112,15 @@ v_cmpx_o_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_o_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x77,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_o_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x77,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_o_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x77,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_o_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x77,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_o_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x77,0xd0,0x04,0x04,0x02,0x00]
@@ -59174,9 +62220,15 @@ v_cmpx_u_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_u_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x78,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_u_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x78,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_u_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x78,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_u_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x78,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_u_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x78,0xd0,0x01,0x09,0x00,0x00]
@@ -59186,9 +62238,15 @@ v_cmpx_u_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_u_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x78,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_u_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x78,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_u_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x78,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_u_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x78,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_u_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x78,0xd0,0x04,0x04,0x02,0x00]
@@ -59288,9 +62346,15 @@ v_cmpx_nge_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_nge_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x79,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_nge_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x79,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_nge_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x79,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_nge_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x79,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_nge_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x79,0xd0,0x01,0x09,0x00,0x00]
@@ -59300,9 +62364,15 @@ v_cmpx_nge_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_nge_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x79,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_nge_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x79,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_nge_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x79,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_nge_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x79,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_nge_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x79,0xd0,0x04,0x04,0x02,0x00]
@@ -59402,9 +62472,15 @@ v_cmpx_nlg_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_nlg_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x7a,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_nlg_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x7a,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_nlg_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x7a,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_nlg_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x7a,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_nlg_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x7a,0xd0,0x01,0x09,0x00,0x00]
@@ -59414,9 +62490,15 @@ v_cmpx_nlg_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_nlg_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x7a,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_nlg_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x7a,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_nlg_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x7a,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_nlg_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x7a,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_nlg_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x7a,0xd0,0x04,0x04,0x02,0x00]
@@ -59516,9 +62598,15 @@ v_cmpx_ngt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_ngt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x7b,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_ngt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x7b,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_ngt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x7b,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_ngt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x7b,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_ngt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x7b,0xd0,0x01,0x09,0x00,0x00]
@@ -59528,9 +62616,15 @@ v_cmpx_ngt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_ngt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x7b,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_ngt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x7b,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_ngt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x7b,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_ngt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x7b,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_ngt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x7b,0xd0,0x04,0x04,0x02,0x00]
@@ -59630,9 +62724,15 @@ v_cmpx_nle_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_nle_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x7c,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_nle_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x7c,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_nle_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x7c,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_nle_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x7c,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_nle_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x7c,0xd0,0x01,0x09,0x00,0x00]
@@ -59642,9 +62742,15 @@ v_cmpx_nle_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_nle_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x7c,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_nle_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x7c,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_nle_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x7c,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_nle_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x7c,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_nle_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x7c,0xd0,0x04,0x04,0x02,0x00]
@@ -59744,9 +62850,15 @@ v_cmpx_neq_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_neq_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x7d,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_neq_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x7d,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_neq_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x7d,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_neq_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x7d,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_neq_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x7d,0xd0,0x01,0x09,0x00,0x00]
@@ -59756,9 +62868,15 @@ v_cmpx_neq_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_neq_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x7d,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_neq_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x7d,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_neq_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x7d,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_neq_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x7d,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_neq_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x7d,0xd0,0x04,0x04,0x02,0x00]
@@ -59858,9 +62976,15 @@ v_cmpx_nlt_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_nlt_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x7e,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_nlt_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x7e,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_nlt_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x7e,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_nlt_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x7e,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_nlt_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x7e,0xd0,0x01,0x09,0x00,0x00]
@@ -59870,9 +62994,15 @@ v_cmpx_nlt_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_nlt_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x7e,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_nlt_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x7e,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_nlt_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x7e,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_nlt_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x7e,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_nlt_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x7e,0xd0,0x04,0x04,0x02,0x00]
@@ -59972,9 +63102,15 @@ v_cmpx_tru_f64_e64 ttmp[10:11], s[4:5], s[4:5]
v_cmpx_tru_f64_e64 s[10:11], 0, s[4:5]
// CHECK: [0x0a,0x00,0x7f,0xd0,0x80,0x08,0x00,0x00]
+v_cmpx_tru_f64_e64 s[10:11], -1, s[4:5]
+// CHECK: [0x0a,0x00,0x7f,0xd0,0xc1,0x08,0x00,0x00]
+
v_cmpx_tru_f64_e64 s[10:11], 0.5, s[4:5]
// CHECK: [0x0a,0x00,0x7f,0xd0,0xf0,0x08,0x00,0x00]
+v_cmpx_tru_f64_e64 s[10:11], -4.0, s[4:5]
+// CHECK: [0x0a,0x00,0x7f,0xd0,0xf7,0x08,0x00,0x00]
+
v_cmpx_tru_f64_e64 s[10:11], v[1:2], s[4:5]
// CHECK: [0x0a,0x00,0x7f,0xd0,0x01,0x09,0x00,0x00]
@@ -59984,9 +63120,15 @@ v_cmpx_tru_f64_e64 s[10:11], v[254:255], s[4:5]
v_cmpx_tru_f64_e64 s[10:11], s[4:5], 0
// CHECK: [0x0a,0x00,0x7f,0xd0,0x04,0x00,0x01,0x00]
+v_cmpx_tru_f64_e64 s[10:11], s[4:5], -1
+// CHECK: [0x0a,0x00,0x7f,0xd0,0x04,0x82,0x01,0x00]
+
v_cmpx_tru_f64_e64 s[10:11], s[4:5], 0.5
// CHECK: [0x0a,0x00,0x7f,0xd0,0x04,0xe0,0x01,0x00]
+v_cmpx_tru_f64_e64 s[10:11], s[4:5], -4.0
+// CHECK: [0x0a,0x00,0x7f,0xd0,0x04,0xee,0x01,0x00]
+
v_cmpx_tru_f64_e64 s[10:11], s[4:5], v[2:3]
// CHECK: [0x0a,0x00,0x7f,0xd0,0x04,0x04,0x02,0x00]
@@ -98831,17 +101973,3 @@ v_cmpx_t_u32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
v_cmpx_t_u32_sdwa vcc, v1, sext(v2) src0_sel:DWORD src1_sel:DWORD
// CHECK: [0xf9,0x04,0xbe,0x7d,0x01,0x16,0x06,0x0e]
-s_rfe_restore_b64 s[4:5], s2
-// CHECK: [0x04,0x02,0x80,0x95]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, s1
-// CHECK: [0x05,0x00,0x49,0xd1,0x01,0x00,0x00,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x06]
-
-v_perm_b32 v5, s1, 0, v255
-// CHECK: [0x05,0x00,0xed,0xd1,0x01,0x00,0xfd,0x07]
diff --git a/test/MC/ARM/assembly-default-build-attributes.s b/test/MC/ARM/assembly-default-build-attributes.s
new file mode 100644
index 0000000000000..e136361af0e54
--- /dev/null
+++ b/test/MC/ARM/assembly-default-build-attributes.s
@@ -0,0 +1,43 @@
+// RUN: llvm-mc -triple armv7a < %s -arm-add-build-attributes | FileCheck %s --check-prefix=v7A
+// RUN: llvm-mc -triple armv6m < %s -arm-add-build-attributes | FileCheck %s --check-prefix=v6M
+// RUN: llvm-mc -triple armv7m < %s -arm-add-build-attributes | FileCheck %s --check-prefix=v7M
+// RUN: llvm-mc -triple armv7a -mcpu=cortex-a15 < %s -arm-add-build-attributes | FileCheck %s --check-prefix=Cortex-A15
+
+// This isn't intended to be a through check of the build attributes emitted
+// for each target (that's tested elsewhere), but just to check that the
+// hardware attributes are emitted by the assembler based on the selected
+// target when requested.
+
+// v7A-NOT: .cpu
+// v7A: .eabi_attribute 6, 10 @ Tag_CPU_arch
+// v7A: .eabi_attribute 7, 65 @ Tag_CPU_arch_profile
+// v7A: .eabi_attribute 8, 1 @ Tag_ARM_ISA_use
+// v7A: .eabi_attribute 9, 2 @ Tag_THUMB_ISA_use
+// v7A: .fpu neon
+// v7A: .eabi_attribute 34, 1 @ Tag_CPU_unaligned_access
+
+// v6M-NOT: .cpu
+// v6M: .eabi_attribute 6, 12 @ Tag_CPU_arch
+// v6M: .eabi_attribute 7, 77 @ Tag_CPU_arch_profile
+// v6M: .eabi_attribute 8, 0 @ Tag_ARM_ISA_use
+// v6M: .eabi_attribute 9, 1 @ Tag_THUMB_ISA_use
+// v6M: .eabi_attribute 34, 1 @ Tag_CPU_unaligned_access
+
+// v7M-NOT: .cpu
+// v7M: .eabi_attribute 6, 10 @ Tag_CPU_arch
+// v7M: .eabi_attribute 7, 77 @ Tag_CPU_arch_profile
+// v7M: .eabi_attribute 8, 0 @ Tag_ARM_ISA_use
+// v7M: .eabi_attribute 9, 2 @ Tag_THUMB_ISA_use
+// v7M: .eabi_attribute 34, 1 @ Tag_CPU_unaligned_access
+
+// Cortex-A15: .cpu cortex-a15
+// Cortex-A15: .eabi_attribute 6, 10 @ Tag_CPU_arch
+// Cortex-A15: .eabi_attribute 7, 65 @ Tag_CPU_arch_profile
+// Cortex-A15: .eabi_attribute 8, 1 @ Tag_ARM_ISA_use
+// Cortex-A15: .eabi_attribute 9, 2 @ Tag_THUMB_ISA_use
+// Cortex-A15: .fpu neon-vfpv4
+// Cortex-A15: .eabi_attribute 36, 1 @ Tag_FP_HP_extension
+// Cortex-A15: .eabi_attribute 42, 1 @ Tag_MPextension_use
+// Cortex-A15: .eabi_attribute 44, 2 @ Tag_DIV_use
+// Cortex-A15: .eabi_attribute 34, 1 @ Tag_CPU_unaligned_access
+// Cortex-A15: .eabi_attribute 68, 3 @ Tag_Virtualization_use
diff --git a/test/MC/ARM/multi-section-mapping.s b/test/MC/ARM/multi-section-mapping.s
index e4b7146e4b0f7..7e62b10f5b096 100644
--- a/test/MC/ARM/multi-section-mapping.s
+++ b/test/MC/ARM/multi-section-mapping.s
@@ -21,14 +21,31 @@
.arm
add r0, r0, r0
+@ Similarly no $t if we change back .starts_thumb using .pushsection
+ .pushsection .starts_thumb
+ .thumb
+ adds r0, r0, r0
+
+@ When we change back to .text using .popsection .thumb is still active, so we
+@ should emit a $t
+ .popsection
+ add r0, r0, r0
+
+@ .ident does a push then pop of the .comment section, so the .word should
+@ cause $d to appear in the .text section
+ .ident "ident"
+ .word 0
+
@ With all those constraints, we want:
-@ + .text to have $a at 0 and no others
+@ + .text to have $a at 0, $t at 8, $d at 12
@ + .wibble to have $a at 0
@ + .starts_thumb to have $t at 0
@ + .starts_data to have $d at 0
@ CHECK: 00000000 .text 00000000 $a
@ CHECK-NEXT: 00000000 .wibble 00000000 $a
+@ CHECK-NEXT: 0000000a .text 00000000 $d
@ CHECK-NEXT: 00000000 .starts_thumb 00000000 $t
+@ CHECK-NEXT: 00000008 .text 00000000 $t
@ CHECK-NOT: ${{[adt]}}
diff --git a/test/TableGen/intrinsic-long-name.td b/test/TableGen/intrinsic-long-name.td
index d7c9d31762668..24ed89ac4acf0 100644
--- a/test/TableGen/intrinsic-long-name.td
+++ b/test/TableGen/intrinsic-long-name.td
@@ -22,7 +22,7 @@ class Intrinsic<string name, list<LLVMType> param_types = []> {
list<IntrinsicProperty> IntrProperties = [];
}
-def iAny : ValueType<0, 125>;
+def iAny : ValueType<0, 253>;
def llvm_anyint_ty : LLVMType<iAny>;
// Make sure we generate the long name without crashing
diff --git a/test/TableGen/intrinsic-varargs.td b/test/TableGen/intrinsic-varargs.td
index 0aafad8093cc2..1e2378550855d 100644
--- a/test/TableGen/intrinsic-varargs.td
+++ b/test/TableGen/intrinsic-varargs.td
@@ -23,7 +23,7 @@ class Intrinsic<string name, list<LLVMType> param_types = []> {
}
// isVoid needs to match the definition in ValueTypes.td
-def isVoid : ValueType<0, 66>; // Produces no value
+def isVoid : ValueType<0, 108>; // Produces no value
def llvm_vararg_ty : LLVMType<isVoid>; // this means vararg here
// CHECK: /* 0 */ 0, 29, 0,
diff --git a/test/ThinLTO/X86/autoupgrade.ll b/test/ThinLTO/X86/autoupgrade.ll
index 15c74f540b8c8..cbbe833d262ab 100644
--- a/test/ThinLTO/X86/autoupgrade.ll
+++ b/test/ThinLTO/X86/autoupgrade.ll
@@ -9,10 +9,8 @@
; RUN: -import=globalfunc1:%p/Inputs/autoupgrade.bc %t.bc \
; RUN: | llvm-bcanalyzer -dump | FileCheck %s
-
-; CHECK-NOT: 'llvm.invariant.start'
-; CHECK: record string = 'llvm.invariant.start.p0i8'
-; CHECK-NOT: 'llvm.invariant.start'
+; CHECK: <STRTAB_BLOCK
+; CHECK-NEXT: blob data = 'mainglobalfunc1llvm.invariant.start.p0i8'
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.11.0"
diff --git a/test/ThinLTO/X86/distributed_indexes.ll b/test/ThinLTO/X86/distributed_indexes.ll
index 0700488b5e92a..b81c94c2df8f2 100644
--- a/test/ThinLTO/X86/distributed_indexes.ll
+++ b/test/ThinLTO/X86/distributed_indexes.ll
@@ -13,15 +13,11 @@
; BACKEND1-NEXT: </MODULE_STRTAB_BLOCK
; BACKEND1-NEXT: <GLOBALVAL_SUMMARY_BLOCK
; BACKEND1-NEXT: <VERSION
+; BACKEND1-NEXT: <VALUE_GUID op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
+; BACKEND1-NEXT: <VALUE_GUID op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
; BACKEND1-NEXT: <COMBINED
; BACKEND1-NEXT: <COMBINED
; BACKEND1-NEXT: </GLOBALVAL_SUMMARY_BLOCK
-; BACKEND1-NEXT: <VALUE_SYMTAB
-; Check that the format is: op0=valueid, op1=offset, op2=funcguid,
-; where funcguid is the lower 64 bits of the function name MD5.
-; BACKEND1-NEXT: <COMBINED_ENTRY abbrevid={{[0-9]+}} op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
-; BACKEND1-NEXT: <COMBINED_ENTRY abbrevid={{[0-9]+}} op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
-; BACKEND1-NEXT: </VALUE_SYMTAB
; The backend index for Input/distributed_indexes.ll contains summaries from
; itself only, as it does not import anything.
@@ -30,13 +26,9 @@
; BACKEND2-NEXT: </MODULE_STRTAB_BLOCK
; BACKEND2-NEXT: <GLOBALVAL_SUMMARY_BLOCK
; BACKEND2-NEXT: <VERSION
+; BACKEND2-NEXT: <VALUE_GUID op0=1 op1=-5300342847281564238
; BACKEND2-NEXT: <COMBINED
; BACKEND2-NEXT: </GLOBALVAL_SUMMARY_BLOCK
-; BACKEND2-NEXT: <VALUE_SYMTAB
-; Check that the format is: op0=valueid, op1=offset, op2=funcguid,
-; where funcguid is the lower 64 bits of the function name MD5.
-; BACKEND2-NEXT: <COMBINED_ENTRY abbrevid={{[0-9]+}} op0=1 op1=-5300342847281564238
-; BACKEND2-NEXT: </VALUE_SYMTAB
declare void @g(...)
diff --git a/test/Transforms/CodeGenPrepare/split-indirect-loop.ll b/test/Transforms/CodeGenPrepare/split-indirect-loop.ll
new file mode 100644
index 0000000000000..cb834bb5dd8f9
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/split-indirect-loop.ll
@@ -0,0 +1,37 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+; Test that an invalid CFG is not created by splitIndirectCriticalEdges
+; transformation when the 'target' block is a loop to itself.
+
+; CHECK: .split:
+; CHECK: br label %while.body.clone
+; CHECK: if.else1:
+; CHECK: indirectbr
+; CHECK: while.body.clone:
+; CHECK: br label %.split
+
+define void @test() {
+entry:
+ br label %if.else
+
+if.else:
+ br i1 undef, label %while.body, label %preheader
+
+preheader:
+ br label %if.else1
+
+if.then:
+ unreachable
+
+while.body:
+ %dest.sroa = phi i32 [ %1, %while.body ], [ undef, %if.else1 ], [ undef, %if.else ]
+ %0 = inttoptr i32 %dest.sroa to i8*
+ %incdec.ptr = getelementptr inbounds i8, i8* %0, i32 -1
+ %1 = ptrtoint i8* %incdec.ptr to i32
+ store i8 undef, i8* %incdec.ptr, align 1
+ br label %while.body
+
+if.else1:
+ indirectbr i8* undef, [label %if.then, label %while.body, label %if.else, label %if.else1]
+}
+
diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll
new file mode 100644
index 0000000000000..9ae4132231d83
--- /dev/null
+++ b/test/Transforms/GVN/non-integral-pointers.ll
@@ -0,0 +1,39 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) {
+; CHECK-LABEL: @f0(
+; CHECK-NOT: inttoptr
+; CHECK-NOT: ptrtoint
+ entry:
+ store i64 %val, i64* %loc
+ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
+
+ neverTaken:
+ %loc.bc = bitcast i64* %loc to i8 addrspace(4)**
+ %ptr = load i8 addrspace(4)*, i8 addrspace(4)** %loc.bc
+ store i8 5, i8 addrspace(4)* %ptr
+ ret void
+
+ alwaysTaken:
+ ret void
+}
+
+define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
+; CHECK-LABEL: @f1(
+; CHECK-NOT: inttoptr
+; CHECK-NOT: ptrtoint
+ entry:
+ store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
+ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
+
+ neverTaken:
+ %loc.bc = bitcast i8 addrspace(4)** %loc to i64*
+ %int = load i64, i64* %loc.bc
+ ret i64 %int
+
+ alwaysTaken:
+ ret i64 42
+}
diff --git a/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll b/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll
deleted file mode 100644
index 510a68c3437e8..0000000000000
--- a/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep bitcast | count 2
-
-define signext i32 @b(i32* inreg %x) {
- ret i32 0
-}
-
-define void @c(...) {
- ret void
-}
-
-define void @g(i32* %y) {
- call i32 bitcast (i32 (i32*)* @b to i32 (i32)*)( i32 zeroext 0 ) ; <i32>:2 [#uses=0]
- call void bitcast (void (...)* @c to void (i32*)*)( i32* sret null )
- ret void
-}
diff --git a/test/Transforms/InstCombine/amdgcn-demanded-vector-elts.ll b/test/Transforms/InstCombine/amdgcn-demanded-vector-elts.ll
index 888f51bf939dd..0c4842c159880 100644
--- a/test/Transforms/InstCombine/amdgcn-demanded-vector-elts.ll
+++ b/test/Transforms/InstCombine/amdgcn-demanded-vector-elts.ll
@@ -227,6 +227,12 @@ define amdgpu_ps float @preserve_metadata_extract_elt0_buffer_load_v2f32(<4 x i3
ret float %elt0
}
+declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #1
+
; --------------------------------------------------------------------
; llvm.amdgcn.buffer.load.format
; --------------------------------------------------------------------
@@ -304,18 +310,1196 @@ define i16 @extract_lo16_0_bitcast_buffer_load_format_v4f32(i32 %arg) #0 {
ret i16 %tmp2
}
-declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #1
-declare <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32>, i32, i32, i1, i1) #1
-declare <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32>, i32, i32, i1, i1) #1
-declare <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32>, i32, i32, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #1
-
declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #1
declare <1 x float> @llvm.amdgcn.buffer.load.format.v1f32(<4 x i32>, i32, i32, i1, i1) #1
declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #1
declare <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32>, i32, i32, i1, i1) #1
declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_sample_v4f32_v4f32_v4i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_sample_v4f32_v2f32_v4i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_invalid_dmask_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 %dmask, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_invalid_dmask_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 %dmask, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; FIXME: Should really fold to undef
+; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_dmask_0001_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_dmask_0001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; FIXME: Should really fold to undef
+; CHECK-LABEL: @extract_elt0_dmask_0010_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 2, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_dmask_0010_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 2, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; FIXME: Should really fold to undef
+; CHECK-LABEL: @extract_elt0_dmask_0100_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 4, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_dmask_0100_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 4, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; FIXME: Should really fold to undef
+; CHECK-LABEL: @extract_elt0_dmask_1000_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 8, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_dmask_1000_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 8, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_dmask_1001_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_dmask_1001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 9, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_dmask_0011_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_dmask_0011_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_dmask_0111_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_dmask_0011_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_dmask_0111_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_dmask_0101_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+ ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+ ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+ ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0111_image_sample_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+ ret <3 x float> %shuf
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_cl_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.d
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_d_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_d_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.d.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_d_cl_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_d_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.l
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_l_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_l_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.b
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_b_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_b_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.b.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_b_cl_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_b_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.lz
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_lz_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.lz.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_lz_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.cd
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_cd_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_cd_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.cd.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.cd.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_cd_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_v4f32_v4f32_v4i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_v4f32_v2f32_v4i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_cl_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.d
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_d_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_d_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.d.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.l
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_l_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_l_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.b
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_b_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_b_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.b.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.lz
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_lz_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_lz_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.cd
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_cd_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_cd_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.cd.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_sample_o_v4f32_v4f32_v4i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_sample_o_v4f32_v2f32_v4i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_cl_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.d.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_d_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_d_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.d.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.d.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_d_cl_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_d_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.l.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_l_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.b.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_b_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.b.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_b_cl_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.lz.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_lz_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.lz.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.cd.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_cd_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_cd_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.cd.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_cd_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_o_v4f32_v4f32_v4i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_o_v4f32_v2f32_v4i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_cl_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.d.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_d_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_d_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.d.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.l.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_l_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.b.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_b_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.b.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.lz.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_lz_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.cd.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_cd_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_cd_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.cd.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4
+; --------------------------------------------------------------------
+
+; Don't handle gather4*
+
+; CHECK-LABEL: @extract_elt0_image_gather4_v4f32_v4f32_v8i32(
+; CHECK: %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i3
+define amdgpu_ps float @extract_elt0_image_gather4_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_gather4_v4f32_v4f32_v4i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_gather4_v4f32_v2f32_v4i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_cl_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.l
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_l_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_l_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.b
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_b_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_b_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.b.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_b_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.lz
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_lz_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_lz_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_gather4_o_v4f32_v4f32_v4i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_gather4_o_v4f32_v2f32_v4i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_cl_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.l.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_l_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.b.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_b_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.b.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.lz.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_lz_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.c.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_c_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_c_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_gather4_c_o_v4f32_v4f32_v4i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_c_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_gather4_c_o_v4f32_v2f32_v4i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_c_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.c.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_c_cl_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_c_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.c.l.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_c_l_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_c_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.c.b.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_c_b_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_c_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.c.b.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_c_b_cl_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_c_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.c.lz.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_c_lz_o_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_gather4_c_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.getlod
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_getlod_v4f32_v4f32_v8i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_getlod_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_getlod_v4f32_v4f32_v4i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_getlod_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_getlod_v4f32_v2f32_v4i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_getlod_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+ %data = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %elt0 = extractelement <4 x float> %data, i32 0
+ ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
diff --git a/test/Transforms/InstCombine/call-cast-attrs.ll b/test/Transforms/InstCombine/call-cast-attrs.ll
new file mode 100644
index 0000000000000..ddaf90c3e74fd
--- /dev/null
+++ b/test/Transforms/InstCombine/call-cast-attrs.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define signext i32 @b(i32* inreg %x) {
+ ret i32 0
+}
+
+define void @c(...) {
+ ret void
+}
+
+declare void @useit(i32)
+
+define void @d(i32 %x, ...) {
+ call void @useit(i32 %x)
+ ret void
+}
+
+define void @g(i32* %y) {
+ call i32 bitcast (i32 (i32*)* @b to i32 (i32)*)(i32 zeroext 0)
+ call void bitcast (void (...)* @c to void (i32*)*)(i32* %y)
+ call void bitcast (void (...)* @c to void (i32*)*)(i32* sret %y)
+ call void bitcast (void (i32, ...)* @d to void (i32, i32*)*)(i32 0, i32* sret %y)
+ ret void
+}
+; CHECK-LABEL: define void @g(i32* %y)
+; CHECK: call i32 bitcast (i32 (i32*)* @b to i32 (i32)*)(i32 zeroext 0)
+; CHECK: call void (...) @c(i32* %y)
+; CHECK: call void bitcast (void (...)* @c to void (i32*)*)(i32* sret %y)
+; CHECK: call void bitcast (void (i32, ...)* @d to void (i32, i32*)*)(i32 0, i32* sret %y)
diff --git a/test/Transforms/InstCombine/constant-fold-math.ll b/test/Transforms/InstCombine/constant-fold-math.ll
index 50cd6070896e8..27578387f827a 100644
--- a/test/Transforms/InstCombine/constant-fold-math.ll
+++ b/test/Transforms/InstCombine/constant-fold-math.ll
@@ -45,4 +45,22 @@ define double @constant_fold_fmuladd_f64() #0 {
ret double %x
}
+; PR32177
+
+; CHECK-LABEL: @constant_fold_frem_f32
+; CHECK-NEXT: ret float 0x41A61B2000000000
+define float @constant_fold_frem_f32() #0 {
+ %x = frem float 0x43cbfcd960000000, 0xc1e2b34a00000000
+ ret float %x
+}
+
+; PR3316
+
+; CHECK-LABEL: @constant_fold_frem_f64
+; CHECK-NEXT: ret double 0.000000e+00
+define double @constant_fold_frem_f64() {
+ %x = frem double 0x43E0000000000000, 1.000000e+00
+ ret double %x
+}
+
attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/div-shift.ll b/test/Transforms/InstCombine/div-shift.ll
index 517313ed8e4ed..b5a65048fda01 100644
--- a/test/Transforms/InstCombine/div-shift.ll
+++ b/test/Transforms/InstCombine/div-shift.ll
@@ -16,6 +16,21 @@ entry:
ret i32 %d
}
+define <2 x i32> @t1vec(<2 x i16> %x, <2 x i32> %y) {
+; CHECK-LABEL: @t1vec(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i16> [[X:%.*]] to <2 x i32>
+; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[Y:%.*]], <i32 1, i32 1>
+; CHECK-NEXT: [[D:%.*]] = lshr <2 x i32> [[CONV]], [[TMP0]]
+; CHECK-NEXT: ret <2 x i32> [[D]]
+;
+entry:
+ %conv = zext <2 x i16> %x to <2 x i32>
+ %s = shl <2 x i32> <i32 2, i32 2>, %y
+ %d = sdiv <2 x i32> %conv, %s
+ ret <2 x i32> %d
+}
+
; rdar://11721329
define i64 @t2(i64 %x, i32 %y) {
; CHECK-LABEL: @t2(
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index a037607267ac8..796fce020fd3d 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -225,6 +225,16 @@ define i32 @test19(i32 %x) {
ret i32 %A
}
+define <2 x i32> @test19vec(<2 x i32> %x) {
+; CHECK-LABEL: @test19vec(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT: [[A:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> [[A]]
+;
+ %A = udiv <2 x i32> <i32 1, i32 1>, %x
+ ret <2 x i32> %A
+}
+
define i32 @test20(i32 %x) {
; CHECK-LABEL: @test20(
; CHECK-NEXT: [[TMP1:%.*]] = add i32 %x, 1
@@ -236,6 +246,17 @@ define i32 @test20(i32 %x) {
ret i32 %A
}
+define <2 x i32> @test20vec(<2 x i32> %x) {
+; CHECK-LABEL: @test20vec(
+; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], <i32 3, i32 3>
+; CHECK-NEXT: [[A:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[X]], <2 x i32> zeroinitializer
+; CHECK-NEXT: ret <2 x i32> [[A]]
+;
+ %A = sdiv <2 x i32> <i32 1, i32 1>, %x
+ ret <2 x i32> %A
+}
+
define i32 @test21(i32 %a) {
; CHECK-LABEL: @test21(
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 %a, 3
@@ -388,6 +409,17 @@ define i32 @test35(i32 %A) {
ret i32 %mul
}
+define <2 x i32> @test35vec(<2 x i32> %A) {
+; CHECK-LABEL: @test35vec(
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT: [[MUL:%.*]] = udiv exact <2 x i32> [[AND]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT: ret <2 x i32> [[MUL]]
+;
+ %and = and <2 x i32> %A, <i32 2147483647, i32 2147483647>
+ %mul = sdiv exact <2 x i32> %and, <i32 2147483647, i32 2147483647>
+ ret <2 x i32> %mul
+}
+
define i32 @test36(i32 %A) {
; CHECK-LABEL: @test36(
; CHECK-NEXT: [[AND:%.*]] = and i32 %A, 2147483647
@@ -400,13 +432,10 @@ define i32 @test36(i32 %A) {
ret i32 %mul
}
-; FIXME: Vector should get same transform as scalar.
-
define <2 x i32> @test36vec(<2 x i32> %A) {
; CHECK-LABEL: @test36vec(
-; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> %A, <i32 2147483647, i32 2147483647>
-; CHECK-NEXT: [[SHL:%.*]] = shl nuw nsw <2 x i32> <i32 1, i32 1>, %A
-; CHECK-NEXT: [[MUL:%.*]] = sdiv exact <2 x i32> [[AND]], [[SHL]]
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT: [[MUL:%.*]] = lshr exact <2 x i32> [[AND]], [[A]]
; CHECK-NEXT: ret <2 x i32> [[MUL]]
;
%and = and <2 x i32> %A, <i32 2147483647, i32 2147483647>
diff --git a/test/Transforms/InstCombine/pr32686.ll b/test/Transforms/InstCombine/pr32686.ll
new file mode 100644
index 0000000000000..b2d2aff2fde8a
--- /dev/null
+++ b/test/Transforms/InstCombine/pr32686.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+@a = external global i8
+@b = external global i32
+
+define void @tinkywinky() {
+; CHECK-LABEL: @tinkywinky(
+; CHECK-NEXT: [[PATATINO:%.*]] = load i8, i8* @a, align 1
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[PATATINO]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[TOBOOL]] to i32
+; CHECK-NEXT: [[OR1:%.*]] = or i32 [[TMP1]], or (i32 zext (i1 icmp ne (i32* bitcast (i8* @a to i32*), i32* @b) to i32), i32 2)
+; CHECK-NEXT: store i32 [[OR1]], i32* @b, align 4
+; CHECK-NEXT: ret void
+;
+ %patatino = load i8, i8* @a
+ %tobool = icmp ne i8 %patatino, 0
+ %lnot = xor i1 %tobool, true
+ %lnot.ext = zext i1 %lnot to i32
+ %or = or i32 xor (i32 zext (i1 icmp ne (i32* bitcast (i8* @a to i32*), i32* @b) to i32), i32 2), %lnot.ext
+ store i32 %or, i32* @b, align 4
+ ret void
+}
diff --git a/test/Transforms/InstCombine/rem.ll b/test/Transforms/InstCombine/rem.ll
index 7a7a134db9c5d..86a3580189fd2 100644
--- a/test/Transforms/InstCombine/rem.ll
+++ b/test/Transforms/InstCombine/rem.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
define i64 @rem_signed(i64 %x1, i64 %y2) {
@@ -571,3 +572,24 @@ rem.is.unsafe:
ret i32 0
}
+define i32 @test22(i32 %A) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 2147483647
+; CHECK-NEXT: [[MUL:%.*]] = urem i32 [[AND]], 2147483647
+; CHECK-NEXT: ret i32 [[MUL]]
+;
+ %and = and i32 %A, 2147483647
+ %mul = srem i32 %and, 2147483647
+ ret i32 %mul
+}
+
+define <2 x i32> @test23(<2 x i32> %A) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT: [[MUL:%.*]] = urem <2 x i32> [[AND]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT: ret <2 x i32> [[MUL]]
+;
+ %and = and <2 x i32> %A, <i32 2147483647, i32 2147483647>
+ %mul = srem <2 x i32> %and, <i32 2147483647, i32 2147483647>
+ ret <2 x i32> %mul
+}
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 60ba35557f70a..d5f489280a034 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -1268,3 +1268,23 @@ define <2 x i64> @test_64_splat_vec(<2 x i32> %t) {
ret <2 x i64> %shl
}
+define <2 x i8> @ashr_demanded_bits_splat(<2 x i8> %x) {
+; CHECK-LABEL: @ashr_demanded_bits_splat(
+; CHECK-NEXT: [[SHR:%.*]] = ashr <2 x i8> %x, <i8 7, i8 7>
+; CHECK-NEXT: ret <2 x i8> [[SHR]]
+;
+ %and = and <2 x i8> %x, <i8 128, i8 128>
+ %shr = ashr <2 x i8> %and, <i8 7, i8 7>
+ ret <2 x i8> %shr
+}
+
+define <2 x i8> @lshr_demanded_bits_splat(<2 x i8> %x) {
+; CHECK-LABEL: @lshr_demanded_bits_splat(
+; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> %x, <i8 7, i8 7>
+; CHECK-NEXT: ret <2 x i8> [[SHR]]
+;
+ %and = and <2 x i8> %x, <i8 128, i8 128>
+ %shr = lshr <2 x i8> %and, <i8 7, i8 7>
+ ret <2 x i8> %shr
+}
+
diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll
index 643ab6c5348fa..2197c250ace2c 100644
--- a/test/Transforms/InstCombine/vector-casts.ll
+++ b/test/Transforms/InstCombine/vector-casts.ll
@@ -15,9 +15,9 @@ define <2 x i1> @test1(<2 x i64> %a) {
; The ashr turns into an lshr.
define <2 x i64> @test2(<2 x i64> %a) {
; CHECK-LABEL: @test2(
-; CHECK-NEXT: [[B:%.*]] = and <2 x i64> %a, <i64 65535, i64 65535>
-; CHECK-NEXT: [[T:%.*]] = lshr <2 x i64> [[B]], <i64 1, i64 1>
-; CHECK-NEXT: ret <2 x i64> [[T]]
+; CHECK-NEXT: [[B:%.*]] = and <2 x i64> %a, <i64 65534, i64 65534>
+; CHECK-NEXT: [[TMP1:%.*]] = lshr exact <2 x i64> [[B]], <i64 1, i64 1>
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
;
%b = and <2 x i64> %a, <i64 65535, i64 65535>
%t = ashr <2 x i64> %b, <i64 1, i64 1>
diff --git a/test/Transforms/InstSimplify/AndOrXor.ll b/test/Transforms/InstSimplify/AndOrXor.ll
index 33fd978277d4c..aa71c6ba86ae6 100644
--- a/test/Transforms/InstSimplify/AndOrXor.ll
+++ b/test/Transforms/InstSimplify/AndOrXor.ll
@@ -376,26 +376,6 @@ define i1 @or_icmp3(i32 %x, i32 %y) {
ret i1 %3
}
-define i1 @disjoint_cmps(i32 %A) {
-; CHECK-LABEL: @disjoint_cmps(
-; CHECK-NEXT: ret i1 false
-;
- %B = icmp eq i32 %A, 1
- %C = icmp sge i32 %A, 3
- %D = and i1 %B, %C
- ret i1 %D
-}
-
-define i1 @disjoint_cmps2(i32 %X) {
-; CHECK-LABEL: @disjoint_cmps2(
-; CHECK-NEXT: ret i1 false
-;
- %a = icmp ult i32 %X, 31
- %b = icmp slt i32 %X, 0
- %c = and i1 %a, %b
- ret i1 %c
-}
-
; PR27869 - Look through casts to eliminate cmps and bitwise logic.
define i32 @and_of_zexted_icmps(i32 %i) {
diff --git a/test/Transforms/InstSimplify/icmp-ranges.ll b/test/Transforms/InstSimplify/icmp-ranges.ll
new file mode 100644
index 0000000000000..dcbbe0bc7fb9c
--- /dev/null
+++ b/test/Transforms/InstSimplify/icmp-ranges.ll
@@ -0,0 +1,2912 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+; Cycle through all pairs of predicates to test
+; simplification of range-intersection or range-union.
+
+; eq
+; x == 13 && x == 17
+
+define i1 @and_eq_eq(i8 %x) {
+; CHECK-LABEL: @and_eq_eq(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp eq i8 %x, 13
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 13 && x != 17
+
+define i1 @and_eq_ne(i8 %x) {
+; CHECK-LABEL: @and_eq_ne(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp eq i8 %x, 13
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 13 && x >=s 17
+
+define i1 @and_eq_sge(i8 %x) {
+; CHECK-LABEL: @and_eq_sge(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp eq i8 %x, 13
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 13 && x >s 17
+
+define i1 @and_eq_sgt(i8 %x) {
+; CHECK-LABEL: @and_eq_sgt(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp eq i8 %x, 13
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 13 && x <=s 17
+
+define i1 @and_eq_sle(i8 %x) {
+; CHECK-LABEL: @and_eq_sle(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp eq i8 %x, 13
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 13 && x <s 17
+
+define i1 @and_eq_slt(i8 %x) {
+; CHECK-LABEL: @and_eq_slt(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp eq i8 %x, 13
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 13 && x >=u 17
+
+define i1 @and_eq_uge(i8 %x) {
+; CHECK-LABEL: @and_eq_uge(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp eq i8 %x, 13
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 13 && x >u 17
+
+define i1 @and_eq_ugt(i8 %x) {
+; CHECK-LABEL: @and_eq_ugt(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp eq i8 %x, 13
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 13 && x <=u 17
+
+define i1 @and_eq_ule(i8 %x) {
+; CHECK-LABEL: @and_eq_ule(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ule i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp eq i8 %x, 13
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 13 && x <u 17
+
+define i1 @and_eq_ult(i8 %x) {
+; CHECK-LABEL: @and_eq_ult(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp eq i8 %x, 13
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; ne
+; x != 13 && x == 17
+
+define i1 @and_ne_eq(i8 %x) {
+; CHECK-LABEL: @and_ne_eq(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp eq i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 13
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 13 && x != 17
+
+define i1 @and_ne_ne(i8 %x) {
+; CHECK-LABEL: @and_ne_ne(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 13
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 13 && x >=s 17
+
+define i1 @and_ne_sge(i8 %x) {
+; CHECK-LABEL: @and_ne_sge(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 13
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 13 && x >s 17
+
+define i1 @and_ne_sgt(i8 %x) {
+; CHECK-LABEL: @and_ne_sgt(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 13
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 13 && x <=s 17
+
+define i1 @and_ne_sle(i8 %x) {
+; CHECK-LABEL: @and_ne_sle(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 13
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 13 && x <s 17
+
+define i1 @and_ne_slt(i8 %x) {
+; CHECK-LABEL: @and_ne_slt(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 13
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 13 && x >=u 17
+
+define i1 @and_ne_uge(i8 %x) {
+; CHECK-LABEL: @and_ne_uge(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 13
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 13 && x >u 17
+
+define i1 @and_ne_ugt(i8 %x) {
+; CHECK-LABEL: @and_ne_ugt(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 13
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 13 && x <=u 17
+
+define i1 @and_ne_ule(i8 %x) {
+; CHECK-LABEL: @and_ne_ule(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ule i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 13
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 13 && x <u 17
+
+define i1 @and_ne_ult(i8 %x) {
+; CHECK-LABEL: @and_ne_ult(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 13
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; sge
+; x >=s 13 && x == 17
+
+define i1 @and_sge_eq(i8 %x) {
+; CHECK-LABEL: @and_sge_eq(
+; CHECK-NEXT: [[A:%.*]] = icmp sge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp eq i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sge i8 %x, 13
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 13 && x != 17
+
+define i1 @and_sge_ne(i8 %x) {
+; CHECK-LABEL: @and_sge_ne(
+; CHECK-NEXT: [[A:%.*]] = icmp sge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sge i8 %x, 13
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 13 && x >=s 17
+
+define i1 @and_sge_sge(i8 %x) {
+; CHECK-LABEL: @and_sge_sge(
+; CHECK-NEXT: [[A:%.*]] = icmp sge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sge i8 %x, 13
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 13 && x >s 17
+
+define i1 @and_sge_sgt(i8 %x) {
+; CHECK-LABEL: @and_sge_sgt(
+; CHECK-NEXT: [[A:%.*]] = icmp sge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sge i8 %x, 13
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 13 && x <=s 17
+
+define i1 @and_sge_sle(i8 %x) {
+; CHECK-LABEL: @and_sge_sle(
+; CHECK-NEXT: [[A:%.*]] = icmp sge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sge i8 %x, 13
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 13 && x <s 17
+
+define i1 @and_sge_slt(i8 %x) {
+; CHECK-LABEL: @and_sge_slt(
+; CHECK-NEXT: [[A:%.*]] = icmp sge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sge i8 %x, 13
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 13 && x >=u 17
+
+define i1 @and_sge_uge(i8 %x) {
+; CHECK-LABEL: @and_sge_uge(
+; CHECK-NEXT: [[A:%.*]] = icmp sge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sge i8 %x, 13
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 13 && x >u 17
+
+define i1 @and_sge_ugt(i8 %x) {
+; CHECK-LABEL: @and_sge_ugt(
+; CHECK-NEXT: [[A:%.*]] = icmp sge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sge i8 %x, 13
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 13 && x <=u 17
+
+define i1 @and_sge_ule(i8 %x) {
+; CHECK-LABEL: @and_sge_ule(
+; CHECK-NEXT: [[A:%.*]] = icmp sge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ule i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sge i8 %x, 13
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 13 && x <u 17
+
+define i1 @and_sge_ult(i8 %x) {
+; CHECK-LABEL: @and_sge_ult(
+; CHECK-NEXT: [[A:%.*]] = icmp sge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sge i8 %x, 13
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; sgt
+; x >s 13 && x == 17
+
+define i1 @and_sgt_eq(i8 %x) {
+; CHECK-LABEL: @and_sgt_eq(
+; CHECK-NEXT: [[A:%.*]] = icmp sgt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp eq i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sgt i8 %x, 13
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 13 && x != 17
+
+define i1 @and_sgt_ne(i8 %x) {
+; CHECK-LABEL: @and_sgt_ne(
+; CHECK-NEXT: [[A:%.*]] = icmp sgt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sgt i8 %x, 13
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 13 && x >=s 17
+
+define i1 @and_sgt_sge(i8 %x) {
+; CHECK-LABEL: @and_sgt_sge(
+; CHECK-NEXT: [[A:%.*]] = icmp sgt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sgt i8 %x, 13
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 13 && x >s 17
+
+define i1 @and_sgt_sgt(i8 %x) {
+; CHECK-LABEL: @and_sgt_sgt(
+; CHECK-NEXT: [[A:%.*]] = icmp sgt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sgt i8 %x, 13
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 13 && x <=s 17
+
+define i1 @and_sgt_sle(i8 %x) {
+; CHECK-LABEL: @and_sgt_sle(
+; CHECK-NEXT: [[A:%.*]] = icmp sgt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sgt i8 %x, 13
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 13 && x <s 17
+
+define i1 @and_sgt_slt(i8 %x) {
+; CHECK-LABEL: @and_sgt_slt(
+; CHECK-NEXT: [[A:%.*]] = icmp sgt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sgt i8 %x, 13
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 13 && x >=u 17
+
+define i1 @and_sgt_uge(i8 %x) {
+; CHECK-LABEL: @and_sgt_uge(
+; CHECK-NEXT: [[A:%.*]] = icmp sgt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sgt i8 %x, 13
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 13 && x >u 17
+
+define i1 @and_sgt_ugt(i8 %x) {
+; CHECK-LABEL: @and_sgt_ugt(
+; CHECK-NEXT: [[A:%.*]] = icmp sgt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sgt i8 %x, 13
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 13 && x <=u 17
+
+define i1 @and_sgt_ule(i8 %x) {
+; CHECK-LABEL: @and_sgt_ule(
+; CHECK-NEXT: [[A:%.*]] = icmp sgt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ule i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sgt i8 %x, 13
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 13 && x <u 17
+
+define i1 @and_sgt_ult(i8 %x) {
+; CHECK-LABEL: @and_sgt_ult(
+; CHECK-NEXT: [[A:%.*]] = icmp sgt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sgt i8 %x, 13
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; sle
+; x <=s 13 && x == 17
+
+define i1 @and_sle_eq(i8 %x) {
+; CHECK-LABEL: @and_sle_eq(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp sle i8 %x, 13
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 13 && x != 17
+
+define i1 @and_sle_ne(i8 %x) {
+; CHECK-LABEL: @and_sle_ne(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 13
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 13 && x >=s 17
+
+define i1 @and_sle_sge(i8 %x) {
+; CHECK-LABEL: @and_sle_sge(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp sle i8 %x, 13
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 13 && x >s 17
+
+define i1 @and_sle_sgt(i8 %x) {
+; CHECK-LABEL: @and_sle_sgt(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp sle i8 %x, 13
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 13 && x <=s 17
+
+define i1 @and_sle_sle(i8 %x) {
+; CHECK-LABEL: @and_sle_sle(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 13
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 13 && x <s 17
+
+define i1 @and_sle_slt(i8 %x) {
+; CHECK-LABEL: @and_sle_slt(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 13
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 13 && x >=u 17
+
+define i1 @and_sle_uge(i8 %x) {
+; CHECK-LABEL: @and_sle_uge(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 13
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 13 && x >u 17
+
+define i1 @and_sle_ugt(i8 %x) {
+; CHECK-LABEL: @and_sle_ugt(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 13
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 13 && x <=u 17
+
+define i1 @and_sle_ule(i8 %x) {
+; CHECK-LABEL: @and_sle_ule(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ule i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 13
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 13 && x <u 17
+
+define i1 @and_sle_ult(i8 %x) {
+; CHECK-LABEL: @and_sle_ult(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 13
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; slt
+; x <s 13 && x == 17
+
+define i1 @and_slt_eq(i8 %x) {
+; CHECK-LABEL: @and_slt_eq(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp slt i8 %x, 13
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 13 && x != 17
+
+define i1 @and_slt_ne(i8 %x) {
+; CHECK-LABEL: @and_slt_ne(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 13
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 13 && x >=s 17
+
+define i1 @and_slt_sge(i8 %x) {
+; CHECK-LABEL: @and_slt_sge(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp slt i8 %x, 13
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 13 && x >s 17
+
+define i1 @and_slt_sgt(i8 %x) {
+; CHECK-LABEL: @and_slt_sgt(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp slt i8 %x, 13
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 13 && x <=s 17
+
+define i1 @and_slt_sle(i8 %x) {
+; CHECK-LABEL: @and_slt_sle(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 13
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 13 && x <s 17
+
+define i1 @and_slt_slt(i8 %x) {
+; CHECK-LABEL: @and_slt_slt(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 13
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 13 && x >=u 17
+
+define i1 @and_slt_uge(i8 %x) {
+; CHECK-LABEL: @and_slt_uge(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 13
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 13 && x >u 17
+
+define i1 @and_slt_ugt(i8 %x) {
+; CHECK-LABEL: @and_slt_ugt(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 13
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 13 && x <=u 17
+
+define i1 @and_slt_ule(i8 %x) {
+; CHECK-LABEL: @and_slt_ule(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ule i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 13
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 13 && x <u 17
+
+define i1 @and_slt_ult(i8 %x) {
+; CHECK-LABEL: @and_slt_ult(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 13
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; uge
+; x >=u 13 && x == 17
+
+define i1 @and_uge_eq(i8 %x) {
+; CHECK-LABEL: @and_uge_eq(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp eq i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 13
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 13 && x != 17
+
+define i1 @and_uge_ne(i8 %x) {
+; CHECK-LABEL: @and_uge_ne(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 13
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 13 && x >=s 17
+
+define i1 @and_uge_sge(i8 %x) {
+; CHECK-LABEL: @and_uge_sge(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 13
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 13 && x >s 17
+
+define i1 @and_uge_sgt(i8 %x) {
+; CHECK-LABEL: @and_uge_sgt(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 13
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 13 && x <=s 17
+
+define i1 @and_uge_sle(i8 %x) {
+; CHECK-LABEL: @and_uge_sle(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 13
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 13 && x <s 17
+
+define i1 @and_uge_slt(i8 %x) {
+; CHECK-LABEL: @and_uge_slt(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 13
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 13 && x >=u 17
+
+define i1 @and_uge_uge(i8 %x) {
+; CHECK-LABEL: @and_uge_uge(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 13
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 13 && x >u 17
+
+define i1 @and_uge_ugt(i8 %x) {
+; CHECK-LABEL: @and_uge_ugt(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 13
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 13 && x <=u 17
+
+define i1 @and_uge_ule(i8 %x) {
+; CHECK-LABEL: @and_uge_ule(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ule i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 13
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 13 && x <u 17
+
+define i1 @and_uge_ult(i8 %x) {
+; CHECK-LABEL: @and_uge_ult(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 13
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; ugt
+; x >u 13 && x == 17
+
+define i1 @and_ugt_eq(i8 %x) {
+; CHECK-LABEL: @and_ugt_eq(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp eq i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 13
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 13 && x != 17
+
+define i1 @and_ugt_ne(i8 %x) {
+; CHECK-LABEL: @and_ugt_ne(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 13
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 13 && x >=s 17
+
+define i1 @and_ugt_sge(i8 %x) {
+; CHECK-LABEL: @and_ugt_sge(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 13
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 13 && x >s 17
+
+define i1 @and_ugt_sgt(i8 %x) {
+; CHECK-LABEL: @and_ugt_sgt(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 13
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 13 && x <=s 17
+
+define i1 @and_ugt_sle(i8 %x) {
+; CHECK-LABEL: @and_ugt_sle(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 13
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 13 && x <s 17
+
+define i1 @and_ugt_slt(i8 %x) {
+; CHECK-LABEL: @and_ugt_slt(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 13
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 13 && x >=u 17
+
+define i1 @and_ugt_uge(i8 %x) {
+; CHECK-LABEL: @and_ugt_uge(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 13
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 13 && x >u 17
+
+define i1 @and_ugt_ugt(i8 %x) {
+; CHECK-LABEL: @and_ugt_ugt(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 13
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 13 && x <=u 17
+
+define i1 @and_ugt_ule(i8 %x) {
+; CHECK-LABEL: @and_ugt_ule(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ule i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 13
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 13 && x <u 17
+
+define i1 @and_ugt_ult(i8 %x) {
+; CHECK-LABEL: @and_ugt_ult(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 13
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; ule
+; x <=u 13 && x == 17
+
+define i1 @and_ule_eq(i8 %x) {
+; CHECK-LABEL: @and_ule_eq(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp ule i8 %x, 13
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 13 && x != 17
+
+define i1 @and_ule_ne(i8 %x) {
+; CHECK-LABEL: @and_ule_ne(
+; CHECK-NEXT: [[A:%.*]] = icmp ule i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ule i8 %x, 13
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 13 && x >=s 17
+
+define i1 @and_ule_sge(i8 %x) {
+; CHECK-LABEL: @and_ule_sge(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp ule i8 %x, 13
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 13 && x >s 17
+
+define i1 @and_ule_sgt(i8 %x) {
+; CHECK-LABEL: @and_ule_sgt(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp ule i8 %x, 13
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 13 && x <=s 17
+
+define i1 @and_ule_sle(i8 %x) {
+; CHECK-LABEL: @and_ule_sle(
+; CHECK-NEXT: [[A:%.*]] = icmp ule i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ule i8 %x, 13
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 13 && x <s 17
+
+define i1 @and_ule_slt(i8 %x) {
+; CHECK-LABEL: @and_ule_slt(
+; CHECK-NEXT: [[A:%.*]] = icmp ule i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ule i8 %x, 13
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 13 && x >=u 17
+
+define i1 @and_ule_uge(i8 %x) {
+; CHECK-LABEL: @and_ule_uge(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp ule i8 %x, 13
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 13 && x >u 17
+
+define i1 @and_ule_ugt(i8 %x) {
+; CHECK-LABEL: @and_ule_ugt(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp ule i8 %x, 13
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 13 && x <=u 17
+
+define i1 @and_ule_ule(i8 %x) {
+; CHECK-LABEL: @and_ule_ule(
+; CHECK-NEXT: [[A:%.*]] = icmp ule i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ule i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ule i8 %x, 13
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 13 && x <u 17
+
+define i1 @and_ule_ult(i8 %x) {
+; CHECK-LABEL: @and_ule_ult(
+; CHECK-NEXT: [[A:%.*]] = icmp ule i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ule i8 %x, 13
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; ult
+; x <u 13 && x == 17
+
+define i1 @and_ult_eq(i8 %x) {
+; CHECK-LABEL: @and_ult_eq(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp ult i8 %x, 13
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 13 && x != 17
+
+define i1 @and_ult_ne(i8 %x) {
+; CHECK-LABEL: @and_ult_ne(
+; CHECK-NEXT: [[A:%.*]] = icmp ult i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ult i8 %x, 13
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 13 && x >=s 17
+
+define i1 @and_ult_sge(i8 %x) {
+; CHECK-LABEL: @and_ult_sge(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp ult i8 %x, 13
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 13 && x >s 17
+
+define i1 @and_ult_sgt(i8 %x) {
+; CHECK-LABEL: @and_ult_sgt(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp ult i8 %x, 13
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 13 && x <=s 17
+
+define i1 @and_ult_sle(i8 %x) {
+; CHECK-LABEL: @and_ult_sle(
+; CHECK-NEXT: [[A:%.*]] = icmp ult i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ult i8 %x, 13
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 13 && x <s 17
+
+define i1 @and_ult_slt(i8 %x) {
+; CHECK-LABEL: @and_ult_slt(
+; CHECK-NEXT: [[A:%.*]] = icmp ult i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ult i8 %x, 13
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 13 && x >=u 17
+
+define i1 @and_ult_uge(i8 %x) {
+; CHECK-LABEL: @and_ult_uge(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp ult i8 %x, 13
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 13 && x >u 17
+
+define i1 @and_ult_ugt(i8 %x) {
+; CHECK-LABEL: @and_ult_ugt(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp ult i8 %x, 13
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 13 && x <=u 17
+
+define i1 @and_ult_ule(i8 %x) {
+; CHECK-LABEL: @and_ult_ule(
+; CHECK-NEXT: [[A:%.*]] = icmp ult i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ule i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ult i8 %x, 13
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 13 && x <u 17
+
+define i1 @and_ult_ult(i8 %x) {
+; CHECK-LABEL: @and_ult_ult(
+; CHECK-NEXT: [[A:%.*]] = icmp ult i8 %x, 13
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ult i8 %x, 13
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; eq
+; x == 23 && x == 17
+
+define i1 @and_eq_eq_swap(i8 %x) {
+; CHECK-LABEL: @and_eq_eq_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp eq i8 %x, 23
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 23 && x != 17
+
+define i1 @and_eq_ne_swap(i8 %x) {
+; CHECK-LABEL: @and_eq_ne_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp eq i8 %x, 23
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 23 && x >=s 17
+
+define i1 @and_eq_sge_swap(i8 %x) {
+; CHECK-LABEL: @and_eq_sge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp eq i8 %x, 23
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 23 && x >s 17
+
+define i1 @and_eq_sgt_swap(i8 %x) {
+; CHECK-LABEL: @and_eq_sgt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp eq i8 %x, 23
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 23 && x <=s 17
+
+define i1 @and_eq_sle_swap(i8 %x) {
+; CHECK-LABEL: @and_eq_sle_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp eq i8 %x, 23
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 23 && x <s 17
+
+define i1 @and_eq_slt_swap(i8 %x) {
+; CHECK-LABEL: @and_eq_slt_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp eq i8 %x, 23
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 23 && x >=u 17
+
+define i1 @and_eq_uge_swap(i8 %x) {
+; CHECK-LABEL: @and_eq_uge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp eq i8 %x, 23
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 23 && x >u 17
+
+define i1 @and_eq_ugt_swap(i8 %x) {
+; CHECK-LABEL: @and_eq_ugt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp eq i8 %x, 23
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 23 && x <=u 17
+
+define i1 @and_eq_ule_swap(i8 %x) {
+; CHECK-LABEL: @and_eq_ule_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp eq i8 %x, 23
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x == 23 && x <u 17
+
+define i1 @and_eq_ult_swap(i8 %x) {
+; CHECK-LABEL: @and_eq_ult_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp eq i8 %x, 23
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; ne
+; x != 23 && x == 17
+
+define i1 @and_ne_eq_swap(i8 %x) {
+; CHECK-LABEL: @and_ne_eq_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp eq i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 23
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 23 && x != 17
+
+define i1 @and_ne_ne_swap(i8 %x) {
+; CHECK-LABEL: @and_ne_ne_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 23
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 23 && x >=s 17
+
+define i1 @and_ne_sge_swap(i8 %x) {
+; CHECK-LABEL: @and_ne_sge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 23
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 23 && x >s 17
+
+define i1 @and_ne_sgt_swap(i8 %x) {
+; CHECK-LABEL: @and_ne_sgt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 23
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 23 && x <=s 17
+
+define i1 @and_ne_sle_swap(i8 %x) {
+; CHECK-LABEL: @and_ne_sle_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 23
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 23 && x <s 17
+
+define i1 @and_ne_slt_swap(i8 %x) {
+; CHECK-LABEL: @and_ne_slt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 23
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 23 && x >=u 17
+
+define i1 @and_ne_uge_swap(i8 %x) {
+; CHECK-LABEL: @and_ne_uge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 23
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 23 && x >u 17
+
+define i1 @and_ne_ugt_swap(i8 %x) {
+; CHECK-LABEL: @and_ne_ugt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 23
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 23 && x <=u 17
+
+define i1 @and_ne_ule_swap(i8 %x) {
+; CHECK-LABEL: @and_ne_ule_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ule i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 23
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x != 23 && x <u 17
+
+define i1 @and_ne_ult_swap(i8 %x) {
+; CHECK-LABEL: @and_ne_ult_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ne i8 %x, 23
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; sge
+; x >=s 23 && x == 17
+
+define i1 @and_sge_eq_swap(i8 %x) {
+; CHECK-LABEL: @and_sge_eq_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp sge i8 %x, 23
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 23 && x != 17
+
+define i1 @and_sge_ne_swap(i8 %x) {
+; CHECK-LABEL: @and_sge_ne_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sge i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sge i8 %x, 23
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 23 && x >=s 17
+
+define i1 @and_sge_sge_swap(i8 %x) {
+; CHECK-LABEL: @and_sge_sge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sge i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sge i8 %x, 23
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 23 && x >s 17
+
+define i1 @and_sge_sgt_swap(i8 %x) {
+; CHECK-LABEL: @and_sge_sgt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sge i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sge i8 %x, 23
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 23 && x <=s 17
+
+define i1 @and_sge_sle_swap(i8 %x) {
+; CHECK-LABEL: @and_sge_sle_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp sge i8 %x, 23
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 23 && x <s 17
+
+define i1 @and_sge_slt_swap(i8 %x) {
+; CHECK-LABEL: @and_sge_slt_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp sge i8 %x, 23
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 23 && x >=u 17
+
+define i1 @and_sge_uge_swap(i8 %x) {
+; CHECK-LABEL: @and_sge_uge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sge i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sge i8 %x, 23
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 23 && x >u 17
+
+define i1 @and_sge_ugt_swap(i8 %x) {
+; CHECK-LABEL: @and_sge_ugt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sge i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sge i8 %x, 23
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 23 && x <=u 17
+
+define i1 @and_sge_ule_swap(i8 %x) {
+; CHECK-LABEL: @and_sge_ule_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp sge i8 %x, 23
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=s 23 && x <u 17
+
+define i1 @and_sge_ult_swap(i8 %x) {
+; CHECK-LABEL: @and_sge_ult_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp sge i8 %x, 23
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; sgt
+; x >s 23 && x == 17
+
+define i1 @and_sgt_eq_swap(i8 %x) {
+; CHECK-LABEL: @and_sgt_eq_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp sgt i8 %x, 23
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 23 && x != 17
+
+define i1 @and_sgt_ne_swap(i8 %x) {
+; CHECK-LABEL: @and_sgt_ne_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sgt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sgt i8 %x, 23
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 23 && x >=s 17
+
+define i1 @and_sgt_sge_swap(i8 %x) {
+; CHECK-LABEL: @and_sgt_sge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sgt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sgt i8 %x, 23
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 23 && x >s 17
+
+define i1 @and_sgt_sgt_swap(i8 %x) {
+; CHECK-LABEL: @and_sgt_sgt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sgt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sgt i8 %x, 23
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 23 && x <=s 17
+
+define i1 @and_sgt_sle_swap(i8 %x) {
+; CHECK-LABEL: @and_sgt_sle_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp sgt i8 %x, 23
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 23 && x <s 17
+
+define i1 @and_sgt_slt_swap(i8 %x) {
+; CHECK-LABEL: @and_sgt_slt_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp sgt i8 %x, 23
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 23 && x >=u 17
+
+define i1 @and_sgt_uge_swap(i8 %x) {
+; CHECK-LABEL: @and_sgt_uge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sgt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sgt i8 %x, 23
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 23 && x >u 17
+
+define i1 @and_sgt_ugt_swap(i8 %x) {
+; CHECK-LABEL: @and_sgt_ugt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sgt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sgt i8 %x, 23
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 23 && x <=u 17
+
+define i1 @and_sgt_ule_swap(i8 %x) {
+; CHECK-LABEL: @and_sgt_ule_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp sgt i8 %x, 23
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >s 23 && x <u 17
+
+define i1 @and_sgt_ult_swap(i8 %x) {
+; CHECK-LABEL: @and_sgt_ult_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp sgt i8 %x, 23
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; sle
+; x <=s 23 && x == 17
+
+define i1 @and_sle_eq_swap(i8 %x) {
+; CHECK-LABEL: @and_sle_eq_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp eq i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 23
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 23 && x != 17
+
+define i1 @and_sle_ne_swap(i8 %x) {
+; CHECK-LABEL: @and_sle_ne_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 23
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 23 && x >=s 17
+
+define i1 @and_sle_sge_swap(i8 %x) {
+; CHECK-LABEL: @and_sle_sge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 23
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 23 && x >s 17
+
+define i1 @and_sle_sgt_swap(i8 %x) {
+; CHECK-LABEL: @and_sle_sgt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 23
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 23 && x <=s 17
+
+define i1 @and_sle_sle_swap(i8 %x) {
+; CHECK-LABEL: @and_sle_sle_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 23
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 23 && x <s 17
+
+define i1 @and_sle_slt_swap(i8 %x) {
+; CHECK-LABEL: @and_sle_slt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 23
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 23 && x >=u 17
+
+define i1 @and_sle_uge_swap(i8 %x) {
+; CHECK-LABEL: @and_sle_uge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 23
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 23 && x >u 17
+
+define i1 @and_sle_ugt_swap(i8 %x) {
+; CHECK-LABEL: @and_sle_ugt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 23
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 23 && x <=u 17
+
+define i1 @and_sle_ule_swap(i8 %x) {
+; CHECK-LABEL: @and_sle_ule_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ule i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 23
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=s 23 && x <u 17
+
+define i1 @and_sle_ult_swap(i8 %x) {
+; CHECK-LABEL: @and_sle_ult_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp sle i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp sle i8 %x, 23
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; slt
+; x <s 23 && x == 17
+
+define i1 @and_slt_eq_swap(i8 %x) {
+; CHECK-LABEL: @and_slt_eq_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp eq i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 23
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 23 && x != 17
+
+define i1 @and_slt_ne_swap(i8 %x) {
+; CHECK-LABEL: @and_slt_ne_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 23
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 23 && x >=s 17
+
+define i1 @and_slt_sge_swap(i8 %x) {
+; CHECK-LABEL: @and_slt_sge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 23
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 23 && x >s 17
+
+define i1 @and_slt_sgt_swap(i8 %x) {
+; CHECK-LABEL: @and_slt_sgt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 23
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 23 && x <=s 17
+
+define i1 @and_slt_sle_swap(i8 %x) {
+; CHECK-LABEL: @and_slt_sle_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 23
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 23 && x <s 17
+
+define i1 @and_slt_slt_swap(i8 %x) {
+; CHECK-LABEL: @and_slt_slt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 23
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 23 && x >=u 17
+
+define i1 @and_slt_uge_swap(i8 %x) {
+; CHECK-LABEL: @and_slt_uge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 23
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 23 && x >u 17
+
+define i1 @and_slt_ugt_swap(i8 %x) {
+; CHECK-LABEL: @and_slt_ugt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 23
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 23 && x <=u 17
+
+define i1 @and_slt_ule_swap(i8 %x) {
+; CHECK-LABEL: @and_slt_ule_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ule i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 23
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <s 23 && x <u 17
+
+define i1 @and_slt_ult_swap(i8 %x) {
+; CHECK-LABEL: @and_slt_ult_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp slt i8 %x, 23
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; uge
+; x >=u 23 && x == 17
+
+define i1 @and_uge_eq_swap(i8 %x) {
+; CHECK-LABEL: @and_uge_eq_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp uge i8 %x, 23
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 23 && x != 17
+
+define i1 @and_uge_ne_swap(i8 %x) {
+; CHECK-LABEL: @and_uge_ne_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 23
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 23 && x >=s 17
+
+define i1 @and_uge_sge_swap(i8 %x) {
+; CHECK-LABEL: @and_uge_sge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 23
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 23 && x >s 17
+
+define i1 @and_uge_sgt_swap(i8 %x) {
+; CHECK-LABEL: @and_uge_sgt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 23
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 23 && x <=s 17
+
+define i1 @and_uge_sle_swap(i8 %x) {
+; CHECK-LABEL: @and_uge_sle_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 23
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 23 && x <s 17
+
+define i1 @and_uge_slt_swap(i8 %x) {
+; CHECK-LABEL: @and_uge_slt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 23
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 23 && x >=u 17
+
+define i1 @and_uge_uge_swap(i8 %x) {
+; CHECK-LABEL: @and_uge_uge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 23
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 23 && x >u 17
+
+define i1 @and_uge_ugt_swap(i8 %x) {
+; CHECK-LABEL: @and_uge_ugt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp uge i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp uge i8 %x, 23
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 23 && x <=u 17
+
+define i1 @and_uge_ule_swap(i8 %x) {
+; CHECK-LABEL: @and_uge_ule_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp uge i8 %x, 23
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >=u 23 && x <u 17
+
+define i1 @and_uge_ult_swap(i8 %x) {
+; CHECK-LABEL: @and_uge_ult_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp uge i8 %x, 23
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; ugt
+; x >u 23 && x == 17
+
+define i1 @and_ugt_eq_swap(i8 %x) {
+; CHECK-LABEL: @and_ugt_eq_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp ugt i8 %x, 23
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 23 && x != 17
+
+define i1 @and_ugt_ne_swap(i8 %x) {
+; CHECK-LABEL: @and_ugt_ne_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 23
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 23 && x >=s 17
+
+define i1 @and_ugt_sge_swap(i8 %x) {
+; CHECK-LABEL: @and_ugt_sge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 23
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 23 && x >s 17
+
+define i1 @and_ugt_sgt_swap(i8 %x) {
+; CHECK-LABEL: @and_ugt_sgt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 23
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 23 && x <=s 17
+
+define i1 @and_ugt_sle_swap(i8 %x) {
+; CHECK-LABEL: @and_ugt_sle_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 23
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 23 && x <s 17
+
+define i1 @and_ugt_slt_swap(i8 %x) {
+; CHECK-LABEL: @and_ugt_slt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 23
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 23 && x >=u 17
+
+define i1 @and_ugt_uge_swap(i8 %x) {
+; CHECK-LABEL: @and_ugt_uge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 23
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 23 && x >u 17
+
+define i1 @and_ugt_ugt_swap(i8 %x) {
+; CHECK-LABEL: @and_ugt_ugt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ugt i8 %x, 23
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 23 && x <=u 17
+
+define i1 @and_ugt_ule_swap(i8 %x) {
+; CHECK-LABEL: @and_ugt_ule_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp ugt i8 %x, 23
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x >u 23 && x <u 17
+
+define i1 @and_ugt_ult_swap(i8 %x) {
+; CHECK-LABEL: @and_ugt_ult_swap(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp ugt i8 %x, 23
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; ule
+; x <=u 23 && x == 17
+
+define i1 @and_ule_eq_swap(i8 %x) {
+; CHECK-LABEL: @and_ule_eq_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ule i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp eq i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ule i8 %x, 23
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 23 && x != 17
+
+define i1 @and_ule_ne_swap(i8 %x) {
+; CHECK-LABEL: @and_ule_ne_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ule i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ule i8 %x, 23
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 23 && x >=s 17
+
+define i1 @and_ule_sge_swap(i8 %x) {
+; CHECK-LABEL: @and_ule_sge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ule i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ule i8 %x, 23
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 23 && x >s 17
+
+define i1 @and_ule_sgt_swap(i8 %x) {
+; CHECK-LABEL: @and_ule_sgt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ule i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ule i8 %x, 23
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 23 && x <=s 17
+
+define i1 @and_ule_sle_swap(i8 %x) {
+; CHECK-LABEL: @and_ule_sle_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ule i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ule i8 %x, 23
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 23 && x <s 17
+
+define i1 @and_ule_slt_swap(i8 %x) {
+; CHECK-LABEL: @and_ule_slt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ule i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ule i8 %x, 23
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 23 && x >=u 17
+
+define i1 @and_ule_uge_swap(i8 %x) {
+; CHECK-LABEL: @and_ule_uge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ule i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ule i8 %x, 23
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 23 && x >u 17
+
+define i1 @and_ule_ugt_swap(i8 %x) {
+; CHECK-LABEL: @and_ule_ugt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ule i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ule i8 %x, 23
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 23 && x <=u 17
+
+define i1 @and_ule_ule_swap(i8 %x) {
+; CHECK-LABEL: @and_ule_ule_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ule i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ule i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ule i8 %x, 23
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <=u 23 && x <u 17
+
+define i1 @and_ule_ult_swap(i8 %x) {
+; CHECK-LABEL: @and_ule_ult_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ule i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ule i8 %x, 23
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; ult
+; x <u 23 && x == 17
+
+define i1 @and_ult_eq_swap(i8 %x) {
+; CHECK-LABEL: @and_ult_eq_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ult i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp eq i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ult i8 %x, 23
+ %b = icmp eq i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 23 && x != 17
+
+define i1 @and_ult_ne_swap(i8 %x) {
+; CHECK-LABEL: @and_ult_ne_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ult i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ne i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ult i8 %x, 23
+ %b = icmp ne i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 23 && x >=s 17
+
+define i1 @and_ult_sge_swap(i8 %x) {
+; CHECK-LABEL: @and_ult_sge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ult i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ult i8 %x, 23
+ %b = icmp sge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 23 && x >s 17
+
+define i1 @and_ult_sgt_swap(i8 %x) {
+; CHECK-LABEL: @and_ult_sgt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ult i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ult i8 %x, 23
+ %b = icmp sgt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 23 && x <=s 17
+
+define i1 @and_ult_sle_swap(i8 %x) {
+; CHECK-LABEL: @and_ult_sle_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ult i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp sle i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ult i8 %x, 23
+ %b = icmp sle i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 23 && x <s 17
+
+define i1 @and_ult_slt_swap(i8 %x) {
+; CHECK-LABEL: @and_ult_slt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ult i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp slt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ult i8 %x, 23
+ %b = icmp slt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 23 && x >=u 17
+
+define i1 @and_ult_uge_swap(i8 %x) {
+; CHECK-LABEL: @and_ult_uge_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ult i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp uge i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ult i8 %x, 23
+ %b = icmp uge i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 23 && x >u 17
+
+define i1 @and_ult_ugt_swap(i8 %x) {
+; CHECK-LABEL: @and_ult_ugt_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ult i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ult i8 %x, 23
+ %b = icmp ugt i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 23 && x <=u 17
+
+define i1 @and_ult_ule_swap(i8 %x) {
+; CHECK-LABEL: @and_ult_ule_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ult i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ule i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ult i8 %x, 23
+ %b = icmp ule i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; x <u 23 && x <u 17
+
+define i1 @and_ult_ult_swap(i8 %x) {
+; CHECK-LABEL: @and_ult_ult_swap(
+; CHECK-NEXT: [[A:%.*]] = icmp ult i8 %x, 23
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 17
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = icmp ult i8 %x, 23
+ %b = icmp ult i8 %x, 17
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; Special case - slt is uge
+; x <u 31 && x <s 0
+
+define i1 @empty2(i32 %x) {
+; CHECK-LABEL: @empty2(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp ult i32 %x, 31
+ %b = icmp slt i32 %x, 0
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
diff --git a/test/Transforms/InstSimplify/shufflevector.ll b/test/Transforms/InstSimplify/shufflevector.ll
index c6d180da293f8..e03916c5b90d6 100644
--- a/test/Transforms/InstSimplify/shufflevector.ll
+++ b/test/Transforms/InstSimplify/shufflevector.ll
@@ -120,8 +120,7 @@ define <4 x i32> @undef_mask(<4 x i32> %x) {
define <4 x i32> @identity_mask_0(<4 x i32> %x) {
; CHECK-LABEL: @identity_mask_0(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x i32> [[SHUF]]
+; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %shuf
@@ -129,8 +128,7 @@ define <4 x i32> @identity_mask_0(<4 x i32> %x) {
define <4 x i32> @identity_mask_1(<4 x i32> %x) {
; CHECK-LABEL: @identity_mask_1(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> undef, <4 x i32> [[X:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: ret <4 x i32> [[SHUF]]
+; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
;
%shuf = shufflevector <4 x i32> undef, <4 x i32> %x, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
ret <4 x i32> %shuf
@@ -138,13 +136,32 @@ define <4 x i32> @identity_mask_1(<4 x i32> %x) {
define <4 x i32> @pseudo_identity_mask(<4 x i32> %x) {
; CHECK-LABEL: @pseudo_identity_mask(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; CHECK-NEXT: ret <4 x i32> [[SHUF]]
+; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x i32> %shuf
}
+define <4 x i32> @not_identity_mask(<4 x i32> %x) {
+; CHECK-LABEL: @not_identity_mask(
+; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: ret <4 x i32> [[SHUF]]
+;
+ %shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+ ret <4 x i32> %shuf
+}
+
+; TODO: Should we simplify if the mask has an undef element?
+
+define <4 x i32> @possible_identity_mask(<4 x i32> %x) {
+; CHECK-LABEL: @possible_identity_mask(
+; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+; CHECK-NEXT: ret <4 x i32> [[SHUF]]
+;
+ %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+ ret <4 x i32> %shuf
+}
+
define <4 x i32> @const_operand(<4 x i32> %x) {
; CHECK-LABEL: @const_operand(
; CHECK-NEXT: ret <4 x i32> <i32 42, i32 45, i32 44, i32 43>
@@ -155,10 +172,7 @@ define <4 x i32> @const_operand(<4 x i32> %x) {
define <4 x i32> @merge(<4 x i32> %x) {
; CHECK-LABEL: @merge(
-; CHECK-NEXT: [[LOWER:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[UPPER:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[MERGED:%.*]] = shufflevector <2 x i32> [[UPPER]], <2 x i32> [[LOWER]], <4 x i32> <i32 3, i32 2, i32 0, i32 1>
-; CHECK-NEXT: ret <4 x i32> [[MERGED]]
+; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
;
%lower = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 1, i32 0>
%upper = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -166,16 +180,24 @@ define <4 x i32> @merge(<4 x i32> %x) {
ret <4 x i32> %merged
}
+; This crosses lanes from the source op.
+
+define <4 x i32> @not_merge(<4 x i32> %x) {
+; CHECK-LABEL: @not_merge(
+; CHECK-NEXT: [[L:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[U:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[MERGED:%.*]] = shufflevector <2 x i32> [[U]], <2 x i32> [[L]], <4 x i32> <i32 3, i32 2, i32 0, i32 1>
+; CHECK-NEXT: ret <4 x i32> [[MERGED]]
+;
+ %l = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %u = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %merged = shufflevector <2 x i32> %u, <2 x i32> %l, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
+ ret <4 x i32> %merged
+}
+
define <8 x double> @extract_and_concat(<8 x double> %x) {
; CHECK-LABEL: @extract_and_concat(
-; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x double> [[X:%.*]], <8 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 6, i32 7>
-; CHECK-NEXT: [[S5:%.*]] = shufflevector <2 x double> [[S1]], <2 x double> [[S2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[S6:%.*]] = shufflevector <2 x double> [[S3]], <2 x double> [[S4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[S7:%.*]] = shufflevector <4 x double> [[S5]], <4 x double> [[S6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: ret <8 x double> [[S7]]
+; CHECK-NEXT: ret <8 x double> [[X:%.*]]
;
%s1 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> <i32 0, i32 1>
%s2 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> <i32 2, i32 3>
@@ -191,14 +213,7 @@ define <8 x double> @extract_and_concat(<8 x double> %x) {
define <8 x i64> @PR30630(<8 x i64> %x) {
; CHECK-LABEL: @PR30630(
-; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i64> [[X:%.*]], <8 x i64> undef, <2 x i32> <i32 0, i32 4>
-; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 1, i32 5>
-; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 2, i32 6>
-; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 3, i32 7>
-; CHECK-NEXT: [[S5:%.*]] = shufflevector <2 x i64> [[S1]], <2 x i64> [[S2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[S6:%.*]] = shufflevector <2 x i64> [[S3]], <2 x i64> [[S4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[S7:%.*]] = shufflevector <4 x i64> [[S5]], <4 x i64> [[S6]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: ret <8 x i64> [[S7]]
+; CHECK-NEXT: ret <8 x i64> [[X:%.*]]
;
%s1 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 0, i32 4>
%s2 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 1, i32 5>
diff --git a/test/Transforms/InstSimplify/vector_gep.ll b/test/Transforms/InstSimplify/vector_gep.ll
index 54887e99ee380..b8e61a05cc0c7 100644
--- a/test/Transforms/InstSimplify/vector_gep.ll
+++ b/test/Transforms/InstSimplify/vector_gep.ll
@@ -61,4 +61,28 @@ define <16 x i32*> @test6() {
; CHECK-NEXT: ret <16 x i32*> getelementptr ([24 x [42 x [3 x i32]]], [24 x [42 x [3 x i32]]]* @v, <16 x i64> zeroinitializer, <16 x i64> zeroinitializer, <16 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, <16 x i64> zeroinitializer)
%VectorGep = getelementptr [24 x [42 x [3 x i32]]], [24 x [42 x [3 x i32]]]* @v, i64 0, i64 0, <16 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, i64 0
ret <16 x i32*> %VectorGep
-} \ No newline at end of file
+}
+
+; PR32697
+; CHECK-LABEL: tinkywinky(
+; CHECK-NEXT: ret <4 x i8*> undef
+define <4 x i8*> @tinkywinky() {
+ %patatino = getelementptr i8, i8* undef, <4 x i64> undef
+ ret <4 x i8*> %patatino
+}
+
+; PR32697
+; CHECK-LABEL: dipsy(
+; CHECK-NEXT: ret <4 x i8*> undef
+define <4 x i8*> @dipsy() {
+ %patatino = getelementptr i8, <4 x i8 *> undef, <4 x i64> undef
+ ret <4 x i8*> %patatino
+}
+
+; PR32697
+; CHECK-LABEL: laalaa(
+; CHECK-NEXT: ret <4 x i8*> undef
+define <4 x i8*> @laalaa() {
+ %patatino = getelementptr i8, <4 x i8 *> undef, i64 undef
+ ret <4 x i8*> %patatino
+}
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
index a9d1e87587662..728f5dcac7b19 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -198,7 +198,7 @@ for.end: ; preds = %for.body
; @testNeon is an important example of the nead for ivchains.
;
-; Currently we have three extra add.w's that keep the store address
+; Currently we have two extra add.w's that keep the store address
; live past the next increment because ISEL is unfortunately undoing
; the store chain. ISEL also fails to convert all but one of the stores to
; post-increment addressing. However, the loads should use
@@ -207,12 +207,10 @@ for.end: ; preds = %for.body
;
; A9: testNeon:
; A9: %.lr.ph
-; A9-NOT: lsl.w
-; A9-NOT: {{ldr|str|adds|add r}}
-; A9: vst1.8 {{.*}} [r{{[0-9]+}}]!
-; A9-NOT: {{ldr|str|adds|add r}}
; A9: add.w r
+; A9-NOT: lsl.w
; A9-NOT: {{ldr|str|adds|add r}}
+; A9: vst1.8 {{.*}} [r{{[0-9]+}}], r{{[0-9]+}}
; A9: add.w r
; A9-NOT: {{ldr|str|adds|add r}}
; A9-NOT: add.w r
diff --git a/test/Transforms/LoopUnroll/peel-loop-negative.ll b/test/Transforms/LoopUnroll/peel-loop-negative.ll
new file mode 100644
index 0000000000000..eab609a3002d1
--- /dev/null
+++ b/test/Transforms/LoopUnroll/peel-loop-negative.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=800 -unroll-peel-max-count=0 | FileCheck %s
+
+; We should not peel this loop even though we can, because the max count is set
+; to zero.
+define i32 @invariant_backedge_neg_1(i32 %a, i32 %b) {
+; CHECK-LABEL: @invariant_backedge_neg_1
+; CHECK-NOT loop.peel{{.*}}:
+; CHECK: loop:
+; CHECK: %i = phi
+; CHECK: %sum = phi
+; CHECK: %plus = phi
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
+ %plus = phi i32 [ %a, %entry ], [ %b, %loop ]
+
+ %incsum = add i32 %sum, %plus
+ %inc = add i32 %i, 1
+ %cmp = icmp slt i32 %i, 1000
+
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %sum
+}
diff --git a/test/Transforms/LoopUnroll/peel-loop-not-forced.ll b/test/Transforms/LoopUnroll/peel-loop-not-forced.ll
index 3dcac87f8242f..8691481acc12f 100644
--- a/test/Transforms/LoopUnroll/peel-loop-not-forced.ll
+++ b/test/Transforms/LoopUnroll/peel-loop-not-forced.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -S -loop-unroll -unroll-threshold=4 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=30 | FileCheck %s
define i32 @invariant_backedge_1(i32 %a, i32 %b) {
; CHECK-LABEL: @invariant_backedge_1
@@ -25,10 +25,112 @@ exit:
ret i32 %sum
}
-; Peeling should fail due to method size.
define i32 @invariant_backedge_2(i32 %a, i32 %b) {
+; This loop should be peeled twice because it has a Phi which becomes invariant
+; starting from 3rd iteration.
; CHECK-LABEL: @invariant_backedge_2
-; CHECK-NOT: loop.peel:
+; CHECK: loop.peel{{.*}}:
+; CHECK: loop.peel{{.*}}:
+; CHECK: %i = phi
+; CHECK: %sum = phi
+; CHECK-NOT: %half.inv = phi
+; CHECK-NOT: %plus = phi
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
+ %half.inv = phi i32 [ %a, %entry ], [ %b, %loop ]
+ %plus = phi i32 [ %a, %entry ], [ %half.inv, %loop ]
+
+ %incsum = add i32 %sum, %plus
+ %inc = add i32 %i, 1
+ %cmp = icmp slt i32 %i, 1000
+
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %sum
+}
+
+define i32 @invariant_backedge_3(i32 %a, i32 %b) {
+; This loop should be peeled thrice because it has a Phi which becomes invariant
+; starting from 4th iteration.
+; CHECK-LABEL: @invariant_backedge_3
+; CHECK: loop.peel{{.*}}:
+; CHECK: loop.peel{{.*}}:
+; CHECK: loop.peel{{.*}}:
+; CHECK: %i = phi
+; CHECK: %sum = phi
+; CHECK-NOT: %half.inv = phi
+; CHECK-NOT: %half.inv.2 = phi
+; CHECK-NOT: %plus = phi
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
+ %half.inv = phi i32 [ %a, %entry ], [ %b, %loop ]
+ %half.inv.2 = phi i32 [ %a, %entry ], [ %half.inv, %loop ]
+ %plus = phi i32 [ %a, %entry ], [ %half.inv.2, %loop ]
+
+ %incsum = add i32 %sum, %plus
+ %inc = add i32 %i, 1
+ %cmp = icmp slt i32 %i, 1000
+
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %sum
+}
+
+define i32 @invariant_backedge_limited_by_size(i32 %a, i32 %b) {
+; This loop should normally be peeled thrice because it has a Phi which becomes
+; invariant starting from 4th iteration, but the size of the loop only allows
+; us to peel twice because we are restricted to 30 instructions in resulting
+; code. Thus, %plus Phi node should stay in loop even despite its backedge
+; input is an invariant.
+; CHECK-LABEL: @invariant_backedge_limited_by_size
+; CHECK: loop.peel{{.*}}:
+; CHECK: loop.peel{{.*}}:
+; CHECK: %i = phi
+; CHECK: %sum = phi
+; CHECK: %plus = phi i32 [ %a, {{.*}} ], [ %b, %loop ]
+; CHECK-NOT: %half.inv = phi
+; CHECK-NOT: %half.inv.2 = phi
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
+ %half.inv = phi i32 [ %a, %entry ], [ %b, %loop ]
+ %half.inv.2 = phi i32 [ %a, %entry ], [ %half.inv, %loop ]
+ %plus = phi i32 [ %a, %entry ], [ %half.inv.2, %loop ]
+
+ %incsum = add i32 %sum, %plus
+ %inc = add i32 %i, 1
+ %cmp = icmp slt i32 %i, 1000
+
+ %incsum2 = add i32 %incsum, %plus
+ %incsum3 = add i32 %incsum, %plus
+ %incsum4 = add i32 %incsum, %plus
+ %incsum5 = add i32 %incsum, %plus
+ %incsum6 = add i32 %incsum, %plus
+ %incsum7 = add i32 %incsum, %plus
+
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %sum
+}
+
+; Peeling should fail due to method size.
+define i32 @invariant_backedge_negative(i32 %a, i32 %b) {
+; CHECK-LABEL: @invariant_backedge_negative
+; CHECK-NOT: loop.peel{{.*}}:
; CHECK: loop:
; CHECK: %i = phi
; CHECK: %sum = phi
@@ -43,6 +145,47 @@ loop:
%incsum = add i32 %sum, %plus
%incsum2 = add i32 %incsum, %plus
+ %incsum3 = add i32 %incsum, %plus
+ %incsum4 = add i32 %incsum, %plus
+ %incsum5 = add i32 %incsum, %plus
+ %incsum6 = add i32 %incsum, %plus
+ %incsum7 = add i32 %incsum, %plus
+ %incsum8 = add i32 %incsum, %plus
+ %incsum9 = add i32 %incsum, %plus
+ %incsum10 = add i32 %incsum, %plus
+ %incsum11 = add i32 %incsum, %plus
+ %incsum12 = add i32 %incsum, %plus
+ %incsum13 = add i32 %incsum, %plus
+ %incsum14 = add i32 %incsum, %plus
+ %incsum15 = add i32 %incsum, %plus
+ %inc = add i32 %i, 1
+ %cmp = icmp slt i32 %i, 1000
+
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %sum
+}
+
+define i32 @cycled_phis(i32 %a, i32 %b) {
+; Make sure that we do not crash working with cycled Phis and don't peel it.
+; TODO: Actually this loop should be partially unrolled with factor 2.
+; CHECK-LABEL: @cycled_phis
+; CHECK-NOT: loop.peel{{.*}}:
+; CHECK: loop:
+; CHECK: %i = phi
+; CHECK: %phi.a = phi
+; CHECK: %phi.b = phi
+; CHECK: %sum = phi
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %phi.a = phi i32 [ %a, %entry ], [ %phi.b, %loop ]
+ %phi.b = phi i32 [ %b, %entry ], [ %phi.a, %loop ]
+ %sum = phi i32 [ 0, %entry], [ %incsum, %loop ]
+ %incsum = add i32 %sum, %phi.a
%inc = add i32 %i, 1
%cmp = icmp slt i32 %i, 1000
diff --git a/test/Transforms/NewGVN/non-integral-pointers.ll b/test/Transforms/NewGVN/non-integral-pointers.ll
new file mode 100644
index 0000000000000..75b8285d51f9a
--- /dev/null
+++ b/test/Transforms/NewGVN/non-integral-pointers.ll
@@ -0,0 +1,39 @@
+; RUN: opt -newgvn -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) {
+; CHECK-LABEL: @f0(
+; CHECK-NOT: inttoptr
+; CHECK-NOT: ptrtoint
+ entry:
+ store i64 %val, i64* %loc
+ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
+
+ neverTaken:
+ %loc.bc = bitcast i64* %loc to i8 addrspace(4)**
+ %ptr = load i8 addrspace(4)*, i8 addrspace(4)** %loc.bc
+ store i8 5, i8 addrspace(4)* %ptr
+ ret void
+
+ alwaysTaken:
+ ret void
+}
+
+define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
+; CHECK-LABEL: @f1(
+; CHECK-NOT: inttoptr
+; CHECK-NOT: ptrtoint
+ entry:
+ store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
+ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
+
+ neverTaken:
+ %loc.bc = bitcast i8 addrspace(4)** %loc to i64*
+ %int = load i64, i64* %loc.bc
+ ret i64 %int
+
+ alwaysTaken:
+ ret i64 42
+}
diff --git a/test/Transforms/PhaseOrdering/globalaa-retained.ll b/test/Transforms/PhaseOrdering/globalaa-retained.ll
index bce193b5e851b..47b8e4d7a9edd 100644
--- a/test/Transforms/PhaseOrdering/globalaa-retained.ll
+++ b/test/Transforms/PhaseOrdering/globalaa-retained.ll
@@ -5,6 +5,37 @@ target triple = "aarch64"
@v = internal unnamed_addr global i32 0, align 4
@p = common global i32* null, align 8
+
+; This test checks that a number of loads and stores are eliminated,
+; that can only be eliminated based on GlobalsAA information. As such,
+; it tests that GlobalsAA information is retained until the passes
+; that perform this optimization, and it protects against accidentally
+; dropping the GlobalsAA information earlier in the pipeline, which
+; has happened a few times.
+
+; GlobalsAA invalidation might happen later in the FunctionPassManager
+; pipeline than the optimization eliminating unnecessary loads/stores.
+; Since GlobalsAA is a module-level analysis, any FunctionPass
+; invalidating the GlobalsAA information will affect FunctionPass
+; pipelines that execute later. For example, assume a FunctionPass1 |
+; FunctionPass2 pipeline and 2 functions to be processed: f1 and f2.
+; Assume furthermore that FunctionPass1 uses GlobalsAA info to do an
+; optimization, and FunctionPass2 invalidates GlobalsAA. Assume the
+; function passes run in the following order: FunctionPass1(f1),
+; FunctionPass2(f1), FunctionPass1(f2), FunctionPass2(f2). Then
+; FunctionPass1 will not be able to optimize f2, since GlobalsAA will
+; have been invalidated in FuntionPass2(f1).
+
+; To try and also test this scenario, there is an empty function
+; before and after the function we're checking so that one of them
+; will be processed by the whole set of FunctionPasses before @f. That
+; will ensure that if the invalidation happens, it happens before the
+; actual optimizations on @f start.
+define void @bar() {
+entry:
+ ret void
+}
+
; Function Attrs: norecurse nounwind
define void @f(i32 %n) {
entry:
@@ -19,8 +50,17 @@ entry:
ret void
}
-; check variable v is loaded only once after optimization, which should be
-; prove that globalsAA survives until the optimization that can use it to
-; optimize away the duplicate load/stores on variable v.
+; check variable v is loaded/stored only once after optimization,
+; which should be prove that globalsAA survives until the optimization
+; that can use it to optimize away the duplicate load/stores on
+; variable v.
; CHECK: load i32, i32* @v, align 4
+; CHECK: store i32 {{.*}}, i32* @v, align 4
; CHECK-NOT: load i32, i32* @v, align 4
+; CHECK-NOT: store i32 {{.*}}, i32* @v, align 4
+
+; Same as @bar above, in case the functions are processed in reverse order.
+define void @bar2() {
+entry:
+ ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/reorder_phi.ll b/test/Transforms/SLPVectorizer/X86/reorder_phi.ll
new file mode 100644
index 0000000000000..f7f58d7350b30
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/reorder_phi.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=corei7-avx | FileCheck %s
+
+%struct.complex = type { float, float }
+
+; CHECK-LABEL: void @foo
+define void @foo (%struct.complex* %A, %struct.complex* %B, %struct.complex* %Result) {
+
+entry:
+ %0 = add i64 256, 0
+ br label %loop
+
+; CHECK-LABEL: loop
+; CHECK: [[REG0:%[0-9]+]] = phi <2 x float> {{.*}}[ [[REG1:%[0-9]+]], %loop ]
+; CHECK: [[REG2:%[0-9]+]] = load <2 x float>, <2 x float>*
+; CHECK: [[REG3:%[0-9]+]] = fmul <2 x float> [[REG2]]
+; CHECK: [[REG4:%[0-9]+]] = fmul <2 x float>
+; CHECK: fsub <2 x float> [[REG3]], [[REG4]]
+; CHECK: fadd <2 x float> [[REG3]], [[REG4]]
+; CHECK: shufflevector <2 x float>
+; CHECK: [[REG1]] = fadd <2 x float>{{.*}}[[REG0]]
+loop:
+
+ %1 = phi i64 [ 0, %entry ], [ %20, %loop ]
+ %2 = phi float [ 0.000000e+00, %entry ], [ %19, %loop ]
+ %3 = phi float [ 0.000000e+00, %entry ], [ %18, %loop ]
+ %4 = getelementptr inbounds %"struct.complex", %"struct.complex"* %A, i64 %1, i32 0
+ %5 = load float, float* %4, align 4
+ %6 = getelementptr inbounds %"struct.complex", %"struct.complex"* %A, i64 %1, i32 1
+ %7 = load float, float* %6, align 4
+ %8 = getelementptr inbounds %"struct.complex", %"struct.complex"* %B, i64 %1, i32 0
+ %9 = load float, float* %8, align 4
+ %10 = getelementptr inbounds %"struct.complex", %"struct.complex"* %B, i64 %1, i32 1
+ %11 = load float, float* %10, align 4
+ %12 = fmul float %5, %9
+ %13 = fmul float %7, %11
+ %14 = fsub float %12, %13
+ %15 = fmul float %7, %9
+ %16 = fmul float %5, %11
+ %17 = fadd float %15, %16
+ %18 = fadd float %3, %14
+ %19 = fadd float %2, %17
+ %20 = add nuw nsw i64 %1, 1
+ %21 = icmp eq i64 %20, %0
+ br i1 %21, label %exit, label %loop
+
+exit:
+ %22 = getelementptr inbounds %"struct.complex", %"struct.complex"* %Result, i32 0, i32 0
+ store float %18, float* %22, align 4
+ %23 = getelementptr inbounds %"struct.complex", %"struct.complex"* %Result, i32 0, i32 1
+ store float %19, float* %23, align 4
+
+ ret void
+
+}
diff --git a/test/Transforms/SafeStack/X86/debug-loc.ll b/test/Transforms/SafeStack/X86/debug-loc.ll
index fc0b6f911f7ee..88cda693b2932 100644
--- a/test/Transforms/SafeStack/X86/debug-loc.ll
+++ b/test/Transforms/SafeStack/X86/debug-loc.ll
@@ -37,10 +37,10 @@ entry:
; CHECK-DAG: ![[VAR_ARG]] = !DILocalVariable(name: "zzz"
; 100 aligned up to 8
-; CHECK-DAG: ![[EXPR_ARG]] = !DIExpression(DW_OP_deref, DW_OP_minus, 104
+; CHECK-DAG: ![[EXPR_ARG]] = !DIExpression(DW_OP_minus, 104)
; CHECK-DAG: ![[VAR_LOCAL]] = !DILocalVariable(name: "xxx"
-; CHECK-DAG: ![[EXPR_LOCAL]] = !DIExpression(DW_OP_deref, DW_OP_minus, 208
+; CHECK-DAG: ![[EXPR_LOCAL]] = !DIExpression(DW_OP_minus, 208)
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
diff --git a/test/Transforms/SampleProfile/Inputs/indirect-call.prof b/test/Transforms/SampleProfile/Inputs/indirect-call.prof
index 428d4cedef5a8..aaf9ec15d02e3 100644
--- a/test/Transforms/SampleProfile/Inputs/indirect-call.prof
+++ b/test/Transforms/SampleProfile/Inputs/indirect-call.prof
@@ -11,3 +11,9 @@ test_noinline:3000:0
test_direct:3000:0
5: foo_direct:3000
1: 3000
+test_inline_strip:3000:0
+ 5: foo_inline_strip:3000
+ 1: 3000
+test_inline_strip_confilict:3000:0
+ 5: foo_inline_strip_conflict:3000
+ 1: 3000
diff --git a/test/Transforms/SampleProfile/indirect-call.ll b/test/Transforms/SampleProfile/indirect-call.ll
index 5a4913d6358f5..4647dd4212998 100644
--- a/test/Transforms/SampleProfile/indirect-call.ll
+++ b/test/Transforms/SampleProfile/indirect-call.ll
@@ -29,6 +29,34 @@ define void @test_inline(i64* (i32*)*, i32* %x) !dbg !3 {
ret void
}
+; CHECK-LABEL: @test_inline_strip
+; If the indirect call is promoted and inlined in profile, and the callee name
+; is stripped we should promote and inline it.
+define void @test_inline_strip(i64* (i32*)*, i32* %x) !dbg !3 {
+ %2 = alloca i64* (i32*)*
+ store i64* (i32*)* %0, i64* (i32*)** %2
+ %3 = load i64* (i32*)*, i64* (i32*)** %2
+; CHECK: icmp {{.*}} @foo_inline_strip.suffix
+; CHECK: if.true.direct_targ:
+; CHECK-NOT: call
+; CHECK: if.false.orig_indirect:
+; CHECK: call
+ call i64* %3(i32* %x), !dbg !5
+ ret void
+}
+
+; CHECK-LABEL: @test_inline_strip_conflict
+; If the indirect call is promoted and inlined in profile, and the callee name
+; is stripped, but have more than 1 potential match, we should not promote.
+define void @test_inline_strip_conflict(i64* (i32*)*, i32* %x) !dbg !3 {
+ %2 = alloca i64* (i32*)*
+ store i64* (i32*)* %0, i64* (i32*)** %2
+ %3 = load i64* (i32*)*, i64* (i32*)** %2
+; CHECK-NOT: if.true.direct_targ:
+ call i64* %3(i32* %x), !dbg !5
+ ret void
+}
+
; CHECK-LABEL: @test_noinline
; If the indirect call target is not available, we should not promote it.
define void @test_noinline(void ()*) !dbg !3 {
@@ -47,6 +75,22 @@ define i32* @foo_inline1(i32* %x) !dbg !3 {
ret i32* %x
}
+define i32* @foo_inline_strip.suffix(i32* %x) !dbg !3 {
+ ret i32* %x
+}
+
+define i32* @foo_inline_strip_conflict.suffix1(i32* %x) !dbg !3 {
+ ret i32* %x
+}
+
+define i32* @foo_inline_strip_conflict.suffix2(i32* %x) !dbg !3 {
+ ret i32* %x
+}
+
+define i32* @foo_inline_strip_conflict.suffix3(i32* %x) !dbg !3 {
+ ret i32* %x
+}
+
define i32* @foo_inline2(i32* %x) !dbg !3 {
ret i32* %x
}
diff --git a/test/Transforms/StructurizeCFG/invert-compare.ll b/test/Transforms/StructurizeCFG/invert-compare.ll
new file mode 100644
index 0000000000000..87d9c6d105694
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/invert-compare.ll
@@ -0,0 +1,60 @@
+; RUN: opt -S -structurizecfg %s | FileCheck %s
+
+; CHECK-LABEL: @directly_invert_compare_condition_jump_into_loop(
+; CHECK: %cmp0 = fcmp uge float %arg0, %arg1
+; CHECK-NEXT: br i1 %cmp0, label %end.loop, label %Flow
+define void @directly_invert_compare_condition_jump_into_loop(i32 addrspace(1)* %out, i32 %n, float %arg0, float %arg1) #0 {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i32 [0, %entry], [%i.inc, %end.loop]
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %i
+ store i32 %i, i32 addrspace(1)* %ptr, align 4
+ %cmp0 = fcmp olt float %arg0, %arg1
+ br i1 %cmp0, label %mid.loop, label %end.loop
+
+mid.loop:
+ store i32 333, i32 addrspace(1)* %out, align 4
+ br label %for.end
+
+end.loop:
+ %i.inc = add i32 %i, 1
+ %cmp = icmp ne i32 %i.inc, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: @invert_multi_use_compare_condition_jump_into_loop(
+; CHECK: %cmp0 = fcmp olt float %arg0, %arg1
+; CHECK: store volatile i1 %cmp0, i1 addrspace(1)* undef
+; CHECK: %0 = xor i1 %cmp0, true
+; CHECK-NEXT: br i1 %0, label %end.loop, label %Flow
+define void @invert_multi_use_compare_condition_jump_into_loop(i32 addrspace(1)* %out, i32 %n, float %arg0, float %arg1) #0 {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i32 [0, %entry], [%i.inc, %end.loop]
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %i
+ store i32 %i, i32 addrspace(1)* %ptr, align 4
+ %cmp0 = fcmp olt float %arg0, %arg1
+ store volatile i1 %cmp0, i1 addrspace(1)* undef
+ br i1 %cmp0, label %mid.loop, label %end.loop
+
+mid.loop:
+ store i32 333, i32 addrspace(1)* %out, align 4
+ br label %for.end
+
+end.loop:
+ %i.inc = add i32 %i, 1
+ %cmp = icmp ne i32 %i.inc, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+attributes #0 = { nounwind } \ No newline at end of file
diff --git a/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll b/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
index 668a1e99d814d..aff59642cbcb4 100644
--- a/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
+++ b/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
@@ -11,8 +11,8 @@ bb:
bb3: ; preds = %bb7, %bb
%tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb7 ]
%tmp4 = fcmp ult float %arg1, 3.500000e+00
-; CHECK: %0 = xor i1 %tmp4, true
-; CHECK: br i1 %0, label %bb5, label %Flow
+; CHECK: %tmp4 = fcmp oge float %arg1, 3.500000e+00
+; CHECK: br i1 %tmp4, label %bb5, label %Flow
br i1 %tmp4, label %bb7, label %bb5
; CHECK: bb5:
@@ -22,7 +22,8 @@ bb5: ; preds = %bb3
br i1 %tmp6, label %bb10, label %bb7
; CHECK: Flow:
-; CHECK: br i1 %3, label %bb7, label %Flow1
+; CHECK: %1 = phi i1 [ %tmp6, %bb5 ], [ %tmp4, %bb3 ]
+; CHECK-NEXT: br i1 %1, label %bb7, label %Flow1
; CHECK: bb7
bb7: ; preds = %bb5, %bb3
@@ -32,9 +33,10 @@ bb7: ; preds = %bb5, %bb3
br i1 %tmp9, label %bb3, label %bb10
; CHECK: Flow1:
-; CHECK: br i1 %7, label %bb10, label %bb3
+; CHECK: %4 = phi i1 [ %tmp9, %bb7 ], [ true, %Flow ]
+; CHECK-NEXT: br i1 %4, label %bb10, label %bb3
-; CHECK: bb10
+; CHECK: bb10:
bb10: ; preds = %bb7, %bb5
%tmp11 = phi i32 [ 15, %bb5 ], [ 255, %bb7 ]
store i32 %tmp11, i32 addrspace(1)* %arg, align 4
diff --git a/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll b/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
index ba9aa29130611..a8835f19d447f 100644
--- a/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
+++ b/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
@@ -59,7 +59,8 @@ for.end: ; preds = %for.body.1, %if.the
; CHECK: br i1 %{{[0-9]}}, label %for.body.1, label %Flow2
; CHECK: for.body.1:
-; CHECK: br i1 %{{[0-9]+}}, label %for.body.6, label %Flow3
+; CHECK: %cmp1.5 = icmp ne i32 %tmp22, %K1
+; CHECK-NEXT: br i1 %cmp1.5, label %for.body.6, label %Flow3
for.body.1: ; preds = %if.then, %lor.lhs.false
%best_val.233 = phi float [ %tmp5, %if.then ], [ %best_val.027, %lor.lhs.false ]
%best_count.231 = phi i32 [ %sub4, %if.then ], [ %best_count.025, %lor.lhs.false ]
diff --git a/test/tools/gold/X86/thinlto.ll b/test/tools/gold/X86/thinlto.ll
index 5e1d913eb09e7..bb87adc44745b 100644
--- a/test/tools/gold/X86/thinlto.ll
+++ b/test/tools/gold/X86/thinlto.ll
@@ -82,15 +82,11 @@
; BACKEND1-NEXT: </MODULE_STRTAB_BLOCK
; BACKEND1-NEXT: <GLOBALVAL_SUMMARY_BLOCK
; BACKEND1-NEXT: <VERSION
+; BACKEND1-NEXT: <VALUE_GUID op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
+; BACKEND1-NEXT: <VALUE_GUID op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
; BACKEND1-NEXT: <COMBINED
; BACKEND1-NEXT: <COMBINED
; BACKEND1-NEXT: </GLOBALVAL_SUMMARY_BLOCK
-; BACKEND1-NEXT: <VALUE_SYMTAB
-; Check that the format is: op0=valueid, op1=offset, op2=funcguid,
-; where funcguid is the lower 64 bits of the function name MD5.
-; BACKEND1-NEXT: <COMBINED_ENTRY abbrevid={{[0-9]+}} op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
-; BACKEND1-NEXT: <COMBINED_ENTRY abbrevid={{[0-9]+}} op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
-; BACKEND1-NEXT: </VALUE_SYMTAB
; The backend index for Input/thinlto.ll contains summaries from itself only,
; as it does not import anything.
@@ -99,13 +95,9 @@
; BACKEND2-NEXT: </MODULE_STRTAB_BLOCK
; BACKEND2-NEXT: <GLOBALVAL_SUMMARY_BLOCK
; BACKEND2-NEXT: <VERSION
+; BACKEND2-NEXT: <VALUE_GUID op0=1 op1=-5300342847281564238
; BACKEND2-NEXT: <COMBINED
; BACKEND2-NEXT: </GLOBALVAL_SUMMARY_BLOCK
-; BACKEND2-NEXT: <VALUE_SYMTAB
-; Check that the format is: op0=valueid, op1=offset, op2=funcguid,
-; where funcguid is the lower 64 bits of the function name MD5.
-; BACKEND2-NEXT: <COMBINED_ENTRY abbrevid={{[0-9]+}} op0=1 op1=-5300342847281564238
-; BACKEND2-NEXT: </VALUE_SYMTAB
; COMBINED: <MODULE_STRTAB_BLOCK
; COMBINED-NEXT: <ENTRY {{.*}} record string = '{{.*}}/test/tools/gold/X86/Output/thinlto.ll.tmp{{.*}}.o'
@@ -113,15 +105,11 @@
; COMBINED-NEXT: </MODULE_STRTAB_BLOCK
; COMBINED-NEXT: <GLOBALVAL_SUMMARY_BLOCK
; COMBINED-NEXT: <VERSION
+; COMBINED-NEXT: <VALUE_GUID op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
+; COMBINED-NEXT: <VALUE_GUID op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
; COMBINED-NEXT: <COMBINED
; COMBINED-NEXT: <COMBINED
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK
-; COMBINED-NEXT: <VALUE_SYMTAB
-; Check that the format is: op0=valueid, op1=offset, op2=funcguid,
-; where funcguid is the lower 64 bits of the function name MD5.
-; COMBINED-NEXT: <COMBINED_ENTRY abbrevid={{[0-9]+}} op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
-; COMBINED-NEXT: <COMBINED_ENTRY abbrevid={{[0-9]+}} op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
-; COMBINED-NEXT: </VALUE_SYMTAB
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/tools/llvm-lto/thinlto.ll b/test/tools/llvm-lto/thinlto.ll
index 61c52b33e72e8..86aca13a4c884 100644
--- a/test/tools/llvm-lto/thinlto.ll
+++ b/test/tools/llvm-lto/thinlto.ll
@@ -11,15 +11,11 @@
; COMBINED-NEXT: </MODULE_STRTAB_BLOCK
; COMBINED-NEXT: <GLOBALVAL_SUMMARY_BLOCK
; COMBINED-NEXT: <VERSION
+; COMBINED-NEXT: <VALUE_GUID op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
+; COMBINED-NEXT: <VALUE_GUID op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
; COMBINED-NEXT: <COMBINED
; COMBINED-NEXT: <COMBINED
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK
-; COMBINED-NEXT: <VALUE_SYMTAB
-; Check that the format is: op0=valueid, op1=offset, op2=funcguid,
-; where funcguid is the lower 64 bits of the function name MD5.
-; COMBINED-NEXT: <COMBINED_ENTRY abbrevid={{[0-9]+}} op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
-; COMBINED-NEXT: <COMBINED_ENTRY abbrevid={{[0-9]+}} op0={{1|2}} op1={{-3706093650706652785|-5300342847281564238}}
-; COMBINED-NEXT: </VALUE_SYMTAB
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/test/tools/llvm-symbolizer/Inputs/discrim b/test/tools/llvm-symbolizer/Inputs/discrim
index ec61fe960bffb..0e5e847a5ed71 100644
--- a/test/tools/llvm-symbolizer/Inputs/discrim
+++ b/test/tools/llvm-symbolizer/Inputs/discrim
Binary files differ
diff --git a/test/tools/llvm-symbolizer/Inputs/discrim.c b/test/tools/llvm-symbolizer/Inputs/discrim.c
index decbce8d454e6..e53cbd4671390 100644
--- a/test/tools/llvm-symbolizer/Inputs/discrim.c
+++ b/test/tools/llvm-symbolizer/Inputs/discrim.c
@@ -1,8 +1,11 @@
static volatile int do_mul;
-static volatile int do_inc;
+static volatile int x, v;
-int main () {
- int x = 1;
- if (do_mul) x *= 2; else x /= 2;
- return do_inc ? ++x : --x;
+int foo () {
+ if (do_mul) x *= v; else x /= v;
+ return x;
+}
+
+int main() {
+ return foo() + foo();
}
diff --git a/test/tools/llvm-symbolizer/Inputs/discrim.inp b/test/tools/llvm-symbolizer/Inputs/discrim.inp
index f8ad6018d7092..a5cfcb2558f35 100644
--- a/test/tools/llvm-symbolizer/Inputs/discrim.inp
+++ b/test/tools/llvm-symbolizer/Inputs/discrim.inp
@@ -1,5 +1,8 @@
some text
-0x4004f2
-0x400509
-0x40050d
+0x400590
+0x4005a5
+0x4005ad
+0x4005b9
+0x4005ce
+0x4005d4
some more text
diff --git a/test/tools/llvm-symbolizer/padding-x86_64.ll b/test/tools/llvm-symbolizer/padding-x86_64.ll
new file mode 100644
index 0000000000000..114c9f701c680
--- /dev/null
+++ b/test/tools/llvm-symbolizer/padding-x86_64.ll
@@ -0,0 +1,40 @@
+; REQUIRES: x86_64-linux
+; Checks if symbolizer can correctly symbolize address in the padding between
+; functions.
+; RUN: llc -o %t.o -filetype=obj -mtriple=x86_64-pc-linux %s
+; RUN: echo 0x5 | llvm-symbolizer -obj=%t.o | FileCheck %s --check-prefix=FOO
+; RUN: echo 0xd | llvm-symbolizer -obj=%t.o | FileCheck %s --check-prefix=PADDING
+; RUN: echo 0x10 | llvm-symbolizer -obj=%t.o | FileCheck %s --check-prefix=MAIN
+
+;FOO: foo
+;PADDING: ??
+;MAIN: main
+
+@a = global i32 1, align 4
+
+define i32 @foo() !dbg !9 {
+entry:
+ %0 = load i32, i32* @a, align 4
+ ret i32 %0
+}
+
+define i32 @main() !dbg !14 {
+entry:
+ %call = call i32 @foo(), !dbg !18
+ ret i32 %call
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!6, !7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "padding-x86_64.c", directory: "/tmp/")
+!2 = !{}
+!5 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = !{i32 2, !"Dwarf Version", i32 4}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !10, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !0, variables: !2)
+!10 = !DISubroutineType(types: !11)
+!11 = !{!5}
+!14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 6, type: !10, isLocal: false, isDefinition: true, scopeLine: 6, isOptimized: false, unit: !0, variables: !2)
+!18 = !DILocation(line: 7, column: 8, scope: !14)
diff --git a/test/tools/llvm-symbolizer/sym-verbose.test b/test/tools/llvm-symbolizer/sym-verbose.test
index ef66db919faae..5b401e3b0982f 100644
--- a/test/tools/llvm-symbolizer/sym-verbose.test
+++ b/test/tools/llvm-symbolizer/sym-verbose.test
@@ -1,39 +1,97 @@
#static volatile int do_mul;
-#static volatile int do_inc;
+#static volatile int x, v;
#
-#int main () {
-# int x = 1;
-# if (do_mul) x *= 2; else x /= 2;
-# return do_inc ? ++x : --x;
+#int foo () {
+# if (do_mul) x *= v; else x /= v;
+# return x;
#}
-#Build as : clang -g -O2 discrim.c -o discrim
+#
+#int main() {
+# return foo() + foo();
+#}
+#Build as : clang -gmlt -fdebug-info-for-profiling -O2 discrim.c -o discrim
RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/discrim.inp | FileCheck %s
#CHECK: some text
-#CHECK: 0x4004f2
+#CHECK: 0x400590
+#CHECK-NEXT: foo
+#CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c
+#CHECK-NEXT: Function start line: 4
+#CHECK-NEXT: Line: 9
+#CHECK-NEXT: Column: 0
#CHECK-NEXT: main
#CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c
+#CHECK-NEXT: Function start line: 9
+#CHECK-NEXT: Line: 10
+#CHECK-NEXT: Column: 0
+
+#CHECK: 0x4005a5
+#CHECK-NEXT: foo
+#CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c
#CHECK-NEXT: Function start line: 4
-#CHECK-NEXT: Line: 6
-#CHECK-NEXT: Column: 7
-#CHECK-NOT: Discriminator: 0
+#CHECK-NEXT: Line: 5
+#CHECK-NEXT: Column: 17
+#CHECK-NEXT: Discriminator: 2
+#CHECK-NEXT: main
+#CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c
+#CHECK-NEXT: Function start line: 9
+#CHECK-NEXT: Line: 10
+#CHECK-NEXT: Column: 0
-#CHECK: 0x400509
+#CHECK: 0x4005ad
+#CHECK-NEXT: foo
+#CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c
+#CHECK-NEXT: Function start line: 4
+#CHECK-NEXT: Line: 0
+#CHECK-NEXT: Column: 30
+#CHECK-NEXT: Discriminator: 4
#CHECK-NEXT: main
#CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c
+#CHECK-NEXT: Function start line: 9
+#CHECK-NEXT: Line: 10
+#CHECK-NEXT: Column: 0
+
+#CHECK: 0x4005b9
+#CHECK-NEXT: foo
+#CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c
#CHECK-NEXT: Function start line: 4
-#CHECK-NEXT: Line: 7
-#CHECK-NEXT: Column: 3
-#CHECK-NEXT: Discriminator: 1
+#CHECK-NEXT: Line: 5
+#CHECK-NEXT: Column: 7
+#CHECK-NEXT: main
+#CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c
+#CHECK-NEXT: Function start line: 9
+#CHECK-NEXT: Line: 10
+#CHECK-NEXT: Column: 0
+#CHECK-NEXT: Discriminator: 2
-#CHECK: 0x40050d
+#CHECK: 0x4005ce
+#CHECK-NEXT: foo
+#CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c
+#CHECK-NEXT: Function start line: 4
+#CHECK-NEXT: Line: 5
+#CHECK-NEXT: Column: 17
+#CHECK-NEXT: Discriminator: 2
#CHECK-NEXT: main
#CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c
+#CHECK-NEXT: Function start line: 9
+#CHECK-NEXT: Line: 10
+#CHECK-NEXT: Column: 0
+#CHECK-NEXT: Discriminator: 2
+
+#CHECK: 0x4005d4
+#CHECK-NEXT: foo
+#CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c
#CHECK-NEXT: Function start line: 4
-#CHECK-NEXT: Line: 7
-#CHECK-NEXT: Column: 3
+#CHECK-NEXT: Line: 5
+#CHECK-NEXT: Column: 30
+#CHECK-NEXT: Discriminator: 4
+#CHECK-NEXT: main
+#CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c
+#CHECK-NEXT: Function start line: 9
+#CHECK-NEXT: Line: 10
+#CHECK-NEXT: Column: 0
#CHECK-NEXT: Discriminator: 2
#CHECK: some more text
diff --git a/test/tools/llvm-xray/X86/extract-instrmap-symbolize.ll b/test/tools/llvm-xray/X86/extract-instrmap-symbolize.ll
new file mode 100644
index 0000000000000..86358ca5c13f0
--- /dev/null
+++ b/test/tools/llvm-xray/X86/extract-instrmap-symbolize.ll
@@ -0,0 +1,10 @@
+; This tests that we can extract the instrumentation map and symbolize the
+; function addresses.
+; RUN: llvm-xray extract %S/Inputs/elf64-example.bin -s | FileCheck %s
+
+; CHECK: ---
+; CHECK-NEXT: - { id: 1, address: 0x000000000041C900, function: 0x000000000041C900, kind: function-enter, always-instrument: true, function-name: {{.*foo.*}} }
+; CHECK-NEXT: - { id: 1, address: 0x000000000041C912, function: 0x000000000041C900, kind: function-exit, always-instrument: true, function-name: {{.*foo.*}} }
+; CHECK-NEXT: - { id: 2, address: 0x000000000041C930, function: 0x000000000041C930, kind: function-enter, always-instrument: true, function-name: {{.*bar.*}} }
+; CHECK-NEXT: - { id: 2, address: 0x000000000041C946, function: 0x000000000041C930, kind: function-exit, always-instrument: true, function-name: {{.*bar.*}} }
+; CHECK-NEXT: ...
diff --git a/test/tools/llvm-xray/X86/extract-instrmap.ll b/test/tools/llvm-xray/X86/extract-instrmap.ll
index 7447aec681144..c036944bd3820 100644
--- a/test/tools/llvm-xray/X86/extract-instrmap.ll
+++ b/test/tools/llvm-xray/X86/extract-instrmap.ll
@@ -4,8 +4,8 @@
; RUN: llvm-xray extract %S/Inputs/elf64-example.bin | FileCheck %s
; CHECK: ---
-; CHECK-NEXT: - { id: 1, address: 0x000000000041C900, function: 0x000000000041C900, kind: function-enter, always-instrument: true }
-; CHECK-NEXT: - { id: 1, address: 0x000000000041C912, function: 0x000000000041C900, kind: function-exit, always-instrument: true }
-; CHECK-NEXT: - { id: 2, address: 0x000000000041C930, function: 0x000000000041C930, kind: function-enter, always-instrument: true }
-; CHECK-NEXT: - { id: 2, address: 0x000000000041C946, function: 0x000000000041C930, kind: function-exit, always-instrument: true }
+; CHECK-NEXT: - { id: 1, address: 0x000000000041C900, function: 0x000000000041C900, kind: function-enter, always-instrument: true{{.*}} }
+; CHECK-NEXT: - { id: 1, address: 0x000000000041C912, function: 0x000000000041C900, kind: function-exit, always-instrument: true{{.*}} }
+; CHECK-NEXT: - { id: 2, address: 0x000000000041C930, function: 0x000000000041C930, kind: function-enter, always-instrument: true{{.*}} }
+; CHECK-NEXT: - { id: 2, address: 0x000000000041C946, function: 0x000000000041C930, kind: function-exit, always-instrument: true{{.*}} }
; CHECK-NEXT: ...