diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2010-09-17 15:48:55 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2010-09-17 15:48:55 +0000 |
commit | d39c594d39df7f283c2fb8a704a3f31c501180d9 (patch) | |
tree | 36453626c792cccd91f783a38a169d610a6b9db9 /test | |
parent | 6144c1de6a7674dad94290650e4e14f24d42e421 (diff) |
Notes
Diffstat (limited to 'test')
443 files changed, 18712 insertions, 8378 deletions
diff --git a/test/Analysis/BasicAA/args-rets-allocas-loads.ll b/test/Analysis/BasicAA/args-rets-allocas-loads.ll index 5d3f67ebe1aec..7555a4c2a9b06 100644 --- a/test/Analysis/BasicAA/args-rets-allocas-loads.ll +++ b/test/Analysis/BasicAA/args-rets-allocas-loads.ll @@ -168,132 +168,132 @@ define void @caller_a(double* %arg_a0, ; CHECK: NoAlias: double* %noalias_ret_a1, double* %normal_ret_a0 ; CHECK: NoAlias: double* %noalias_ret_a1, double* %normal_ret_a1 ; CHECK: NoAlias: double* %noalias_ret_a0, double* %noalias_ret_a1 -; CHECK: ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %noescape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %noescape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %noescape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %noescape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: ===== Alias Analysis Evaluator Report ===== @@ -302,9 +302,9 @@ define void @caller_a(double* %arg_a0, ; CHECK: 36 may alias responses (30.0%) ; CHECK: 0 must alias responses (0.0%) ; CHECK: Alias Analysis Evaluator Pointer Alias Summary: 70%/30%/0% -; CHECK: 128 Total ModRef Queries Performed -; CHECK: 44 no mod/ref responses (34.3%) +; CHECK: 184 Total ModRef Queries Performed +; CHECK: 44 no mod/ref responses (23.9%) ; CHECK: 0 mod responses (0.0%) ; CHECK: 0 ref responses (0.0%) -; CHECK: 84 mod & ref responses (65.6%) -; CHECK: Alias Analysis Evaluator Mod/Ref Summary: 34%/0%/0%/65% +; CHECK: 140 mod & ref responses (76.0%) +; CHECK: Alias Analysis Evaluator Mod/Ref Summary: 23%/0%/0%/76% diff --git a/test/Analysis/BasicAA/constant-over-index.ll b/test/Analysis/BasicAA/constant-over-index.ll index 95f94d096f35f..0e0c45c8ad5cb 100644 --- a/test/Analysis/BasicAA/constant-over-index.ll +++ b/test/Analysis/BasicAA/constant-over-index.ll @@ -1,7 +1,8 @@ -; RUN: opt < %s -aa-eval -print-all-alias-modref-info \ -; RUN: |& grep {MayAlias: double\\* \[%\]p.0.i.0, double\\* \[%\]p3\$} +; RUN: opt < %s -aa-eval -print-all-alias-modref-info |& FileCheck %s ; PR4267 +; CHECK: MayAlias: double* %p.0.i.0, double* %p3 + ; %p3 is equal to %p.0.i.0 on the second iteration of the loop, ; so MayAlias is needed. diff --git a/test/Analysis/BasicAA/featuretest.ll b/test/Analysis/BasicAA/featuretest.ll index 50dc8864ac9b0..47d278fab1c2a 100644 --- a/test/Analysis/BasicAA/featuretest.ll +++ b/test/Analysis/BasicAA/featuretest.ll @@ -1,17 +1,22 @@ ; This testcase tests for various features the basicaa test should be able to ; determine, as noted in the comments. -; RUN: opt < %s -basicaa -gvn -instcombine -dce -S | not grep REMOVE +; RUN: opt < %s -basicaa -gvn -instcombine -dce -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @Global = external global { i32 } +declare void @external(i32*) + ; Array test: Test that operations on one local array do not invalidate ; operations on another array. Important for scientific codes. ; define i32 @different_array_test(i64 %A, i64 %B) { %Array1 = alloca i32, i32 100 %Array2 = alloca i32, i32 200 + + call void @external(i32* %Array1) + call void @external(i32* %Array2) %pointer = getelementptr i32* %Array1, i64 %A %val = load i32* %pointer @@ -22,6 +27,8 @@ define i32 @different_array_test(i64 %A, i64 %B) { %REMOVE = load i32* %pointer ; redundant with above load %retval = sub i32 %REMOVE, %val ret i32 %retval +; CHECK: @different_array_test +; CHECK: ret i32 0 } ; Constant index test: Constant indexes into the same array should not @@ -29,6 +36,8 @@ define i32 @different_array_test(i64 %A, i64 %B) { ; define i32 @constant_array_index_test() { %Array = alloca i32, i32 100 + call void @external(i32* %Array) + %P1 = getelementptr i32* %Array, i64 7 %P2 = getelementptr i32* %Array, i64 6 @@ -37,6 +46,8 @@ define i32 @constant_array_index_test() { %BREMOVE = load i32* %P1 %Val = sub i32 %A, %BREMOVE ret i32 %Val +; CHECK: @constant_array_index_test +; CHECK: ret i32 0 } ; Test that if two pointers are spaced out by a constant getelementptr, that @@ -48,6 +59,8 @@ define i32 @gep_distance_test(i32* %A) { %REMOVEv = load i32* %A %r = sub i32 %REMOVEu, %REMOVEv ret i32 %r +; CHECK: @gep_distance_test +; CHECK: ret i32 0 } ; Test that if two pointers are spaced out by a constant offset, that they @@ -60,6 +73,8 @@ define i32 @gep_distance_test2({i32,i32}* %A, i64 %distance) { %REMOVEv = load i32* %A1 %r = sub i32 %REMOVEu, %REMOVEv ret i32 %r +; CHECK: @gep_distance_test2 +; CHECK: ret i32 0 } ; Test that we can do funny pointer things and that distance calc will still @@ -68,16 +83,45 @@ define i32 @gep_distance_test3(i32 * %A) { %X = load i32* %A %B = bitcast i32* %A to i8* %C = getelementptr i8* %B, i64 4 - %Y = load i8* %C - ret i32 8 + store i8 42, i8* %C + %Y = load i32* %A + %R = sub i32 %X, %Y + ret i32 %R +; CHECK: @gep_distance_test3 +; CHECK: ret i32 0 } ; Test that we can disambiguate globals reached through constantexpr geps define i32 @constexpr_test() { %X = alloca i32 + call void @external(i32* %X) + %Y = load i32* %X store i32 5, i32* getelementptr ({ i32 }* @Global, i64 0, i32 0) %REMOVE = load i32* %X %retval = sub i32 %Y, %REMOVE ret i32 %retval +; CHECK: @constexpr_test +; CHECK: ret i32 0 +} + + + +; PR7589 +; These two index expressions are different, this cannot be CSE'd. +define i16 @zext_sext_confusion(i16* %row2col, i5 %j) nounwind{ +entry: + %sum5.cast = zext i5 %j to i64 ; <i64> [#uses=1] + %P1 = getelementptr i16* %row2col, i64 %sum5.cast + %row2col.load.1.2 = load i16* %P1, align 1 ; <i16> [#uses=1] + + %sum13.cast31 = sext i5 %j to i6 ; <i6> [#uses=1] + %sum13.cast = zext i6 %sum13.cast31 to i64 ; <i64> [#uses=1] + %P2 = getelementptr i16* %row2col, i64 %sum13.cast + %row2col.load.1.6 = load i16* %P2, align 1 ; <i16> [#uses=1] + + %.ret = sub i16 %row2col.load.1.6, %row2col.load.1.2 ; <i16> [#uses=1] + ret i16 %.ret +; CHECK: @zext_sext_confusion +; CHECK: ret i16 %.ret } diff --git a/test/Analysis/BasicAA/gep-alias.ll b/test/Analysis/BasicAA/gep-alias.ll index 1ed031224713c..eba9599ba07bb 100644 --- a/test/Analysis/BasicAA/gep-alias.ll +++ b/test/Analysis/BasicAA/gep-alias.ll @@ -117,12 +117,12 @@ define i32 @test7(i32* %p, i64 %i) { ; P[zext(i)] != p[zext(i+1)] ; PR1143 -define i32 @test8(i32* %p, i32 %i) { - %i1 = zext i32 %i to i64 - %pi = getelementptr i32* %p, i64 %i1 - %i.next = add i32 %i, 1 - %i.next2 = zext i32 %i.next to i64 - %pi.next = getelementptr i32* %p, i64 %i.next2 +define i32 @test8(i32* %p, i16 %i) { + %i1 = zext i16 %i to i32 + %pi = getelementptr i32* %p, i32 %i1 + %i.next = add i16 %i, 1 + %i.next2 = zext i16 %i.next to i32 + %pi.next = getelementptr i32* %p, i32 %i.next2 %x = load i32* %pi store i32 42, i32* %pi.next %y = load i32* %pi diff --git a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll new file mode 100644 index 0000000000000..12b088b1f6518 --- /dev/null +++ b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s + + +; CHECK: Just Ref: call void @ro() <-> call void @f0() + +declare void @f0() +declare void @ro() readonly + +define void @test0() { + call void @f0() + call void @ro() + ret void +} + +; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) +; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) + +declare void @llvm.memset.i64(i8*, i8, i64, i32) + +@A = external global i8 +@B = external global i8 +define void @test1() { + call void @llvm.memset.i64(i8* @A, i8 0, i64 1, i32 1) + call void @llvm.memset.i64(i8* @B, i8 0, i64 1, i32 1) + ret void +} diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll index a2aabf135f6f0..b9a3c5e58f68e 100644 --- a/test/Analysis/BasicAA/modref.ll +++ b/test/Analysis/BasicAA/modref.ll @@ -123,3 +123,14 @@ define i32 @test5(i8* %P, i32 %Len) { ; CHECK: sub i32 %tmp, %tmp } +define i8 @test6(i8* %p, i8* noalias %a) { + %x = load i8* %a + %t = va_arg i8* %p, float + %y = load i8* %a + %z = add i8 %x, %y + ret i8 %z +; CHECK: @test6 +; CHECK: load i8* %a +; CHECK-NOT: load +; CHECK: ret +} diff --git a/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll b/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll new file mode 100644 index 0000000000000..218b4375f70cc --- /dev/null +++ b/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll @@ -0,0 +1,20 @@ +; RUN: opt -regions %s +define i32 @main() nounwind { +entry: + br label %for.cond + +test: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + br i1 true, label %for.body, label %for.end + +for.body: ; preds = %for.cond + br label %for.inc + +for.inc: ; preds = %for.body + br label %for.cond + +for.end: ; preds = %for.cond + ret i32 0 +} diff --git a/test/Analysis/RegionInfo/block_sort.ll b/test/Analysis/RegionInfo/block_sort.ll new file mode 100644 index 0000000000000..faec45a911f52 --- /dev/null +++ b/test/Analysis/RegionInfo/block_sort.ll @@ -0,0 +1,42 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats -analyze < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @BZ2_blockSort() nounwind { +start: + br label %while + +while: + br label %while.body134.i.i + +while.body134.i.i: + br i1 1, label %end, label %w + +w: + br label %if.end140.i.i + +if.end140.i.i: + br i1 1, label %while.end186.i.i, label %if.end183.i.i + +if.end183.i.i: + br label %while.body134.i.i + +while.end186.i.i: + br label %while + +end: + ret void +} +; CHECK-NOT: => +; CHECK: [0] start => <Function Return> +; CHECK: [1] while => end + +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: start, while, while.body134.i.i, end, w, if.end140.i.i, while.end186.i.i, if.end183.i.i, +; BBIT: while, while.body134.i.i, w, if.end140.i.i, while.end186.i.i, if.end183.i.i, + +; RNIT: start, while => end, end, +; RNIT: while, while.body134.i.i, w, if.end140.i.i, while.end186.i.i, if.end183.i.i, diff --git a/test/Analysis/RegionInfo/cond_loop.ll b/test/Analysis/RegionInfo/cond_loop.ll new file mode 100644 index 0000000000000..2ce57c3c5f37c --- /dev/null +++ b/test/Analysis/RegionInfo/cond_loop.ll @@ -0,0 +1,33 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +5: + br label %"0" + +0: + br label %"1" +1: + br i1 1, label %"2", label %"3" +2: + ret void +3: + br i1 1, label %"1", label %"4" +4: + br label %"0" +} + +; CHECK-NOT: => +; CHECK: [0] 5 => <Function Return> +; CHECK: [1] 0 => 2 + +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 5, 0, 1, 2, 3, 4, +; BBIT: 0, 1, 3, 4, + +; RNIT: 5, 0 => 2, 2, +; RNIT: 0, 1, 3, 4, diff --git a/test/Analysis/RegionInfo/condition_complicated.ll b/test/Analysis/RegionInfo/condition_complicated.ll new file mode 100644 index 0000000000000..7ca5c7c7b5370 --- /dev/null +++ b/test/Analysis/RegionInfo/condition_complicated.ll @@ -0,0 +1,60 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc zeroext i8 @handle_compress() nounwind { +end165: + br i1 1, label %false239, label %true181 + +true181: + br i1 1, label %then187, label %else232 + +then187: + br label %end265 + +else232: + br i1 1, label %false239, label %then245 + +false239: + br i1 1, label %then245, label %else259 + +then245: + br i1 1, label %then251, label %end253 + +then251: + br label %end253 + +end253: + br label %end265 + +else259: + br label %end265 + +end265: + br i1 1, label %then291, label %end298 + +then291: + br label %end298 + +end298: + ret i8 1 +} + +; CHECK-NOT: => +; CHECK: [0] end165 => <Function Return> +; CHECK-NEXT: [1] end165 => end265 +; CHECK-NEXT: [2] then245 => end253 +; CHECK-NEXT: [1] end265 => end298 + +; STAT: 4 region - The # of regions + +; BBIT: end165, false239, then245, then251, end253, end265, then291, end298, else259, true181, then187, else232, +; BBIT: end165, false239, then245, then251, end253, else259, true181, then187, else232, +; BBIT: then245, then251, +; BBIT: end265, then291, + +; RNIT: end165 => end265, end265 => end298, end298, +; RNIT: end165, false239, then245 => end253, end253, else259, true181, then187, else232, +; RNIT: then245, then251, +; RNIT: end265, then291, diff --git a/test/Analysis/RegionInfo/condition_complicated_2.ll b/test/Analysis/RegionInfo/condition_complicated_2.ll new file mode 100644 index 0000000000000..5fa940a61ef64 --- /dev/null +++ b/test/Analysis/RegionInfo/condition_complicated_2.ll @@ -0,0 +1,44 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc void @compress() nounwind { +end33: + br i1 1, label %end124, label %lor.lhs.false95 + +lor.lhs.false95: + br i1 1, label %then107, label %end172 + +then107: + br i1 1, label %end124, label %then113 + +then113: + br label %end124 + +end124: + br label %exit + +end172: + br label %exit + + +exit: + unreachable + + +} +; CHECK-NOT: => +; CHECK: [0] end33 => <Function Return> +; CHECK-NEXT: [1] end33 => exit +; CHECK-NEXT: [2] then107 => end124 + +; STAT: 3 region - The # of regions + +; BBIT: end33, end124, exit, lor.lhs.false95, then107, then113, end172, +; BBIT: end33, end124, lor.lhs.false95, then107, then113, end172, +; BBIT: then107, then113, + +; RNIT: end33 => exit, exit, +; RNIT: end33, end124, lor.lhs.false95, then107 => end124, end172, +; RNIT: then107, then113, diff --git a/test/Analysis/RegionInfo/condition_forward_edge.ll b/test/Analysis/RegionInfo/condition_forward_edge.ll new file mode 100644 index 0000000000000..098c9b6b46131 --- /dev/null +++ b/test/Analysis/RegionInfo/condition_forward_edge.ll @@ -0,0 +1,26 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"1" +1: + br i1 1, label %"2", label %"3" +2: + br label %"3" +3: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK: [1] 1 => 3 + +; STAT: 2 region - The # of regions + +; BBIT: 0, 1, 2, 3, +; BBIT: 1, 2, + +; RNIT: 0, 1 => 3, 3, +; RNIT: 1, 2, diff --git a/test/Analysis/RegionInfo/condition_same_exit.ll b/test/Analysis/RegionInfo/condition_same_exit.ll new file mode 100644 index 0000000000000..1b88596c0f8c4 --- /dev/null +++ b/test/Analysis/RegionInfo/condition_same_exit.ll @@ -0,0 +1,31 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br i1 1, label %"1", label %"4" + +1: + br i1 1, label %"2", label %"3" +2: + br label %"4" +3: + br label %"4" +4: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 0 => 4 +; CHECK-NEXT: [2] 1 => 4 +; STAT: 3 region - The # of regions + +; BBIT: 0, 1, 2, 4, 3, +; BBIT: 0, 1, 2, 3, +; BBIT: 1, 2, 3, + +; RNIT: 0 => 4, 4, +; RNIT: 0, 1 => 4, +; RNIT: 1, 2, 3, diff --git a/test/Analysis/RegionInfo/condition_simple.ll b/test/Analysis/RegionInfo/condition_simple.ll new file mode 100644 index 0000000000000..19b154b6476b8 --- /dev/null +++ b/test/Analysis/RegionInfo/condition_simple.ll @@ -0,0 +1,28 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"1" +1: + br i1 1, label %"2", label %"3" +2: + br label %"4" +3: + br label %"4" +4: + ret void +} + +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 1 => 4 +; STAT: 2 region - The # of regions + +; BBIT: 0, 1, 2, 4, 3, +; BBIT: 1, 2, 3, + +; RNIT: 0, 1 => 4, 4, +; RNIT: 1, 2, 3, diff --git a/test/Transforms/ABCD/dg.exp b/test/Analysis/RegionInfo/dg.exp index f2005891a59a8..f2005891a59a8 100644 --- a/test/Transforms/ABCD/dg.exp +++ b/test/Analysis/RegionInfo/dg.exp diff --git a/test/Analysis/RegionInfo/exit_in_condition.ll b/test/Analysis/RegionInfo/exit_in_condition.ll new file mode 100644 index 0000000000000..3b152d2f565db --- /dev/null +++ b/test/Analysis/RegionInfo/exit_in_condition.ll @@ -0,0 +1,38 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc zeroext i8 @handle_compress() nounwind { +entry: + br label %outer + +outer: + br label %body + +body: + br i1 1, label %body.i, label %if.end + +body.i: + br i1 1, label %end, label %if.end + +if.end: + br label %if.then64 + +if.then64: + br label %outer + +end: + ret i8 1 +} +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK-NEXT: [1] outer => end +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: entry, outer, body, body.i, end, if.end, if.then64, +; BBIT: outer, body, body.i, if.end, if.then64, + +; RNIT: entry, outer => end, end, +; RNIT: outer, body, body.i, if.end, if.then64, diff --git a/test/Analysis/RegionInfo/infinite_loop.ll b/test/Analysis/RegionInfo/infinite_loop.ll new file mode 100644 index 0000000000000..59cead492619e --- /dev/null +++ b/test/Analysis/RegionInfo/infinite_loop.ll @@ -0,0 +1,20 @@ +; RUN: opt -regions -analyze < %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s + +define void @normal_condition() nounwind { +0: + br label %"1" +1: + br i1 1, label %"2", label %"3" +2: + br label %"2" +3: + br label %"4" +4: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK: [1] 1 => 4 +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions diff --git a/test/Analysis/RegionInfo/infinite_loop_2.ll b/test/Analysis/RegionInfo/infinite_loop_2.ll new file mode 100644 index 0000000000000..80c69b7ab2e22 --- /dev/null +++ b/test/Analysis/RegionInfo/infinite_loop_2.ll @@ -0,0 +1,36 @@ +; RUN: opt -regions -analyze < %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"1" +1: + br i1 1, label %"2", label %"3" +2: + br label %"5" +5: + br i1 1, label %"11", label %"12" +11: + br label %"6" +12: + br label %"6" +6: + br label %"2" +3: + br label %"4" +4: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK: [1] 1 => 3 +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 0, 1, 2, 5, 11, 6, 12, 3, 4, +; BBIT: 1, 2, 5, 11, 6, 12, + +; RNIT: 0, 1 => 3, 3, 4, +; RNIT: 1, 2, 5, 11, 6, 12, diff --git a/test/Analysis/RegionInfo/infinite_loop_3.ll b/test/Analysis/RegionInfo/infinite_loop_3.ll new file mode 100644 index 0000000000000..74ceafb84955e --- /dev/null +++ b/test/Analysis/RegionInfo/infinite_loop_3.ll @@ -0,0 +1,52 @@ +; RUN: opt -regions -analyze < %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s + +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"7" +7: + br i1 1, label %"1", label %"8" +1: + br i1 1, label %"2", label %"3" +2: + br label %"5" +5: + br i1 1, label %"11", label %"12" +11: + br label %"6" +12: + br label %"6" +6: + br label %"2" +8: + br label %"9" +9: + br i1 1, label %"13", label %"14" +13: + br label %"10" +14: + br label %"10" +10: + br label %"8" +3: + br label %"4" +4: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 1 => 3 +; CHECK-NEXT: [1] 7 => 1 +; STAT: 3 region - The # of regions +; STAT: 2 region - The # of simple regions + +; BBIT: 0, 7, 1, 2, 5, 11, 6, 12, 3, 4, 8, 9, 13, 10, 14, +; BBIT: 7, 8, 9, 13, 10, 14, +; BBIT: 1, 2, 5, 11, 6, 12, + +; RNIT: 0, 7 => 1, 1 => 3, 3, 4, +; RNIT: 7, 8, 9, 13, 10, 14, +; RNIT: 1, 2, 5, 11, 6, 12, diff --git a/test/Analysis/RegionInfo/infinite_loop_4.ll b/test/Analysis/RegionInfo/infinite_loop_4.ll new file mode 100644 index 0000000000000..fd56af1d3b8c2 --- /dev/null +++ b/test/Analysis/RegionInfo/infinite_loop_4.ll @@ -0,0 +1,48 @@ +; RUN: opt -regions -analyze < %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"7" +7: + br i1 1, label %"1", label %"8" +1: + br i1 1, label %"2", label %"3" +2: + br label %"5" +5: + br i1 1, label %"11", label %"12" +11: + br label %"6" +12: + br label %"6" +6: + br i1 1, label %"2", label %"10" +8: + br label %"9" +9: + br i1 1, label %"13", label %"14" +13: + br label %"10" +14: + br label %"10" +10: + br label %"8" +3: + br label %"4" +4: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 7 => 3 +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 0, 7, 1, 2, 5, 11, 6, 10, 8, 9, 13, 14, 12, 3, 4, +; BBIT: 7, 1, 2, 5, 11, 6, 10, 8, 9, 13, 14, 12, + +; RNIT: 0, 7 => 3, 3, 4, +; RNIT: 7, 1, 2, 5, 11, 6, 10, 8, 9, 13, 14, 12, diff --git a/test/Analysis/RegionInfo/loop_with_condition.ll b/test/Analysis/RegionInfo/loop_with_condition.ll new file mode 100644 index 0000000000000..d1d68982eec66 --- /dev/null +++ b/test/Analysis/RegionInfo/loop_with_condition.ll @@ -0,0 +1,46 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s + +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"1" +1: + br i1 1, label %"6", label %"2" +2: + br i1 1, label %"3", label %"4" +3: + br label %"5" +4: + br label %"5" +5: + br label %"8" +8: + br i1 1, label %"7", label %"9" +9: + br label %"2" +7: + br label %"6" +6: + ret void +} + +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 1 => 6 +; CHECK-NEXT: [2] 2 => 7 +; CHECK-NEXT: [3] 2 => 5 +; STAT: 4 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 0, 1, 6, 2, 3, 5, 8, 7, 9, 4, +; BBIT: 1, 2, 3, 5, 8, 7, 9, 4, +; BBIT: 2, 3, 5, 8, 9, 4, +; BBIT: 2, 3, 4, + +; RNIT: 0, 1 => 6, 6, +; RNIT: 1, 2 => 7, 7, +; RNIT: 2 => 5, 5, 8, 9, +; RNIT: 2, 3, 4, diff --git a/test/Analysis/RegionInfo/loops_1.ll b/test/Analysis/RegionInfo/loops_1.ll new file mode 100644 index 0000000000000..d4bf3cc501182 --- /dev/null +++ b/test/Analysis/RegionInfo/loops_1.ll @@ -0,0 +1,40 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc zeroext i8 @loops_1() nounwind { +entry: + br i1 1, label %outer , label %a + +a: + br label %body + +outer: + br label %body + +body: + br i1 1, label %land, label %if + +land: + br i1 1, label %exit, label %end + +exit: + br i1 1, label %if, label %end + +if: + br label %outer + +end: + ret i8 1 +} +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK-NEXT: [1] entry => end +; STAT: 2 region - The # of regions + +; BBIT: entry, outer, body, land, exit, if, end, a, +; BBIT: entry, outer, body, land, exit, if, a, + +; RNIT: entry => end, end, +; RNIT: entry, outer, body, land, exit, if, a, diff --git a/test/Analysis/RegionInfo/loops_2.ll b/test/Analysis/RegionInfo/loops_2.ll new file mode 100644 index 0000000000000..07aa7c3110105 --- /dev/null +++ b/test/Analysis/RegionInfo/loops_2.ll @@ -0,0 +1,49 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @meread_() nounwind { +entry: + br label %bb23 + +bb23: + br label %bb.i + +bb.i: ; preds = %bb.i, %bb54 + br label %pflini_.exit + +pflini_.exit: ; preds = %bb.i + br label %bb58thread-split + +bb58thread-split: ; preds = %bb64, %bb61, %pflini_.exit + br label %bb58 + +bb58: ; preds = %bb60, %bb58thread-split + br i1 1, label %bb59, label %bb23 + +bb59: ; preds = %bb58 + switch i32 1, label %bb60 [ + i32 1, label %l98 + ] + +bb60: ; preds = %bb59 + br i1 1, label %bb61, label %bb58 + +bb61: ; preds = %bb60 + br label %bb58thread-split + +l98: ; preds = %bb69, %bb59 + ret void +} +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK: [1] bb23 => l98 +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: entry, bb23, bb.i, pflini_.exit, bb58thread-split, bb58, bb59, bb60, bb61, l98, +; BBIT: bb23, bb.i, pflini_.exit, bb58thread-split, bb58, bb59, bb60, bb61, + +; RNIT: entry, bb23 => l98, l98, +; RNIT: bb23, bb.i, pflini_.exit, bb58thread-split, bb58, bb59, bb60, bb61, diff --git a/test/Analysis/RegionInfo/mix_1.ll b/test/Analysis/RegionInfo/mix_1.ll new file mode 100644 index 0000000000000..829c157c2c68d --- /dev/null +++ b/test/Analysis/RegionInfo/mix_1.ll @@ -0,0 +1,69 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s + +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @a_linear_impl_fig_1() nounwind { +0: + + br i1 1, label %"1", label %"15" +1: + switch i32 0, label %"2" [ i32 0, label %"3" + i32 1, label %"7"] +2: + br label %"4" +3: + br label %"5" +4: + br label %"6" +5: + br label %"6" +6: + br label %"7" +7: + br label %"15" +15: + br label %"8" +8: + br label %"16" +16: + br label %"9" +9: + br i1 1, label %"10", label %"11" +11: + br i1 1, label %"13", label %"12" +13: + br label %"14" +12: + br label %"14" +14: + br label %"8" +10: + br label %"17" +17: + br label %"18" +18: + ret void +} + +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 0 => 15 +; CHECK-NEXT: [2] 1 => 7 +; CHECK-NEXT: [1] 8 => 10 +; CHECK-NEXT: [2] 11 => 14 +; STAT: 5 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 0, 1, 2, 4, 6, 7, 15, 8, 16, 9, 10, 17, 18, 11, 13, 14, 12, 3, 5, +; BBIT: 0, 1, 2, 4, 6, 7, 3, 5, +; BBIT: 1, 2, 4, 6, 3, 5, +; BBIT: 8, 16, 9, 11, 13, 14, 12, +; BBIT: 11, 13, 12, + +; RNIT: 0 => 15, 15, 8 => 10, 10, 17, 18, +; RNIT: 0, 1 => 7, 7, +; RNIT: 1, 2, 4, 6, 3, 5, +; RNIT: 8, 16, 9, 11 => 14, 14, +; RNIT: 11, 13, 12, diff --git a/test/Analysis/RegionInfo/multiple_exiting_edge.ll b/test/Analysis/RegionInfo/multiple_exiting_edge.ll new file mode 100644 index 0000000000000..7bc0e4607d682 --- /dev/null +++ b/test/Analysis/RegionInfo/multiple_exiting_edge.ll @@ -0,0 +1,38 @@ +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition_0() nounwind { +bb38: ; preds = %bb34, %bb34, %bb37 + switch i32 undef, label %bb42 [ + i32 67, label %bb42 + i32 90, label %bb41 + ] +bb41: ; preds = %bb38 + br label %bb42 +bb42: ; preds = %bb38, %bb38, %bb41 + ret void +} + +; BBIT: bb38, bb42, bb41, +; BBIT: bb38, bb41, + +; RNIT: bb38 => bb42, bb42, +; RNIT: bb38, bb41, + +define void @normal_condition_1() nounwind { +bb38: ; preds = %bb34, %bb34, %bb37 + switch i32 undef, label %bb41 [ + i32 67, label %bb42 + i32 90, label %bb42 + ] +bb41: ; preds = %bb38 + br label %bb42 +bb42: ; preds = %bb38, %bb38, %bb41 + ret void +} + +; BBIT: bb38, bb41, bb42, +; BBIT: bb38, bb41, + +; RNIT: bb38 => bb42, bb42, +; RNIT: bb38, bb41, diff --git a/test/Analysis/RegionInfo/nested_loops.ll b/test/Analysis/RegionInfo/nested_loops.ll new file mode 100644 index 0000000000000..9d8c4558f0493 --- /dev/null +++ b/test/Analysis/RegionInfo/nested_loops.ll @@ -0,0 +1,33 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s + +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc zeroext i8 @handle_compress() nounwind { +entry: + br label %outer + +outer: + br label %body + +body: + br i1 1, label %exit172, label %end + +exit172: + br i1 1, label %end, label %outer + +end: + ret i8 1 +} +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK-NEXT: [1] outer => end + +; STAT: 2 region - The # of regions + +; BBIT: entry, outer, body, exit172, end, +; BBIT: outer, body, exit172, + +; RNIT: entry, outer => end, end, +; RNIT: outer, body, exit172, diff --git a/test/Analysis/RegionInfo/next.ll b/test/Analysis/RegionInfo/next.ll new file mode 100644 index 0000000000000..d986387099c3e --- /dev/null +++ b/test/Analysis/RegionInfo/next.ll @@ -0,0 +1,49 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @MAIN__() nounwind { +entry: + br label %__label_002001.outer + +__label_002001.outer: ; preds = %bb236, %bb92 + br label %__label_002001 + +__label_002001: ; preds = %bb229, %__label_002001.outer + br i1 1, label %bb93, label %__label_000020 + +bb93: ; preds = %__label_002001 + br i1 1, label %__label_000020, label %bb197 + +bb197: ; preds = %bb193 + br i1 1, label %bb229, label %bb224 + +bb224: ; preds = %bb223, %bb227 + br i1 1, label %bb229, label %bb224 + +bb229: ; preds = %bb227, %bb223 + br i1 1, label %__label_002001, label %__label_002001.outer + +__label_000020: ; preds = %__label_002001, %bb194 + ret void +} + +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK-NEXT: [1] __label_002001.outer => __label_000020 +; CHECK-NEXT; [2] bb197 => bb229 +; CHECK-NEXT; [3] bb224 => bb229 + +; STAT: 4 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: entry, __label_002001.outer, __label_002001, bb93, __label_000020, bb197, bb229, bb224, +; BBIT: __label_002001.outer, __label_002001, bb93, bb197, bb229, bb224, +; BBIT: bb197, bb224, +; BBIT: bb224, + +; RNIT: entry, __label_002001.outer => __label_000020, __label_000020, +; RNIT: __label_002001.outer, __label_002001, bb93, bb197 => bb229, bb229, +; RNIT: bb197, bb224 => bb229, +; RNIT: bb224, diff --git a/test/Analysis/RegionInfo/paper.ll b/test/Analysis/RegionInfo/paper.ll new file mode 100644 index 0000000000000..00b544bc69196 --- /dev/null +++ b/test/Analysis/RegionInfo/paper.ll @@ -0,0 +1,55 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @a_linear_impl_fig_1() nounwind { +0: + br label %"1" +1: + br label %"2" +2: + br label %"3" +3: + br i1 1, label %"13", label %"4" +4: + br i1 1, label %"5", label %"1" +5: + br i1 1, label %"8", label %"6" +6: + br i1 1, label %"7", label %"4" +7: + ret void +8: + br i1 1, label %"9", label %"1" +9: + br label %"10" +10: + br i1 1, label %"12", label %"11" +11: + br i1 1, label %"9", label %"8" +13: + br i1 1, label %"2", label %"1" +12: + switch i32 0, label %"1" [ i32 0, label %"9" + i32 1, label %"8"] +} + +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 1 => 7 +; CHECK-NEXT: [2] 1 => 4 +; CHECK-NEXT: [2] 8 => 1 + +; STAT: 4 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 0, 1, 2, 3, 13, 4, 5, 8, 9, 10, 12, 11, 6, 7, +; BBIT: 1, 2, 3, 13, 4, 5, 8, 9, 10, 12, 11, 6, +; BBIT: 1, 2, 3, 13, +; BBIT: 8, 9, 10, 12, 11, + +; RNIT: 0, 1 => 7, 7, +; RNIT: 1 => 4, 4, 5, 8 => 1, 6, +; RNIT: 1, 2, 3, 13, +; RNIT: 8, 9, 10, 12, 11, diff --git a/test/Analysis/RegionInfo/two_loops_same_header.ll b/test/Analysis/RegionInfo/two_loops_same_header.ll new file mode 100644 index 0000000000000..a97182b81a248 --- /dev/null +++ b/test/Analysis/RegionInfo/two_loops_same_header.ll @@ -0,0 +1,46 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc zeroext i8 @handle_compress() nounwind { +entry: + br label %outer + +outer: + br label %body + +body: + br i1 1, label %else, label %true77 + +true77: + br i1 1, label %then83, label %else + +then83: + br label %outer + +else: + br label %else106 + +else106: + br i1 1, label %end, label %outer + +end: + ret i8 1 +} + +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK-NEXT: [1] outer => end +; CHECK-NEXT: [2] outer => else + +; STAT: 3 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: entry, outer, body, else, else106, end, true77, then83, +; BBIT: outer, body, else, else106, true77, then83, +; BBIT: outer, body, true77, then83, + +; RNIT: entry, outer => end, end, +; RNIT: outer => else, else, else106, +; RNIT: outer, body, true77, then83, diff --git a/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/test/Analysis/ScalarEvolution/avoid-smax-1.ll index 0bc9ce8241a88..89e8b983c0c0f 100644 --- a/test/Analysis/ScalarEvolution/avoid-smax-1.ll +++ b/test/Analysis/ScalarEvolution/avoid-smax-1.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -indvars -S > %t ; RUN: grep select %t | count 2 -; RUN: grep {icmp ne i32.\* %w } %t +; RUN: grep {icmp ne i32.\* } %t ; Indvars should be able to insert a canonical induction variable ; for the bb6 loop without using a maximum calculation (icmp, select) diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll index a8966be4ccd46..843fb073087c6 100644 --- a/test/Analysis/ScalarEvolution/max-trip-count.ll +++ b/test/Analysis/ScalarEvolution/max-trip-count.ll @@ -1,8 +1,9 @@ -; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: | grep {\{%d,+,\[^\{\}\]\*\}<%bb>} +; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s ; ScalarEvolution should be able to understand the loop and eliminate the casts. +; CHECK: {%d,+,sizeof(i32)} + define void @foo(i32* nocapture %d, i32 %n) nounwind { entry: %0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1] @@ -32,3 +33,40 @@ bb1.return_crit_edge: ; preds = %bb1 return: ; preds = %bb1.return_crit_edge, %entry ret void } + +; ScalarEvolution should be able to find the maximum tripcount +; of this multiple-exit loop, and if it doesn't know the exact +; count, it should say so. + +; PR7845 +; CHECK: Loop %for.cond: <multiple exits> Unpredictable backedge-taken count. +; CHECK: Loop %for.cond: max backedge-taken count is 5 + +@.str = private constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=2] + +define i32 @main() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %g_4.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ] ; <i32> [#uses=5] + %cmp = icmp slt i32 %g_4.0, 5 ; <i1> [#uses=1] + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %conv = trunc i32 %g_4.0 to i16 ; <i16> [#uses=1] + %tobool.not = icmp eq i16 %conv, 0 ; <i1> [#uses=1] + %tobool3 = icmp ne i32 %g_4.0, 0 ; <i1> [#uses=1] + %or.cond = and i1 %tobool.not, %tobool3 ; <i1> [#uses=1] + br i1 %or.cond, label %for.end, label %for.inc + +for.inc: ; preds = %for.body + %add = add nsw i32 %g_4.0, 1 ; <i32> [#uses=1] + br label %for.cond + +for.end: ; preds = %for.body, %for.cond + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %g_4.0) nounwind ; <i32> [#uses=0] + ret i32 0 +} + +declare i32 @printf(i8*, ...) diff --git a/test/Archive/README.txt b/test/Archive/README.txt index da6cfa4c9ed00..6810befc5857c 100644 --- a/test/Archive/README.txt +++ b/test/Archive/README.txt @@ -5,7 +5,7 @@ This directory contains various tests of llvm-ar and llvm-ranlib to ensure compatibility reading other ar(1) formats. It also provides a basic functionality test for these tools. -There are four archives stored in CVS with these tests: +There are four archives accompanying these tests: GNU.a - constructed on Linux with GNU ar MacOSX.a - constructed on Mac OS X with its native BSD4.4 ar diff --git a/test/Assembler/2010-01-06-UnionType.ll b/test/Assembler/2010-01-06-UnionType.ll deleted file mode 100644 index 37130d66088d0..0000000000000 --- a/test/Assembler/2010-01-06-UnionType.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: llvm-as %s -o /dev/null - -%X = type union { i32, i32* } diff --git a/test/Assembler/align-inst-alloca.ll b/test/Assembler/align-inst-alloca.ll new file mode 100644 index 0000000000000..0343bebf18764 --- /dev/null +++ b/test/Assembler/align-inst-alloca.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as %s -o /dev/null 2>/dev/null + +define void @foo() { + %p = alloca i1, align 1073741824 + ret void +} diff --git a/test/Assembler/align-inst-load.ll b/test/Assembler/align-inst-load.ll new file mode 100644 index 0000000000000..3586be2d6e03b --- /dev/null +++ b/test/Assembler/align-inst-load.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as %s -o /dev/null 2>/dev/null + +define void @foo() { + load i1* %p, align 1073741824 + ret void +} diff --git a/test/Assembler/align-inst-store.ll b/test/Assembler/align-inst-store.ll new file mode 100644 index 0000000000000..8c3b7124b4376 --- /dev/null +++ b/test/Assembler/align-inst-store.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as %s -o /dev/null 2>/dev/null + +define void @foo() { + store i1 false, i1* %p, align 1073741824 + ret void +} diff --git a/test/Assembler/align-inst.ll b/test/Assembler/align-inst.ll new file mode 100644 index 0000000000000..6f7100e065d33 --- /dev/null +++ b/test/Assembler/align-inst.ll @@ -0,0 +1,10 @@ +; RUN: llvm-as %s -o /dev/null + +@A = global i1 0, align 536870912 + +define void @foo() { + %p = alloca i1, align 536870912 + load i1* %p, align 536870912 + store i1 false, i1* %p, align 536870912 + ret void +} diff --git a/test/Assembler/comment.ll b/test/Assembler/comment.ll new file mode 100644 index 0000000000000..fe23d26fbeb4c --- /dev/null +++ b/test/Assembler/comment.ll @@ -0,0 +1,20 @@ +; RUN: llvm-as < %s | llvm-dis -show-annotations | FileCheck -check-prefix=ANNOT %s +; RUN: llvm-as < %s | llvm-dis | FileCheck -check-prefix=BARE %s + +; The bare version of this file should not have any #uses lines. +; BARE: @B = +; BARE-NOT: #uses +; BARE: } + +@B = external global i32 +; ANNOT: @B = external global i32 ; [#uses=0] + +define <4 x i1> @foo(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %cmp = fcmp olt <4 x float> %a, %b ; [#uses=1] + ret <4 x i1> %cmp +} + +; ANNOT: %cmp = fcmp olt <4 x float> %a, %b ; [#uses=1] + + diff --git a/test/Assembler/getelementptr.ll b/test/Assembler/getelementptr.ll index 803d6d3430631..ebef58ff459cc 100644 --- a/test/Assembler/getelementptr.ll +++ b/test/Assembler/getelementptr.ll @@ -3,9 +3,9 @@ ; Verify that over-indexed getelementptrs are folded. @A = external global [2 x [3 x [5 x [7 x i32]]]] @B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 0, i64 0, i64 2, i64 1, i64 7523) -; CHECK: @B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 36, i64 0, i64 1, i64 0, i64 5) ; <i32**> [#uses=0] +; CHECK: @B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 36, i64 0, i64 1, i64 0, i64 5) @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 3, i64 2, i64 0, i64 0, i64 7523) -; CHECK: @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 39, i64 1, i64 1, i64 4, i64 5) ; <i32**> [#uses=0] +; CHECK: @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 39, i64 1, i64 1, i64 4, i64 5) ;; Verify that i16 indices work. @x = external global {i32, i32} diff --git a/test/Bindings/Ocaml/analysis.ml b/test/Bindings/Ocaml/analysis.ml index e830106c11ffd..bf2178254409e 100644 --- a/test/Bindings/Ocaml/analysis.ml +++ b/test/Bindings/Ocaml/analysis.ml @@ -1,5 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa %s -o %t - * RUN: ./%t %t.bc + * RUN: %t *) open Llvm diff --git a/test/Bindings/Ocaml/bitreader.ml b/test/Bindings/Ocaml/bitreader.ml index 112ca618301a7..30b07d2199f3a 100644 --- a/test/Bindings/Ocaml/bitreader.ml +++ b/test/Bindings/Ocaml/bitreader.ml @@ -1,5 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_bitwriter.cmxa %s -o %t - * RUN: ./%t %t.bc + * RUN: %t %t.bc * RUN: llvm-dis < %t.bc | grep caml_int_ty *) diff --git a/test/Bindings/Ocaml/bitwriter.ml b/test/Bindings/Ocaml/bitwriter.ml index ef1c9ab722c86..8eb923ea32c73 100644 --- a/test/Bindings/Ocaml/bitwriter.ml +++ b/test/Bindings/Ocaml/bitwriter.ml @@ -1,5 +1,5 @@ (* RUN: %ocamlopt -warn-error A unix.cmxa llvm.cmxa llvm_bitwriter.cmxa %s -o %t - * RUN: ./%t %t.bc + * RUN: %t %t.bc * RUN: llvm-dis < %t.bc | grep caml_int_ty *) diff --git a/test/Bindings/Ocaml/executionengine.ml b/test/Bindings/Ocaml/executionengine.ml index 2caeb82aac22c..63040e4a33fc0 100644 --- a/test/Bindings/Ocaml/executionengine.ml +++ b/test/Bindings/Ocaml/executionengine.ml @@ -1,5 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %s -o %t - * RUN: ./%t %t.bc + * RUN: %t *) open Llvm diff --git a/test/Bindings/Ocaml/scalar_opts.ml b/test/Bindings/Ocaml/scalar_opts.ml index f28eff28da759..8a6af012ac249 100644 --- a/test/Bindings/Ocaml/scalar_opts.ml +++ b/test/Bindings/Ocaml/scalar_opts.ml @@ -1,4 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_scalar_opts.cmxa llvm_target.cmxa %s -o %t + * RUN: %t %t.bc *) (* Note: It takes several seconds for ocamlopt to link an executable with @@ -13,8 +14,11 @@ let context = global_context () let void_type = Llvm.void_type context (* Tiny unit test framework - really just to help find which line is busted *) +let print_checkpoints = false + let suite name f = - prerr_endline (name ^ ":"); + if print_checkpoints then + prerr_endline (name ^ ":"); f () diff --git a/test/Bindings/Ocaml/target.ml b/test/Bindings/Ocaml/target.ml index 3c3b7339fef87..bfaf37ca879df 100644 --- a/test/Bindings/Ocaml/target.ml +++ b/test/Bindings/Ocaml/target.ml @@ -1,4 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa %s -o %t + * RUN: %t %t.bc *) (* Note: It takes several seconds for ocamlopt to link an executable with @@ -8,13 +9,17 @@ open Llvm open Llvm_target + let context = global_context () let i32_type = Llvm.i32_type context let i64_type = Llvm.i64_type context (* Tiny unit test framework - really just to help find which line is busted *) +let print_checkpoints = false + let suite name f = - prerr_endline (name ^ ":"); + if print_checkpoints then + prerr_endline (name ^ ":"); f () diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml index 506bf50e2a490..e55ab9643e43c 100644 --- a/test/Bindings/Ocaml/vmcore.ml +++ b/test/Bindings/Ocaml/vmcore.ml @@ -1,5 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa llvm_bitwriter.cmxa %s -o %t - * RUN: ./%t %t.bc + * RUN: %t %t.bc * RUN: llvm-dis < %t.bc > %t.ll *) @@ -296,12 +296,6 @@ let test_constants () = insist ((struct_type context [| i16_type; i16_type; i32_type; i32_type |]) = (type_of c)); - group "union"; - let t = union_type context [| i1_type; i16_type; i64_type; double_type |] in - let c = const_union t one in - ignore (define_global "const_union" c m); - insist (t = (type_of c)); - (* RUN: grep {const_null.*zeroinit} < %t.ll *) group "null"; @@ -436,7 +430,7 @@ let test_constants () = * RUN: grep {const_select.*select} < %t.ll * RUN: grep {const_extractelement.*extractelement} < %t.ll * RUN: grep {const_insertelement.*insertelement} < %t.ll - * RUN: grep {const_shufflevector.*shufflevector} < %t.ll + * RUN: grep {const_shufflevector = global <4 x i32> <i32 0, i32 1, i32 1, i32 0>} < %t.ll *) ignore (define_global "const_size_of" (size_of (pointer_type i8_type)) m); ignore (define_global "const_gep" (const_gep foldbomb_gv [| five |]) m); @@ -455,7 +449,8 @@ let test_constants () = ignore (define_global "const_shufflevector" (const_shufflevector (const_vector [| zero; one |]) (const_vector [| one; zero |]) - (const_bitcast foldbomb (vector_type i32_type 2))) m); + (const_vector [| const_int i32_type 0; const_int i32_type 1; + const_int i32_type 2; const_int i32_type 3 |])) m); group "asm"; begin let ft = function_type void_type [| i32_type; i32_type; i32_type |] in @@ -642,11 +637,18 @@ let test_users () = let p1 = param fn 0 in let p2 = param fn 1 in + let a3 = build_alloca i32_type "user_alloca" b in + let p3 = build_load a3 "user_load" b in let i = build_add p1 p2 "sum" b in + insist ((num_operands i) = 2); insist ((operand i 0) = p1); insist ((operand i 1) = p2); + set_operand i 1 p3; + insist ((operand i 1) != p2); + insist ((operand i 1) = p3); + ignore (build_unreachable b) @@ -1154,13 +1156,13 @@ let test_builder () = group "comparisons"; begin (* RUN: grep {%build_icmp_ne = icmp ne i32 %P1, %P2} < %t.ll * RUN: grep {%build_icmp_sle = icmp sle i32 %P2, %P1} < %t.ll - * RUN: grep {%build_icmp_false = fcmp false float %F1, %F2} < %t.ll - * RUN: grep {%build_icmp_true = fcmp true float %F2, %F1} < %t.ll + * RUN: grep {%build_fcmp_false = fcmp false float %F1, %F2} < %t.ll + * RUN: grep {%build_fcmp_true = fcmp true float %F2, %F1} < %t.ll *) ignore (build_icmp Icmp.Ne p1 p2 "build_icmp_ne" atentry); ignore (build_icmp Icmp.Sle p2 p1 "build_icmp_sle" atentry); - ignore (build_fcmp Fcmp.False f1 f2 "build_icmp_false" atentry); - ignore (build_fcmp Fcmp.True f2 f1 "build_icmp_true" atentry) + ignore (build_fcmp Fcmp.False f1 f2 "build_fcmp_false" atentry); + ignore (build_fcmp Fcmp.True f2 f1 "build_fcmp_true" atentry) end; group "miscellaneous"; begin @@ -1229,13 +1231,19 @@ let test_builder () = group "dbg"; begin (* RUN: grep {%dbg = add i32 %P1, %P2, !dbg !1} < %t.ll - * RUN: grep {!1 = metadata !\{i32 2, metadata !"dbg test"\}} < %t.ll + * RUN: grep {!1 = metadata !\{i32 2, i32 3, metadata !2, metadata !2\}} < %t.ll *) - let m1 = const_int i32_type 2 in - let m2 = mdstring context "dbg test" in - let md = mdnode context [| m1; m2 |] in + insist ((current_debug_location atentry) = None); + + let m_line = const_int i32_type 2 in + let m_col = const_int i32_type 3 in + let m_scope = mdnode context [| |] in + let m_inlined = mdnode context [| |] in + let md = mdnode context [| m_line; m_col; m_scope; m_inlined |] in set_current_debug_location atentry md; + insist ((current_debug_location atentry) = Some md); + let i = build_add p1 p2 "dbg" atentry in insist ((has_metadata i) = true); diff --git a/test/Bitcode/AutoUpgradeGlobals.ll b/test/Bitcode/AutoUpgradeGlobals.ll new file mode 100644 index 0000000000000..8a8767337dca7 --- /dev/null +++ b/test/Bitcode/AutoUpgradeGlobals.ll @@ -0,0 +1,3 @@ +; This isn't really an assembly file. It just runs test on bitcode to ensure +; it is auto-upgraded. +; RUN: llvm-dis < %s.bc | not grep {i32 @\\.llvm\\.eh} diff --git a/test/Bitcode/AutoUpgradeGlobals.ll.bc b/test/Bitcode/AutoUpgradeGlobals.ll.bc Binary files differnew file mode 100644 index 0000000000000..1abe9688e291c --- /dev/null +++ b/test/Bitcode/AutoUpgradeGlobals.ll.bc diff --git a/test/Bitcode/neon-intrinsics.ll b/test/Bitcode/neon-intrinsics.ll new file mode 100644 index 0000000000000..272cd424e2a20 --- /dev/null +++ b/test/Bitcode/neon-intrinsics.ll @@ -0,0 +1,213 @@ +; RUN: llvm-dis < %s.bc | FileCheck %s + +; vmovls should be auto-upgraded to sext + +; CHECK: vmovls8 +; CHECK-NOT: arm.neon.vmovls.v8i16 +; CHECK: sext <8 x i8> + +; CHECK: vmovls16 +; CHECK-NOT: arm.neon.vmovls.v4i32 +; CHECK: sext <4 x i16> + +; CHECK: vmovls32 +; CHECK-NOT: arm.neon.vmovls.v2i64 +; CHECK: sext <2 x i32> + +; vmovlu should be auto-upgraded to zext + +; CHECK: vmovlu8 +; CHECK-NOT: arm.neon.vmovlu.v8i16 +; CHECK: zext <8 x i8> + +; CHECK: vmovlu16 +; CHECK-NOT: arm.neon.vmovlu.v4i32 +; CHECK: zext <4 x i16> + +; CHECK: vmovlu32 +; CHECK-NOT: arm.neon.vmovlu.v2i64 +; CHECK: zext <2 x i32> + +; vaddl/vaddw should be auto-upgraded to add with sext/zext + +; CHECK: vaddls16 +; CHECK-NOT: arm.neon.vaddls.v4i32 +; CHECK: sext <4 x i16> +; CHECK-NEXT: sext <4 x i16> +; CHECK-NEXT: add <4 x i32> + +; CHECK: vaddlu32 +; CHECK-NOT: arm.neon.vaddlu.v2i64 +; CHECK: zext <2 x i32> +; CHECK-NEXT: zext <2 x i32> +; CHECK-NEXT: add <2 x i64> + +; CHECK: vaddws8 +; CHECK-NOT: arm.neon.vaddws.v8i16 +; CHECK: sext <8 x i8> +; CHECK-NEXT: add <8 x i16> + +; CHECK: vaddwu16 +; CHECK-NOT: arm.neon.vaddwu.v4i32 +; CHECK: zext <4 x i16> +; CHECK-NEXT: add <4 x i32> + +; vsubl/vsubw should be auto-upgraded to subtract with sext/zext + +; CHECK: vsubls16 +; CHECK-NOT: arm.neon.vsubls.v4i32 +; CHECK: sext <4 x i16> +; CHECK-NEXT: sext <4 x i16> +; CHECK-NEXT: sub <4 x i32> + +; CHECK: vsublu32 +; CHECK-NOT: arm.neon.vsublu.v2i64 +; CHECK: zext <2 x i32> +; CHECK-NEXT: zext <2 x i32> +; CHECK-NEXT: sub <2 x i64> + +; CHECK: vsubws8 +; CHECK-NOT: arm.neon.vsubws.v8i16 +; CHECK: sext <8 x i8> +; CHECK-NEXT: sub <8 x i16> + +; CHECK: vsubwu16 +; CHECK-NOT: arm.neon.vsubwu.v4i32 +; CHECK: zext <4 x i16> +; CHECK-NEXT: sub <4 x i32> + +; vmull should be auto-upgraded to multiply with sext/zext +; (but vmullp should remain an intrinsic) + +; CHECK: vmulls8 +; CHECK-NOT: arm.neon.vmulls.v8i16 +; CHECK: sext <8 x i8> +; CHECK-NEXT: sext <8 x i8> +; CHECK-NEXT: mul <8 x i16> + +; CHECK: vmullu16 +; CHECK-NOT: arm.neon.vmullu.v4i32 +; CHECK: zext <4 x i16> +; CHECK-NEXT: zext <4 x i16> +; CHECK-NEXT: mul <4 x i32> + +; CHECK: vmullp8 +; CHECK: arm.neon.vmullp.v8i16 + +; vmlal should be auto-upgraded to multiply/add with sext/zext + +; CHECK: vmlals32 +; CHECK-NOT: arm.neon.vmlals.v2i64 +; CHECK: sext <2 x i32> +; CHECK-NEXT: sext <2 x i32> +; CHECK-NEXT: mul <2 x i64> +; CHECK-NEXT: add <2 x i64> + +; CHECK: vmlalu8 +; CHECK-NOT: arm.neon.vmlalu.v8i16 +; CHECK: zext <8 x i8> +; CHECK-NEXT: zext <8 x i8> +; CHECK-NEXT: mul <8 x i16> +; CHECK-NEXT: add <8 x i16> + +; vmlsl should be auto-upgraded to multiply/sub with sext/zext + +; CHECK: vmlsls16 +; CHECK-NOT: arm.neon.vmlsls.v4i32 +; CHECK: sext <4 x i16> +; CHECK-NEXT: sext <4 x i16> +; CHECK-NEXT: mul <4 x i32> +; CHECK-NEXT: sub <4 x i32> + +; CHECK: vmlslu32 +; CHECK-NOT: arm.neon.vmlslu.v2i64 +; CHECK: zext <2 x i32> +; CHECK-NEXT: zext <2 x i32> +; CHECK-NEXT: mul <2 x i64> +; CHECK-NEXT: sub <2 x i64> + +; vaba should be auto-upgraded to vabd + add + +; CHECK: vabas32 +; CHECK-NOT: arm.neon.vabas.v2i32 +; CHECK: arm.neon.vabds.v2i32 +; CHECK-NEXT: add <2 x i32> + +; CHECK: vabaQu8 +; CHECK-NOT: arm.neon.vabau.v16i8 +; CHECK: arm.neon.vabdu.v16i8 +; CHECK-NEXT: add <16 x i8> + +; vabal should be auto-upgraded to vabd with zext + add + +; CHECK: vabals16 +; CHECK-NOT: arm.neon.vabals.v4i32 +; CHECK: arm.neon.vabds.v4i16 +; CHECK-NEXT: zext <4 x i16> +; CHECK-NEXT: add <4 x i32> + +; CHECK: vabalu32 +; CHECK-NOT: arm.neon.vabalu.v2i64 +; CHECK: arm.neon.vabdu.v2i32 +; CHECK-NEXT: zext <2 x i32> +; CHECK-NEXT: add <2 x i64> + +; vabdl should be auto-upgraded to vabd with zext + +; CHECK: vabdls8 +; CHECK-NOT: arm.neon.vabdls.v8i16 +; CHECK: arm.neon.vabds.v8i8 +; CHECK-NEXT: zext <8 x i8> + +; CHECK: vabdlu16 +; CHECK-NOT: arm.neon.vabdlu.v4i32 +; CHECK: arm.neon.vabdu.v4i16 +; CHECK-NEXT: zext <4 x i16> + +; vmovn should be auto-upgraded to trunc + +; CHECK: vmovni16 +; CHECK-NOT: arm.neon.vmovn.v8i8 +; CHECK: trunc <8 x i16> + +; CHECK: vmovni32 +; CHECK-NOT: arm.neon.vmovn.v4i16 +; CHECK: trunc <4 x i32> + +; CHECK: vmovni64 +; CHECK-NOT: arm.neon.vmovn.v2i32 +; CHECK: trunc <2 x i64> + +; vld* and vst* intrinsic calls need an alignment argument (defaulted to 1) + +; CHECK: vld1i8 +; CHECK: i32 1 +; CHECK: vld2Qi16 +; CHECK: i32 1 +; CHECK: vld3i32 +; CHECK: i32 1 +; CHECK: vld4Qf +; CHECK: i32 1 + +; CHECK: vst1i8 +; CHECK: i32 1 +; CHECK: vst2Qi16 +; CHECK: i32 1 +; CHECK: vst3i32 +; CHECK: i32 1 +; CHECK: vst4Qf +; CHECK: i32 1 + +; CHECK: vld2laneQi16 +; CHECK: i32 1 +; CHECK: vld3lanei32 +; CHECK: i32 1 +; CHECK: vld4laneQf +; CHECK: i32 1 + +; CHECK: vst2laneQi16 +; CHECK: i32 1 +; CHECK: vst3lanei32 +; CHECK: i32 1 +; CHECK: vst4laneQf +; CHECK: i32 1 diff --git a/test/Bitcode/neon-intrinsics.ll.bc b/test/Bitcode/neon-intrinsics.ll.bc Binary files differnew file mode 100644 index 0000000000000..cabc3c9341362 --- /dev/null +++ b/test/Bitcode/neon-intrinsics.ll.bc diff --git a/test/BugPoint/crash-narrowfunctiontest.ll b/test/BugPoint/crash-narrowfunctiontest.ll index 6ad09d2e25cdf..18a31eb45d36d 100644 --- a/test/BugPoint/crash-narrowfunctiontest.ll +++ b/test/BugPoint/crash-narrowfunctiontest.ll @@ -1,6 +1,8 @@ ; Test that bugpoint can narrow down the testcase to the important function +; FIXME: This likely fails on windows ; -; RUN: bugpoint %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null +; RUN: bugpoint -load %llvmlibsdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null +; XFAIL: mingw define i32 @foo() { ret i32 1 } diff --git a/test/BugPoint/metadata.ll b/test/BugPoint/metadata.ll new file mode 100644 index 0000000000000..f2541ee3f9ac6 --- /dev/null +++ b/test/BugPoint/metadata.ll @@ -0,0 +1,35 @@ +; RUN: bugpoint -load %llvmlibsdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null +; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s +; XFAIL: mingw + +; Bugpoint should keep the call's metadata attached to the call. + +; CHECK: call void @foo(), !dbg !0, !attach !2 +; CHECK: !0 = metadata !{i32 104, i32 105, metadata !1, metadata !1} +; CHECK: !1 = metadata !{i32 0, i32 0, i32 0, metadata !"source.c", metadata !"/dir", metadata !"me", i1 true, i1 false, metadata !"", i32 0} +; CHECK: !2 = metadata !{metadata !"the call to foo"} + +%rust_task = type {} +define void @test(i32* %a, i8* %b) { + %s = mul i8 22, 9, !attach !0, !dbg !10 + store i8 %s, i8* %b, !attach !1, !dbg !11 + call void @foo(), !attach !2, !dbg !12 + store i32 7, i32* %a, !attach !3, !dbg !13 + %t = add i32 0, 5, !attach !4, !dbg !14 + ret void +} + +declare void @foo() + +!0 = metadata !{metadata !"boring"} +!1 = metadata !{metadata !"uninteresting"} +!2 = metadata !{metadata !"the call to foo"} +!3 = metadata !{metadata !"noise"} +!4 = metadata !{metadata !"filler"} + +!9 = metadata !{i32 0, i32 0, i32 0, metadata !"source.c", metadata !"/dir", metadata !"me", i1 true, i1 false, metadata !"", i32 0} +!10 = metadata !{i32 100, i32 101, metadata !9, metadata !9} +!11 = metadata !{i32 102, i32 103, metadata !9, metadata !9} +!12 = metadata !{i32 104, i32 105, metadata !9, metadata !9} +!13 = metadata !{i32 106, i32 107, metadata !9, metadata !9} +!14 = metadata !{i32 108, i32 109, metadata !9, metadata !9} diff --git a/test/BugPoint/remove_arguments_test.ll b/test/BugPoint/remove_arguments_test.ll index 439ea545468e0..791ec69a23d2f 100644 --- a/test/BugPoint/remove_arguments_test.ll +++ b/test/BugPoint/remove_arguments_test.ll @@ -1,5 +1,7 @@ -; RUN: bugpoint %s -output-prefix %t -bugpoint-crashcalls -silence-passes +; FIXME: This likely fails on windows +; RUN: bugpoint -load %llvmlibsdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes ; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s +; XFAIL: mingw ; Test to make sure that arguments are removed from the function if they are ; unnecessary. And clean up any types that that frees up too. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 433af900dd2b4..ad9a2432dbfa8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -24,6 +24,23 @@ endif() include(FindPythonInterp) if(PYTHONINTERP_FOUND) + get_directory_property(DEFINITIONS COMPILE_DEFINITIONS) + foreach(DEF ${DEFINITIONS}) + set(DEFS "${DEFS} -D${DEF}") + endforeach() + get_directory_property(INC_DIRS INCLUDE_DIRECTORIES) + foreach(INC_DIR ${INC_DIRS}) + set(IDIRS "${IDIRS} -I${INC_DIR}") + endforeach() + string(REPLACE "<CMAKE_CXX_COMPILER>" "${CMAKE_CXX_COMPILER}" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT}) + string(REPLACE "<DEFINES>" "${DEFS}" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + string(REPLACE "<FLAGS>" "${CMAKE_CXX_FLAGS}" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + string(REPLACE "-o" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + string(REGEX REPLACE "<[^>]+>" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} ${IDIRS}") + if(NOT MSVC) + set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} -x c++") + endif() configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in ${CMAKE_CURRENT_BINARY_DIR}/site.exp) diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll index 81483cb4e7c55..ee63656b26d39 100644 --- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll +++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll @@ -1,11 +1,15 @@ -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | FileCheck %s @quant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1] @dequant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1] @A = external global [4 x [4 x i32]] ; <[4 x [4 x i32]]*> [#uses=1] +; CHECK: dct_luma_sp: define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) { entry: +; Make sure to use base-updating stores for saving callee-saved registers. +; CHECK-NOT: sub sp +; CHECK: vstmdb sp! %predicted_block = alloca [4 x [4 x i32]], align 4 ; <[4 x [4 x i32]]*> [#uses=1] br label %cond_next489 diff --git a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll index d741112e2886e..76fa3649c8806 100644 --- a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll @@ -2,7 +2,7 @@ ; PR1266 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "arm-linux-gnueabi" +target triple = "arm-unknown-linux-gnueabi" %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 } %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] } %struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] } diff --git a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll index 030486a7c9832..7ba2a190be734 100644 --- a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll +++ b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll @@ -2,7 +2,7 @@ ; PR1424 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "arm-linux-gnueabi" +target triple = "arm-unknown-linux-gnueabi" %struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* } %struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* } %struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64 } diff --git a/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll index 198faebbea6f1..f89a5de77b3f5 100644 --- a/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll +++ b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll @@ -17,3 +17,17 @@ entry: store <8 x i16> %1, <8 x i16>* %agg.result12.1.0, align 16 ret void } + +; Radar 8290937: Ignore undef shuffle indices. +; CHECK: t2 +; CHECK: vtrn.16 +define void @t2(%struct.int16x8x2_t* nocapture %ptr, <4 x i16> %a.0, <4 x i16> %b.0) nounwind { +entry: + %0 = shufflevector <4 x i16> %a.0, <4 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef> + %1 = shufflevector <4 x i16> %a.0, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> + %ptr26.0 = getelementptr inbounds %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 0, i32 0 + store <8 x i16> %0, <8 x i16>* %ptr26.0, align 16 + %ptr20.1.0 = getelementptr inbounds %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 1, i32 0 + store <8 x i16> %1, <8 x i16>* %ptr20.1.0, align 16 + ret void +} diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll index ff60fa8c49d85..e47c038393754 100644 --- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll +++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll @@ -5,32 +5,32 @@ %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } -declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly -declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind +declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind { - %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A2) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A2, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 0 ; <<8 x i8>> [#uses=1] %tmp4b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 1 ; <<8 x i8>> [#uses=1] - %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1] %tmp4d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 1 ; <<8 x i8>> [#uses=1] - %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A5) ; <%struct.__neon_int8x8x3_t> [#uses=1] + %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A5, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] %tmp2e = extractvalue %struct.__neon_int8x8x3_t %tmp1e, 0 ; <<8 x i8>> [#uses=1] - %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6) ; <%struct.__neon_int8x8x3_t> [#uses=1] + %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1] - %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A7) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A7, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 0 ; <<8 x i8>> [#uses=1] %tmp4g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 1 ; <<8 x i8>> [#uses=1] - %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A8) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A8, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 0 ; <<8 x i8>> [#uses=1] %tmp3h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 2 ; <<8 x i8>> [#uses=1] %tmp2bd = add <8 x i8> %tmp2b, %tmp2d ; <<8 x i8>> [#uses=1] %tmp4bd = add <8 x i8> %tmp4b, %tmp4d ; <<8 x i8>> [#uses=1] %tmp2abcd = mul <8 x i8> undef, %tmp2bd ; <<8 x i8>> [#uses=1] %tmp4abcd = mul <8 x i8> undef, %tmp4bd ; <<8 x i8>> [#uses=2] - call void @llvm.arm.neon.vst3.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd) + call void @llvm.arm.neon.vst3.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd, i32 1) %tmp2ef = sub <8 x i8> %tmp2e, %tmp2f ; <<8 x i8>> [#uses=1] %tmp2gh = sub <8 x i8> %tmp2g, %tmp2h ; <<8 x i8>> [#uses=1] %tmp3gh = sub <8 x i8> zeroinitializer, %tmp3h ; <<8 x i8>> [#uses=1] @@ -38,8 +38,8 @@ define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A %tmp2efgh = mul <8 x i8> %tmp2ef, %tmp2gh ; <<8 x i8>> [#uses=1] %tmp3efgh = mul <8 x i8> undef, %tmp3gh ; <<8 x i8>> [#uses=1] %tmp4efgh = mul <8 x i8> %tmp4ef, undef ; <<8 x i8>> [#uses=2] - call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh) + call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh, i32 1) %tmp4 = sub <8 x i8> %tmp4efgh, %tmp4abcd ; <<8 x i8>> [#uses=1] - tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef) + tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef, i32 1) ret <8 x i8> %tmp4 } diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll index ce959d1b91c8f..cd1c9c8c04214 100644 --- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll +++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll @@ -36,8 +36,8 @@ entry: %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3 %19 = fmul <4 x float> %tmp5, %2 %20 = bitcast float* %fltp to i8* - tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19) + tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19, i32 1) ret void } -declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll index e4f20990bed2d..6f487962310fe 100644 --- a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll +++ b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll @@ -12,8 +12,8 @@ entry: %tmp9 = trunc i128 %tmp8 to i64 ; <i64> [#uses=1] %tmp16.i = bitcast i64 %tmp6 to <8 x i8> ; <<8 x i8>> [#uses=1] %tmp20.i = bitcast i64 %tmp9 to <8 x i8> ; <<8 x i8>> [#uses=1] - tail call void @llvm.arm.neon.vst2.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i) nounwind + tail call void @llvm.arm.neon.vst2.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i, i32 1) nounwind ret void } -declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind +declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind diff --git a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll index 7650d883d7b1c..ac8e80904edae 100755 --- a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll +++ b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=arm -mtriple=armv4t-unknown-linux-gnueabi | FileCheck %s ; PR 7433 +; XFAIL: * %0 = type { i8*, i8* } %1 = type { i8*, i8*, i8* } diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll index 0c5b180cf8464..ffc47ebdf1969 100644 --- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll +++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll @@ -16,10 +16,10 @@ target triple = "thumbv7-apple-darwin10" define i32 @test(i8* %arg) nounwind { entry: - %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %arg) + %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %arg, i32 1) %1 = shufflevector <2 x i64> undef, <2 x i64> %0, <2 x i32> <i32 1, i32 2> store <2 x i64> %1, <2 x i64>* undef, align 16 ret i32 undef } -declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly +declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly diff --git a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll new file mode 100644 index 0000000000000..c03c815459465 --- /dev/null +++ b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll @@ -0,0 +1,95 @@ +; RUN: llc -enable-correct-eh-support < %s +; PR7716 +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10.0.0" + +%0 = type { i8*, i8* } +%struct.A = type { i32 } + +@d = internal global i32 0, align 4 ; <i32*> [#uses=6] +@_ZTVN10__cxxabiv117__class_type_infoE = external global i8* ; <i8**> [#uses=1] +@_ZTS1A = internal constant [3 x i8] c"1A\00" ; <[3 x i8]*> [#uses=1] +@_ZTI1A = internal constant %0 { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1A, i32 0, i32 0) } ; <%0*> [#uses=1] +@.str2 = private constant [18 x i8] c"c == %d, d == %d\0A\00" ; <[18 x i8]*> [#uses=1] +@.str3 = private constant [16 x i8] c"A(const A&) %d\0A\00" ; <[16 x i8]*> [#uses=1] +@.str4 = private constant [9 x i8] c"~A() %d\0A\00" ; <[9 x i8]*> [#uses=1] +@.str5 = private constant [8 x i8] c"A() %d\0A\00" ; <[8 x i8]*> [#uses=1] +@str = internal constant [14 x i8] c"Throwing 1...\00" ; <[14 x i8]*> [#uses=1] +@str1 = internal constant [8 x i8] c"Caught.\00" ; <[8 x i8]*> [#uses=1] + +declare i32 @printf(i8* nocapture, ...) nounwind + +declare i8* @__cxa_allocate_exception(i32) + +declare i8* @llvm.eh.exception() nounwind readonly + +declare i32 @__gxx_personality_sj0(...) + +declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind + +declare i32 @llvm.eh.typeid.for(i8*) nounwind + +declare void @_Unwind_SjLj_Resume(i8*) + +define internal void @_ZN1AD1Ev(%struct.A* nocapture %this) nounwind ssp align 2 { +entry: + %tmp.i = getelementptr inbounds %struct.A* %this, i32 0, i32 0 ; <i32*> [#uses=1] + %tmp2.i = load i32* %tmp.i ; <i32> [#uses=1] + %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 %tmp2.i) nounwind ; <i32> [#uses=0] + %tmp3.i = load i32* @d ; <i32> [#uses=1] + %inc.i = add nsw i32 %tmp3.i, 1 ; <i32> [#uses=1] + store i32 %inc.i, i32* @d + ret void +} + +declare void @__cxa_throw(i8*, i8*, i8*) + +define i32 @main() ssp { +entry: + %puts.i = tail call i32 @puts(i8* getelementptr inbounds ([14 x i8]* @str, i32 0, i32 0)) ; <i32> [#uses=0] + %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind ; <i8*> [#uses=2] + %tmp2.i.i.i = bitcast i8* %exception.i to i32* ; <i32*> [#uses=1] + store i32 1, i32* %tmp2.i.i.i + %call.i.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str5, i32 0, i32 0), i32 1) nounwind ; <i32> [#uses=0] + invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (%0* @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1AD1Ev to i8*)) noreturn + to label %.noexc unwind label %lpad + +.noexc: ; preds = %entry + unreachable + +try.cont: ; preds = %lpad + %0 = tail call i8* @__cxa_get_exception_ptr(i8* %exn) nounwind ; <i8*> [#uses=0] + %call.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str3, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0] + %1 = tail call i8* @__cxa_begin_catch(i8* %exn) nounwind ; <i8*> [#uses=0] + %puts = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @str1, i32 0, i32 0)) ; <i32> [#uses=0] + %call.i.i3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0] + %tmp3.i.i = load i32* @d ; <i32> [#uses=1] + %inc.i.i4 = add nsw i32 %tmp3.i.i, 1 ; <i32> [#uses=1] + store i32 %inc.i.i4, i32* @d + tail call void @__cxa_end_catch() + %tmp13 = load i32* @d ; <i32> [#uses=1] + %call14 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str2, i32 0, i32 0), i32 2, i32 %tmp13) ; <i32> [#uses=0] + %tmp16 = load i32* @d ; <i32> [#uses=1] + %cmp = icmp ne i32 %tmp16, 2 ; <i1> [#uses=1] + %conv = zext i1 %cmp to i32 ; <i32> [#uses=1] + ret i32 %conv + +lpad: ; preds = %entry + %exn = tail call i8* @llvm.eh.exception() nounwind ; <i8*> [#uses=4] + %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* bitcast (%0* @_ZTI1A to i8*), i8* null) nounwind ; <i32> [#uses=1] + %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%0* @_ZTI1A to i8*)) nounwind ; <i32> [#uses=1] + %3 = icmp eq i32 %eh.selector, %2 ; <i1> [#uses=1] + br i1 %3, label %try.cont, label %eh.resume + +eh.resume: ; preds = %lpad + tail call void @_Unwind_SjLj_Resume(i8* %exn) noreturn + unreachable +} + +declare i8* @__cxa_get_exception_ptr(i8*) + +declare i8* @__cxa_begin_catch(i8*) + +declare void @__cxa_end_catch() + +declare i32 @puts(i8* nocapture) nounwind diff --git a/test/CodeGen/ARM/2010-08-04-EHCrash.ll b/test/CodeGen/ARM/2010-08-04-EHCrash.ll new file mode 100644 index 0000000000000..f57b7e6769494 --- /dev/null +++ b/test/CodeGen/ARM/2010-08-04-EHCrash.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 +; <rdar://problem/8264008> + +define linkonce_odr arm_apcscc void @func1() { +entry: + %save_filt.936 = alloca i32 ; <i32*> [#uses=2] + %save_eptr.935 = alloca i8* ; <i8**> [#uses=2] + %eh_exception = alloca i8* ; <i8**> [#uses=5] + %eh_selector = alloca i32 ; <i32*> [#uses=3] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + call arm_apcscc void @func2() + br label %return + +bb: ; No predecessors! + %eh_select = load i32* %eh_selector ; <i32> [#uses=1] + store i32 %eh_select, i32* %save_filt.936, align 4 + %eh_value = load i8** %eh_exception ; <i8*> [#uses=1] + store i8* %eh_value, i8** %save_eptr.935, align 4 + invoke arm_apcscc void @func3() + to label %invcont unwind label %lpad + +invcont: ; preds = %bb + %tmp6 = load i8** %save_eptr.935, align 4 ; <i8*> [#uses=1] + store i8* %tmp6, i8** %eh_exception, align 4 + %tmp7 = load i32* %save_filt.936, align 4 ; <i32> [#uses=1] + store i32 %tmp7, i32* %eh_selector, align 4 + br label %Unwind + +bb12: ; preds = %ppad + call arm_apcscc void @_ZSt9terminatev() noreturn nounwind + unreachable + +return: ; preds = %entry + ret void + +lpad: ; preds = %bb + %eh_ptr = call i8* @llvm.eh.exception() ; <i8*> [#uses=1] + store i8* %eh_ptr, i8** %eh_exception + %eh_ptr13 = load i8** %eh_exception ; <i8*> [#uses=1] + %eh_select14 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr13, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 1) + store i32 %eh_select14, i32* %eh_selector + br label %ppad + +ppad: + br label %bb12 + +Unwind: + %eh_ptr15 = load i8** %eh_exception + call arm_apcscc void @_Unwind_SjLj_Resume(i8* %eh_ptr15) + unreachable +} + +declare arm_apcscc void @func2() + +declare arm_apcscc void @_ZSt9terminatev() noreturn nounwind + +declare i8* @llvm.eh.exception() nounwind readonly + +declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind + +declare arm_apcscc void @_Unwind_SjLj_Resume(i8*) + +declare arm_apcscc void @func3() + +declare arm_apcscc i32 @__gxx_personality_sj0(...) diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll index cc718399ea96e..bb7853e66ef46 100644 --- a/test/CodeGen/ARM/arguments.ll +++ b/test/CodeGen/ARM/arguments.ll @@ -1,11 +1,43 @@ -; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ELF -; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN +; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 | FileCheck %s -check-prefix=ELF +; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+vfp2 | FileCheck %s -check-prefix=DARWIN -define i32 @f(i32 %a, i64 %b) { +define i32 @f1(i32 %a, i64 %b) { +; ELF: f1: ; ELF: mov r0, r2 +; DARWIN: f1: ; DARWIN: mov r0, r1 - %tmp = call i32 @g(i64 %b) + %tmp = call i32 @g1(i64 %b) ret i32 %tmp } -declare i32 @g(i64) +; test that allocating the double to r2/r3 makes r1 unavailable on gnueabi. +define i32 @f2() nounwind optsize { +; ELF: f2: +; ELF: mov r0, #128 +; ELF: str r0, [sp] +; DARWIN: f2: +; DARWIN: mov r3, #128 +entry: + %0 = tail call i32 (i32, ...)* @g2(i32 5, double 1.600000e+01, i32 128) nounwind optsize ; <i32> [#uses=1] + %not. = icmp ne i32 %0, 128 ; <i1> [#uses=1] + %.0 = zext i1 %not. to i32 ; <i32> [#uses=1] + ret i32 %.0 +} + +; test that on gnueabi a 64 bit value at this position will cause r3 to go +; unused and the value stored in [sp] +; ELF: f3: +; ELF: ldr r0, [sp] +; ELF-NEXT: mov pc, lr +; DARWIN: f3: +; DARWIN: mov r0, r3 +; DARWIN-NEXT: mov pc, lr +define i32 @f3(i32 %i, i32 %j, i32 %k, i64 %l, ...) { +entry: + %0 = trunc i64 %l to i32 + ret i32 %0 +} + +declare i32 @g1(i64) + +declare i32 @g2(i32 %i, ...) diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll new file mode 100644 index 0000000000000..59e2b43a9172b --- /dev/null +++ b/test/CodeGen/ARM/bfi.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=arm -mattr=+v6t2 < %s | FileCheck %s + +%struct.F = type { [3 x i8], i8 } + +@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1] + +define void @f1([1 x i32] %f.coerce0) nounwind { +entry: +; CHECK: f1 +; CHECK: mov r2, #10 +; CHECK: bfi r1, r2, #22, #4 + %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1] + %1 = and i32 %0, -62914561 ; <i32> [#uses=1] + %2 = or i32 %1, 41943040 ; <i32> [#uses=1] + store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4 + ret void +} + +define i32 @f2(i32 %A, i32 %B) nounwind readnone optsize { +entry: +; CHECK: f2 +; CHECK: mov r1, r1, lsr #7 +; CHECK: bfi r0, r1, #7, #16 + %and = and i32 %A, -8388481 ; <i32> [#uses=1] + %and2 = and i32 %B, 8388480 ; <i32> [#uses=1] + %or = or i32 %and2, %and ; <i32> [#uses=1] + ret i32 %or +} + +define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize { +entry: +; CHECK: f3 +; CHECK: mov r2, r0, lsr #7 +; CHECK: mov r0, r1 +; CHECK: bfi r0, r2, #7, #16 + %and = and i32 %A, 8388480 ; <i32> [#uses=1] + %and2 = and i32 %B, -8388481 ; <i32> [#uses=1] + %or = or i32 %and2, %and ; <i32> [#uses=1] + ret i32 %or +} diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll index f1269d5bd2be6..db5afe3f56cb8 100644 --- a/test/CodeGen/ARM/call-tc.ll +++ b/test/CodeGen/ARM/call-tc.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\ ; RUN: -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF +; XFAIL: * @t = weak global i32 ()* null ; <i32 ()**> [#uses=1] diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll new file mode 100644 index 0000000000000..25c556889fc44 --- /dev/null +++ b/test/CodeGen/ARM/code-placement.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s +; PHI elimination shouldn't break backedge. +; rdar://8263994 + +%struct.list_data_s = type { i16, i16 } +%struct.list_head = type { %struct.list_head*, %struct.list_data_s* } + +define arm_apcscc %struct.list_head* @t(%struct.list_head* %list) nounwind { +entry: + %0 = icmp eq %struct.list_head* %list, null + br i1 %0, label %bb2, label %bb + +bb: +; CHECK: LBB0_2: +; CHECK: bne LBB0_2 +; CHECK-NOT: b LBB0_2 +; CHECK: bx lr + %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ] + %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ] + %1 = getelementptr inbounds %struct.list_head* %list_addr.05, i32 0, i32 0 + %2 = load %struct.list_head** %1, align 4 + store %struct.list_head* %next.04, %struct.list_head** %1, align 4 + %3 = icmp eq %struct.list_head* %2, null + br i1 %3, label %bb2, label %bb + +bb2: + %next.0.lcssa = phi %struct.list_head* [ null, %entry ], [ %list_addr.05, %bb ] + ret %struct.list_head* %next.0.lcssa +} diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll index d833afa55583a..448b437ddf46e 100644 --- a/test/CodeGen/ARM/div.ll +++ b/test/CodeGen/ARM/div.ll @@ -1,13 +1,9 @@ ; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECK-ARM -; RUN: llc < %s -march=arm -mcpu=cortex-m3 \ -; RUN: | FileCheck %s -check-prefix=CHECK-ARMV7M define i32 @f1(i32 %a, i32 %b) { entry: ; CHECK-ARM: f1 ; CHECK-ARM: __divsi3 -; CHECK-ARMV7M: f1 -; CHECK-ARMV7M: sdiv %tmp1 = sdiv i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } @@ -16,8 +12,6 @@ define i32 @f2(i32 %a, i32 %b) { entry: ; CHECK-ARM: f2 ; CHECK-ARM: __udivsi3 -; CHECK-ARMV7M: f2 -; CHECK-ARMV7M: udiv %tmp1 = udiv i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } @@ -26,8 +20,6 @@ define i32 @f3(i32 %a, i32 %b) { entry: ; CHECK-ARM: f3 ; CHECK-ARM: __modsi3 -; CHECK-ARMV7M: f3 -; CHECK-ARMV7M: sdiv %tmp1 = srem i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } @@ -36,8 +28,6 @@ define i32 @f4(i32 %a, i32 %b) { entry: ; CHECK-ARM: f4 ; CHECK-ARM: __umodsi3 -; CHECK-ARMV7M: f4 -; CHECK-ARMV7M: udiv %tmp1 = urem i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll new file mode 100644 index 0000000000000..3bee84d84de4c --- /dev/null +++ b/test/CodeGen/ARM/fast-isel.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -O0 -arm-fast-isel -fast-isel-abort -mtriple=armv7-apple-darwin +; RUN: llc < %s -O0 -arm-fast-isel -fast-isel-abort -mtriple=thumbv7-apple-darwin + +; Very basic fast-isel functionality. + +define i32 @add(i32 %a, i32 %b) nounwind ssp { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr + store i32 %b, i32* %b.addr + %tmp = load i32* %a.addr + %tmp1 = load i32* %b.addr + %add = add nsw i32 %tmp, %tmp1 + ret i32 %add +} + +define i32* @foo(i32* %p, i32* %q, i32** %z) nounwind { +entry: + %r = load i32* %p + %s = load i32* %q + %y = load i32** %z + br label %fast + +fast: + %t0 = add i32 %r, %s + %t1 = mul i32 %t0, %s + %t2 = sub i32 %t1, %s + %t3 = and i32 %t2, %s + %t4 = xor i32 %t3, 3 + %t5 = xor i32 %t4, %s + %t6 = add i32 %t5, 2 + %t7 = getelementptr i32* %y, i32 1 + %t8 = getelementptr i32* %t7, i32 %t6 + br label %exit + +exit: + ret i32* %t8 +} diff --git a/test/CodeGen/ARM/fnmuls.ll b/test/CodeGen/ARM/fnmuls.ll index efd87d2dcb896..3223885feda90 100644 --- a/test/CodeGen/ARM/fnmuls.ll +++ b/test/CodeGen/ARM/fnmuls.ll @@ -1,20 +1,18 @@ -; XFAIL: * ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -define float @test1(float %a, float %b) nounwind { -; CHECK: fnmscs s2, s1, s0 +define arm_aapcs_vfpcc float @test1(float %a, float %b) nounwind { +; CHECK: vnmul.f32 s0, s0, s1 entry: %0 = fmul float %a, %b %1 = fsub float -0.0, %0 ret float %1 } -define float @test2(float %a, float %b) nounwind { -; CHECK: fnmscs s2, s1, s0 +define arm_aapcs_vfpcc float @test2(float %a, float %b) nounwind { +; CHECK: vnmul.f32 s0, s0, s1 entry: %0 = fmul float %a, %b %1 = fmul float -1.0, %0 diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll index 6875288304be2..64350591b87f1 100644 --- a/test/CodeGen/ARM/fpcmp-opt.ll +++ b/test/CodeGen/ARM/fpcmp-opt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s +; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s ; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s ; rdar://7461510 diff --git a/test/CodeGen/ARM/fpowi.ll b/test/CodeGen/ARM/fpowi.ll index 7f9d62a9e9453..561463720c808 100644 --- a/test/CodeGen/ARM/fpowi.ll +++ b/test/CodeGen/ARM/fpowi.ll @@ -3,7 +3,7 @@ ; ModuleID = '<stdin>' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "arm-linux-gnueabi" +target triple = "arm-unknown-linux-gnueabi" define double @_ZSt3powdi(double %__x, i32 %__i) { entry: diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll index 688b7bc312c7b..1ec4d15f66723 100644 --- a/test/CodeGen/ARM/long_shift.ll +++ b/test/CodeGen/ARM/long_shift.ll @@ -24,8 +24,7 @@ define i32 @f2(i64 %x, i64 %y) { ; CHECK: f2 ; CHECK: mov r0, r0, lsr r2 ; CHECK-NEXT: rsb r3, r2, #32 -; CHECK-NEXT: sub r2, r2, #32 -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: subs r2, r2, #32 ; CHECK-NEXT: orr r0, r0, r1, lsl r3 ; CHECK-NEXT: movge r0, r1, asr r2 %a = ashr i64 %x, %y @@ -37,8 +36,7 @@ define i32 @f3(i64 %x, i64 %y) { ; CHECK: f3 ; CHECK: mov r0, r0, lsr r2 ; CHECK-NEXT: rsb r3, r2, #32 -; CHECK-NEXT: sub r2, r2, #32 -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: subs r2, r2, #32 ; CHECK-NEXT: orr r0, r0, r1, lsl r3 ; CHECK-NEXT: movge r0, r1, lsr r2 %a = lshr i64 %x, %y diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll index 25cf1356d61c3..866be423c2cb1 100644 --- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll +++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll @@ -4,14 +4,14 @@ ; constant offset addressing, so that each of the following stores ; uses the same register. -; CHECK: vstr.32 s0, [r9, #-128] -; CHECK: vstr.32 s0, [r9, #-96] -; CHECK: vstr.32 s0, [r9, #-64] -; CHECK: vstr.32 s0, [r9, #-32] -; CHECK: vstr.32 s0, [r9] -; CHECK: vstr.32 s0, [r9, #32] -; CHECK: vstr.32 s0, [r9, #64] -; CHECK: vstr.32 s0, [r9, #96] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-128] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-96] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-64] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-32] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #32] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #64] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #96] target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" @@ -628,8 +628,7 @@ bb24: ; preds = %bb23 ; CHECK: @ %bb24 ; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: sub{{.*}} [[REGISTER:r[0-9]+]], #1 -; CHECK-NEXT: cmp{{.*}} [[REGISTER]], #0 +; CHECK-NEXT: sub{{.*}} [[REGISTER:(r[0-9]+)|(lr)]], #1 ; CHECK-NEXT: bne.w %92 = icmp eq i32 %tmp81, %indvar78 ; <i1> [#uses=1] diff --git a/test/CodeGen/ARM/pack.ll b/test/CodeGen/ARM/pack.ll index 1e2e7aa0c8ff1..4905dc28cf488 100644 --- a/test/CodeGen/ARM/pack.ll +++ b/test/CodeGen/ARM/pack.ll @@ -1,8 +1,7 @@ -; RUN: llc < %s -march=arm -mattr=+v6 | \ -; RUN: grep pkhbt | count 5 -; RUN: llc < %s -march=arm -mattr=+v6 | \ -; RUN: grep pkhtb | count 4 +; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s +; CHECK: test1 +; CHECK: pkhbt r0, r0, r1, lsl #16 define i32 @test1(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp4 = shl i32 %Y, 16 ; <i32> [#uses=1] @@ -10,6 +9,8 @@ define i32 @test1(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test1a +; CHECK: pkhbt r0, r0, r1, lsl #16 define i32 @test1a(i32 %X, i32 %Y) { %tmp19 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp37 = shl i32 %Y, 16 ; <i32> [#uses=1] @@ -17,6 +18,8 @@ define i32 @test1a(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test2 +; CHECK: pkhbt r0, r0, r1, lsl #12 define i32 @test2(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp3 = shl i32 %Y, 12 ; <i32> [#uses=1] @@ -25,6 +28,8 @@ define i32 @test2(i32 %X, i32 %Y) { ret i32 %tmp57 } +; CHECK: test3 +; CHECK: pkhbt r0, r0, r1, lsl #18 define i32 @test3(i32 %X, i32 %Y) { %tmp19 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp37 = shl i32 %Y, 18 ; <i32> [#uses=1] @@ -32,6 +37,8 @@ define i32 @test3(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test4 +; CHECK: pkhbt r0, r0, r1 define i32 @test4(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp3 = and i32 %Y, -65536 ; <i32> [#uses=1] @@ -39,6 +46,8 @@ define i32 @test4(i32 %X, i32 %Y) { ret i32 %tmp46 } +; CHECK: test5 +; CHECK: pkhtb r0, r0, r1, asr #16 define i32 @test5(i32 %X, i32 %Y) { %tmp17 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp2 = bitcast i32 %Y to i32 ; <i32> [#uses=1] @@ -47,6 +56,8 @@ define i32 @test5(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test5a +; CHECK: pkhtb r0, r0, r1, asr #16 define i32 @test5a(i32 %X, i32 %Y) { %tmp110 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp37 = lshr i32 %Y, 16 ; <i32> [#uses=1] @@ -55,6 +66,8 @@ define i32 @test5a(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test6 +; CHECK: pkhtb r0, r0, r1, asr #12 define i32 @test6(i32 %X, i32 %Y) { %tmp1 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp37 = lshr i32 %Y, 12 ; <i32> [#uses=1] @@ -64,6 +77,8 @@ define i32 @test6(i32 %X, i32 %Y) { ret i32 %tmp59 } +; CHECK: test7 +; CHECK: pkhtb r0, r0, r1, asr #18 define i32 @test7(i32 %X, i32 %Y) { %tmp1 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp3 = ashr i32 %Y, 18 ; <i32> [#uses=1] @@ -71,3 +86,12 @@ define i32 @test7(i32 %X, i32 %Y) { %tmp57 = or i32 %tmp4, %tmp1 ; <i32> [#uses=1] ret i32 %tmp57 } + +; CHECK: test8 +; CHECK: pkhtb r0, r0, r1, asr #22 +define i32 @test8(i32 %X, i32 %Y) { + %tmp1 = and i32 %X, -65536 + %tmp3 = lshr i32 %Y, 22 + %tmp57 = or i32 %tmp3, %tmp1 + ret i32 %tmp57 +} diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 89b657797f2a3..2e4f10d8a63de 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -23,21 +23,21 @@ entry: %2 = getelementptr inbounds %struct.int32x4_t* %vT1ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1] %3 = load <4 x i32>* %2, align 16 ; <<4 x i32>> [#uses=1] %4 = bitcast i16* %i_ptr to i8* ; <i8*> [#uses=1] - %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4) ; <<8 x i16>> [#uses=1] + %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1] %6 = bitcast <8 x i16> %5 to <2 x double> ; <<2 x double>> [#uses=2] %7 = extractelement <2 x double> %6, i32 0 ; <double> [#uses=1] %8 = bitcast double %7 to <4 x i16> ; <<4 x i16>> [#uses=1] - %9 = tail call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %8) ; <<4 x i32>> [#uses=1] + %9 = sext <4 x i16> %8 to <4 x i32> ; <<4 x i32>> [#uses=1] %10 = extractelement <2 x double> %6, i32 1 ; <double> [#uses=1] %11 = bitcast double %10 to <4 x i16> ; <<4 x i16>> [#uses=1] - %12 = tail call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %11) ; <<4 x i32>> [#uses=1] + %12 = sext <4 x i16> %11 to <4 x i32> ; <<4 x i32>> [#uses=1] %13 = mul <4 x i32> %1, %9 ; <<4 x i32>> [#uses=1] %14 = mul <4 x i32> %3, %12 ; <<4 x i32>> [#uses=1] %15 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %13, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1] %16 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %14, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1] %17 = shufflevector <4 x i16> %15, <4 x i16> %16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; <<8 x i16>> [#uses=1] %18 = bitcast i16* %o_ptr to i8* ; <i8*> [#uses=1] - tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17) + tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17, i32 1) ret void } @@ -45,10 +45,10 @@ define void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocapture %vT0ptr, entry: ; CHECK: t2: ; CHECK: vld1.16 -; CHECK: vmul.i16 ; CHECK-NOT: vmov ; CHECK: vld1.16 ; CHECK: vmul.i16 +; CHECK: vmul.i16 ; CHECK-NOT: vmov ; CHECK: vst1.16 ; CHECK: vst1.16 @@ -57,17 +57,17 @@ entry: %2 = getelementptr inbounds %struct.int16x8_t* %vT1ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] %3 = load <8 x i16>* %2, align 16 ; <<8 x i16>> [#uses=1] %4 = bitcast i16* %i_ptr to i8* ; <i8*> [#uses=1] - %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4) ; <<8 x i16>> [#uses=1] + %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1] %6 = getelementptr inbounds i16* %i_ptr, i32 8 ; <i16*> [#uses=1] %7 = bitcast i16* %6 to i8* ; <i8*> [#uses=1] - %8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %7) ; <<8 x i16>> [#uses=1] + %8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %7, i32 1) ; <<8 x i16>> [#uses=1] %9 = mul <8 x i16> %1, %5 ; <<8 x i16>> [#uses=1] %10 = mul <8 x i16> %3, %8 ; <<8 x i16>> [#uses=1] %11 = bitcast i16* %o_ptr to i8* ; <i8*> [#uses=1] - tail call void @llvm.arm.neon.vst1.v8i16(i8* %11, <8 x i16> %9) + tail call void @llvm.arm.neon.vst1.v8i16(i8* %11, <8 x i16> %9, i32 1) %12 = getelementptr inbounds i16* %o_ptr, i32 8 ; <i16*> [#uses=1] %13 = bitcast i16* %12 to i8* ; <i8*> [#uses=1] - tail call void @llvm.arm.neon.vst1.v8i16(i8* %13, <8 x i16> %10) + tail call void @llvm.arm.neon.vst1.v8i16(i8* %13, <8 x i16> %10, i32 1) ret void } @@ -77,14 +77,14 @@ define <8 x i8> @t3(i8* %A, i8* %B) nounwind { ; CHECK: vmul.i8 ; CHECK-NOT: vmov ; CHECK: vst3.8 - %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 ; <<8 x i8>> [#uses=1] %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 ; <<8 x i8>> [#uses=1] %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 1 ; <<8 x i8>> [#uses=1] %tmp5 = sub <8 x i8> %tmp3, %tmp4 %tmp6 = add <8 x i8> %tmp2, %tmp3 ; <<8 x i8>> [#uses=1] %tmp7 = mul <8 x i8> %tmp4, %tmp2 - tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7) + tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7, i32 1) ret <8 x i8> %tmp4 } @@ -97,10 +97,10 @@ entry: ; CHECK-NOT: vmov ; CHECK: bne %tmp1 = bitcast i32* %in to i8* ; <i8*> [#uses=1] - %tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp1) ; <%struct.__neon_int32x4x2_t> [#uses=2] + %tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp1, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2] %tmp3 = getelementptr inbounds i32* %in, i32 8 ; <i32*> [#uses=1] %tmp4 = bitcast i32* %tmp3 to i8* ; <i8*> [#uses=1] - %tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp4) ; <%struct.__neon_int32x4x2_t> [#uses=2] + %tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp4, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2] %tmp8 = bitcast i32* %out to i8* ; <i8*> [#uses=1] br i1 undef, label %return1, label %return2 @@ -116,7 +116,7 @@ return1: %tmp39 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1] %tmp6 = add <4 x i32> %tmp52, %tmp ; <<4 x i32>> [#uses=1] %tmp7 = add <4 x i32> %tmp57, %tmp39 ; <<4 x i32>> [#uses=1] - tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp6, <4 x i32> %tmp7) + tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp6, <4 x i32> %tmp7, i32 1) ret void return2: @@ -128,7 +128,7 @@ return2: %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1] %tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1] %tmp102 = add <4 x i32> %tmp100, %tmp101 ; <<4 x i32>> [#uses=1] - tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp102, <4 x i32> %tmp101) + tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp102, <4 x i32> %tmp101, i32 1) call void @llvm.trap() unreachable } @@ -143,7 +143,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind { ; CHECK: vadd.i16 %tmp0 = bitcast i16* %A to i8* ; <i8*> [#uses=1] %tmp1 = load <8 x i16>* %B ; <<8 x i16>> [#uses=2] - %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2] + %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2] %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 ; <<8 x i16>> [#uses=1] %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 ; <<8 x i16>> [#uses=1] %tmp5 = add <8 x i16> %tmp3, %tmp4 ; <<8 x i16>> [#uses=1] @@ -156,7 +156,7 @@ define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind { ; CHECK: vmov d1, d0 ; CHECK-NEXT: vld2.8 {d0[1], d1[1]} %tmp1 = load <8 x i8>* %B ; <<8 x i8>> [#uses=2] - %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2] + %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2] %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1] %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 ; <<8 x i8>> [#uses=1] %tmp5 = add <8 x i8> %tmp3, %tmp4 ; <<8 x i8>> [#uses=1] @@ -174,14 +174,14 @@ entry: ; CHECK: vuzp.32 q0, q1 ; CHECK: vst1.32 %0 = bitcast i32* %iptr to i8* ; <i8*> [#uses=2] - %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0) ; <%struct.__neon_int32x4x2_t> [#uses=2] + %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2] %tmp57 = extractvalue %struct.__neon_int32x4x2_t %1, 0 ; <<4 x i32>> [#uses=1] %tmp60 = extractvalue %struct.__neon_int32x4x2_t %1, 1 ; <<4 x i32>> [#uses=1] %2 = bitcast i32* %optr to i8* ; <i8*> [#uses=2] - tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60) - %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0) ; <<4 x i32>> [#uses=1] + tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60, i32 1) + %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0, i32 1) ; <<4 x i32>> [#uses=1] %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> ; <<4 x i32>> [#uses=1] - tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4) + tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4, i32 1) ret void } @@ -304,44 +304,43 @@ bb14: ; preds = %bb6 ; This test crashes the coalescer because live variables were not updated properly. define <8 x i8> @t11(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind { - %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4) ; <%struct.__neon_int8x8x3_t> [#uses=1] + %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1] - %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6) ; <%struct.__neon_int8x8x3_t> [#uses=1] + %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1] %tmp2bd = add <8 x i8> zeroinitializer, %tmp2d ; <<8 x i8>> [#uses=1] %tmp2abcd = mul <8 x i8> zeroinitializer, %tmp2bd ; <<8 x i8>> [#uses=1] %tmp2ef = sub <8 x i8> zeroinitializer, %tmp2f ; <<8 x i8>> [#uses=1] %tmp2efgh = mul <8 x i8> %tmp2ef, undef ; <<8 x i8>> [#uses=2] - call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh) + call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh, i32 1) %tmp2 = sub <8 x i8> %tmp2efgh, %tmp2abcd ; <<8 x i8>> [#uses=1] %tmp7 = mul <8 x i8> undef, %tmp2 ; <<8 x i8>> [#uses=1] - tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7) + tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7, i32 1) ret <8 x i8> undef } -declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly - -declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly +declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly -declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone -declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind +declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind +declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) +nounwind -declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly -declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly -declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly -declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly -declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll index 1e780e6a9097d..6b86f1a9f368d 100644 --- a/test/CodeGen/ARM/remat.ll +++ b/test/CodeGen/ARM/remat.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -stats -info-output-file - | grep "Number of re-materialization" +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -o /dev/null -stats -info-output-file - | grep "Number of re-materialization" define i32 @main(i32 %argc, i8** nocapture %argv, double %d1, double %d2) nounwind { entry: diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll index 29c55c6bd9752..7413bed5c5b17 100644 --- a/test/CodeGen/ARM/select.ll +++ b/test/CodeGen/ARM/select.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=arm | FileCheck %s ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP +; RUN: llc < %s -mattr=+neon,+thumb2 -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=CHECK-NEON define i32 @f1(i32 %a.s) { ;CHECK: f1: @@ -65,3 +66,27 @@ define double @f7(double %a, double %b) { %tmp1 = select i1 %tmp, double -1.000e+00, double %b ret double %tmp1 } + +; <rdar://problem/7260094> +; +; We used to generate really horrible code for this function. The main cause was +; a lack of a custom lowering routine for an ISD::SELECT. This would result in +; two "it" blocks in the code: one for the "icmp" and another to move the index +; into the constant pool based on the value of the "icmp". If we have one "it" +; block generated, odds are good that we have close to the ideal code for this: +; +; CHECK-NEON: _f8: +; CHECK-NEON: movw [[REGISTER_1:r[0-9]+]], #1123 +; CHECK-NEON-NEXT: movs [[REGISTER_2:r[0-9]+]], #0 +; CHECK-NEON-NEXT: cmp r0, [[REGISTER_1]] +; CHECK-NEON-NEXT: adr [[REGISTER_3:r[0-9]+]], #LCPI +; CHECK-NEON-NEXT: it eq +; CHECK-NEON-NEXT: moveq [[REGISTER_2]], #4 +; CHECK-NEON-NEXT: ldr +; CHECK-NEON: bx + +define arm_apcscc float @f8(i32 %a) nounwind { + %tmp = icmp eq i32 %a, 1123 + %tmp1 = select i1 %tmp, float 0x3FF3BE76C0000000, float 0x40030E9A20000000 + ret float %tmp1 +} diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll index 792ef79982b79..ae1ba2f738252 100644 --- a/test/CodeGen/ARM/spill-q.ll +++ b/test/CodeGen/ARM/spill-q.ll @@ -7,7 +7,7 @@ %quux = type { i32 (...)**, %baz*, i32 } %quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo } -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly define void @aaa(%quuz* %this, i8* %block) { ; CHECK: aaa: @@ -15,11 +15,11 @@ define void @aaa(%quuz* %this, i8* %block) { ; CHECK: vst1.64 {{.*}}sp, :128 ; CHECK: vld1.64 {{.*}}sp, :128 entry: - %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 6.300000e+01, float* undef, align 4 - %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 0.000000e+00, float* undef, align 4 - %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] %val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1] br label %bb4 diff --git a/test/CodeGen/ARM/t2-imm.ll b/test/CodeGen/ARM/t2-imm.ll index 848a4dfed0542..8b4145914e7ca 100644 --- a/test/CodeGen/ARM/t2-imm.ll +++ b/test/CodeGen/ARM/t2-imm.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s define i32 @f6(i32 %a) { ; CHECK:f6 -; CHECK: movw r0, #:lower16:65537123 -; CHECK: movt r0, #:upper16:65537123 +; CHECK: movw r0, #1123 +; CHECK: movt r0, #1000 %tmp = add i32 0, 65537123 ret i32 %tmp } diff --git a/test/CodeGen/ARM/vaba.ll b/test/CodeGen/ARM/vaba.ll index e2dca4647bce6..4fe1c434799d2 100644 --- a/test/CodeGen/ARM/vaba.ll +++ b/test/CodeGen/ARM/vaba.ll @@ -6,8 +6,9 @@ define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i8> @llvm.arm.neon.vabas.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i8> %tmp4 + %tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) + %tmp5 = add <8 x i8> %tmp1, %tmp4 + ret <8 x i8> %tmp5 } define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -16,8 +17,9 @@ define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i16> @llvm.arm.neon.vabas.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i16> %tmp4 + %tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) + %tmp5 = add <4 x i16> %tmp1, %tmp4 + ret <4 x i16> %tmp5 } define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -26,8 +28,9 @@ define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i32> @llvm.arm.neon.vabas.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i32> %tmp4 + %tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) + %tmp5 = add <2 x i32> %tmp1, %tmp4 + ret <2 x i32> %tmp5 } define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { @@ -36,8 +39,9 @@ define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i8> @llvm.arm.neon.vabau.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i8> %tmp4 + %tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) + %tmp5 = add <8 x i8> %tmp1, %tmp4 + ret <8 x i8> %tmp5 } define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -46,8 +50,9 @@ define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i16> @llvm.arm.neon.vabau.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i16> %tmp4 + %tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) + %tmp5 = add <4 x i16> %tmp1, %tmp4 + ret <4 x i16> %tmp5 } define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -56,8 +61,9 @@ define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i32> @llvm.arm.neon.vabau.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i32> %tmp4 + %tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) + %tmp5 = add <2 x i32> %tmp1, %tmp4 + ret <2 x i32> %tmp5 } define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { @@ -66,8 +72,9 @@ define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = load <16 x i8>* %C - %tmp4 = call <16 x i8> @llvm.arm.neon.vabas.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> %tmp3) - ret <16 x i8> %tmp4 + %tmp4 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3) + %tmp5 = add <16 x i8> %tmp1, %tmp4 + ret <16 x i8> %tmp5 } define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { @@ -76,8 +83,9 @@ define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = load <8 x i16>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vabas.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3) + %tmp5 = add <8 x i16> %tmp1, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { @@ -86,8 +94,9 @@ define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = load <4 x i32>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3) + %tmp5 = add <4 x i32> %tmp1, %tmp4 + ret <4 x i32> %tmp5 } define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { @@ -96,8 +105,9 @@ define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = load <16 x i8>* %C - %tmp4 = call <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> %tmp3) - ret <16 x i8> %tmp4 + %tmp4 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3) + %tmp5 = add <16 x i8> %tmp1, %tmp4 + ret <16 x i8> %tmp5 } define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { @@ -106,8 +116,9 @@ define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = load <8 x i16>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3) + %tmp5 = add <8 x i16> %tmp1, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { @@ -116,25 +127,26 @@ define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = load <4 x i32>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3) + %tmp5 = add <4 x i32> %tmp1, %tmp4 + ret <4 x i32> %tmp5 } -declare <8 x i8> @llvm.arm.neon.vabas.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vabas.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vabas.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <8 x i8> @llvm.arm.neon.vabau.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vabau.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vabau.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vabas.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vabas.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone +declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone +declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { ;CHECK: vabals8: @@ -142,8 +154,10 @@ define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) + %tmp5 = zext <8 x i8> %tmp4 to <8 x i16> + %tmp6 = add <8 x i16> %tmp1, %tmp5 + ret <8 x i16> %tmp6 } define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -152,8 +166,10 @@ define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) + %tmp5 = zext <4 x i16> %tmp4 to <4 x i32> + %tmp6 = add <4 x i32> %tmp1, %tmp5 + ret <4 x i32> %tmp6 } define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -162,8 +178,10 @@ define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) + %tmp5 = zext <2 x i32> %tmp4 to <2 x i64> + %tmp6 = add <2 x i64> %tmp1, %tmp5 + ret <2 x i64> %tmp6 } define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { @@ -172,8 +190,10 @@ define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) + %tmp5 = zext <8 x i8> %tmp4 to <8 x i16> + %tmp6 = add <8 x i16> %tmp1, %tmp5 + ret <8 x i16> %tmp6 } define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -182,8 +202,10 @@ define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) + %tmp5 = zext <4 x i16> %tmp4 to <4 x i32> + %tmp6 = add <4 x i32> %tmp1, %tmp5 + ret <4 x i32> %tmp6 } define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -192,14 +214,8 @@ define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) + %tmp5 = zext <2 x i32> %tmp4 to <2 x i64> + %tmp6 = add <2 x i64> %tmp1, %tmp5 + ret <2 x i64> %tmp6 } - -declare <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll index 2b4539361459b..9ec734fa7641e 100644 --- a/test/CodeGen/ARM/vabd.ll +++ b/test/CodeGen/ARM/vabd.ll @@ -151,8 +151,9 @@ define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vabdl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> + ret <8 x i16> %tmp4 } define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -160,8 +161,9 @@ define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vabdl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 } define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -169,8 +171,9 @@ define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vabdl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> + ret <2 x i64> %tmp4 } define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -178,8 +181,9 @@ define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vabdl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> + ret <8 x i16> %tmp4 } define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -187,8 +191,9 @@ define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vabdl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 } define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -196,14 +201,7 @@ define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vabdl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> + ret <2 x i64> %tmp4 } - -declare <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll index 9bb8bf5610453..a830e968ff788 100644 --- a/test/CodeGen/ARM/vadd.ll +++ b/test/CodeGen/ARM/vadd.ll @@ -157,8 +157,10 @@ define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vaddl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = sext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -166,8 +168,10 @@ define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vaddl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -175,8 +179,10 @@ define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vaddl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = sext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = add <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -184,8 +190,10 @@ define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vaddl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = zext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -193,8 +201,10 @@ define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vaddl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = zext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -202,25 +212,20 @@ define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vaddl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = zext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = add <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } -declare <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind { ;CHECK: vaddws8: ;CHECK: vaddw.s8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp4 = add <8 x i16> %tmp1, %tmp3 + ret <8 x i16> %tmp4 } define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { @@ -228,8 +233,9 @@ define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { ;CHECK: vaddw.s16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp4 = add <4 x i32> %tmp1, %tmp3 + ret <4 x i32> %tmp4 } define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { @@ -237,8 +243,9 @@ define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { ;CHECK: vaddw.s32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp4 = add <2 x i64> %tmp1, %tmp3 + ret <2 x i64> %tmp4 } define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { @@ -246,8 +253,9 @@ define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { ;CHECK: vaddw.u8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp4 = add <8 x i16> %tmp1, %tmp3 + ret <8 x i16> %tmp4 } define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { @@ -255,8 +263,9 @@ define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { ;CHECK: vaddw.u16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp4 = add <4 x i32> %tmp1, %tmp3 + ret <4 x i32> %tmp4 } define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind { @@ -264,14 +273,7 @@ define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind { ;CHECK: vaddw.u32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp4 = add <2 x i64> %tmp1, %tmp3 + ret <2 x i64> %tmp4 } - -declare <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll index c11a67c6c4341..e460a84f6265f 100644 --- a/test/CodeGen/ARM/vext.ll +++ b/test/CodeGen/ARM/vext.ll @@ -54,3 +54,23 @@ define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ret <4 x i32> %tmp3 } +; Undef shuffle indices should not prevent matching to VEXT: + +define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: test_vextd_undef: +;CHECK: vext + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10> + ret <8 x i8> %tmp3 +} + +define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: test_vextRq_undef: +;CHECK: vext + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6> + ret <16 x i8> %tmp3 +} + diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll index c61ea8c9a7892..2488e8a0d0ccc 100644 --- a/test/CodeGen/ARM/vld1.ll +++ b/test/CodeGen/ARM/vld1.ll @@ -3,7 +3,7 @@ define <8 x i8> @vld1i8(i8* %A) nounwind { ;CHECK: vld1i8: ;CHECK: vld1.8 - %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A) + %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 1) ret <8 x i8> %tmp1 } @@ -11,7 +11,7 @@ define <4 x i16> @vld1i16(i16* %A) nounwind { ;CHECK: vld1i16: ;CHECK: vld1.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0) + %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1) ret <4 x i16> %tmp1 } @@ -19,7 +19,7 @@ define <2 x i32> @vld1i32(i32* %A) nounwind { ;CHECK: vld1i32: ;CHECK: vld1.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0) + %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1) ret <2 x i32> %tmp1 } @@ -27,7 +27,7 @@ define <2 x float> @vld1f(float* %A) nounwind { ;CHECK: vld1f: ;CHECK: vld1.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %tmp0) + %tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %tmp0, i32 1) ret <2 x float> %tmp1 } @@ -35,14 +35,14 @@ define <1 x i64> @vld1i64(i64* %A) nounwind { ;CHECK: vld1i64: ;CHECK: vld1.64 %tmp0 = bitcast i64* %A to i8* - %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0) + %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0, i32 1) ret <1 x i64> %tmp1 } define <16 x i8> @vld1Qi8(i8* %A) nounwind { ;CHECK: vld1Qi8: ;CHECK: vld1.8 - %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A) + %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 1) ret <16 x i8> %tmp1 } @@ -50,7 +50,7 @@ define <8 x i16> @vld1Qi16(i16* %A) nounwind { ;CHECK: vld1Qi16: ;CHECK: vld1.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0) + %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 1) ret <8 x i16> %tmp1 } @@ -58,7 +58,7 @@ define <4 x i32> @vld1Qi32(i32* %A) nounwind { ;CHECK: vld1Qi32: ;CHECK: vld1.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0) + %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0, i32 1) ret <4 x i32> %tmp1 } @@ -66,7 +66,7 @@ define <4 x float> @vld1Qf(float* %A) nounwind { ;CHECK: vld1Qf: ;CHECK: vld1.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %tmp0) + %tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %tmp0, i32 1) ret <4 x float> %tmp1 } @@ -74,18 +74,31 @@ define <2 x i64> @vld1Qi64(i64* %A) nounwind { ;CHECK: vld1Qi64: ;CHECK: vld1.64 %tmp0 = bitcast i64* %A to i8* - %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0) + %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0, i32 1) ret <2 x i64> %tmp1 } -declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*) nounwind readonly -declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*) nounwind readonly -declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*) nounwind readonly -declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*) nounwind readonly -declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*) nounwind readonly +declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly +declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) nounwind readonly +declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly +declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly +declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly -declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*) nounwind readonly -declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly -declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly -declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly +declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly +declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly +declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly +declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly + +; Radar 8355607 +; Do not crash if the vld1 result is not used. +define void @unused_vld1_result() { +entry: +;CHECK: unused_vld1_result +;CHECK: vld1.32 + %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) + call void @llvm.trap() + unreachable +} + +declare void @llvm.trap() nounwind diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll index 0838636ce7421..811f6e6db96f2 100644 --- a/test/CodeGen/ARM/vld2.ll +++ b/test/CodeGen/ARM/vld2.ll @@ -14,7 +14,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind { ;CHECK: vld2i8: ;CHECK: vld2.8 - %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A) + %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1 %tmp4 = add <8 x i8> %tmp2, %tmp3 @@ -25,7 +25,7 @@ define <4 x i16> @vld2i16(i16* %A) nounwind { ;CHECK: vld2i16: ;CHECK: vld2.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1 %tmp4 = add <4 x i16> %tmp2, %tmp3 @@ -36,7 +36,7 @@ define <2 x i32> @vld2i32(i32* %A) nounwind { ;CHECK: vld2i32: ;CHECK: vld2.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1 %tmp4 = add <2 x i32> %tmp2, %tmp3 @@ -47,7 +47,7 @@ define <2 x float> @vld2f(float* %A) nounwind { ;CHECK: vld2f: ;CHECK: vld2.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1 %tmp4 = fadd <2 x float> %tmp2, %tmp3 @@ -58,7 +58,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind { ;CHECK: vld2i64: ;CHECK: vld1.64 %tmp0 = bitcast i64* %A to i8* - %tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0) + %tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1 %tmp4 = add <1 x i64> %tmp2, %tmp3 @@ -68,7 +68,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind { define <16 x i8> @vld2Qi8(i8* %A) nounwind { ;CHECK: vld2Qi8: ;CHECK: vld2.8 - %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A) + %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1 %tmp4 = add <16 x i8> %tmp2, %tmp3 @@ -79,7 +79,7 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind { ;CHECK: vld2Qi16: ;CHECK: vld2.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1 %tmp4 = add <8 x i16> %tmp2, %tmp3 @@ -90,7 +90,7 @@ define <4 x i32> @vld2Qi32(i32* %A) nounwind { ;CHECK: vld2Qi32: ;CHECK: vld2.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1 %tmp4 = add <4 x i32> %tmp2, %tmp3 @@ -101,20 +101,20 @@ define <4 x float> @vld2Qf(float* %A) nounwind { ;CHECK: vld2Qf: ;CHECK: vld2.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1 %tmp4 = fadd <4 x float> %tmp2, %tmp3 ret <4 x float> %tmp4 } -declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly -declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly -declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly -declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly -declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*) nounwind readonly +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*, i32) nounwind readonly -declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*) nounwind readonly -declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*) nounwind readonly -declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly -declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*) nounwind readonly +declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll index 65a24486bc62c..92538c34f5b8e 100644 --- a/test/CodeGen/ARM/vld3.ll +++ b/test/CodeGen/ARM/vld3.ll @@ -14,7 +14,7 @@ define <8 x i8> @vld3i8(i8* %A) nounwind { ;CHECK: vld3i8: ;CHECK: vld3.8 - %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A) + %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 %tmp4 = add <8 x i8> %tmp2, %tmp3 @@ -25,7 +25,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind { ;CHECK: vld3i16: ;CHECK: vld3.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2 %tmp4 = add <4 x i16> %tmp2, %tmp3 @@ -36,7 +36,7 @@ define <2 x i32> @vld3i32(i32* %A) nounwind { ;CHECK: vld3i32: ;CHECK: vld3.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2 %tmp4 = add <2 x i32> %tmp2, %tmp3 @@ -47,7 +47,7 @@ define <2 x float> @vld3f(float* %A) nounwind { ;CHECK: vld3f: ;CHECK: vld3.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2 %tmp4 = fadd <2 x float> %tmp2, %tmp3 @@ -58,7 +58,7 @@ define <1 x i64> @vld3i64(i64* %A) nounwind { ;CHECK: vld3i64: ;CHECK: vld1.64 %tmp0 = bitcast i64* %A to i8* - %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0) + %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2 %tmp4 = add <1 x i64> %tmp2, %tmp3 @@ -69,7 +69,7 @@ define <16 x i8> @vld3Qi8(i8* %A) nounwind { ;CHECK: vld3Qi8: ;CHECK: vld3.8 ;CHECK: vld3.8 - %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A) + %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2 %tmp4 = add <16 x i8> %tmp2, %tmp3 @@ -81,7 +81,7 @@ define <8 x i16> @vld3Qi16(i16* %A) nounwind { ;CHECK: vld3.16 ;CHECK: vld3.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2 %tmp4 = add <8 x i16> %tmp2, %tmp3 @@ -93,7 +93,7 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind { ;CHECK: vld3.32 ;CHECK: vld3.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2 %tmp4 = add <4 x i32> %tmp2, %tmp3 @@ -105,20 +105,20 @@ define <4 x float> @vld3Qf(float* %A) nounwind { ;CHECK: vld3.32 ;CHECK: vld3.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2 %tmp4 = fadd <4 x float> %tmp2, %tmp3 ret <4 x float> %tmp4 } -declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly -declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly -declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly -declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly -declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*, i32) nounwind readonly -declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*) nounwind readonly -declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*) nounwind readonly -declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*) nounwind readonly -declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*) nounwind readonly +declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*, i32) nounwind readonly diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll index e0b870638a182..d1bf957ebadca 100644 --- a/test/CodeGen/ARM/vld4.ll +++ b/test/CodeGen/ARM/vld4.ll @@ -14,7 +14,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind { ;CHECK: vld4i8: ;CHECK: vld4.8 - %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A) + %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2 %tmp4 = add <8 x i8> %tmp2, %tmp3 @@ -25,7 +25,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind { ;CHECK: vld4i16: ;CHECK: vld4.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2 %tmp4 = add <4 x i16> %tmp2, %tmp3 @@ -36,7 +36,7 @@ define <2 x i32> @vld4i32(i32* %A) nounwind { ;CHECK: vld4i32: ;CHECK: vld4.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2 %tmp4 = add <2 x i32> %tmp2, %tmp3 @@ -47,7 +47,7 @@ define <2 x float> @vld4f(float* %A) nounwind { ;CHECK: vld4f: ;CHECK: vld4.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2 %tmp4 = fadd <2 x float> %tmp2, %tmp3 @@ -58,7 +58,7 @@ define <1 x i64> @vld4i64(i64* %A) nounwind { ;CHECK: vld4i64: ;CHECK: vld1.64 %tmp0 = bitcast i64* %A to i8* - %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0) + %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2 %tmp4 = add <1 x i64> %tmp2, %tmp3 @@ -69,7 +69,7 @@ define <16 x i8> @vld4Qi8(i8* %A) nounwind { ;CHECK: vld4Qi8: ;CHECK: vld4.8 ;CHECK: vld4.8 - %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A) + %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2 %tmp4 = add <16 x i8> %tmp2, %tmp3 @@ -81,7 +81,7 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind { ;CHECK: vld4.16 ;CHECK: vld4.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2 %tmp4 = add <8 x i16> %tmp2, %tmp3 @@ -93,7 +93,7 @@ define <4 x i32> @vld4Qi32(i32* %A) nounwind { ;CHECK: vld4.32 ;CHECK: vld4.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 2 %tmp4 = add <4 x i32> %tmp2, %tmp3 @@ -105,20 +105,20 @@ define <4 x float> @vld4Qf(float* %A) nounwind { ;CHECK: vld4.32 ;CHECK: vld4.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 2 %tmp4 = fadd <4 x float> %tmp2, %tmp3 ret <4 x float> %tmp4 } -declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly -declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly -declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly -declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly -declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*) nounwind readonly +declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*, i32) nounwind readonly -declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*) nounwind readonly -declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*) nounwind readonly -declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*) nounwind readonly -declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*) nounwind readonly +declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*, i32) nounwind readonly diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll index b32c59019f4cb..31ee64fa598f9 100644 --- a/test/CodeGen/ARM/vldlane.ll +++ b/test/CodeGen/ARM/vldlane.ll @@ -13,7 +13,7 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld2lanei8: ;CHECK: vld2.8 %tmp1 = load <8 x i8>* %B - %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 %tmp5 = add <8 x i8> %tmp3, %tmp4 @@ -25,7 +25,7 @@ define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vld2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1 %tmp5 = add <4 x i16> %tmp3, %tmp4 @@ -37,7 +37,7 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vld2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 %tmp5 = add <2 x i32> %tmp3, %tmp4 @@ -49,7 +49,7 @@ define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vld2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1 %tmp5 = fadd <2 x float> %tmp3, %tmp4 @@ -61,7 +61,7 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vld2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 %tmp5 = add <8 x i16> %tmp3, %tmp4 @@ -73,7 +73,7 @@ define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vld2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2) + %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1 %tmp5 = add <4 x i32> %tmp3, %tmp4 @@ -85,21 +85,21 @@ define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vld2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1 %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } -declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly -declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly -declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } @@ -114,7 +114,7 @@ define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld3lanei8: ;CHECK: vld3.8 %tmp1 = load <8 x i8>* %B - %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2 @@ -128,7 +128,7 @@ define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vld3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2 @@ -142,7 +142,7 @@ define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vld3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2 @@ -156,7 +156,7 @@ define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vld3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2 @@ -170,7 +170,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vld3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2 @@ -184,7 +184,7 @@ define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vld3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3) + %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1) %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2 @@ -198,7 +198,7 @@ define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vld3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2 @@ -207,14 +207,14 @@ define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind { ret <4 x float> %tmp7 } -declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly -declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly -declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @@ -229,7 +229,7 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld4lanei8: ;CHECK: vld4.8 %tmp1 = load <8 x i8>* %B - %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 @@ -245,7 +245,7 @@ define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vld4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2 @@ -261,7 +261,7 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vld4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2 @@ -277,7 +277,7 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vld4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2 @@ -293,7 +293,7 @@ define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vld4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2 @@ -309,7 +309,7 @@ define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vld4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2 @@ -325,7 +325,7 @@ define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vld4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2 @@ -336,11 +336,11 @@ define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind { ret <4 x float> %tmp9 } -declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly -declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly +declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly -declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly +declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly diff --git a/test/CodeGen/ARM/vmla.ll b/test/CodeGen/ARM/vmla.ll index 77cf10ad3e684..9c6b210be7976 100644 --- a/test/CodeGen/ARM/vmla.ll +++ b/test/CodeGen/ARM/vmla.ll @@ -94,8 +94,11 @@ define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = sext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = add <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 } define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -104,8 +107,11 @@ define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = sext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = add <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 } define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -114,8 +120,11 @@ define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = sext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = add <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 } define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { @@ -124,8 +133,11 @@ define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = zext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = add <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 } define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -134,8 +146,11 @@ define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = zext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = add <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 } define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -144,8 +159,11 @@ define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = zext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = add <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 } define arm_aapcs_vfpcc <4 x i32> @test_vmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone { @@ -153,8 +171,11 @@ entry: ; CHECK: test_vmlal_lanes16 ; CHECK: vmlal.s16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = sext <4 x i16> %arg1_int16x4_t to <4 x i32> + %2 = sext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + %4 = add <4 x i32> %arg0_int32x4_t, %3 + ret <4 x i32> %4 } define arm_aapcs_vfpcc <2 x i64> @test_vmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone { @@ -162,8 +183,11 @@ entry: ; CHECK: test_vmlal_lanes32 ; CHECK: vmlal.s32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = sext <2 x i32> %arg1_int32x2_t to <2 x i64> + %2 = sext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + %4 = add <2 x i64> %arg0_int64x2_t, %3 + ret <2 x i64> %4 } define arm_aapcs_vfpcc <4 x i32> @test_vmlal_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone { @@ -171,8 +195,11 @@ entry: ; CHECK: test_vmlal_laneu16 ; CHECK: vmlal.u16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = zext <4 x i16> %arg1_uint16x4_t to <4 x i32> + %2 = zext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + %4 = add <4 x i32> %arg0_uint32x4_t, %3 + ret <4 x i32> %4 } define arm_aapcs_vfpcc <2 x i64> @test_vmlal_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone { @@ -180,14 +207,9 @@ entry: ; CHECK: test_vmlal_laneu32 ; CHECK: vmlal.u32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = zext <2 x i32> %arg1_uint32x2_t to <2 x i64> + %2 = zext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + %4 = add <2 x i64> %arg0_uint64x2_t, %3 + ret <2 x i64> %4 } - -declare <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vmls.ll b/test/CodeGen/ARM/vmls.ll index 2b70a7878ced5..65e7fe41bb3a2 100644 --- a/test/CodeGen/ARM/vmls.ll +++ b/test/CodeGen/ARM/vmls.ll @@ -94,8 +94,11 @@ define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = sext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = sub <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 } define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -104,8 +107,11 @@ define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = sext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = sub <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 } define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -114,8 +120,11 @@ define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = sext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = sub <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 } define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { @@ -124,8 +133,11 @@ define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = zext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = sub <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 } define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -134,8 +146,11 @@ define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = zext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = sub <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 } define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -144,8 +159,11 @@ define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = zext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = sub <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 } define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone { @@ -153,8 +171,11 @@ entry: ; CHECK: test_vmlsl_lanes16 ; CHECK: vmlsl.s16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = sext <4 x i16> %arg1_int16x4_t to <4 x i32> + %2 = sext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + %4 = sub <4 x i32> %arg0_int32x4_t, %3 + ret <4 x i32> %4 } define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone { @@ -162,8 +183,11 @@ entry: ; CHECK: test_vmlsl_lanes32 ; CHECK: vmlsl.s32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = sext <2 x i32> %arg1_int32x2_t to <2 x i64> + %2 = sext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + %4 = sub <2 x i64> %arg0_int64x2_t, %3 + ret <2 x i64> %4 } define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone { @@ -171,8 +195,11 @@ entry: ; CHECK: test_vmlsl_laneu16 ; CHECK: vmlsl.u16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = zext <4 x i16> %arg1_uint16x4_t to <4 x i32> + %2 = zext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + %4 = sub <4 x i32> %arg0_uint32x4_t, %3 + ret <4 x i32> %4 } define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone { @@ -180,14 +207,9 @@ entry: ; CHECK: test_vmlsl_laneu32 ; CHECK: vmlsl.u32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = zext <2 x i32> %arg1_uint32x2_t to <2 x i64> + %2 = zext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + %4 = sub <2 x i64> %arg0_uint64x2_t, %3 + ret <2 x i64> %4 } - -declare <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll index 5e872ab6d0b25..8cd94576b0c21 100644 --- a/test/CodeGen/ARM/vmov.ll +++ b/test/CodeGen/ARM/vmov.ll @@ -192,7 +192,7 @@ define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind { ;CHECK: vmovls8: ;CHECK: vmovl.s8 %tmp1 = load <8 x i8>* %A - %tmp2 = call <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8> %tmp1) + %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> ret <8 x i16> %tmp2 } @@ -200,7 +200,7 @@ define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind { ;CHECK: vmovls16: ;CHECK: vmovl.s16 %tmp1 = load <4 x i16>* %A - %tmp2 = call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %tmp1) + %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> ret <4 x i32> %tmp2 } @@ -208,7 +208,7 @@ define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind { ;CHECK: vmovls32: ;CHECK: vmovl.s32 %tmp1 = load <2 x i32>* %A - %tmp2 = call <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32> %tmp1) + %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> ret <2 x i64> %tmp2 } @@ -216,7 +216,7 @@ define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind { ;CHECK: vmovlu8: ;CHECK: vmovl.u8 %tmp1 = load <8 x i8>* %A - %tmp2 = call <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8> %tmp1) + %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> ret <8 x i16> %tmp2 } @@ -224,7 +224,7 @@ define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind { ;CHECK: vmovlu16: ;CHECK: vmovl.u16 %tmp1 = load <4 x i16>* %A - %tmp2 = call <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16> %tmp1) + %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> ret <4 x i32> %tmp2 } @@ -232,23 +232,15 @@ define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind { ;CHECK: vmovlu32: ;CHECK: vmovl.u32 %tmp1 = load <2 x i32>* %A - %tmp2 = call <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32> %tmp1) + %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> ret <2 x i64> %tmp2 } -declare <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32>) nounwind readnone - define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind { ;CHECK: vmovni16: ;CHECK: vmovn.i16 %tmp1 = load <8 x i16>* %A - %tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1) + %tmp2 = trunc <8 x i16> %tmp1 to <8 x i8> ret <8 x i8> %tmp2 } @@ -256,7 +248,7 @@ define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind { ;CHECK: vmovni32: ;CHECK: vmovn.i32 %tmp1 = load <4 x i32>* %A - %tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1) + %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16> ret <4 x i16> %tmp2 } @@ -264,14 +256,10 @@ define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind { ;CHECK: vmovni64: ;CHECK: vmovn.i64 %tmp1 = load <2 x i64>* %A - %tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1) + %tmp2 = trunc <2 x i64> %tmp1 to <2 x i32> ret <2 x i32> %tmp2 } -declare <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone - define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind { ;CHECK: vqmovns16: ;CHECK: vqmovn.s16 diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll index 1d91680212794..5383425018f8e 100644 --- a/test/CodeGen/ARM/vmul.ll +++ b/test/CodeGen/ARM/vmul.ll @@ -152,8 +152,10 @@ define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vmull.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = sext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = mul <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -161,8 +163,10 @@ define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vmull.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = mul <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -170,8 +174,10 @@ define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vmull.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = sext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = mul <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -179,8 +185,10 @@ define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vmull.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = zext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = mul <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -188,8 +196,10 @@ define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vmull.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = zext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = mul <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -197,8 +207,10 @@ define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vmull.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = zext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = mul <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -215,8 +227,10 @@ entry: ; CHECK: test_vmull_lanes16 ; CHECK: vmull.s16 q0, d0, d1[1] %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = sext <4 x i16> %arg0_int16x4_t to <4 x i32> + %2 = sext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + ret <4 x i32> %3 } define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { @@ -224,8 +238,10 @@ entry: ; CHECK: test_vmull_lanes32 ; CHECK: vmull.s32 q0, d0, d1[1] %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = sext <2 x i32> %arg0_int32x2_t to <2 x i64> + %2 = sext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + ret <2 x i64> %3 } define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone { @@ -233,8 +249,10 @@ entry: ; CHECK: test_vmull_laneu16 ; CHECK: vmull.u16 q0, d0, d1[1] %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = zext <4 x i16> %arg0_uint16x4_t to <4 x i32> + %2 = zext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + ret <4 x i32> %3 } define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone { @@ -242,16 +260,10 @@ entry: ; CHECK: test_vmull_laneu32 ; CHECK: vmull.u32 q0, d0, d1[1] %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = zext <2 x i32> %arg0_uint32x2_t to <2 x i64> + %2 = zext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + ret <2 x i64> %3 } -declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll index deed554d842c6..e1fe64b02d9d3 100644 --- a/test/CodeGen/ARM/vrev.ll +++ b/test/CodeGen/ARM/vrev.ll @@ -111,3 +111,21 @@ define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind { %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> ret <16 x i8> %tmp2 } + +; Undef shuffle indices should not prevent matching to VREV: + +define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind { +;CHECK: test_vrev64D8_undef: +;CHECK: vrev64.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %tmp2 +} + +define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind { +;CHECK: test_vrev32Q16_undef: +;CHECK: vrev32.16 + %tmp1 = load <8 x i16>* %A + %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef> + ret <8 x i16> %tmp2 +} diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll index 95414c3089141..2b535ada30728 100644 --- a/test/CodeGen/ARM/vst1.ll +++ b/test/CodeGen/ARM/vst1.ll @@ -4,7 +4,7 @@ define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst1i8: ;CHECK: vst1.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1) + call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 1) ret void } @@ -13,7 +13,7 @@ define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst1.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1) + call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1) ret void } @@ -22,7 +22,7 @@ define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst1.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1) + call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1) ret void } @@ -31,7 +31,7 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst1.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1) + call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1) ret void } @@ -40,7 +40,7 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind { ;CHECK: vst1.64 %tmp0 = bitcast i64* %A to i8* %tmp1 = load <1 x i64>* %B - call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1) + call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1) ret void } @@ -48,7 +48,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst1Qi8: ;CHECK: vst1.8 %tmp1 = load <16 x i8>* %B - call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1) + call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 1) ret void } @@ -57,7 +57,7 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst1.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1) + call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 1) ret void } @@ -66,7 +66,7 @@ define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst1.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1) + call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1) ret void } @@ -75,7 +75,7 @@ define void @vst1Qf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst1.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1) + call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1) ret void } @@ -84,18 +84,18 @@ define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind { ;CHECK: vst1.64 %tmp0 = bitcast i64* %A to i8* %tmp1 = load <2 x i64>* %B - call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1) + call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1) ret void } -declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>) nounwind -declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>) nounwind -declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>) nounwind -declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>) nounwind -declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>) nounwind +declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32) nounwind -declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>) nounwind -declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind -declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind -declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind -declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>) nounwind +declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32) nounwind diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll index 3c98a2cbe60dd..aed15fd51c56b 100644 --- a/test/CodeGen/ARM/vst2.ll +++ b/test/CodeGen/ARM/vst2.ll @@ -4,7 +4,7 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst2i8: ;CHECK: vst2.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1) + call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ret void } @@ -13,7 +13,7 @@ define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1) + call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) ret void } @@ -22,7 +22,7 @@ define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1) + call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) ret void } @@ -31,7 +31,7 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1) + call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) ret void } @@ -40,7 +40,7 @@ define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind { ;CHECK: vst1.64 %tmp0 = bitcast i64* %A to i8* %tmp1 = load <1 x i64>* %B - call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1) + call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) ret void } @@ -48,7 +48,7 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst2Qi8: ;CHECK: vst2.8 %tmp1 = load <16 x i8>* %B - call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1) + call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1) ret void } @@ -57,7 +57,7 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1) + call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ret void } @@ -66,7 +66,7 @@ define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1) + call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1) ret void } @@ -75,17 +75,17 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1) + call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) ret void } -declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind -declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind -declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind -declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>) nounwind -declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>) nounwind +declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) nounwind -declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>) nounwind -declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>) nounwind -declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind -declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>) nounwind +declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll index 2599bc0db933b..1feaed5a10445 100644 --- a/test/CodeGen/ARM/vst3.ll +++ b/test/CodeGen/ARM/vst3.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -O0 | FileCheck %s define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst3i8: ;CHECK: vst3.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1) + call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ret void } @@ -13,7 +13,7 @@ define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1) + call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) ret void } @@ -22,7 +22,7 @@ define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1) + call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) ret void } @@ -31,7 +31,7 @@ define void @vst3f(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1) + call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) ret void } @@ -40,7 +40,7 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind { ;CHECK: vst1.64 %tmp0 = bitcast i64* %A to i8* %tmp1 = load <1 x i64>* %B - call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1) + call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) ret void } @@ -49,7 +49,7 @@ define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst3.8 ;CHECK: vst3.8 %tmp1 = load <16 x i8>* %B - call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1) + call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1) ret void } @@ -59,7 +59,7 @@ define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1) + call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ret void } @@ -69,7 +69,7 @@ define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1) + call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1) ret void } @@ -79,17 +79,17 @@ define void @vst3Qf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1) + call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) ret void } -declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind -declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind -declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind -declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind -declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>) nounwind +declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind -declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>) nounwind -declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>) nounwind -declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>) nounwind -declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>) nounwind +declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll index 878f0efaa4807..d302f097fc1fd 100644 --- a/test/CodeGen/ARM/vst4.ll +++ b/test/CodeGen/ARM/vst4.ll @@ -4,7 +4,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst4i8: ;CHECK: vst4.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1) + call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ret void } @@ -13,7 +13,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1) + call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) ret void } @@ -22,7 +22,7 @@ define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1) + call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) ret void } @@ -31,7 +31,7 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1) + call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) ret void } @@ -40,7 +40,7 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind { ;CHECK: vst1.64 %tmp0 = bitcast i64* %A to i8* %tmp1 = load <1 x i64>* %B - call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1) + call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) ret void } @@ -49,7 +49,7 @@ define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst4.8 ;CHECK: vst4.8 %tmp1 = load <16 x i8>* %B - call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1) + call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1) ret void } @@ -59,7 +59,7 @@ define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1) + call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ret void } @@ -69,7 +69,7 @@ define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1) + call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1) ret void } @@ -79,17 +79,17 @@ define void @vst4Qf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1) + call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) ret void } -declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind -declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind -declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind -declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind -declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>) nounwind +declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind -declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind -declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>) nounwind -declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind -declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) nounwind +declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll index cf50756d465e5..30ec52ac64209 100644 --- a/test/CodeGen/ARM/vstlane.ll +++ b/test/CodeGen/ARM/vstlane.ll @@ -4,7 +4,7 @@ define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst2lanei8: ;CHECK: vst2.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ret void } @@ -13,7 +13,7 @@ define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) ret void } @@ -22,7 +22,7 @@ define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) ret void } @@ -31,7 +31,7 @@ define void @vst2lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) ret void } @@ -40,7 +40,7 @@ define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ret void } @@ -49,7 +49,7 @@ define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2) + call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) ret void } @@ -58,24 +58,24 @@ define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3) + call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1) ret void } -declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst3lanei8: ;CHECK: vst3.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ret void } @@ -84,7 +84,7 @@ define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) ret void } @@ -93,7 +93,7 @@ define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) ret void } @@ -102,7 +102,7 @@ define void @vst3lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) ret void } @@ -111,7 +111,7 @@ define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6) + call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 1) ret void } @@ -120,7 +120,7 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0) + call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1) ret void } @@ -129,25 +129,25 @@ define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) ret void } -declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst4lanei8: ;CHECK: vst4.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ret void } @@ -156,7 +156,7 @@ define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) ret void } @@ -165,7 +165,7 @@ define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) ret void } @@ -174,7 +174,7 @@ define void @vst4lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) ret void } @@ -183,7 +183,7 @@ define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7) + call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 1) ret void } @@ -192,7 +192,7 @@ define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2) + call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) ret void } @@ -201,15 +201,15 @@ define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) ret void } -declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll index 3416de76f123e..df77bb31fc8b8 100644 --- a/test/CodeGen/ARM/vsub.ll +++ b/test/CodeGen/ARM/vsub.ll @@ -157,8 +157,10 @@ define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vsubl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = sext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = sub <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -166,8 +168,10 @@ define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vsubl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = sub <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -175,8 +179,10 @@ define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vsubl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = sext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = sub <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -184,8 +190,10 @@ define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vsubl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = zext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = sub <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -193,8 +201,10 @@ define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vsubl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = zext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = sub <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -202,25 +212,20 @@ define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vsubl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = zext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = sub <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } -declare <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind { ;CHECK: vsubws8: ;CHECK: vsubw.s8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp4 = sub <8 x i16> %tmp1, %tmp3 + ret <8 x i16> %tmp4 } define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { @@ -228,8 +233,9 @@ define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { ;CHECK: vsubw.s16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp4 = sub <4 x i32> %tmp1, %tmp3 + ret <4 x i32> %tmp4 } define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { @@ -237,8 +243,9 @@ define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { ;CHECK: vsubw.s32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp4 = sub <2 x i64> %tmp1, %tmp3 + ret <2 x i64> %tmp4 } define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { @@ -246,8 +253,9 @@ define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { ;CHECK: vsubw.u8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp4 = sub <8 x i16> %tmp1, %tmp3 + ret <8 x i16> %tmp4 } define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { @@ -255,8 +263,9 @@ define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { ;CHECK: vsubw.u16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp4 = sub <4 x i32> %tmp1, %tmp3 + ret <4 x i32> %tmp4 } define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind { @@ -264,14 +273,7 @@ define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind { ;CHECK: vsubw.u32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp4 = sub <2 x i64> %tmp1, %tmp3 + ret <2 x i64> %tmp4 } - -declare <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll index 10bb10ac24a17..b1c2f93b47c6c 100644 --- a/test/CodeGen/ARM/vtrn.ll +++ b/test/CodeGen/ARM/vtrn.ll @@ -95,3 +95,30 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } + +; Undef shuffle indices should not prevent matching to VTRN: + +define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vtrni8_undef: +;CHECK: vtrn.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vtrnQi16_undef: +;CHECK: vtrn.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14> + %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll index 6cef188d76dd4..9130f628919a5 100644 --- a/test/CodeGen/ARM/vuzp.ll +++ b/test/CodeGen/ARM/vuzp.ll @@ -73,3 +73,30 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } + +; Undef shuffle indices should not prevent matching to VUZP: + +define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vuzpi8_undef: +;CHECK: vuzp.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vuzpQi16_undef: +;CHECK: vuzp.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14> + %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll index a9ecdcab42d7e..926970aeb29b4 100644 --- a/test/CodeGen/ARM/vzip.ll +++ b/test/CodeGen/ARM/vzip.ll @@ -73,3 +73,30 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } + +; Undef shuffle indices should not prevent matching to VZIP: + +define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vzipi8_undef: +;CHECK: vzip.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vzipQi8_undef: +;CHECK: vzip.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> + %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31> + %tmp5 = add <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} + diff --git a/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll b/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll new file mode 100644 index 0000000000000..b838ec949eaed --- /dev/null +++ b/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=alpha | FileCheck %s + +define fastcc i64 @getcount(i64 %s) { + %tmp431 = mul i64 %s, 12884901888 + ret i64 %tmp431 +} + +; CHECK: sll $16,33,$0 +; CHECK-NEXT: sll $16,32,$1 +; CHECK-NEXT: addq $0,$1,$0 + diff --git a/test/CodeGen/CellSPU/arg_ret.ll b/test/CodeGen/CellSPU/arg_ret.ll new file mode 100644 index 0000000000000..743292a58d591 --- /dev/null +++ b/test/CodeGen/CellSPU/arg_ret.ll @@ -0,0 +1,33 @@ +; Test parameter passing and return values +;RUN: llc --march=cellspu %s -o - | FileCheck %s + +; this fits into registers r3-r74 +%paramstruct = type { i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, + i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, + i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, + i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, + i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, + i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32} +define ccc i32 @test_regs( %paramstruct %prm ) +{ +;CHECK: lr $3, $74 +;CHECK: bi $lr + %1 = extractvalue %paramstruct %prm, 71 + ret i32 %1 +} + +define ccc i32 @test_regs_and_stack( %paramstruct %prm, i32 %stackprm ) +{ +;CHECK-NOT: a $3, $74, $75 + %1 = extractvalue %paramstruct %prm, 71 + %2 = add i32 %1, %stackprm + ret i32 %2 +} + +define ccc %paramstruct @test_return( i32 %param, %paramstruct %prm ) +{ +;CHECK: lqd $75, 80($sp) +;CHECK: lr $3, $4 + ret %paramstruct %prm +} + diff --git a/test/CodeGen/CellSPU/bigstack.ll b/test/CodeGen/CellSPU/bigstack.ll index 5483f463732ba..63293e2aecb14 100644 --- a/test/CodeGen/CellSPU/bigstack.ll +++ b/test/CodeGen/CellSPU/bigstack.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=cellspu -o %t1.s -; RUN: grep lqx %t1.s | count 4 -; RUN: grep il %t1.s | grep -v file | count 7 -; RUN: grep stqx %t1.s | count 2 +; RUN: grep lqx %t1.s | count 3 +; RUN: grep il %t1.s | grep -v file | count 5 +; RUN: grep stqx %t1.s | count 1 define i32 @bigstack() nounwind { entry: diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll index eb7cf2c6467c8..559b266e59df3 100644 --- a/test/CodeGen/CellSPU/call.ll +++ b/test/CodeGen/CellSPU/call.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=cellspu -regalloc=linearscan > %t1.s ; RUN: grep brsl %t1.s | count 1 -; RUN: grep brasl %t1.s | count 1 -; RUN: grep stqd %t1.s | count 80 +; RUN: grep brasl %t1.s | count 2 +; RUN: grep stqd %t1.s | count 82 ; RUN: llc < %s -march=cellspu | FileCheck %s target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" @@ -29,3 +29,25 @@ define i32 @stub_2(...) { entry: ret i32 0 } + +; check that struct is passed in r3-> +; assert this by changing the second field in the struct +%0 = type { i32, i32, i32 } +declare %0 @callee() +define %0 @test_structret() +{ +;CHECK: stqd $lr, 16($sp) +;CHECK: stqd $sp, -48($sp) +;CHECK: ai $sp, $sp, -48 +;CHECK: brasl $lr, callee + %rv = call %0 @callee() +;CHECK: ai $4, $4, 1 +;CHECK: lqd $lr, 64($sp) +;CHECK: ai $sp, $sp, 48 +;CHECK: bi $lr + %oldval = extractvalue %0 %rv, 1 + %newval = add i32 %oldval,1 + %newrv = insertvalue %0 %rv, i32 %newval, 1 + ret %0 %newrv +} + diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll index d94d77c9f1423..141361d5702b7 100644 --- a/test/CodeGen/CellSPU/call_indirect.ll +++ b/test/CodeGen/CellSPU/call_indirect.ll @@ -12,7 +12,7 @@ ; RUN: grep rotqby %t2.s | count 5 ; RUN: grep lqd %t2.s | count 13 ; RUN: grep ilhu %t2.s | count 2 -; RUN: grep ai %t2.s | count 8 +; RUN: grep ai %t2.s | count 9 ; RUN: grep dispatch_tab %t2.s | count 6 ; ModuleID = 'call_indirect.bc' diff --git a/test/CodeGen/CellSPU/shuffles.ll b/test/CodeGen/CellSPU/shuffles.ll index 04accb9c56b8d..f37d2ae89b003 100644 --- a/test/CodeGen/CellSPU/shuffles.ll +++ b/test/CodeGen/CellSPU/shuffles.ll @@ -16,3 +16,26 @@ define <4 x float> @splat(float %param1) { ret <4 x float> %val } +define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) { + %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0 +;CHECK: lqa $6, +;CHECK: shufb $4, $4, $5, $6 + %sl2_17 = insertelement <2 x float> %sl2_17_tmp1, float %val2, i32 1 + +;CHECK: cdd $5, 0($3) +;CHECK: lqd $6, 0($3) +;CHECK: shufb $4, $4, $6, $5 +;CHECK: stqd $4, 0($3) +;CHECK: bi $lr + store <2 x float> %sl2_17, <2 x float>* %ptr + ret void +} + +define <4 x float> @test_insert_1(<4 x float> %vparam, float %eltparam) { +;CHECK: cwd $5, 4($sp) +;CHECK: shufb $3, $4, $3, $5 +;CHECK: bi $lr + %rv = insertelement <4 x float> %vparam, float %eltparam, i32 1 + ret <4 x float> %rv +} + diff --git a/test/CodeGen/CellSPU/v2f32.ll b/test/CodeGen/CellSPU/v2f32.ll new file mode 100644 index 0000000000000..b81c0cdbb2994 --- /dev/null +++ b/test/CodeGen/CellSPU/v2f32.ll @@ -0,0 +1,75 @@ +;RUN: llc --march=cellspu %s -o - | FileCheck %s +%vec = type <2 x float> + +define %vec @test_ret(%vec %param) +{ +;CHECK: bi $lr + ret %vec %param +} + +define %vec @test_add(%vec %param) +{ +;CHECK: fa {{\$.}}, $3, $3 + %1 = fadd %vec %param, %param +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_sub(%vec %param) +{ +;CHECK: fs {{\$.}}, $3, $3 + %1 = fsub %vec %param, %param + +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_mul(%vec %param) +{ +;CHECK: fm {{\$.}}, $3, $3 + %1 = fmul %vec %param, %param + +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_splat(float %param ) { +;CHECK: lqa +;CHECK: shufb + %sv = insertelement <1 x float> undef, float %param, i32 0 + %rv = shufflevector <1 x float> %sv, <1 x float> undef, <2 x i32> zeroinitializer +;CHECK: bi $lr + ret %vec %rv +} + +define void @test_store(%vec %val, %vec* %ptr){ + +;CHECK: stqd + store %vec undef, %vec* null + +;CHECK: stqd $3, 0(${{.}}) +;CHECK: bi $lr + store %vec %val, %vec* %ptr + ret void +} + +define %vec @test_insert(){ +;CHECK: cwd +;CHECK: shufb $3 + %rv = insertelement %vec undef, float 0.0e+00, i32 undef +;CHECK: bi $lr + ret %vec %rv +} + +define void @test_unaligned_store() { +;CHECK: cdd $3, 8($3) +;CHECK: lqd +;CHECK: shufb +;CHECK: stqd + %data = alloca [4 x float], align 16 ; <[4 x float]*> [#uses=1] + %ptr = getelementptr [4 x float]* %data, i32 0, i32 2 ; <float*> [#uses=1] + %vptr = bitcast float* %ptr to <2 x float>* ; <[1 x <2 x float>]*> [#uses=1] + store <2 x float> undef, <2 x float>* %vptr + ret void +} + diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll new file mode 100644 index 0000000000000..dd51be5a71d2e --- /dev/null +++ b/test/CodeGen/CellSPU/v2i32.ll @@ -0,0 +1,64 @@ +;RUN: llc --march=cellspu %s -o - | FileCheck %s +%vec = type <2 x i32> + +define %vec @test_ret(%vec %param) +{ +;CHECK: bi $lr + ret %vec %param +} + +define %vec @test_add(%vec %param) +{ +;CHECK: a {{\$.}}, $3, $3 + %1 = add %vec %param, %param +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_sub(%vec %param) +{ +;CHECK: sf {{\$.}}, $4, $3 + %1 = sub %vec %param, <i32 1, i32 1> + +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_mul(%vec %param) +{ +;CHECK: mpyu +;CHECK: mpyh +;CHECK: a {{\$., \$., \$.}} +;CHECK: a {{\$., \$., \$.}} + %1 = mul %vec %param, %param + +;CHECK: bi $lr + ret %vec %1 +} + +define <2 x i32> @test_splat(i32 %param ) { +;TODO insertelement transforms to a PREFSLOT2VEC, that trasforms to the +; somewhat redundant: +;CHECK-NOT or $3, $3, $3 +;CHECK: lqa +;CHECK: shufb + %sv = insertelement <1 x i32> undef, i32 %param, i32 0 + %rv = shufflevector <1 x i32> %sv, <1 x i32> undef, <2 x i32> zeroinitializer +;CHECK: bi $lr + ret <2 x i32> %rv +} + +define i32 @test_extract() { +;CHECK: shufb $3 + %rv = extractelement <2 x i32> zeroinitializer, i32 undef ; <i32> [#uses=1] +;CHECK: bi $lr + ret i32 %rv +} + +define void @test_store( %vec %val, %vec* %ptr) +{ +;CHECK: stqd $3, 0(${{.}}) +;CHECK: bi $lr + store %vec %val, %vec* %ptr + ret void +} diff --git a/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll b/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll new file mode 100644 index 0000000000000..a2945aaec331d --- /dev/null +++ b/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s + +define float @test1() +{ + ret float extractelement (<2 x float> bitcast (<1 x double> <double 0x3f800000> to <2 x float>), i32 1); +} diff --git a/test/CodeGen/Mips/2008-06-05-Carry.ll b/test/CodeGen/Mips/2008-06-05-Carry.ll index 8e7b70e2216f5..9d8e391f874ee 100644 --- a/test/CodeGen/Mips/2008-06-05-Carry.ll +++ b/test/CodeGen/Mips/2008-06-05-Carry.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define i64 @add64(i64 %u, i64 %v) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-03-SRet.ll b/test/CodeGen/Mips/2008-07-03-SRet.ll index b2aaa00754b70..b1d20d93f1871 100644 --- a/test/CodeGen/Mips/2008-07-03-SRet.ll +++ b/test/CodeGen/Mips/2008-07-03-SRet.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep {sw.*(\$4)} | count 3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" %struct.sret0 = type { i32, i32, i32 } define void @test0(%struct.sret0* noalias sret %agg.result, i32 %dummy) nounwind { diff --git a/test/CodeGen/Mips/2008-07-05-ByVal.ll b/test/CodeGen/Mips/2008-07-05-ByVal.ll index 6bb6bd862b25e..a1f05044b6c6c 100644 --- a/test/CodeGen/Mips/2008-07-05-ByVal.ll +++ b/test/CodeGen/Mips/2008-07-05-ByVal.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep {lw.*(\$4)} | count 2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" %struct.byval0 = type { i32, i32 } define i64 @test0(%struct.byval0* byval %b, i64 %sum) nounwind { diff --git a/test/CodeGen/Mips/2008-07-06-fadd64.ll b/test/CodeGen/Mips/2008-07-06-fadd64.ll index 808ce16910eee..ecd8521027afb 100644 --- a/test/CodeGen/Mips/2008-07-06-fadd64.ll +++ b/test/CodeGen/Mips/2008-07-06-fadd64.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep __adddf3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define double @dofloat(double %a, double %b) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-07-FPExtend.ll b/test/CodeGen/Mips/2008-07-07-FPExtend.ll index 7ac0f5f840db1..681788e98196f 100644 --- a/test/CodeGen/Mips/2008-07-07-FPExtend.ll +++ b/test/CodeGen/Mips/2008-07-07-FPExtend.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep __extendsfdf2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define double @dofloat(float %a) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-07-Float2Int.ll b/test/CodeGen/Mips/2008-07-07-Float2Int.ll index ca996367733e4..d804c7dcf317f 100644 --- a/test/CodeGen/Mips/2008-07-07-Float2Int.ll +++ b/test/CodeGen/Mips/2008-07-07-Float2Int.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep trunc.w.s | count 3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define i32 @fptoint(float %a) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll b/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll index 20de18a0164c8..b8b4c5c610de9 100644 --- a/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll +++ b/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll @@ -5,7 +5,7 @@ ; RUN: grep __fixunsdfsi %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define double @int2fp(i32 %a) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll index f6b2045444a57..bda4a3172f309 100644 --- a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll +++ b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll @@ -6,7 +6,7 @@ ; RUN: not grep {gp_rel} %t target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" @.str = internal constant [10 x i8] c"AAAAAAAAA\00" @i0 = internal constant [5 x i32] [ i32 0, i32 1, i32 2, i32 3, i32 4 ] diff --git a/test/CodeGen/Mips/2008-07-15-SmallSection.ll b/test/CodeGen/Mips/2008-07-15-SmallSection.ll index 26eb4db26d4d4..91efd68622a2e 100644 --- a/test/CodeGen/Mips/2008-07-15-SmallSection.ll +++ b/test/CodeGen/Mips/2008-07-15-SmallSection.ll @@ -10,7 +10,7 @@ ; RUN: grep {\%lo} %t1 | count 2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" %struct.anon = type { i32, i32 } @s0 = global [8 x i8] c"AAAAAAA\00", align 4 diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll index 59599b399c291..41ae5dd65f516 100644 --- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll +++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll @@ -3,7 +3,7 @@ ; RUN: grep seb %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define i8 @A(i8 %e.0, i8 signext %sum) signext nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll index 21ff960054213..20bd88889061f 100644 --- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll +++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll @@ -2,7 +2,7 @@ ; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2 ; RUN: grep {rodata.cst4,"aM",@progbits} %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @F(float %a) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/test/CodeGen/Mips/2008-07-23-fpcmp.ll index 80101fa25b3ef..ca837ffd2a50b 100644 --- a/test/CodeGen/Mips/2008-07-23-fpcmp.ll +++ b/test/CodeGen/Mips/2008-07-23-fpcmp.ll @@ -3,7 +3,7 @@ ; RUN: grep {bc1\[tf\]} %t | count 3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @A(float %a, float %b) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-29-icmp.ll b/test/CodeGen/Mips/2008-07-29-icmp.ll index 042cad60e2b04..52a4b081ddb38 100644 --- a/test/CodeGen/Mips/2008-07-29-icmp.ll +++ b/test/CodeGen/Mips/2008-07-29-icmp.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep {b\[ne\]\[eq\]} | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @A(float %a, float %b, i32 %j) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-31-fcopysign.ll b/test/CodeGen/Mips/2008-07-31-fcopysign.ll index 77680bccf9765..47382f989ca42 100644 --- a/test/CodeGen/Mips/2008-07-31-fcopysign.ll +++ b/test/CodeGen/Mips/2008-07-31-fcopysign.ll @@ -3,7 +3,7 @@ ; RUN: grep neg.s %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @A(float %i, float %j) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll index cd35ccaee83d5..23ed64a96d8e8 100644 --- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll +++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll @@ -4,7 +4,7 @@ ; RUN: grep multu %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" %struct.DWstruct = type { i32, i32 } define i32 @A0(i32 %u, i32 %v) nounwind { diff --git a/test/CodeGen/Mips/2008-08-03-fabs64.ll b/test/CodeGen/Mips/2008-08-03-fabs64.ll index 2f33e9bea73f8..0fc45f7d1b05f 100644 --- a/test/CodeGen/Mips/2008-08-03-fabs64.ll +++ b/test/CodeGen/Mips/2008-08-03-fabs64.ll @@ -3,7 +3,7 @@ ; RUN: grep {ori.*65535} %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define double @A(double %c, double %d) nounwind readnone { entry: diff --git a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll index ca90b500f0506..f8eb028559792 100644 --- a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll +++ b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll @@ -3,7 +3,7 @@ ; RUN: grep mfc1 %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @A(i32 %u) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-08-06-Alloca.ll b/test/CodeGen/Mips/2008-08-06-Alloca.ll index 79e49a3d682e5..7be7974e0ffe8 100644 --- a/test/CodeGen/Mips/2008-08-06-Alloca.ll +++ b/test/CodeGen/Mips/2008-08-06-Alloca.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define i32 @twoalloca(i32 %size) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-08-07-CC.ll b/test/CodeGen/Mips/2008-08-07-CC.ll index 54d454cc3aded..63c25951423a0 100644 --- a/test/CodeGen/Mips/2008-08-07-CC.ll +++ b/test/CodeGen/Mips/2008-08-07-CC.ll @@ -3,7 +3,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define internal fastcc i32 @A(i32 %u) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-08-07-FPRound.ll b/test/CodeGen/Mips/2008-08-07-FPRound.ll index f3bb965cdb69e..67f86d7411417 100644 --- a/test/CodeGen/Mips/2008-08-07-FPRound.ll +++ b/test/CodeGen/Mips/2008-08-07-FPRound.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep __truncdfsf2 | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @round2float(double %a) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-08-08-ctlz.ll b/test/CodeGen/Mips/2008-08-08-ctlz.ll index 1da1db24bf5a0..fb3332329d6c8 100644 --- a/test/CodeGen/Mips/2008-08-08-ctlz.ll +++ b/test/CodeGen/Mips/2008-08-08-ctlz.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep clz | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define i32 @A0(i32 %u) nounwind { entry: diff --git a/test/CodeGen/Mips/2010-07-20-Select.ll b/test/CodeGen/Mips/2010-07-20-Select.ll new file mode 100644 index 0000000000000..8b7f9a919378c --- /dev/null +++ b/test/CodeGen/Mips/2010-07-20-Select.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s +; Fix PR7473 + +define i32 @main() nounwind readnone { +entry: + %a = alloca i32, align 4 ; <i32*> [#uses=2] + %c = alloca i32, align 4 ; <i32*> [#uses=2] + volatile store i32 1, i32* %a, align 4 + volatile store i32 0, i32* %c, align 4 + %0 = volatile load i32* %a, align 4 ; <i32> [#uses=1] + %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1] +; CHECK: addiu $4, $zero, 3 + %iftmp.0.0 = select i1 %1, i32 3, i32 0 ; <i32> [#uses=1] + %2 = volatile load i32* %c, align 4 ; <i32> [#uses=1] + %3 = icmp eq i32 %2, 0 ; <i1> [#uses=1] +; CHECK: addu $4, $zero, $3 +; CHECK: addu $2, $5, $4 + %iftmp.2.0 = select i1 %3, i32 0, i32 5 ; <i32> [#uses=1] + %4 = add nsw i32 %iftmp.2.0, %iftmp.0.0 ; <i32> [#uses=1] + ret i32 %4 +} diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll new file mode 100644 index 0000000000000..07fc10cae1802 --- /dev/null +++ b/test/CodeGen/Mips/2010-07-20-Switch.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s + +define i32 @main() nounwind readnone { +entry: + %x = alloca i32, align 4 ; <i32*> [#uses=2] + volatile store i32 2, i32* %x, align 4 + %0 = volatile load i32* %x, align 4 ; <i32> [#uses=1] +; CHECK: lui $3, %hi($JTI0_0) +; CHECK: sll $2, $2, 2 +; CHECK: addiu $3, $3, %lo($JTI0_0) + switch i32 %0, label %bb4 [ + i32 0, label %bb5 + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + ] + +bb1: ; preds = %entry + ret i32 2 + +; CHECK: $BB0_2 +bb2: ; preds = %entry + ret i32 0 + +bb3: ; preds = %entry + ret i32 3 + +bb4: ; preds = %entry + ret i32 4 + +bb5: ; preds = %entry + ret i32 1 +} diff --git a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll b/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll deleted file mode 100644 index db2ab877ff7d2..0000000000000 --- a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll +++ /dev/null @@ -1,8 +0,0 @@ -; RUN: llc < %s -march=ppc32 | grep nop -target triple = "powerpc-apple-darwin8" - - -define void @bork() noreturn nounwind { -entry: - unreachable -} diff --git a/test/CodeGen/PowerPC/empty-functions.ll b/test/CodeGen/PowerPC/empty-functions.ll new file mode 100644 index 0000000000000..3a2907d5d7b9d --- /dev/null +++ b/test/CodeGen/PowerPC/empty-functions.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck -check-prefix=CHECK-NO-FP %s +; RUN: llc < %s -mtriple=powerpc-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s + +define void @func() { +entry: + unreachable +} +; CHECK-NO-FP: _func: +; CHECK-NO-FP: nop + +; CHECK-FP: _func: +; CHECK-FP: nop diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll index 32c6f4809cb49..399f19f8d2e29 100644 --- a/test/CodeGen/PowerPC/vec_constants.ll +++ b/test/CodeGen/PowerPC/vec_constants.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep CPI -define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) { +define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind { %tmp = load <4 x i32>* %P1 ; <<4 x i32>> [#uses=1] %tmp4 = and <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1] store <4 x i32> %tmp4, <4 x i32>* %P1 @@ -15,26 +15,30 @@ define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) { ret void } -define <4 x i32> @test_30() { +define <4 x i32> @test_30() nounwind { ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 > } -define <4 x i32> @test_29() { +define <4 x i32> @test_29() nounwind { ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 > } -define <8 x i16> @test_n30() { +define <8 x i16> @test_n30() nounwind { ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 > } -define <16 x i8> @test_n104() { +define <16 x i8> @test_n104() nounwind { ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 > } -define <4 x i32> @test_vsldoi() { +define <4 x i32> @test_vsldoi() nounwind { ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 > } -define <4 x i32> @test_rol() { +define <8 x i16> @test_vsldoi_65023() nounwind { + ret <8 x i16> < i16 65023, i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023 > +} + +define <4 x i32> @test_rol() nounwind { ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 > } diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores.ll b/test/CodeGen/SystemZ/05-MemLoadsStores.ll index cf12063e5d4c3..eabeb0a422546 100644 --- a/test/CodeGen/SystemZ/05-MemLoadsStores.ll +++ b/test/CodeGen/SystemZ/05-MemLoadsStores.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s | grep {st %} | count 2 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define void @foo1(i32* nocapture %foo, i32* nocapture %bar) nounwind { entry: diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll index 1e6232a625508..53bb641cf1eba 100644 --- a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll +++ b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s | grep {sth.%} | count 2 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define void @foo1(i16* nocapture %foo, i16* nocapture %bar) nounwind { entry: diff --git a/test/CodeGen/SystemZ/07-BrUnCond.ll b/test/CodeGen/SystemZ/07-BrUnCond.ll index e0bc302c73141..ac6067abbee0f 100644 --- a/test/CodeGen/SystemZ/07-BrUnCond.ll +++ b/test/CodeGen/SystemZ/07-BrUnCond.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define void @foo() noreturn nounwind { entry: diff --git a/test/CodeGen/SystemZ/09-DynamicAlloca.ll b/test/CodeGen/SystemZ/09-DynamicAlloca.ll index 27189ab41567c..30810ce6eb90d 100644 --- a/test/CodeGen/SystemZ/09-DynamicAlloca.ll +++ b/test/CodeGen/SystemZ/09-DynamicAlloca.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define void @foo(i64 %N) nounwind { entry: diff --git a/test/CodeGen/SystemZ/09-Globals.ll b/test/CodeGen/SystemZ/09-Globals.ll index 6e0c1ab2c1657..50a26e2a451a3 100644 --- a/test/CodeGen/SystemZ/09-Globals.ll +++ b/test/CodeGen/SystemZ/09-Globals.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s | grep larl | count 3 target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" @bar = common global i64 0, align 8 ; <i64*> [#uses=3] define i64 @foo() nounwind readonly { diff --git a/test/CodeGen/SystemZ/10-FuncsPic.ll b/test/CodeGen/SystemZ/10-FuncsPic.ll index cc325389d787c..f291e5ff42b62 100644 --- a/test/CodeGen/SystemZ/10-FuncsPic.ll +++ b/test/CodeGen/SystemZ/10-FuncsPic.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -relocation-model=pic | grep PLT | count 1 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" @ptr = external global void (...)* ; <void (...)**> [#uses=2] define void @foo1() nounwind { diff --git a/test/CodeGen/SystemZ/10-GlobalsPic.ll b/test/CodeGen/SystemZ/10-GlobalsPic.ll index a77671e2ba7b9..c581ad9c45789 100644 --- a/test/CodeGen/SystemZ/10-GlobalsPic.ll +++ b/test/CodeGen/SystemZ/10-GlobalsPic.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 6 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" @src = external global i32 ; <i32*> [#uses=2] @dst = external global i32 ; <i32*> [#uses=2] @ptr = external global i32* ; <i32**> [#uses=2] diff --git a/test/CodeGen/SystemZ/11-BSwap.ll b/test/CodeGen/SystemZ/11-BSwap.ll index 609d9dcf59c59..b170a8044a9d3 100644 --- a/test/CodeGen/SystemZ/11-BSwap.ll +++ b/test/CodeGen/SystemZ/11-BSwap.ll @@ -2,7 +2,7 @@ target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define i16 @foo(i16 zeroext %a) zeroext { diff --git a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll index 07a164d42645b..54424e18f68b0 100644 --- a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll +++ b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=systemz | grep rll target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone { entry: diff --git a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll index 99d0ee7b03d97..89b22251eb23b 100644 --- a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll +++ b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define void @compdecomp(i8* nocapture %data, i64 %data_len) nounwind { entry: diff --git a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll index a35167fba04f4..68ccb848980cd 100644 --- a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll +++ b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define signext i32 @bit_place_piece(i32 signext %col, i32 signext %player, i64* nocapture %b1, i64* nocapture %b2) nounwind { entry: diff --git a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll index b37f7e92d5fb4..98feb83231dc1 100644 --- a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll +++ b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s | FileCheck %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) nounwind diff --git a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll index 5457b12afcba0..f4e176eb4421f 100644 --- a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll +++ b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define float @foo(i32 signext %a) { entry: diff --git a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll index a91e29ea4f9d9..63fd8553b32e7 100644 --- a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll +++ b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define signext i32 @dfg_parse() nounwind { entry: diff --git a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll index 2074bfd5d7b9e..929c472d1ef62 100644 --- a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll +++ b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s | not grep r11 -target triple = "thumb-linux-gnueabi" +target triple = "thumb-unknown-linux-gnueabi" %struct.__sched_param = type { i32 } %struct.pthread_attr_t = type { i32, i32, %struct.__sched_param, i32, i32, i32, i32, i8*, i32 } @i.1882 = internal global i32 1 ; <i32*> [#uses=2] diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll new file mode 100644 index 0000000000000..9a6321bb43c4d --- /dev/null +++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll @@ -0,0 +1,147 @@ +; RUN: llc -mtriple=thumbv6-apple-darwin10 < %s | FileCheck %s +; RUN: opt -strip-debug < %s | llc -mtriple=thumbv6-apple-darwin10 | FileCheck %s +; Stripping out debug info formerly caused the last two multiplies to be emitted in +; the other order. 7797940 (part of it dated 6/29/2010..7/15/2010). + +%0 = type { [3 x double] } + +@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%0*, i32, i32)* @_Z19getClosestDiagonal3ii to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] + +define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind { +; CHECK: blx ___muldf3 +; CHECK: blx ___muldf3 +; CHECK: beq LBB0_8 +; CHECK: blx ___muldf3 +; <label>:3 + switch i32 %1, label %4 [ + i32 0, label %5 + i32 3, label %5 + ] + +; <label>:4 ; preds = %3 + br label %5, !dbg !0 + +; <label>:5 ; preds = %4, %3, %3 + %storemerge = phi double [ -1.000000e+00, %4 ], [ 1.000000e+00, %3 ], [ 1.000000e+00, %3 ] ; <double> [#uses=1] + %v_6 = icmp slt i32 %1, 2 ; <i1> [#uses=1] + %storemerge1 = select i1 %v_6, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3] + call void @llvm.dbg.value(metadata !{double %storemerge}, i64 0, metadata !91), !dbg !0 + %v_7 = icmp eq i32 %2, 1, !dbg !92 ; <i1> [#uses=1] + %storemerge2 = select i1 %v_7, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3] + %v_8 = getelementptr inbounds %0* %0, i32 0, i32 0, i32 0 ; <double*> [#uses=1] + %v_10 = getelementptr inbounds %0* %0, i32 0, i32 0, i32 2 ; <double*> [#uses=1] + %v_11 = fmul double %storemerge1, %storemerge1, !dbg !93 ; <double> [#uses=1] + %v_15 = tail call double @sqrt(double %v_11) nounwind readonly, !dbg !93 ; <double> [#uses=1] + %v_16 = fdiv double 1.000000e+00, %v_15, !dbg !93 ; <double> [#uses=3] + %v_17 = fmul double %storemerge, %v_16, !dbg !97 ; <double> [#uses=1] + store double %v_17, double* %v_8, align 4, !dbg !97 + %v_19 = fmul double %storemerge2, %v_16, !dbg !97 ; <double> [#uses=1] + store double %v_19, double* %v_10, align 4, !dbg !97 + ret void, !dbg !98 +} + +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone + +declare double @sqrt(double) nounwind readonly + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +!0 = metadata !{i32 46, i32 0, metadata !1, null} +!1 = metadata !{i32 524299, metadata !2, i32 44, i32 0} ; [ DW_TAG_lexical_block ] +!2 = metadata !{i32 524299, metadata !3, i32 44, i32 0} ; [ DW_TAG_lexical_block ] +!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"getClosestDiagonal3", metadata !"getClosestDiagonal3", metadata !"_Z19getClosestDiagonal3ii", metadata !4, i32 44, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!4 = metadata !{i32 524329, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 524305, i32 0, i32 4, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ] +!7 = metadata !{metadata !8, metadata !22, metadata !22} +!8 = metadata !{i32 524307, metadata !4, metadata !"ggVector3", metadata !9, i32 66, i64 192, i64 32, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_structure_type ] +!9 = metadata !{i32 524329, metadata !"ggVector3.h", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ] +!10 = metadata !{metadata !11, metadata !16, metadata !23, metadata !26, metadata !29, metadata !30, metadata !35, metadata !36, metadata !37, metadata !41, metadata !42, metadata !43, metadata !46, metadata !47, metadata !48, metadata !52, metadata !53, metadata !54, metadata !57, metadata !60, metadata !63, metadata !66, metadata !70, metadata !71, metadata !74, metadata !75, metadata !76, metadata !77, metadata !78, metadata !81, metadata !82, metadata !83, metadata !84, metadata !85, metadata !88, metadata !89, metadata !90} +!11 = metadata !{i32 524301, metadata !8, metadata !"e", metadata !9, i32 160, i64 192, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ] +!12 = metadata !{i32 524289, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !13, metadata !14, i32 0, null} ; [ DW_TAG_array_type ] +!13 = metadata !{i32 524324, metadata !4, metadata !"double", metadata !4, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!14 = metadata !{metadata !15} +!15 = metadata !{i32 524321, i64 0, i64 2} ; [ DW_TAG_subrange_type ] +!16 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 72, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!17 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ] +!18 = metadata !{null, metadata !19, metadata !20} +!19 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ] +!20 = metadata !{i32 524310, metadata !21, metadata !"ggBoolean", metadata !21, i32 478, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_typedef ] +!21 = metadata !{i32 524329, metadata !"math.h", metadata !"/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm", metadata !5} ; [ DW_TAG_file_type ] +!22 = metadata !{i32 524324, metadata !4, metadata !"int", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!23 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 73, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!24 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ] +!25 = metadata !{null, metadata !19} +!26 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 74, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!27 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null} ; [ DW_TAG_subroutine_type ] +!28 = metadata !{null, metadata !19, metadata !13, metadata !13, metadata !13} +!29 = metadata !{i32 524334, i32 0, metadata !8, metadata !"Set", metadata !"Set", metadata !"_ZN9ggVector33SetEddd", metadata !9, i32 81, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!30 = metadata !{i32 524334, i32 0, metadata !8, metadata !"x", metadata !"x", metadata !"_ZNK9ggVector31xEv", metadata !9, i32 82, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!31 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !32, i32 0, null} ; [ DW_TAG_subroutine_type ] +!32 = metadata !{metadata !13, metadata !33} +!33 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 64, metadata !34} ; [ DW_TAG_pointer_type ] +!34 = metadata !{i32 524326, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] +!35 = metadata !{i32 524334, i32 0, metadata !8, metadata !"y", metadata !"y", metadata !"_ZNK9ggVector31yEv", metadata !9, i32 83, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!36 = metadata !{i32 524334, i32 0, metadata !8, metadata !"z", metadata !"z", metadata !"_ZNK9ggVector31zEv", metadata !9, i32 84, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!37 = metadata !{i32 524334, i32 0, metadata !8, metadata !"x", metadata !"x", metadata !"_ZN9ggVector31xEv", metadata !9, i32 85, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!38 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !39, i32 0, null} ; [ DW_TAG_subroutine_type ] +!39 = metadata !{metadata !40, metadata !19} +!40 = metadata !{i32 524304, metadata !4, metadata !"double", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !13} ; [ DW_TAG_reference_type ] +!41 = metadata !{i32 524334, i32 0, metadata !8, metadata !"y", metadata !"y", metadata !"_ZN9ggVector31yEv", metadata !9, i32 86, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!42 = metadata !{i32 524334, i32 0, metadata !8, metadata !"z", metadata !"z", metadata !"_ZN9ggVector31zEv", metadata !9, i32 87, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!43 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetX", metadata !"SetX", metadata !"_ZN9ggVector34SetXEd", metadata !9, i32 88, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!44 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !45, i32 0, null} ; [ DW_TAG_subroutine_type ] +!45 = metadata !{null, metadata !19, metadata !13} +!46 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetY", metadata !"SetY", metadata !"_ZN9ggVector34SetYEd", metadata !9, i32 89, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!47 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetZ", metadata !"SetZ", metadata !"_ZN9ggVector34SetZEd", metadata !9, i32 90, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!48 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 92, metadata !49, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!49 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !50, i32 0, null} ; [ DW_TAG_subroutine_type ] +!50 = metadata !{null, metadata !19, metadata !51} +!51 = metadata !{i32 524304, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !34} ; [ DW_TAG_reference_type ] +!52 = metadata !{i32 524334, i32 0, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZNK9ggVector39toleranceEv", metadata !9, i32 100, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!53 = metadata !{i32 524334, i32 0, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZN9ggVector39toleranceEv", metadata !9, i32 101, metadata !38, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!54 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator+", metadata !"operator+", metadata !"_ZNK9ggVector3psEv", metadata !9, i32 107, metadata !55, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!55 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !56, i32 0, null} ; [ DW_TAG_subroutine_type ] +!56 = metadata !{metadata !51, metadata !33} +!57 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator-", metadata !"operator-", metadata !"_ZNK9ggVector3ngEv", metadata !9, i32 108, metadata !58, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!58 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !59, i32 0, null} ; [ DW_TAG_subroutine_type ] +!59 = metadata !{metadata !8, metadata !33} +!60 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZNK9ggVector3ixEi", metadata !9, i32 290, metadata !61, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!61 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !62, i32 0, null} ; [ DW_TAG_subroutine_type ] +!62 = metadata !{metadata !13, metadata !33, metadata !22} +!63 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZN9ggVector3ixEi", metadata !9, i32 278, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!64 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, null} ; [ DW_TAG_subroutine_type ] +!65 = metadata !{metadata !40, metadata !19, metadata !22} +!66 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator+=", metadata !"operator+=", metadata !"_ZN9ggVector3pLERKS_", metadata !9, i32 303, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!67 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !68, i32 0, null} ; [ DW_TAG_subroutine_type ] +!68 = metadata !{metadata !69, metadata !19, metadata !51} +!69 = metadata !{i32 524304, metadata !4, metadata !"ggVector3", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_reference_type ] +!70 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator-=", metadata !"operator-=", metadata !"_ZN9ggVector3mIERKS_", metadata !9, i32 310, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!71 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator*=", metadata !"operator*=", metadata !"_ZN9ggVector3mLEd", metadata !9, i32 317, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!72 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !73, i32 0, null} ; [ DW_TAG_subroutine_type ] +!73 = metadata !{metadata !69, metadata !19, metadata !13} +!74 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator/=", metadata !"operator/=", metadata !"_ZN9ggVector3dVEd", metadata !9, i32 324, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!75 = metadata !{i32 524334, i32 0, metadata !8, metadata !"length", metadata !"length", metadata !"_ZNK9ggVector36lengthEv", metadata !9, i32 121, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!76 = metadata !{i32 524334, i32 0, metadata !8, metadata !"squaredLength", metadata !"squaredLength", metadata !"_ZNK9ggVector313squaredLengthEv", metadata !9, i32 122, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!77 = metadata !{i32 524334, i32 0, metadata !8, metadata !"MakeUnitVector", metadata !"MakeUnitVector", metadata !"_ZN9ggVector314MakeUnitVectorEv", metadata !9, i32 217, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!78 = metadata !{i32 524334, i32 0, metadata !8, metadata !"Perturb", metadata !"Perturb", metadata !"_ZNK9ggVector37PerturbEdd", metadata !9, i32 126, metadata !79, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!79 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !80, i32 0, null} ; [ DW_TAG_subroutine_type ] +!80 = metadata !{metadata !8, metadata !33, metadata !13, metadata !13} +!81 = metadata !{i32 524334, i32 0, metadata !8, metadata !"maxComponent", metadata !"maxComponent", metadata !"_ZNK9ggVector312maxComponentEv", metadata !9, i32 128, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!82 = metadata !{i32 524334, i32 0, metadata !8, metadata !"minComponent", metadata !"minComponent", metadata !"_ZNK9ggVector312minComponentEv", metadata !9, i32 129, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!83 = metadata !{i32 524334, i32 0, metadata !8, metadata !"maxAbsComponent", metadata !"maxAbsComponent", metadata !"_ZNK9ggVector315maxAbsComponentEv", metadata !9, i32 131, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!84 = metadata !{i32 524334, i32 0, metadata !8, metadata !"minAbsComponent", metadata !"minAbsComponent", metadata !"_ZNK9ggVector315minAbsComponentEv", metadata !9, i32 132, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!85 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMinComponent", metadata !"indexOfMinComponent", metadata !"_ZNK9ggVector319indexOfMinComponentEv", metadata !9, i32 133, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!86 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !87, i32 0, null} ; [ DW_TAG_subroutine_type ] +!87 = metadata !{metadata !22, metadata !33} +!88 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMinAbsComponent", metadata !"indexOfMinAbsComponent", metadata !"_ZNK9ggVector322indexOfMinAbsComponentEv", metadata !9, i32 137, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!89 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMaxComponent", metadata !"indexOfMaxComponent", metadata !"_ZNK9ggVector319indexOfMaxComponentEv", metadata !9, i32 146, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!90 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMaxAbsComponent", metadata !"indexOfMaxAbsComponent", metadata !"_ZNK9ggVector322indexOfMaxAbsComponentEv", metadata !9, i32 150, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!91 = metadata !{i32 524544, metadata !1, metadata !"vx", metadata !4, i32 46, metadata !13} ; [ DW_TAG_auto_variable ] +!92 = metadata !{i32 48, i32 0, metadata !1, null} +!93 = metadata !{i32 218, i32 0, metadata !94, metadata !96} +!94 = metadata !{i32 524299, metadata !95, i32 217, i32 0} ; [ DW_TAG_lexical_block ] +!95 = metadata !{i32 524299, metadata !77, i32 217, i32 0} ; [ DW_TAG_lexical_block ] +!96 = metadata !{i32 51, i32 0, metadata !1, null} +!97 = metadata !{i32 227, i32 0, metadata !94, metadata !96} +!98 = metadata !{i32 52, i32 0, metadata !1, null} diff --git a/test/CodeGen/Thumb/barrier.ll b/test/CodeGen/Thumb/barrier.ll new file mode 100644 index 0000000000000..c611b865f67d8 --- /dev/null +++ b/test/CodeGen/Thumb/barrier.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s -check-prefix=V6 +; RUN: llc < %s -march=thumb -mattr=+v6m | FileCheck %s -check-prefix=V6M + +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 ) + +define void @t1() { +; V6: t1: +; V6: blx {{_*}}sync_synchronize + +; V6M: t1: +; V6M: dsb + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true ) + ret void +} + +define void @t2() { +; V6: t2: +; V6: blx {{_*}}sync_synchronize + +; V6M: t2: +; V6M: dmb + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false ) + ret void +} diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll index acfdc917ddf07..5c8ad974bc0e6 100644 --- a/test/CodeGen/Thumb/dyn-stackalloc.ll +++ b/test/CodeGen/Thumb/dyn-stackalloc.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=thumb | not grep {ldr sp} ; RUN: llc < %s -mtriple=thumb-apple-darwin | \ ; RUN: not grep {sub.*r7} -; RUN: llc < %s -march=thumb | grep 4294967280 +; RUN: llc < %s -march=thumb | grep {mov.*r6, sp} %struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* } %struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* } diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll index 02de36af1cc7f..b289484f5efbe 100644 --- a/test/CodeGen/Thumb/large-stack.ll +++ b/test/CodeGen/Thumb/large-stack.ll @@ -1,20 +1,35 @@ -; RUN: llc < %s -march=thumb | grep {ldr.*LCP} | count 5 +; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s define void @test1() { +; CHECK: test1: +; CHECK: sub sp, #256 +; CHECK: add sp, #256 %tmp = alloca [ 64 x i32 ] , align 4 ret void } define void @test2() { +; CHECK: test2: +; CHECK: ldr r0, LCPI +; CHECK: add sp, r0 +; CHECK: mov sp, r7 +; CHECK: sub sp, #4 %tmp = alloca [ 4168 x i8 ] , align 4 ret void } define i32 @test3() { - %retval = alloca i32, align 4 - %tmp = alloca i32, align 4 - %a = alloca [805306369 x i8], align 16 - store i32 0, i32* %tmp - %tmp1 = load i32* %tmp - ret i32 %tmp1 +; CHECK: test3: +; CHECK: ldr r2, LCPI +; CHECK: add sp, r2 +; CHECK: ldr r1, LCPI +; CHECK: add r1, sp +; CHECK: mov sp, r7 +; CHECK: sub sp, #4 + %retval = alloca i32, align 4 + %tmp = alloca i32, align 4 + %a = alloca [805306369 x i8], align 16 + store i32 0, i32* %tmp + %tmp1 = load i32* %tmp + ret i32 %tmp1 } diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll index 16a9c4442d8a8..c2ba208e4ae27 100644 --- a/test/CodeGen/Thumb/vargs.ll +++ b/test/CodeGen/Thumb/vargs.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=thumb -; RUN: llc < %s -mtriple=thumb-linux | grep pop | count 1 +; RUN: llc < %s -mtriple=thumb-linux | grep pop | count 2 ; RUN: llc < %s -mtriple=thumb-darwin | grep pop | count 2 @str = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1] diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll index 98a5263c2f99b..45d356c3dc676 100644 --- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll +++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll @@ -11,8 +11,8 @@ define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) { ; CHECK: _ZNKSs7compareERKSs: ; CHECK: it eq -; CHECK-NEXT: subeq.w r0, r6, r8 -; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r8, r9, pc} +; CHECK-NEXT: subeq r0, r6, r7 +; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r7, r8, pc} entry: %0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3] %1 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3] diff --git a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll index 3f1b9eb8d9d09..2246de35e03cd 100644 --- a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll +++ b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll @@ -7,17 +7,12 @@ define void @t() nounwind ssp { entry: ; CHECK: t: -; CHECK: mov r0, sp -; CHECK: bfc r0, #0, #3 -; CHECK: subs r0, #16 -; CHECK: mov sp, r0 -; Yes, this is stupid codegen, but it's correct. -; CHECK: mov r0, sp -; CHECK: bfc r0, #0, #3 -; CHECK: subs r0, #16 -; CHECK: mov sp, r0 %size = mul i32 8, 2 +; CHECK: subs r0, #16 +; CHECK: mov sp, r0 %vla_a = alloca i8, i32 %size, align 8 +; CHECK: subs r0, #16 +; CHECK: mov sp, r0 %vla_b = alloca i8, i32 %size, align 8 unreachable } diff --git a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll new file mode 100644 index 0000000000000..abcf13a3e38f9 --- /dev/null +++ b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -O3 | FileCheck %s + +@.str = private constant [4 x i8] c"%d\0A\00", align 4 ; <[4 x i8]*> [#uses=1] + +define internal fastcc i32 @Callee(i32 %i) nounwind { +entry: +; CHECK: Callee: + %0 = icmp eq i32 %i, 0 ; <i1> [#uses=1] + br i1 %0, label %bb2, label %bb + +bb: ; preds = %entry + %1 = alloca [1000 x i8], align 4 ; <[1000 x i8]*> [#uses=1] + %.sub = getelementptr inbounds [1000 x i8]* %1, i32 0, i32 0 ; <i8*> [#uses=2] + %2 = call i32 (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* %.sub, i32 0, i32 1000, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %i) nounwind ; <i32> [#uses=0] + %3 = load i8* %.sub, align 4 ; <i8> [#uses=1] + %4 = sext i8 %3 to i32 ; <i32> [#uses=1] + ret i32 %4 + +bb2: ; preds = %entry +; Must restore sp from fp here +; CHECK: mov sp, r7 +; CHECK: sub sp, #8 +; CHECK: pop + ret i32 0 +} + +declare i32 @__sprintf_chk(i8*, i32, i32, i8*, ...) nounwind + +define i32 @main() nounwind { +; CHECK: main: +bb.nph: + br label %bb + +bb: ; preds = %bb, %bb.nph + %0 = phi i32 [ 0, %bb.nph ], [ %3, %bb ] ; <i32> [#uses=2] + %j.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb ] ; <i32> [#uses=1] + %1 = tail call fastcc i32 @Callee(i32 %0) nounwind ; <i32> [#uses=1] + %2 = add nsw i32 %1, %j.01 ; <i32> [#uses=2] + %3 = add nsw i32 %0, 1 ; <i32> [#uses=2] + %exitcond = icmp eq i32 %3, 10000 ; <i1> [#uses=1] + br i1 %exitcond, label %bb2, label %bb + +bb2: ; preds = %bb +; No need to restore sp from fp here. +; CHECK: printf +; CHECK-NOT: mov sp, r7 +; CHECK-NOT: sub sp, #12 +; CHECK: pop + %4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0] + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/test/CodeGen/Thumb2/bfi.ll b/test/CodeGen/Thumb2/bfi.ll new file mode 100644 index 0000000000000..22473bb35a0ae --- /dev/null +++ b/test/CodeGen/Thumb2/bfi.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=thumb -mattr=+v6t2 < %s | FileCheck %s + +%struct.F = type { [3 x i8], i8 } + +@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1] + +define void @f1([1 x i32] %f.coerce0) nounwind { +entry: +; CHECK: f1 +; CHECK: movs r2, #10 +; CHECK: bfi r1, r2, #22, #4 + %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1] + %1 = and i32 %0, -62914561 ; <i32> [#uses=1] + %2 = or i32 %1, 41943040 ; <i32> [#uses=1] + store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4 + ret void +} + +define i32 @f2(i32 %A, i32 %B) nounwind readnone optsize { +entry: +; CHECK: f2 +; CHECK: lsrs r1, r1, #7 +; CHECK: bfi r0, r1, #7, #16 + %and = and i32 %A, -8388481 ; <i32> [#uses=1] + %and2 = and i32 %B, 8388480 ; <i32> [#uses=1] + %or = or i32 %and2, %and ; <i32> [#uses=1] + ret i32 %or +} + +define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize { +entry: +; CHECK: f3 +; CHECK: lsrs r2, r0, #7 +; CHECK: mov r0, r1 +; CHECK: bfi r0, r2, #7, #16 + %and = and i32 %A, 8388480 ; <i32> [#uses=1] + %and2 = and i32 %B, -8388481 ; <i32> [#uses=1] + %or = or i32 %and2, %and ; <i32> [#uses=1] + ret i32 %or +} diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll new file mode 100644 index 0000000000000..f7ec5a3b577c3 --- /dev/null +++ b/test/CodeGen/Thumb2/cortex-fp.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=CORTEXM3 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m4 | FileCheck %s -check-prefix=CORTEXM4 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 + + +define float @foo(float %a, float %b) { +entry: +; CHECK: foo +; CORTEXM3: blx ___mulsf3 +; CORTEXM4: vmul.f32 s0, s1, s0 +; CORTEXA8: vmul.f32 d0, d1, d0 + %0 = fmul float %a, %b + ret float %0 +} + +define double @bar(double %a, double %b) { +entry: +; CHECK: bar + %0 = fmul double %a, %b +; CORTEXM3: blx ___muldf3 +; CORTEXM4: blx ___muldf3 +; CORTEXA8: vmul.f64 d0, d1, d0 + ret double %0 +} diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll index 87af9d10572b6..d8b51ec82dedd 100644 --- a/test/CodeGen/Thumb2/crash.ll +++ b/test/CodeGen/Thumb2/crash.ll @@ -14,11 +14,11 @@ entry: %6 = bitcast i32* %sp3 to <4 x i32>* ; <<4 x i32>*> [#uses=1] %7 = load <4 x i32>* %6, align 16 ; <<4 x i32>> [#uses=1] %8 = bitcast i32* %dp to i8* ; <i8*> [#uses=1] - tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7) + tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7, i32 1) ret void } -declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind @sbuf = common global [16 x i32] zeroinitializer, align 16 ; <[16 x i32]*> [#uses=5] @dbuf = common global [16 x i32] zeroinitializer ; <[16 x i32]*> [#uses=2] @@ -44,6 +44,6 @@ bb2: ; preds = %bb %3 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 4) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1] %4 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 8) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1] %5 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 12) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1] - tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5) nounwind + tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind ret i32 0 } diff --git a/test/CodeGen/Thumb2/div.ll b/test/CodeGen/Thumb2/div.ll index 0cddd489fb469..e63a115273ffe 100644 --- a/test/CodeGen/Thumb2/div.ll +++ b/test/CodeGen/Thumb2/div.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=thumb -mattr=+thumb2 \ ; RUN: | FileCheck %s -check-prefix=CHECK-THUMB -; RUN: llc < %s -march=arm -mcpu=cortex-m3 -mattr=+thumb2 \ +; RUN: llc < %s -march=thumb -mcpu=cortex-m3 -mattr=+thumb2 \ ; RUN: | FileCheck %s -check-prefix=CHECK-THUMBV7M define i32 @f1(i32 %a, i32 %b) { diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll index 29b8e75cb8b3b..650d788cb4d20 100644 --- a/test/CodeGen/Thumb2/ldr-str-imm12.ll +++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll @@ -22,7 +22,7 @@ define %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind { entry: -; CHECK: ldr.w r9, [r7, #28] +; CHECK: ldr.w {{(r[0-9])|(lr)}}, [r7, #28] %xgaps.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0] %ycomp.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0] br label %bb20 @@ -46,9 +46,9 @@ bb119: ; preds = %bb20, %bb20 bb420: ; preds = %bb20, %bb20 ; CHECK: bb420 -; CHECK: str r{{[0-7]}}, [sp] -; CHECK: str r{{[0-7]}}, [sp, #4] -; CHECK: str r{{[0-7]}}, [sp, #8] +; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp] +; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #4] +; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #8] ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #24] store %union.rec* null, %union.rec** @zz_hold, align 4 store %union.rec* null, %union.rec** @zz_res, align 4 diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll index 7fa782f91de9a..ad957a1fcb45f 100644 --- a/test/CodeGen/Thumb2/lsr-deficiency.ll +++ b/test/CodeGen/Thumb2/lsr-deficiency.ll @@ -21,8 +21,8 @@ entry: bb: ; preds = %bb, %entry ; CHECK: LBB0_1: ; CHECK: cmp r2, #0 -; CHECK: sub.w r9, r2, #1 -; CHECK: mov r2, r9 +; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], r2, #1 +; CHECK: mov r2, [[REGISTER]] %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1] %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2] diff --git a/test/CodeGen/Thumb2/machine-licm-vdup.ll b/test/CodeGen/Thumb2/machine-licm-vdup.ll new file mode 100644 index 0000000000000..fde2ee0ab0c9a --- /dev/null +++ b/test/CodeGen/Thumb2/machine-licm-vdup.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-fp-elim -arm-vdup-splat | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s +; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375. +; Eventually this should become the default and be moved into machine-licm.ll. +; FIXME: the vdup should be hoisted out of the loop, 8248029. + +define void @t2(i8* %ptr1, i8* %ptr2) nounwind { +entry: +; CHECK: t2: +; CHECK: mov.w r3, #1065353216 + br i1 undef, label %bb1, label %bb2 + +bb1: +; CHECK-NEXT: %bb1 +; CHECK: vdup.32 q1, r3 + %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ] + %tmp1 = shl i32 %indvar, 2 + %gep1 = getelementptr i8* %ptr1, i32 %tmp1 + %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1) + %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2) + %gep2 = getelementptr i8* %ptr2, i32 %tmp1 + call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1) + %indvar.next = add i32 %indvar, 1 + %cond = icmp eq i32 %indvar.next, 10 + br i1 %cond, label %bb2, label %bb1 + +bb2: + ret void +} + +; CHECK-NOT: LCPI1_0: +; CHECK: .subsections_via_symbols + +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly + +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind + +declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll index cdb3041b3beab..b949b2f30506d 100644 --- a/test/CodeGen/Thumb2/machine-licm.ll +++ b/test/CodeGen/Thumb2/machine-licm.ll @@ -64,10 +64,10 @@ bb1: %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ] %tmp1 = shl i32 %indvar, 2 %gep1 = getelementptr i8* %ptr1, i32 %tmp1 - %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1) + %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1) %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2) %gep2 = getelementptr i8* %ptr2, i32 %tmp1 - call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3) + call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1) %indvar.next = add i32 %indvar, 1 %cond = icmp eq i32 %indvar.next, 10 br i1 %cond, label %bb2, label %bb1 @@ -79,8 +79,8 @@ bb2: ; CHECK: LCPI1_0: ; CHECK: .section -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly -declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone diff --git a/test/CodeGen/Thumb2/thumb2-and2.ll b/test/CodeGen/Thumb2/thumb2-and2.ll index 76c56d00473d3..7b0432de9bb5e 100644 --- a/test/CodeGen/Thumb2/thumb2-and2.ll +++ b/test/CodeGen/Thumb2/thumb2-and2.ll @@ -30,7 +30,7 @@ define i32 @f4(i32 %a) { ret i32 %tmp } ; CHECK: f4: -; CHECK: and r0, r0, #1448498774 +; CHECK: bic r0, r0, #-1448498775 ; 66846720 = 0x03fc0000 define i32 @f5(i32 %a) { diff --git a/test/CodeGen/Thumb2/thumb2-badreg-operands.ll b/test/CodeGen/Thumb2/thumb2-badreg-operands.ll new file mode 100644 index 0000000000000..4df06b836fc57 --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-badreg-operands.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s + +define void @b(i32 %x) nounwind optsize { +entry: +; CHECK: b +; CHECK: mov r2, sp +; CHECK: mls r0, r0, r1, r2 +; CHECK: mov sp, r0 + %0 = mul i32 %x, 24 ; <i32> [#uses=1] + %vla = alloca i8, i32 %0, align 1 ; <i8*> [#uses=1] + call arm_aapcscc void @a(i8* %vla) nounwind optsize + ret void +} + +declare void @a(i8*) optsize diff --git a/test/CodeGen/Thumb2/thumb2-barrier.ll b/test/CodeGen/Thumb2/thumb2-barrier.ll new file mode 100644 index 0000000000000..a54d09e629199 --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-barrier.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s + +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 ) + +define void @t1() { +; CHECK: t1: +; CHECK: dsb + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true ) + ret void +} + +define void @t2() { +; CHECK: t2: +; CHECK: dmb + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false ) + ret void +} diff --git a/test/CodeGen/Thumb2/thumb2-call-tc.ll b/test/CodeGen/Thumb2/thumb2-call-tc.ll index 24502b0338c25..2e4da1b289b55 100644 --- a/test/CodeGen/Thumb2/thumb2-call-tc.ll +++ b/test/CodeGen/Thumb2/thumb2-call-tc.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN ; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX +; XFAIL: * @t = weak global i32 ()* null ; <i32 ()**> [#uses=1] diff --git a/test/CodeGen/Thumb2/thumb2-cmp.ll b/test/CodeGen/Thumb2/thumb2-cmp.ll index d4773bb5809b0..63249f4cf1452 100644 --- a/test/CodeGen/Thumb2/thumb2-cmp.ll +++ b/test/CodeGen/Thumb2/thumb2-cmp.ll @@ -39,3 +39,17 @@ define i1 @f5(i32 %a) { %tmp = icmp eq i32 %a, 1114112 ret i1 %tmp } + +; Check that we don't do an invalid (a > b) --> !(a < b + 1) transform. +; +; CHECK: f6: +; CHECK-NOT: cmp.w r0, #-2147483648 +; CHECK: bx lr +define i32 @f6(i32 %a) { + %tmp = icmp sgt i32 %a, 2147483647 + br i1 %tmp, label %true, label %false +true: + ret i32 2 +false: + ret i32 0 +} diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll index c024415477189..5315535db0456 100644 --- a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll +++ b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s +; XFAIL: * define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK: t1: diff --git a/test/CodeGen/Thumb2/thumb2-pack.ll b/test/CodeGen/Thumb2/thumb2-pack.ll index c8302df78f680..2e8bb1d609342 100644 --- a/test/CodeGen/Thumb2/thumb2-pack.ll +++ b/test/CodeGen/Thumb2/thumb2-pack.ll @@ -1,8 +1,7 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | \ -; RUN: grep pkhbt | count 5 -; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | \ -; RUN: grep pkhtb | count 4 +; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s +; CHECK: test1 +; CHECK: pkhbt r0, r0, r1, lsl #16 define i32 @test1(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp4 = shl i32 %Y, 16 ; <i32> [#uses=1] @@ -10,6 +9,8 @@ define i32 @test1(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test1a +; CHECK: pkhbt r0, r0, r1, lsl #16 define i32 @test1a(i32 %X, i32 %Y) { %tmp19 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp37 = shl i32 %Y, 16 ; <i32> [#uses=1] @@ -17,6 +18,8 @@ define i32 @test1a(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test2 +; CHECK: pkhbt r0, r0, r1, lsl #12 define i32 @test2(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp3 = shl i32 %Y, 12 ; <i32> [#uses=1] @@ -25,6 +28,8 @@ define i32 @test2(i32 %X, i32 %Y) { ret i32 %tmp57 } +; CHECK: test3 +; CHECK: pkhbt r0, r0, r1, lsl #18 define i32 @test3(i32 %X, i32 %Y) { %tmp19 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp37 = shl i32 %Y, 18 ; <i32> [#uses=1] @@ -32,6 +37,8 @@ define i32 @test3(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test4 +; CHECK: pkhbt r0, r0, r1 define i32 @test4(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp3 = and i32 %Y, -65536 ; <i32> [#uses=1] @@ -39,6 +46,8 @@ define i32 @test4(i32 %X, i32 %Y) { ret i32 %tmp46 } +; CHECK: test5 +; CHECK: pkhtb r0, r0, r1, asr #16 define i32 @test5(i32 %X, i32 %Y) { %tmp17 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp2 = bitcast i32 %Y to i32 ; <i32> [#uses=1] @@ -47,6 +56,8 @@ define i32 @test5(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test5a +; CHECK: pkhtb r0, r0, r1, asr #16 define i32 @test5a(i32 %X, i32 %Y) { %tmp110 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp37 = lshr i32 %Y, 16 ; <i32> [#uses=1] @@ -55,6 +66,8 @@ define i32 @test5a(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test6 +; CHECK: pkhtb r0, r0, r1, asr #12 define i32 @test6(i32 %X, i32 %Y) { %tmp1 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp37 = lshr i32 %Y, 12 ; <i32> [#uses=1] @@ -64,6 +77,8 @@ define i32 @test6(i32 %X, i32 %Y) { ret i32 %tmp59 } +; CHECK: test7 +; CHECK: pkhtb r0, r0, r1, asr #18 define i32 @test7(i32 %X, i32 %Y) { %tmp1 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp3 = ashr i32 %Y, 18 ; <i32> [#uses=1] @@ -71,3 +86,12 @@ define i32 @test7(i32 %X, i32 %Y) { %tmp57 = or i32 %tmp4, %tmp1 ; <i32> [#uses=1] ret i32 %tmp57 } + +; CHECK: test8 +; CHECK: pkhtb r0, r0, r1, asr #22 +define i32 @test8(i32 %X, i32 %Y) { + %tmp1 = and i32 %X, -65536 + %tmp3 = lshr i32 %Y, 22 + %tmp57 = or i32 %tmp3, %tmp1 + ret i32 %tmp57 +} diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll index 3946371709d58..4f92c93338066 100644 --- a/test/CodeGen/Thumb2/thumb2-spill-q.ll +++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll @@ -7,7 +7,7 @@ %quux = type { i32 (...)**, %baz*, i32 } %quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo } -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly define void @aaa(%quuz* %this, i8* %block) { ; CHECK: aaa: @@ -15,11 +15,11 @@ define void @aaa(%quuz* %this, i8* %block) { ; CHECK: vst1.64 {{.*}}[{{.*}}, :128] ; CHECK: vld1.64 {{.*}}[{{.*}}, :128] entry: - %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 6.300000e+01, float* undef, align 4 - %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 0.000000e+00, float* undef, align 4 - %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] %val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1] br label %bb4 diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll index 1fa4e5c21dabf..2074f98cb608c 100644 --- a/test/CodeGen/Thumb2/thumb2-uxtb.ll +++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll @@ -6,7 +6,7 @@ define i32 @test1(i32 %x) { ; ARMv7A: uxtb16 r0, r0 ; ARMv7M: test1 -; ARMv7M: and r0, r0, #16711935 +; ARMv7M: bic r0, r0, #-16711936 %tmp1 = and i32 %x, 16711935 ; <i32> [#uses=1] ret i32 %tmp1 } diff --git a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll index 2d7bd27d24bdc..35b0159d39c64 100644 --- a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll +++ b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 | grep setnp -; RUN: llc < %s -march=x86 -enable-unsafe-fp-math -enable-finite-only-fp-math | \ +; RUN: llc < %s -march=x86 -enable-unsafe-fp-math -enable-no-nans-fp-math | \ ; RUN: not grep setnp define i32 @test(float %f) { diff --git a/test/CodeGen/X86/2007-06-14-branchfold.ll b/test/CodeGen/X86/2007-06-14-branchfold.ll deleted file mode 100644 index 2680b1543fbb4..0000000000000 --- a/test/CodeGen/X86/2007-06-14-branchfold.ll +++ /dev/null @@ -1,133 +0,0 @@ -; RUN: llc < %s -march=x86 -mcpu=i686 | not grep jmp -; check that branch folding understands FP_REG_KILL is not a branch - -target triple = "i686-pc-linux-gnu" - %struct.FRAME.c34003a = type { float, float } -@report_E = global i8 0 ; <i8*> [#uses=0] - -define void @main() { -entry: - %FRAME.31 = alloca %struct.FRAME.c34003a, align 8 ; <%struct.FRAME.c34003a*> [#uses=4] - %tmp20 = call i32 @report__ident_int( i32 -50 ) ; <i32> [#uses=1] - %tmp2021 = sitofp i32 %tmp20 to float ; <float> [#uses=5] - %tmp23 = fcmp ult float %tmp2021, 0xC7EFFFFFE0000000 ; <i1> [#uses=1] - %tmp26 = fcmp ugt float %tmp2021, 0x47EFFFFFE0000000 ; <i1> [#uses=1] - %bothcond = or i1 %tmp23, %tmp26 ; <i1> [#uses=1] - br i1 %bothcond, label %bb, label %bb30 - -bb: ; preds = %entry - unwind - -bb30: ; preds = %entry - %tmp35 = call i32 @report__ident_int( i32 50 ) ; <i32> [#uses=1] - %tmp3536 = sitofp i32 %tmp35 to float ; <float> [#uses=4] - %tmp38 = fcmp ult float %tmp3536, 0xC7EFFFFFE0000000 ; <i1> [#uses=1] - %tmp44 = fcmp ugt float %tmp3536, 0x47EFFFFFE0000000 ; <i1> [#uses=1] - %bothcond226 = or i1 %tmp38, %tmp44 ; <i1> [#uses=1] - br i1 %bothcond226, label %bb47, label %bb49 - -bb47: ; preds = %bb30 - unwind - -bb49: ; preds = %bb30 - %tmp60 = fcmp ult float %tmp3536, %tmp2021 ; <i1> [#uses=1] - %tmp60.not = xor i1 %tmp60, true ; <i1> [#uses=1] - %tmp65 = fcmp olt float %tmp2021, 0xC7EFFFFFE0000000 ; <i1> [#uses=1] - %bothcond227 = and i1 %tmp65, %tmp60.not ; <i1> [#uses=1] - br i1 %bothcond227, label %cond_true68, label %cond_next70 - -cond_true68: ; preds = %bb49 - unwind - -cond_next70: ; preds = %bb49 - %tmp71 = call i32 @report__ident_int( i32 -30 ) ; <i32> [#uses=1] - %tmp7172 = sitofp i32 %tmp71 to float ; <float> [#uses=3] - %tmp74 = fcmp ult float %tmp7172, 0xC7EFFFFFE0000000 ; <i1> [#uses=1] - %tmp80 = fcmp ugt float %tmp7172, 0x47EFFFFFE0000000 ; <i1> [#uses=1] - %bothcond228 = or i1 %tmp74, %tmp80 ; <i1> [#uses=1] - br i1 %bothcond228, label %bb83, label %bb85 - -bb83: ; preds = %cond_next70 - unwind - -bb85: ; preds = %cond_next70 - %tmp90 = getelementptr %struct.FRAME.c34003a* %FRAME.31, i32 0, i32 1 ; <float*> [#uses=3] - store float %tmp7172, float* %tmp90 - %tmp92 = call i32 @report__ident_int( i32 30 ) ; <i32> [#uses=1] - %tmp9293 = sitofp i32 %tmp92 to float ; <float> [#uses=7] - %tmp95 = fcmp ult float %tmp9293, 0xC7EFFFFFE0000000 ; <i1> [#uses=1] - %tmp101 = fcmp ugt float %tmp9293, 0x47EFFFFFE0000000 ; <i1> [#uses=1] - %bothcond229 = or i1 %tmp95, %tmp101 ; <i1> [#uses=1] - br i1 %bothcond229, label %bb104, label %bb106 - -bb104: ; preds = %bb85 - unwind - -bb106: ; preds = %bb85 - %tmp111 = getelementptr %struct.FRAME.c34003a* %FRAME.31, i32 0, i32 0 ; <float*> [#uses=2] - store float %tmp9293, float* %tmp111 - %tmp123 = load float* %tmp90 ; <float> [#uses=4] - %tmp125 = fcmp ult float %tmp9293, %tmp123 ; <i1> [#uses=1] - br i1 %tmp125, label %cond_next147, label %cond_true128 - -cond_true128: ; preds = %bb106 - %tmp133 = fcmp olt float %tmp123, %tmp2021 ; <i1> [#uses=1] - %tmp142 = fcmp ogt float %tmp9293, %tmp3536 ; <i1> [#uses=1] - %bothcond230 = or i1 %tmp133, %tmp142 ; <i1> [#uses=1] - br i1 %bothcond230, label %bb145, label %cond_next147 - -bb145: ; preds = %cond_true128 - unwind - -cond_next147: ; preds = %cond_true128, %bb106 - %tmp157 = fcmp ugt float %tmp123, -3.000000e+01 ; <i1> [#uses=1] - %tmp165 = fcmp ult float %tmp9293, -3.000000e+01 ; <i1> [#uses=1] - %bothcond231 = or i1 %tmp157, %tmp165 ; <i1> [#uses=1] - br i1 %bothcond231, label %bb168, label %bb169 - -bb168: ; preds = %cond_next147 - unwind - -bb169: ; preds = %cond_next147 - %tmp176 = fcmp ugt float %tmp123, 3.000000e+01 ; <i1> [#uses=1] - %tmp184 = fcmp ult float %tmp9293, 3.000000e+01 ; <i1> [#uses=1] - %bothcond232 = or i1 %tmp176, %tmp184 ; <i1> [#uses=1] - br i1 %bothcond232, label %bb187, label %bb188 - -bb187: ; preds = %bb169 - unwind - -bb188: ; preds = %bb169 - %tmp192 = call fastcc float @c34003a__ident.154( %struct.FRAME.c34003a* %FRAME.31, float 3.000000e+01 ) ; <float> [#uses=2] - %tmp194 = load float* %tmp90 ; <float> [#uses=1] - %tmp196 = fcmp ugt float %tmp194, 0.000000e+00 ; <i1> [#uses=1] - br i1 %tmp196, label %bb207, label %cond_next200 - -cond_next200: ; preds = %bb188 - %tmp202 = load float* %tmp111 ; <float> [#uses=1] - %tmp204 = fcmp ult float %tmp202, 0.000000e+00 ; <i1> [#uses=1] - br i1 %tmp204, label %bb207, label %bb208 - -bb207: ; preds = %cond_next200, %bb188 - unwind - -bb208: ; preds = %cond_next200 - %tmp212 = call fastcc float @c34003a__ident.154( %struct.FRAME.c34003a* %FRAME.31, float 0.000000e+00 ) ; <float> [#uses=1] - %tmp214 = fcmp oge float %tmp212, %tmp192 ; <i1> [#uses=1] - %tmp217 = fcmp oge float %tmp192, 1.000000e+02 ; <i1> [#uses=1] - %tmp221 = or i1 %tmp214, %tmp217 ; <i1> [#uses=1] - br i1 %tmp221, label %cond_true224, label %UnifiedReturnBlock - -cond_true224: ; preds = %bb208 - call void @abort( ) noreturn - ret void - -UnifiedReturnBlock: ; preds = %bb208 - ret void -} - -declare fastcc float @c34003a__ident.154(%struct.FRAME.c34003a* %CHAIN.32, float %x) - -declare i32 @report__ident_int(i32 %x) - -declare void @abort() noreturn diff --git a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll b/test/CodeGen/X86/2008-01-25-EmptyFunction.ll deleted file mode 100644 index b936686798f03..0000000000000 --- a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll +++ /dev/null @@ -1,8 +0,0 @@ -; RUN: llc < %s -march=x86 | grep nop -target triple = "i686-apple-darwin8" - - -define void @bork() noreturn nounwind { -entry: - unreachable -} diff --git a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll b/test/CodeGen/X86/2008-08-06-CmpStride.ll index 99cb8569b3f42..99cb8569b3f42 100644 --- a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll +++ b/test/CodeGen/X86/2008-08-06-CmpStride.ll diff --git a/test/Transforms/LoopStrengthReduce/2009-02-09-ivs-different-sizes.ll b/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll index 36cc535451037..36cc535451037 100644 --- a/test/Transforms/LoopStrengthReduce/2009-02-09-ivs-different-sizes.ll +++ b/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index 4a97ac35afc7f..bb01e5afceff1 100644 --- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {7 machine-licm} +; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {6 machine-licm} ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s ; rdar://6627786 ; rdar://7792037 diff --git a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll index 8d426271a1946..28539307aa409 100644 --- a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll +++ b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | FileCheck %s +; RUN: llc < %s -mtriple=i686-linux | FileCheck %s ; Check the register copy comes after the call to f and before the call to g ; PR3784 diff --git a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll index da493d4910e16..b13d33eb3fd91 100644 --- a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll +++ b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -asm-verbose | FileCheck %s +; RUN: llc < %s -mtriple=i686-linux -asm-verbose | FileCheck %s ; Check that register copies in the landing pad come after the EH_LABEL declare i32 @f() diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll index b5873bae5f05f..90dabb8ab635b 100644 --- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll +++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -stats -info-output-file - > %t +; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t ; RUN: not grep spill %t ; RUN: not grep {%rsp} %t ; RUN: not grep {%rbp} %t diff --git a/test/DebugInfo/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll index 001f853dd236e..85ee091c34789 100644 --- a/test/DebugInfo/2010-01-18-DbgValue.ll +++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll @@ -1,7 +1,4 @@ -; RUN: llc -O0 < %s | FileCheck %s -; ModuleID = 'try.c' -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" -target triple = "i386-apple-darwin9.8" +; RUN: llc -march=x86 -O0 < %s | FileCheck %s ; Currently, dbg.declare generates a DEBUG_VALUE comment. Eventually it will ; generate DWARF and this test will need to be modified or removed. diff --git a/test/DebugInfo/2010-02-01-DbgValueCrash.ll b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll index 70103e5f72bdb..2113263c0ac3d 100644 --- a/test/DebugInfo/2010-02-01-DbgValueCrash.ll +++ b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll @@ -1,6 +1,5 @@ ; RUN: llc -O1 < %s ; ModuleID = 'pr6157.bc' -target triple = "x86_64-unknown-linux-gnu" ; formerly crashed in SelectionDAGBuilder %tart.reflect.ComplexType = type { double, double } diff --git a/test/DebugInfo/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll index 52e9484289523..d2115496f8f44 100644 --- a/test/DebugInfo/2010-05-25-DotDebugLoc.ll +++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll @@ -1,4 +1,4 @@ -; RUN: llc -O2 < %s -mtriple=x86_64-apple-darwin | grep debug_loc12 +; RUN: llc -march=x86-64 -O2 < %s | grep debug_loc12 ; Test to check .debug_loc support. This test case emits 13 debug_loc entries. %0 = type { double } diff --git a/test/DebugInfo/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll index 80643d0792acf..80643d0792acf 100644 --- a/test/DebugInfo/2010-05-28-Crash.ll +++ b/test/CodeGen/X86/2010-05-28-Crash.ll diff --git a/test/DebugInfo/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll index 812d3720d6f56..812d3720d6f56 100644 --- a/test/DebugInfo/2010-06-01-DeadArg-DbgInfo.ll +++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll diff --git a/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll b/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll new file mode 100644 index 0000000000000..be7d94c4f2914 --- /dev/null +++ b/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -mcpu=i486 +; PR7375 +; +; This function contains a block (while.cond) with a lonely RFP use that is +; not a kill. We still need an FP_REG_KILL for that block since the register +; allocator will insert a reload. +; +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +define void @_ZN7QVectorIdE4fillERKdi(double* nocapture %t) nounwind ssp align 2 { +entry: + %tmp2 = load double* %t ; <double> [#uses=1] + br i1 undef, label %if.end, label %if.then + +if.then: ; preds = %entry + br i1 undef, label %if.end, label %bb.nph + +while.cond: ; preds = %bb.nph, %while.cond + store double %tmp2, double* undef + br i1 undef, label %if.end, label %while.cond + +bb.nph: ; preds = %if.then + br label %while.cond + +if.end: ; preds = %while.cond, %if.then, %entry + ret void +} diff --git a/test/CodeGen/X86/2010-07-15-Crash.ll b/test/CodeGen/X86/2010-07-15-Crash.ll new file mode 100644 index 0000000000000..3ac4cf5964c3b --- /dev/null +++ b/test/CodeGen/X86/2010-07-15-Crash.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s -o /dev/null +; PR7653 + +@__FUNCTION__.1623 = external constant [4 x i8] ; <[4 x i8]*> [#uses=1] + +define void @foo() nounwind { +entry: + tail call void asm sideeffect "", "s,i,~{fpsr},~{flags}"(i8* getelementptr +inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0), i8* getelementptr +inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0)) nounwind + ret void +} diff --git a/test/CodeGen/X86/2010-07-29-SetccSimplify.ll b/test/CodeGen/X86/2010-07-29-SetccSimplify.ll new file mode 100644 index 0000000000000..96016cfe1c730 --- /dev/null +++ b/test/CodeGen/X86/2010-07-29-SetccSimplify.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s + +define i32 @extend2bit_v2(i32 %val) { +entry: + %0 = trunc i32 %val to i2 ; <i2> [#uses=1] + %1 = sext i2 %0 to i32 ; <i32> [#uses=1] + %2 = icmp eq i32 %1, 3 ; <i1> [#uses=1] + %3 = zext i1 %2 to i32 ; <i32> [#uses=1] + ret i32 %3 +} + +; CHECK: extend2bit_v2: +; CHECK: xorl %eax, %eax +; CHECK-NEXT: ret diff --git a/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll new file mode 100644 index 0000000000000..1919d2ef34aec --- /dev/null +++ b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s +; PR7814 + +@g_16 = global i64 -3738643449681751625, align 8 ; <i64*> [#uses=1] +@g_38 = global i32 0, align 4 ; <i32*> [#uses=2] +@.str = private constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1] + +define i32 @main() nounwind { +entry: + %tmp = load i64* @g_16 ; <i64> [#uses=1] + %not.lnot = icmp ne i64 %tmp, 0 ; <i1> [#uses=1] + %conv = sext i1 %not.lnot to i64 ; <i64> [#uses=1] + %and = and i64 %conv, 150 ; <i64> [#uses=1] + %conv.i = trunc i64 %and to i8 ; <i8> [#uses=1] + %cmp = icmp sgt i8 %conv.i, 0 ; <i1> [#uses=1] + br i1 %cmp, label %if.then, label %entry.if.end_crit_edge + +; CHECK: andl $150 +; CHECK-NEXT: testb +; CHECK-NEXT: jg + +entry.if.end_crit_edge: ; preds = %entry + %tmp4.pre = load i32* @g_38 ; <i32> [#uses=1] + br label %if.end + +if.then: ; preds = %entry + store i32 1, i32* @g_38 + br label %if.end + +if.end: ; preds = %entry.if.end_crit_edge, %if.then + %tmp4 = phi i32 [ %tmp4.pre, %entry.if.end_crit_edge ], [ 1, %if.then ] ; <i32> [#uses=1] + %call5 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %tmp4) nounwind ; <i32> [#uses=0] + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/test/CodeGen/X86/2010-08-04-MingWCrash.ll b/test/CodeGen/X86/2010-08-04-MingWCrash.ll new file mode 100644 index 0000000000000..98a0887c0e693 --- /dev/null +++ b/test/CodeGen/X86/2010-08-04-MingWCrash.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -mtriple=i386-pc-mingw32 + +define void @func() nounwind { +invoke.cont: + %call = tail call i8* @malloc() + %a = invoke i32 @bar() + to label %bb1 unwind label %lpad + +bb1: + ret void + +lpad: + %exn = tail call i8* @llvm.eh.exception() nounwind + %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1, i8* null) nounwind + %ehspec.fails = icmp slt i32 %eh.selector, 0 + br i1 %ehspec.fails, label %ehspec.unexpected, label %cleanup + +cleanup: + tail call void @_Unwind_Resume_or_Rethrow(i8* %exn) noreturn nounwind + unreachable + +ehspec.unexpected: + tail call void @__cxa_call_unexpected(i8* %exn) noreturn nounwind + unreachable +} + +declare noalias i8* @malloc() + +declare i8* @llvm.eh.exception() nounwind readonly + +declare i32 @__gxx_personality_v0(...) + +declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind + +declare void @_Unwind_Resume_or_Rethrow(i8*) + +declare void @__cxa_call_unexpected(i8*) + +declare i32 @bar() diff --git a/test/CodeGen/X86/2010-08-10-DbgConstant.ll b/test/CodeGen/X86/2010-08-10-DbgConstant.ll new file mode 100644 index 0000000000000..d98ef14e108bb --- /dev/null +++ b/test/CodeGen/X86/2010-08-10-DbgConstant.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=x86 -O0 < %s | FileCheck %s +; CHECK: DW_TAG_constant +; CHECK-NEXT: ascii "ro" #{{#?}} DW_AT_name + +define void @foo() nounwind ssp { +entry: + call void @bar(i32 201), !dbg !8 + ret void, !dbg !8 +} + +declare void @bar(i32) + +!llvm.dbg.sp = !{!0} +!llvm.dbg.gv = !{!5} + +!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 524329, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !"clang 2.8", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{null} +!5 = metadata !{i32 524327, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201} ; [ DW_TAG_constant ] +!6 = metadata !{i32 524326, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_const_type ] +!7 = metadata !{i32 524324, metadata !1, metadata !"unsigned int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!8 = metadata !{i32 3, i32 14, metadata !9, null} +!9 = metadata !{i32 524299, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] diff --git a/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll b/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll new file mode 100644 index 0000000000000..e5542baf2ee86 --- /dev/null +++ b/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -verify-machineinstrs | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +; This test exercises the alias checking in SimpleRegisterCoalescing::RemoveCopyByCommutingDef. + +define void @f(i32* %w, i32* %h, i8* %_this, i8* %image) nounwind ssp { + %x1 = tail call i64 @g(i8* %_this, i8* %image) nounwind ; <i64> [#uses=3] + %tmp1 = trunc i64 %x1 to i32 ; <i32> [#uses=1] +; CHECK: movl (%r{{.*}}), % + %x4 = load i32* %h, align 4 ; <i32> [#uses=1] + +; The imull clobbers a 32-bit register. +; CHECK: imull %{{...}}, %e[[CLOBBER:..]] + %x5 = mul nsw i32 %x4, %tmp1 ; <i32> [#uses=1] + +; So we cannot use the corresponding 64-bit register anymore. +; CHECK-NOT: shrq $32, %r[[CLOBBER]] + %btmp3 = lshr i64 %x1, 32 ; <i64> [#uses=1] + %btmp4 = trunc i64 %btmp3 to i32 ; <i32> [#uses=1] + +; CHECK: idiv + %x6 = sdiv i32 %x5, %btmp4 ; <i32> [#uses=1] + store i32 %x6, i32* %w, align 4 + ret void +} + +declare i64 @g(i8*, i8*) diff --git a/test/CodeGen/X86/GC/dg.exp b/test/CodeGen/X86/GC/dg.exp index f2005891a59a8..629a147736150 100644 --- a/test/CodeGen/X86/GC/dg.exp +++ b/test/CodeGen/X86/GC/dg.exp @@ -1,3 +1,5 @@ load_lib llvm.exp -RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]] +if { [llvm_supports_target X86] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]] +} diff --git a/test/CodeGen/X86/MachineSink-PHIUse.ll b/test/CodeGen/X86/MachineSink-PHIUse.ll new file mode 100644 index 0000000000000..728e377360189 --- /dev/null +++ b/test/CodeGen/X86/MachineSink-PHIUse.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -mtriple=x86_64-appel-darwin -stats |& grep {machine-sink} + +define fastcc void @t() nounwind ssp { +entry: + br i1 undef, label %bb, label %bb4 + +bb: ; preds = %entry + br i1 undef, label %return, label %bb3 + +bb3: ; preds = %bb + unreachable + +bb4: ; preds = %entry + br i1 undef, label %bb.nph, label %return + +bb.nph: ; preds = %bb4 + br label %bb5 + +bb5: ; preds = %bb9, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp12, %bb9 ] ; <i64> [#uses=1] + %tmp12 = add i64 %indvar, 1 ; <i64> [#uses=2] + %tmp13 = trunc i64 %tmp12 to i32 ; <i32> [#uses=0] + br i1 undef, label %bb9, label %bb6 + +bb6: ; preds = %bb5 + br i1 undef, label %bb9, label %bb7 + +bb7: ; preds = %bb6 + br i1 undef, label %bb9, label %bb8 + +bb8: ; preds = %bb7 + unreachable + +bb9: ; preds = %bb7, %bb6, %bb5 + br i1 undef, label %bb5, label %return + +return: ; preds = %bb9, %bb4, %bb + ret void +} diff --git a/test/CodeGen/X86/avx-128.ll b/test/CodeGen/X86/avx-128.ll new file mode 100644 index 0000000000000..a72160be719ac --- /dev/null +++ b/test/CodeGen/X86/avx-128.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s + +@z = common global <4 x float> zeroinitializer, align 16 + +define void @zero() nounwind ssp { +entry: + ; CHECK: vpxor + ; CHECK: vmovaps + store <4 x float> zeroinitializer, <4 x float>* @z, align 16 + ret void +} + diff --git a/test/CodeGen/X86/avx-256.ll b/test/CodeGen/X86/avx-256.ll new file mode 100644 index 0000000000000..20d31e738857b --- /dev/null +++ b/test/CodeGen/X86/avx-256.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s + +@x = common global <8 x float> zeroinitializer, align 32 +@y = common global <4 x double> zeroinitializer, align 32 + +define void @zero() nounwind ssp { +entry: + ; CHECK: vxorps + ; CHECK: vmovaps + ; CHECK: vmovaps + store <8 x float> zeroinitializer, <8 x float>* @x, align 32 + store <4 x double> zeroinitializer, <4 x double>* @y, align 32 + ret void +} + diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll new file mode 100644 index 0000000000000..9de90237d1468 --- /dev/null +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -0,0 +1,2587 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s + +define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vaesdec + %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone + + +define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vaesdeclast + %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone + + +define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vaesenc + %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone + + +define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vaesenclast + %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone + + +define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) { + ; CHECK: vaesimc + %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone + + +define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) { + ; CHECK: vaeskeygenassist + %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone + + +define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vaddsd + %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcmpordpd + %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone + + +define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcmpordsd + %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone + + +define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: setae + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: setbe + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: setne + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { + ; CHECK: vcvtdq2pd + %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone + + +define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { + ; CHECK: vcvtdq2ps + %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { + ; CHECK: vcvtpd2dq + %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { + ; CHECK: vcvtpd2ps + %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { + ; CHECK: vcvtps2dq + %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { + ; CHECK: vcvtps2pd + %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone + + +define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { + ; CHECK: vcvtsd2si + %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { + ; CHECK: vcvtsd2ss + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) { + ; CHECK: movl + ; CHECK: vcvtsi2sd + %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { + ; CHECK: vcvtss2sd + %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { + ; CHECK: vcvttpd2dq + %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { + ; CHECK: vcvttps2dq + %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone + + +define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { + ; CHECK: vcvttss2si + %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vdivsd + %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) { + ; CHECK: movl + ; CHECK: vmovdqu + %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly + + +define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) { + ; CHECK: movl + ; CHECK: vmovupd + %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.loadu.pd(i8*) nounwind readonly + + +define void @test_x86_sse2_maskmov_dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { + ; CHECK: pushl + ; CHECK: movl + ; CHECK: vmaskmovdqu + ; CHECK: popl + call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) + ret void +} +declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind + + +define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vmaxpd + %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vmaxsd + %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vminpd + %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vminsd + %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { + ; CHECK: vmovmskpd + %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone + + +define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) { + ; CHECK: movl + ; CHECK: vmovntdq + call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1) + ret void +} +declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind + + +define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) { + ; CHECK: movl + ; CHECK: vmovntpd + call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1) + ret void +} +declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind + + +define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vmulsd + %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpackssdw + %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpacksswb + %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpackuswb + %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpaddsb + %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpaddsw + %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpaddusb + %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpaddusw + %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpavgb + %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpavgw + %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpeqb + %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcmpeqd + %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcmpeqw + %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpgtb + %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcmpgtd + %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcmpgtw + %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmaddwd + %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmaxsw + %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpmaxub + %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpminsw + %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpminub + %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { + ; CHECK: vpmovmskb + %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmulhw + %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmulhuw + %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpmuludq + %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpsadbw + %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpslld + %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { + ; CHECK: vpslldq + %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { + ; CHECK: vpslldq + %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpsllq + %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsllw + %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { + ; CHECK: vpslld + %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { + ; CHECK: vpsllq + %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { + ; CHECK: vpsllw + %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpsrad + %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsraw + %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { + ; CHECK: vpsrad + %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { + ; CHECK: vpsraw + %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpsrld + %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { + ; CHECK: vpsrldq + %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) { + ; CHECK: vpsrldq + %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpsrlq + %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsrlw + %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { + ; CHECK: vpsrld + %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { + ; CHECK: vpsrlq + %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { + ; CHECK: vpsrlw + %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone + + +define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpsubsb + %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsubsw + %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpsubusb + %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsubusw + %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { + ; CHECK: vsqrtpd + %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { + ; CHECK: vsqrtsd + %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone + + +define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { + ; CHECK: movl + ; CHECK: vmovq + call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) + ret void +} +declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind + + +define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { + ; CHECK: movl + ; CHECK: vmovdqu + call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1) + ret void +} +declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind + + +define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { + ; CHECK: movl + ; CHECK: vmovupd + call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1) + ret void +} +declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind + + +define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vsubsd + %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: setae + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: setbe + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: setne + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vaddsubpd + %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vaddsubps + %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vhaddpd + %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vhaddps + %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vhsubpd + %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vhsubps + %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) { + ; CHECK: movl + ; CHECK: vlddqu + %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly + + +define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vblendpd + %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vblendps + %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone + + +define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: vblendvpd + %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: vblendvps + %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vdppd + %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vdpps + %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vinsertps + %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse41_movntdqa(i8* %a0) { + ; CHECK: movl + ; CHECK: vmovntdqa + %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readonly + + +define <16 x i8> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vmpsadbw + %res = call <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpackusdw + %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone + + +define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { + ; CHECK: vpblendvb + %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpblendw + %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcmpeqq + %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) { + ; CHECK: vphminposuw + %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpmaxsb + %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpmaxsd + %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpmaxud + %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmaxuw + %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpminsb + %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpminsd + %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpminud + %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpminuw + %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) { + ; CHECK: vpmovsxbd + %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) { + ; CHECK: vpmovsxbq + %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) { + ; CHECK: vpmovsxbw + %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) { + ; CHECK: vpmovsxdq + %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) { + ; CHECK: vpmovsxwd + %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { + ; CHECK: vpmovsxwq + %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { + ; CHECK: vpmovzxbd + %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { + ; CHECK: vpmovzxbq + %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { + ; CHECK: vpmovzxbw + %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { + ; CHECK: vpmovzxdq + %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { + ; CHECK: vpmovzxwd + %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { + ; CHECK: vpmovzxwq + %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpmuldq + %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone + + +define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vptest + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vptest + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vptest + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { + ; CHECK: vroundpd + %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) { + ; CHECK: vroundps + %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone + + +define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vroundsd + %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vroundss + %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestrm + %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcmpgtq + %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistrm + %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vaddss + %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcmpordps + %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone + + +define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcmpordss + %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone + + +define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: setae + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: setbe + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: setne + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) { + ; CHECK: movl + ; CHECK: vcvtsi2ss + %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone + + +define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) { + ; CHECK: vcvtss2si + %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone + + +define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) { + ; CHECK: vcvttss2si + %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vdivss + %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone + + +define void @test_x86_sse_ldmxcsr(i8* %a0) { + ; CHECK: movl + ; CHECK: vldmxcsr + call void @llvm.x86.sse.ldmxcsr(i8* %a0) + ret void +} +declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind + + +define <4 x float> @test_x86_sse_loadu_ps(i8* %a0) { + ; CHECK: movl + ; CHECK: vmovups + %res = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly + + +define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vmaxps + %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vmaxss + %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vminps + %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vminss + %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) { + ; CHECK: vmovmskps + %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone + + +define void @test_x86_sse_movnt_ps(i8* %a0, <4 x float> %a1) { + ; CHECK: movl + ; CHECK: vmovntps + call void @llvm.x86.sse.movnt.ps(i8* %a0, <4 x float> %a1) + ret void +} +declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind + + +define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vmulss + %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { + ; CHECK: vrcpps + %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) { + ; CHECK: vrcpss + %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) { + ; CHECK: vrsqrtps + %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) { + ; CHECK: vrsqrtss + %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { + ; CHECK: vsqrtps + %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { + ; CHECK: vsqrtss + %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone + + +define void @test_x86_sse_stmxcsr(i8* %a0) { + ; CHECK: movl + ; CHECK: vstmxcsr + call void @llvm.x86.sse.stmxcsr(i8* %a0) + ret void +} +declare void @llvm.x86.sse.stmxcsr(i8*) nounwind + + +define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { + ; CHECK: movl + ; CHECK: vmovups + call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) + ret void +} +declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind + + +define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vsubss + %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: setae + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: setbe + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: setne + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { + ; CHECK: vpabsb + %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { + ; CHECK: vpabsd + %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { + ; CHECK: vpabsw + %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vphaddd + %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vphaddsw + %res = call <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vphaddw + %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vphsubd + %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vphsubsw + %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vphsubw + %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmaddubsw + %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmulhrsw + %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpshufb + %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone + + +define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpsignb + %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpsignd + %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsignw + %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vaddsubpd + %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vaddsubps + %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vblendpd + %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone + + +define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vblendps + %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone + + +define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { + ; CHECK: vblendvpd + %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { + ; CHECK: vblendvps + %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vcmpordpd + %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vcmpordps + %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone + + +define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) { + ; CHECK: vcvtpd2psy + %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone + + +define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) { + ; CHECK: vcvtpd2dqy + %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone + + +define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { + ; CHECK: vcvtps2pd + %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone + + +define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) { + ; CHECK: vcvtps2dq + %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { + ; CHECK: vcvtdq2pd + %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone + + +define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { + ; CHECK: vcvtdq2ps + %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) { + ; CHECK: vcvttpd2dqy + %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone + + +define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) { + ; CHECK: vcvttps2dq + %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone + + +define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vdpps + %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone + + +define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vhaddpd + %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vhaddps + %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vhsubpd + %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vhsubps + %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) { + ; CHECK: vlddqu + %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly + + +define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) { + ; CHECK: vmovdqu + %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly + + +define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) { + ; CHECK: vmovupd + %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly + + +define <8 x float> @test_x86_avx_loadu_ps_256(i8* %a0) { + ; CHECK: vmovups + %res = call <8 x float> @llvm.x86.avx.loadu.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.loadu.ps.256(i8*) nounwind readonly + + +define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) { + ; CHECK: vmaskmovpd + %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly + + +define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) { + ; CHECK: vmaskmovpd + %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly + + +define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) { + ; CHECK: vmaskmovps + %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly + + +define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) { + ; CHECK: vmaskmovps + %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly + + +define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: vmaskmovpd + call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind + + +define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) { + ; CHECK: vmaskmovpd + call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind + + +define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: vmaskmovps + call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind + + +define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) { + ; CHECK: vmaskmovps + call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind + + +define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vmaxpd + %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vmaxps + %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vminpd + %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vminps + %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) { + ; CHECK: vmovmskpd + %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone + + +define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) { + ; CHECK: vmovmskps + %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone + + +define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) { + ; CHECK: vmovntdq + call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1) + ret void +} +declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind + + +define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) { + ; CHECK: vmovntpd + call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1) + ret void +} +declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind + + +define void @test_x86_avx_movnt_ps_256(i8* %a0, <8 x float> %a1) { + ; CHECK: vmovntps + call void @llvm.x86.avx.movnt.ps.256(i8* %a0, <8 x float> %a1) + ret void +} +declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind + + +define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) { + ; CHECK: vptest + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) { + ; CHECK: vptest + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) { + ; CHECK: vptest + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) { + ; CHECK: vrcpps + %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) { + ; CHECK: vroundpd + %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone + + +define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) { + ; CHECK: vroundps + %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone + + +define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) { + ; CHECK: vrsqrtps + %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { + ; CHECK: vsqrtpd + %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { + ; CHECK: vsqrtps + %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone + + +define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { + ; CHECK: vmovdqu + call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1) + ret void +} +declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind + + +define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) { + ; CHECK: vmovupd + call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1) + ret void +} +declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind + + +define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) { + ; CHECK: vmovups + call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1) + ret void +} +declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind + + +define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) { + ; CHECK: vbroadcastsd + %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly + + +define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) { + ; CHECK: vbroadcastf128 + %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly + + +define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { + ; CHECK: vbroadcastf128 + %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly + + +define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) { + ; CHECK: vbroadcastss + %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly + + +define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) { + ; CHECK: vbroadcastss + %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly + + +define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) { + ; CHECK: vextractf128 + %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone + + +define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) { + ; CHECK: vextractf128 + %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone + + +define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) { + ; CHECK: vextractf128 + %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone + + +define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) { + ; CHECK: vinsertf128 + %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) { + ; CHECK: vinsertf128 + %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone + + +define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vinsertf128 + %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone + + +define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vperm2f128 + %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vperm2f128 + %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone + + +define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { + ; CHECK: vperm2f128 + %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone + + +define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) { + ; CHECK: vpermilpd + %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone + + +define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) { + ; CHECK: vpermilpd + %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone + + +define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { + ; CHECK: vpermilps + %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) { + ; CHECK: vpermilps + %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone + + +define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) { + ; CHECK: vpermilpd + %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone + + +define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { + ; CHECK: vpermilpd + %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone + + +define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) { + ; CHECK: vpermilps + %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone + + +define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { + ; CHECK: vpermilps + %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vtestps + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vtestps + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vtestps + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vtestps + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vtestps + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vtestps + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define void @test_x86_avx_vzeroall() { + ; CHECK: vzeroall + call void @llvm.x86.avx.vzeroall() + ret void +} +declare void @llvm.x86.avx.vzeroall() nounwind + + +define void @test_x86_avx_vzeroupper() { + ; CHECK: vzeroupper + call void @llvm.x86.avx.vzeroupper() + ret void +} +declare void @llvm.x86.avx.vzeroupper() nounwind + + diff --git a/test/CodeGen/X86/avx-intrinsics-x86_64.ll b/test/CodeGen/X86/avx-intrinsics-x86_64.ll new file mode 100644 index 0000000000000..b1867105ce857 --- /dev/null +++ b/test/CodeGen/X86/avx-intrinsics-x86_64.ll @@ -0,0 +1,50 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=corei7 -mattr=avx | FileCheck %s + +define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) { + ; CHECK: vcvtsd2si + %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { + ; CHECK: vcvtsi2sd + %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone + + +define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) { + ; CHECK: vcvttss2si + %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone + + +define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) { + ; CHECK: vcvtss2si + %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) { + ; CHECK: vcvtsi2ss + %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone + + +define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) { + ; CHECK: vcvttss2si + %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone + + diff --git a/test/CodeGen/X86/barrier-sse.ll b/test/CodeGen/X86/barrier-sse.ll new file mode 100644 index 0000000000000..6190c3684ed62 --- /dev/null +++ b/test/CodeGen/X86/barrier-sse.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence +; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence +; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep mfence +; RUN: llc < %s -march=x86 -mattr=+sse2 | grep MEMBARRIER + + +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1) + +define void @test() { + call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false) + call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 false, i1 false) + call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true, i1 false) + + call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 false, i1 false) + call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 true, i1 false) + call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 true, i1 false) + + call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 false) + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 false) + ret void +} diff --git a/test/CodeGen/X86/barrier.ll b/test/CodeGen/X86/barrier.ll new file mode 100644 index 0000000000000..fad6ef690c2f2 --- /dev/null +++ b/test/CodeGen/X86/barrier.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s -march=x86 -mattr=-sse2 | grep lock +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1) + +define void @test() { + call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false) + ret void +}
\ No newline at end of file diff --git a/test/CodeGen/X86/call-imm.ll b/test/CodeGen/X86/call-imm.ll index 255adfbb2bb4d..3857fb157905f 100644 --- a/test/CodeGen/X86/call-imm.ll +++ b/test/CodeGen/X86/call-imm.ll @@ -5,7 +5,7 @@ ; Call to immediate is not safe on x86-64 unless we *know* that the ; call will be within 32-bits pcrel from the dest immediate. -; RUN: llc < %s -march=x86-64 | grep {call.*\*%rax} +; RUN: llc < %s -march=x86-64 | grep {call.*\\*%rax} ; PR3666 ; PR3773 diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll b/test/CodeGen/X86/change-compare-stride-trickiness-0.ll index 1f7f6ecafafb2..1f7f6ecafafb2 100644 --- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll +++ b/test/CodeGen/X86/change-compare-stride-trickiness-0.ll diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll b/test/CodeGen/X86/change-compare-stride-trickiness-1.ll index cb638092ea1a3..cb638092ea1a3 100644 --- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll +++ b/test/CodeGen/X86/change-compare-stride-trickiness-1.ll diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-2.ll b/test/CodeGen/X86/change-compare-stride-trickiness-2.ll index ae27383895ce4..ae27383895ce4 100644 --- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-2.ll +++ b/test/CodeGen/X86/change-compare-stride-trickiness-2.ll diff --git a/test/CodeGen/X86/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll index 05388f9b2a96e..2a44463e5d32e 100644 --- a/test/CodeGen/X86/constant-pool-remat-0.ll +++ b/test/CodeGen/X86/constant-pool-remat-0.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86-64 | grep LCPI | count 3 -; RUN: llc < %s -march=x86-64 -stats -info-output-file - | grep asm-printer | grep 6 +; RUN: llc < %s -march=x86-64 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep LCPI | count 3 -; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -info-output-file - | grep asm-printer | grep 12 +; RUN: llc < %s -march=x86 -mattr=+sse2 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 12 declare float @qux(float %y) diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll index f29cbf323e376..96fef0fbfc611 100644 --- a/test/CodeGen/X86/critical-edge-split.ll +++ b/test/CodeGen/X86/critical-edge-split.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -stats -info-output-file - | grep asm-printer | grep 29 +; RUN: llc < %s -mtriple=i386-apple-darwin -o /dev/null -stats -info-output-file - | grep asm-printer | grep 29 %CC = type { %Register } %II = type { %"struct.XX::II::$_74" } diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll index bdbaac05f118d..bf57e78f35d4b 100644 --- a/test/CodeGen/X86/dllexport.ll +++ b/test/CodeGen/X86/dllexport.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s | FileCheck %s ; PR2936 -target triple = "i386-mingw32" +target triple = "i386-pc-mingw32" define dllexport x86_fastcallcc i32 @foo() nounwind { entry: diff --git a/test/CodeGen/X86/dyn-stackalloc.ll b/test/CodeGen/X86/dyn-stackalloc.ll index 1df092018dd83..e577611ebcf1b 100644 --- a/test/CodeGen/X86/dyn-stackalloc.ll +++ b/test/CodeGen/X86/dyn-stackalloc.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=x86 | not egrep {\\\$4294967289|-7} -; RUN: llc < %s -march=x86 | egrep {\\\$4294967280|-16} -; RUN: llc < %s -march=x86-64 | grep {\\-16} +; RUN: llc < %s -mtriple=i686-linux | not egrep {\\\$4294967289|-7} +; RUN: llc < %s -mtriple=i686-linux | egrep {\\\$4294967280|-16} +; RUN: llc < %s -mtriple=x86_64-linux | grep {\\-16} define void @t() nounwind { A: diff --git a/test/CodeGen/X86/empty-functions.ll b/test/CodeGen/X86/empty-functions.ll new file mode 100644 index 0000000000000..b303cd1f73681 --- /dev/null +++ b/test/CodeGen/X86/empty-functions.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=CHECK-NO-FP %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s + +define void @func() { +entry: + unreachable +} +; CHECK-NO-FP: _func: +; CHECK-NO-FP-NOT: movq %rsp, %rbp +; CHECK-NO-FP: nop + +; CHECK-FP: _func: +; CHECK-FP: movq %rsp, %rbp +; CHECK-FP-NEXT: Ltmp1: +; CHECK-FP: nop diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll index 23b45ebb8d8b4..9ded7e05dc465 100644 --- a/test/CodeGen/X86/fabs.ll +++ b/test/CodeGen/X86/fabs.ll @@ -1,7 +1,7 @@ ; Make sure this testcase codegens to the fabs instruction, not a call to fabsf ; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \ ; RUN: count 2 -; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-finite-only-fp-math | \ +; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | \ ; RUN: grep fabs\$ | count 3 declare float @fabsf(float) diff --git a/test/CodeGen/X86/fast-isel-atomic.ll b/test/CodeGen/X86/fast-isel-atomic.ll new file mode 100644 index 0000000000000..74c586846d96d --- /dev/null +++ b/test/CodeGen/X86/fast-isel-atomic.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -O0 -march=x86-64 +; rdar://8204072 +; PR7652 + +@sc = external global i8 +@uc = external global i8 + +declare i8 @llvm.atomic.load.and.i8.p0i8(i8* nocapture, i8) nounwind + +define void @test_fetch_and_op() nounwind { +entry: + %tmp40 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @sc, i8 11) ; <i8> [#uses=1] + store i8 %tmp40, i8* @sc + %tmp41 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @uc, i8 11) ; <i8> [#uses=1] + store i8 %tmp41, i8* @uc + ret void +} diff --git a/test/CodeGen/X86/fast-isel-cmp-branch.ll b/test/CodeGen/X86/fast-isel-cmp-branch.ll new file mode 100644 index 0000000000000..4ab1bc61c7e2c --- /dev/null +++ b/test/CodeGen/X86/fast-isel-cmp-branch.ll @@ -0,0 +1,29 @@ +; RUN: llc -O0 -march=x86-64 -asm-verbose=false < %s | FileCheck %s +; rdar://8337108 + +; Fast-isel shouldn't try to look through the compare because it's in a +; different basic block, so its operands aren't necessarily exported +; for cross-block usage. + +; CHECK: movb %al, 7(%rsp) +; CHECK: callq {{_?}}bar +; CHECK: movb 7(%rsp), %al + +declare void @bar() + +define void @foo(i32 %a, i32 %b) nounwind { +entry: + %q = add i32 %a, 7 + %r = add i32 %b, 9 + %t = icmp ult i32 %q, %r + invoke void @bar() to label %next unwind label %unw +next: + br i1 %t, label %true, label %return +true: + call void @bar() + br label %return +return: + ret void +unw: + unreachable +} diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll index 1270ab78ab5f7..577dd7223a4d7 100644 --- a/test/CodeGen/X86/fast-isel-gep.ll +++ b/test/CodeGen/X86/fast-isel-gep.ll @@ -9,7 +9,7 @@ define i32 @test1(i32 %t3, i32* %t1) nounwind { %t15 = load i32* %t9 ; <i32> [#uses=1] ret i32 %t15 ; X32: test1: -; X32: movl (%ecx,%eax,4), %eax +; X32: movl (%eax,%ecx,4), %eax ; X32: ret ; X64: test1: @@ -23,7 +23,7 @@ define i32 @test2(i64 %t3, i32* %t1) nounwind { %t15 = load i32* %t9 ; <i32> [#uses=1] ret i32 %t15 ; X32: test2: -; X32: movl (%eax,%ecx,4), %eax +; X32: movl (%edx,%ecx,4), %eax ; X32: ret ; X64: test2: diff --git a/test/CodeGen/X86/fast-isel-shift-imm.ll b/test/CodeGen/X86/fast-isel-shift-imm.ll index 7759bb056892e..5c62c1880516e 100644 --- a/test/CodeGen/X86/fast-isel-shift-imm.ll +++ b/test/CodeGen/X86/fast-isel-shift-imm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -O0 | grep {sarl \$80, %eax} +; RUN: llc < %s -march=x86 -O0 | grep {sarl \$80, %e} ; PR3242 define void @foo(i32 %x, i32* %p) nounwind { diff --git a/test/CodeGen/X86/force-align-stack.ll b/test/CodeGen/X86/force-align-stack.ll new file mode 100644 index 0000000000000..ffcbf8a908c8d --- /dev/null +++ b/test/CodeGen/X86/force-align-stack.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -relocation-model=static -force-align-stack | FileCheck %s +; Tests to make sure that we always align the stack out to the minimum needed - +; in this case 16-bytes. +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" +target triple = "i386-apple-darwin10.3" + +define void @a() nounwind ssp { +entry: +; CHECK: _a: +; CHECK: andl $-16, %esp + %z = alloca <16 x i8> ; <<16 x i8>*> [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store <16 x i8> zeroinitializer, <16 x i8>* %z, align 16 + call void @b(<16 x i8>* %z) nounwind + br label %return + +return: ; preds = %entry + ret void +} + +declare void @b(<16 x i8>*) diff --git a/test/Transforms/LoopStrengthReduce/insert-positions.ll b/test/CodeGen/X86/insert-positions.ll index 1a695f35e3b06..1a695f35e3b06 100644 --- a/test/Transforms/LoopStrengthReduce/insert-positions.ll +++ b/test/CodeGen/X86/insert-positions.ll diff --git a/test/CodeGen/X86/int-intrinsic.ll b/test/CodeGen/X86/int-intrinsic.ll new file mode 100644 index 0000000000000..45a9b0f15c674 --- /dev/null +++ b/test/CodeGen/X86/int-intrinsic.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=x86 | FileCheck %s +; RUN: llc < %s -march=x86-64 | FileCheck %s + +declare void @llvm.x86.int(i8) nounwind + +; CHECK: int3 +; CHECK: ret +define void @primitive_int3 () { +bb.entry: + call void @llvm.x86.int(i8 3) nounwind + ret void +} + +; CHECK: int $-128 +; CHECK: ret +define void @primitive_int128 () { +bb.entry: + call void @llvm.x86.int(i8 128) nounwind + ret void +} diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll index 71685bb5b83a4..b0105ac533bdd 100644 --- a/test/CodeGen/X86/licm-nested.ll +++ b/test/CodeGen/X86/licm-nested.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -stats -info-output-file - | grep machine-licm | grep 2 +; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep machine-licm | grep 3 ; MachineLICM should be able to hoist the symbolic addresses out of ; the inner loops. diff --git a/test/CodeGen/X86/lock-inst-encoding.ll b/test/CodeGen/X86/lock-inst-encoding.ll new file mode 100644 index 0000000000000..03468e2b3f4f2 --- /dev/null +++ b/test/CodeGen/X86/lock-inst-encoding.ll @@ -0,0 +1,22 @@ +; RUN: llc -O0 --show-mc-encoding < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +; CHECK: f0: +; CHECK: addq %rax, (%rdi) +; CHECK: # encoding: [0xf0,0x48,0x01,0x07] +; CHECK: ret +define void @f0(i64* %a0) { + %t0 = and i64 1, 1 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) nounwind + %1 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %a0, i64 %t0) nounwind + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) nounwind + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +declare i32 @llvm.atomic.load.and.i32.p0i32(i32* nocapture, i32) nounwind + +declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll index 6c0eb8c0df93d..6556fdeea8340 100644 --- a/test/CodeGen/X86/loop-strength-reduce4.ll +++ b/test/CodeGen/X86/loop-strength-reduce4.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC -; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC +; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC +; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC ; By starting the IV at -64 instead of 0, a cmp is eliminated, ; as the flags from the add can be used directly. diff --git a/test/CodeGen/X86/lsr-interesting-step.ll b/test/CodeGen/X86/lsr-interesting-step.ll new file mode 100644 index 0000000000000..4b7050bd507bf --- /dev/null +++ b/test/CodeGen/X86/lsr-interesting-step.ll @@ -0,0 +1,51 @@ +; RUN: llc < %s -march=x86-64 -relocation-model=static -mtriple=x86_64-unknown-linux-gnu + +; The inner loop should require only one add (and no leas either). +; rdar://8100380 + +; CHECK: BB0_4: +; CHECK-NEXT: movb $0, flags(%rdx) +; CHECK-NEXT: addq %rcx, %rdx +; CHECK-NEXT: cmpq $8192, %rdx +; CHECK-NEXT: jl + +@flags = external global [8192 x i8], align 16 ; <[8192 x i8]*> [#uses=1] + +define void @foo() nounwind { +entry: + %tmp = icmp slt i64 2, 8192 ; <i1> [#uses=1] + br i1 %tmp, label %bb, label %bb21 + +bb: ; preds = %entry + br label %bb7 + +bb7: ; preds = %bb, %bb17 + %tmp8 = phi i64 [ %tmp18, %bb17 ], [ 2, %bb ] ; <i64> [#uses=2] + %tmp9 = icmp slt i64 2, 8192 ; <i1> [#uses=1] + br i1 %tmp9, label %bb10, label %bb17 + +bb10: ; preds = %bb7 + br label %bb11 + +bb11: ; preds = %bb10, %bb11 + %tmp12 = phi i64 [ %tmp14, %bb11 ], [ 2, %bb10 ] ; <i64> [#uses=2] + %tmp13 = getelementptr inbounds [8192 x i8]* @flags, i64 0, i64 %tmp12 ; <i8*> [#uses=1] + store i8 0, i8* %tmp13, align 1 + %tmp14 = add nsw i64 %tmp12, %tmp8 ; <i64> [#uses=2] + %tmp15 = icmp slt i64 %tmp14, 8192 ; <i1> [#uses=1] + br i1 %tmp15, label %bb11, label %bb16 + +bb16: ; preds = %bb11 + br label %bb17 + +bb17: ; preds = %bb16, %bb7 + %tmp18 = add nsw i64 %tmp8, 1 ; <i64> [#uses=2] + %tmp19 = icmp slt i64 %tmp18, 8192 ; <i1> [#uses=1] + br i1 %tmp19, label %bb7, label %bb20 + +bb20: ; preds = %bb17 + br label %bb21 + +bb21: ; preds = %bb20, %entry + ret void +} diff --git a/test/CodeGen/X86/lsr-normalization.ll b/test/CodeGen/X86/lsr-normalization.ll new file mode 100644 index 0000000000000..932141d0448ed --- /dev/null +++ b/test/CodeGen/X86/lsr-normalization.ll @@ -0,0 +1,99 @@ +; RUN: llc < %s -march=x86-64 | grep div | count 1 +; rdar://8168938 + +; This testcase involves SCEV normalization with the exit value from +; one loop involved with the increment value for an addrec on another +; loop. The expression should be properly normalized and simplified, +; and require only a single division. + +%0 = type { %0*, %0* } + +@0 = private constant [13 x i8] c"Result: %lu\0A\00" ; <[13 x i8]*> [#uses=1] +@1 = internal constant [5 x i8] c"Huh?\00" ; <[5 x i8]*> [#uses=1] + +define i32 @main(i32 %arg, i8** nocapture %arg1) nounwind { +bb: + %tmp = alloca %0, align 8 ; <%0*> [#uses=11] + %tmp2 = bitcast %0* %tmp to i8* ; <i8*> [#uses=1] + call void @llvm.memset.p0i8.i64(i8* %tmp2, i8 0, i64 16, i32 8, i1 false) nounwind + %tmp3 = getelementptr inbounds %0* %tmp, i64 0, i32 0 ; <%0**> [#uses=3] + store %0* %tmp, %0** %tmp3 + %tmp4 = getelementptr inbounds %0* %tmp, i64 0, i32 1 ; <%0**> [#uses=1] + store %0* %tmp, %0** %tmp4 + %tmp5 = call noalias i8* @_Znwm(i64 24) nounwind ; <i8*> [#uses=2] + %tmp6 = getelementptr inbounds i8* %tmp5, i64 16 ; <i8*> [#uses=2] + %tmp7 = icmp eq i8* %tmp6, null ; <i1> [#uses=1] + br i1 %tmp7, label %bb10, label %bb8 + +bb8: ; preds = %bb + %tmp9 = bitcast i8* %tmp6 to i32* ; <i32*> [#uses=1] + store i32 1, i32* %tmp9 + br label %bb10 + +bb10: ; preds = %bb8, %bb + %tmp11 = bitcast i8* %tmp5 to %0* ; <%0*> [#uses=1] + call void @_ZNSt15_List_node_base4hookEPS_(%0* %tmp11, %0* %tmp) nounwind + %tmp12 = load %0** %tmp3 ; <%0*> [#uses=3] + %tmp13 = icmp eq %0* %tmp12, %tmp ; <i1> [#uses=1] + br i1 %tmp13, label %bb14, label %bb16 + +bb14: ; preds = %bb10 + %tmp15 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @1, i64 0, i64 0)) + br label %bb35 + +bb16: ; preds = %bb16, %bb10 + %tmp17 = phi i64 [ %tmp22, %bb16 ], [ 0, %bb10 ] ; <i64> [#uses=1] + %tmp18 = phi %0* [ %tmp20, %bb16 ], [ %tmp12, %bb10 ] ; <%0*> [#uses=1] + %tmp19 = getelementptr inbounds %0* %tmp18, i64 0, i32 0 ; <%0**> [#uses=1] + %tmp20 = load %0** %tmp19 ; <%0*> [#uses=2] + %tmp21 = icmp eq %0* %tmp20, %tmp ; <i1> [#uses=1] + %tmp22 = add i64 %tmp17, 1 ; <i64> [#uses=2] + br i1 %tmp21, label %bb23, label %bb16 + +bb23: ; preds = %bb16 + %tmp24 = udiv i64 100, %tmp22 ; <i64> [#uses=1] + br label %bb25 + +bb25: ; preds = %bb25, %bb23 + %tmp26 = phi i64 [ %tmp31, %bb25 ], [ 0, %bb23 ] ; <i64> [#uses=1] + %tmp27 = phi %0* [ %tmp29, %bb25 ], [ %tmp12, %bb23 ] ; <%0*> [#uses=1] + %tmp28 = getelementptr inbounds %0* %tmp27, i64 0, i32 0 ; <%0**> [#uses=1] + %tmp29 = load %0** %tmp28 ; <%0*> [#uses=2] + %tmp30 = icmp eq %0* %tmp29, %tmp ; <i1> [#uses=1] + %tmp31 = add i64 %tmp26, 1 ; <i64> [#uses=2] + br i1 %tmp30, label %bb32, label %bb25 + +bb32: ; preds = %bb25 + %tmp33 = mul i64 %tmp31, %tmp24 ; <i64> [#uses=1] + %tmp34 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @0, i64 0, i64 0), i64 %tmp33) nounwind + br label %bb35 + +bb35: ; preds = %bb32, %bb14 + %tmp36 = load %0** %tmp3 ; <%0*> [#uses=2] + %tmp37 = icmp eq %0* %tmp36, %tmp ; <i1> [#uses=1] + br i1 %tmp37, label %bb44, label %bb38 + +bb38: ; preds = %bb38, %bb35 + %tmp39 = phi %0* [ %tmp41, %bb38 ], [ %tmp36, %bb35 ] ; <%0*> [#uses=2] + %tmp40 = getelementptr inbounds %0* %tmp39, i64 0, i32 0 ; <%0**> [#uses=1] + %tmp41 = load %0** %tmp40 ; <%0*> [#uses=2] + %tmp42 = bitcast %0* %tmp39 to i8* ; <i8*> [#uses=1] + call void @_ZdlPv(i8* %tmp42) nounwind + %tmp43 = icmp eq %0* %tmp41, %tmp ; <i1> [#uses=1] + br i1 %tmp43, label %bb44, label %bb38 + +bb44: ; preds = %bb38, %bb35 + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +declare void @_ZNSt15_List_node_base4hookEPS_(%0*, %0*) + +declare noalias i8* @_Znwm(i64) + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +declare void @_ZdlPv(i8*) nounwind + +declare i32 @puts(i8* nocapture) nounwind diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll index b7e69b84bf845..d2ff58be1055b 100644 --- a/test/CodeGen/X86/lsr-reuse.ll +++ b/test/CodeGen/X86/lsr-reuse.ll @@ -464,7 +464,7 @@ bb5: ; preds = %bb3, %entry ; And the one at %bb68, where we want to be sure to use superhero mode: -; CHECK: BB10_10: +; CHECK: BB10_9: ; CHECK-NEXT: movaps 48(%r{{[^,]*}}), %xmm{{.*}} ; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}} ; CHECK-NEXT: movaps 32(%r{{[^,]*}}), %xmm{{.*}} @@ -484,7 +484,7 @@ bb5: ; preds = %bb3, %entry ; CHECK-NEXT: addq $64, %r{{.*}} ; CHECK-NEXT: addq $64, %r{{.*}} ; CHECK-NEXT: addq $-16, %r{{.*}} -; CHECK-NEXT: BB10_11: +; CHECK-NEXT: BB10_10: ; CHECK-NEXT: cmpq $15, %r{{.*}} ; CHECK-NEXT: jg diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll new file mode 100644 index 0000000000000..c9ed3e553a466 --- /dev/null +++ b/test/CodeGen/X86/lsr-static-addr.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s + +; CHECK: xorl %eax, %eax +; CHECK: movsd .LCPI0_0(%rip), %xmm0 +; CHECK: align +; CHECK-NEXT: BB0_2: +; CHECK-NEXT: movsd A(,%rax,8) +; CHECK-NEXT: mulsd +; CHECK-NEXT: movsd +; CHECK-NEXT: incq %rax + +@A = external global [0 x double] + +define void @foo(i64 %n) nounwind { +entry: + %cmp5 = icmp sgt i64 %n, 0 + br i1 %cmp5, label %for.body, label %for.end + +for.body: + %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr [0 x double]* @A, i64 0, i64 %i.06 + %tmp3 = load double* %arrayidx, align 8 + %mul = fmul double %tmp3, 2.300000e+00 + store double %mul, double* %arrayidx, align 8 + %inc = add nsw i64 %i.06, 1 + %exitcond = icmp eq i64 %inc, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/CodeGen/X86/lsr-wrap.ll b/test/CodeGen/X86/lsr-wrap.ll index ec8db501ef346..d605e4f14fe43 100644 --- a/test/CodeGen/X86/lsr-wrap.ll +++ b/test/CodeGen/X86/lsr-wrap.ll @@ -3,7 +3,7 @@ ; LSR would like to use a single IV for both of these, however it's ; not safe due to wraparound. -; CHECK: addb $-4, %r +; CHECK: addb $-4, % ; CHECK: decw % @g_19 = common global i32 0 ; <i32*> [#uses=2] diff --git a/test/CodeGen/X86/narrow_op-2.ll b/test/CodeGen/X86/narrow_op-2.ll deleted file mode 100644 index 796ef7a29e498..0000000000000 --- a/test/CodeGen/X86/narrow_op-2.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s - - %struct.bf = type { i64, i16, i16, i32 } -@bfi = external global %struct.bf* - -define void @t1() nounwind ssp { -entry: - -; CHECK: andb $-2, 10( -; CHECK: andb $-3, 10( - - %0 = load %struct.bf** @bfi, align 8 - %1 = getelementptr %struct.bf* %0, i64 0, i32 1 - %2 = bitcast i16* %1 to i32* - %3 = load i32* %2, align 1 - %4 = and i32 %3, -65537 - store i32 %4, i32* %2, align 1 - %5 = load %struct.bf** @bfi, align 8 - %6 = getelementptr %struct.bf* %5, i64 0, i32 1 - %7 = bitcast i16* %6 to i32* - %8 = load i32* %7, align 1 - %9 = and i32 %8, -131073 - store i32 %9, i32* %7, align 1 - ret void -} diff --git a/test/CodeGen/X86/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll index 9f9f92115c797..8bed62488070d 100644 --- a/test/CodeGen/X86/phi-immediate-factoring.ll +++ b/test/CodeGen/X86/phi-immediate-factoring.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" target triple = "i686-apple-darwin8" -define i32 @foo(i32 %A, i32 %B, i32 %C) { +define i32 @foo(i32 %A, i32 %B, i32 %C) nounwind { entry: switch i32 %A, label %out [ i32 1, label %bb diff --git a/test/CodeGen/X86/pr7882.ll b/test/CodeGen/X86/pr7882.ll new file mode 100644 index 0000000000000..88404dbe125eb --- /dev/null +++ b/test/CodeGen/X86/pr7882.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin -pre-RA-sched=fast \ +; RUN: | FileCheck %s +; make sure scheduler honors the flags clobber. PR 7882. + +define i32 @main(i32 %argc, i8** %argv) nounwind +{ +entry: +; CHECK: InlineAsm End +; CHECK: cmpl + %res = icmp slt i32 1, %argc + %tmp = call i32 asm sideeffect alignstack + "push $$0 + popf + mov $$13, $0", "=r,r,~{memory},~{flags}" (i1 %res) + %ret = select i1 %res, i32 %tmp, i32 42 + ret i32 %ret +} diff --git a/test/CodeGen/X86/shl-anyext.ll b/test/CodeGen/X86/shl-anyext.ll new file mode 100644 index 0000000000000..10d489b9a8a63 --- /dev/null +++ b/test/CodeGen/X86/shl-anyext.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s + +; Codegen should be able to use a 32-bit shift instead of a 64-bit shift. +; CHECK: shll $16 + +define fastcc void @test(i32 %level, i64 %a, i64 %b, i64 %c, i64 %d, i32* %p) nounwind { +if.end523: ; preds = %if.end453 + %conv7981749 = zext i32 %level to i64 ; <i64> [#uses=1] + %and799 = shl i64 %conv7981749, 16 ; <i64> [#uses=1] + %shl800 = and i64 %and799, 16711680 ; <i64> [#uses=1] + %or801 = or i64 %shl800, %a ; <i64> [#uses=1] + %or806 = or i64 %or801, %b ; <i64> [#uses=1] + %or811 = or i64 %or806, %c ; <i64> [#uses=1] + %or819 = or i64 %or811, %d ; <i64> [#uses=1] + %conv820 = trunc i64 %or819 to i32 ; <i32> [#uses=1] + store i32 %conv820, i32* %p + ret void +} + +; CHECK: foo: + +declare void @bar(i64) + +define fastcc void @foo(i32 %t) { +bb: + %tmp = add i32 %t, -1 ; <i32> [#uses=1] + br label %bb1 + +bb1: ; preds = %bb + %tmp2 = zext i32 %tmp to i64 ; <i64> [#uses=2] + %tmp3 = add i64 %tmp2, 1 ; <i64> [#uses=1] + %tmp4 = xor i64 %tmp2, 536870911 ; <i64> [#uses=1] + %tmp5 = and i64 %tmp3, %tmp4 ; <i64> [#uses=1] + %tmp6 = shl i64 %tmp5, 3 ; <i64> [#uses=1] + %tmp7 = sub i64 64, %tmp6 ; <i64> [#uses=1] + %tmp8 = and i64 %tmp7, 4294967288 ; <i64> [#uses=1] + %tmp9 = lshr i64 -1, %tmp8 ; <i64> [#uses=1] + call void @bar(i64 %tmp9) + ret void +} diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll index 4b27f2edb759b..a3c9957be34e9 100644 --- a/test/CodeGen/X86/sibcall.ll +++ b/test/CodeGen/X86/sibcall.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32 ; RUN: llc < %s -march=x86-64 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64 +; Darwin 8 generates stubs, which don't match +; XFAIL: apple-darwin8 define void @t1(i32 %x) nounwind ssp { entry: diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll index ebcdc655eedaa..348121ac8bcfa 100644 --- a/test/CodeGen/X86/sse-minmax.ll +++ b/test/CodeGen/X86/sse-minmax.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s -; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=UNSAFE %s -; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s +; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s +; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s ; Some of these patterns can be matched as SSE min or max. Some of ; then can be matched provided that the operands are swapped. diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll new file mode 100644 index 0000000000000..73f88aec643f3 --- /dev/null +++ b/test/CodeGen/X86/sse1.ll @@ -0,0 +1,45 @@ +; Tests for SSE1 and below, without SSE2+. +; RUN: llc < %s -march=x86 -mcpu=pentium3 -O3 | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=pentium3 -O3 | FileCheck %s + +define <8 x i16> @test1(<8 x i32> %a) nounwind { +; CHECK: test1 + ret <8 x i16> zeroinitializer +} + +define <8 x i16> @test2(<8 x i32> %a) nounwind { +; CHECK: test2 + %c = trunc <8 x i32> %a to <8 x i16> ; <<8 x i16>> [#uses=1] + ret <8 x i16> %c +} + +; PR7993 +;define <4 x i32> @test3(<4 x i16> %a) nounwind { +; %c = sext <4 x i16> %a to <4 x i32> ; <<4 x i32>> [#uses=1] +; ret <4 x i32> %c +;} + +; This should not emit shuffles to populate the top 2 elements of the 4-element +; vector that this ends up returning. +; rdar://8368414 +define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind { +entry: + %tmp7 = extractelement <2 x float> %A, i32 0 + %tmp5 = extractelement <2 x float> %A, i32 1 + %tmp3 = extractelement <2 x float> %B, i32 0 + %tmp1 = extractelement <2 x float> %B, i32 1 + %add.r = fadd float %tmp7, %tmp3 + %add.i = fsub float %tmp5, %tmp1 + %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0 + %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1 + ret <2 x float> %tmp9 +; CHECK: test4: +; CHECK-NOT: shufps $16 +; CHECK: shufps $1, +; CHECK-NOT: shufps $16 +; CHECK: shufps $1, +; CHECK-NOT: shufps $16 +; CHECK: unpcklps +; CHECK-NOT: shufps $16 +; CHECK: ret +} diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll index 20b8eac9c8d82..6fc019071f8bf 100644 --- a/test/CodeGen/X86/sse2.ll +++ b/test/CodeGen/X86/sse2.ll @@ -1,14 +1,14 @@ ; Tests for SSE2 and below, without SSE3+. ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s -define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { +define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { %tmp3 = load <2 x double>* %A, align 16 %tmp7 = insertelement <2 x double> undef, double %B, i32 0 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 > store <2 x double> %tmp9, <2 x double>* %r, align 16 ret void -; CHECK: t1: +; CHECK: test1: ; CHECK: movl 8(%esp), %eax ; CHECK-NEXT: movapd (%eax), %xmm0 ; CHECK-NEXT: movlpd 12(%esp), %xmm0 @@ -17,14 +17,14 @@ define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { ; CHECK-NEXT: ret } -define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { +define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { %tmp3 = load <2 x double>* %A, align 16 %tmp7 = insertelement <2 x double> undef, double %B, i32 0 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 > store <2 x double> %tmp9, <2 x double>* %r, align 16 ret void -; CHECK: t2: +; CHECK: test2: ; CHECK: movl 8(%esp), %eax ; CHECK-NEXT: movapd (%eax), %xmm0 ; CHECK-NEXT: movhpd 12(%esp), %xmm0 @@ -32,3 +32,163 @@ define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { ; CHECK-NEXT: movapd %xmm0, (%eax) ; CHECK-NEXT: ret } + + +define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind { + %tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2] + %tmp3 = load <4 x float>* %A ; <<4 x float>> [#uses=2] + %tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0 ; <float> [#uses=1] + %tmp7 = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1] + %tmp8 = extractelement <4 x float> %tmp3, i32 1 ; <float> [#uses=1] + %tmp9 = extractelement <4 x float> %tmp, i32 1 ; <float> [#uses=1] + %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0 ; <<4 x float>> [#uses=1] + %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1 ; <<4 x float>> [#uses=1] + %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2 ; <<4 x float>> [#uses=1] + %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3 ; <<4 x float>> [#uses=1] + store <4 x float> %tmp13, <4 x float>* %res + ret void +; CHECK: @test3 +; CHECK: unpcklps +} + +define void @test4(<4 x float> %X, <4 x float>* %res) nounwind { + %tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] + store <4 x float> %tmp5, <4 x float>* %res + ret void +; CHECK: @test4 +; CHECK: pshufd $50, %xmm0, %xmm0 +} + +define <4 x i32> @test5(i8** %ptr) nounwind { +; CHECK: test5: +; CHECK: pxor +; CHECK: punpcklbw +; CHECK: punpcklwd + + %tmp = load i8** %ptr ; <i8*> [#uses=1] + %tmp.upgrd.1 = bitcast i8* %tmp to float* ; <float*> [#uses=1] + %tmp.upgrd.2 = load float* %tmp.upgrd.1 ; <float> [#uses=1] + %tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0 ; <<4 x float>> [#uses=1] + %tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1] + %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1] + %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1] + %tmp21 = bitcast <4 x float> %tmp11 to <16 x i8> ; <<16 x i8>> [#uses=1] + %tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 > ; <<16 x i8>> [#uses=1] + %tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1] + %tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32> ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp36 +} + +define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind { + %tmp1 = load <4 x float>* %A ; <<4 x float>> [#uses=1] + %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1] + store <4 x float> %tmp2, <4 x float>* %res + ret void + +; CHECK: test6: +; CHECK: movaps (%eax), %xmm0 +; CHECK: movaps %xmm0, (%eax) +} + +define void @test7() nounwind { + bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:1 [#uses=1] + shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1] + store <4 x float> %2, <4 x float>* null + ret void + +; CHECK: test7: +; CHECK: pxor %xmm0, %xmm0 +; CHECK: movaps %xmm0, 0 +} + +@x = external global [4 x i32] + +define <2 x i64> @test8() nounwind { + %tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1] + %tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1) ; <i32> [#uses=1] + %tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2) ; <i32> [#uses=1] + %tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3) ; <i32> [#uses=1] + %tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 ; <<4 x i32>> [#uses=1] + %tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] + %tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2 ; <<4 x i32>> [#uses=1] + %tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3 ; <<4 x i32>> [#uses=1] + %tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64> ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp16 +; CHECK: test8: +; CHECK: movups (%eax), %xmm0 +} + +define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind { + %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] + %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] + %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] + %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] + ret <4 x float> %tmp13 +; CHECK: test9: +; CHECK: movups 8(%esp), %xmm0 +} + +define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind { + %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] + %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] + %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] + %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] + ret <4 x float> %tmp13 +; CHECK: test10: +; CHECK: movaps 4(%esp), %xmm0 +} + +define <2 x double> @test11(double %a, double %b) nounwind { + %tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1] + %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1] + ret <2 x double> %tmp7 +; CHECK: test11: +; CHECK: movapd 4(%esp), %xmm0 +} + +define void @test12() nounwind { + %tmp1 = load <4 x float>* null ; <<4 x float>> [#uses=2] + %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1] + %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] + %tmp4 = fadd <4 x float> %tmp2, %tmp3 ; <<4 x float>> [#uses=1] + store <4 x float> %tmp4, <4 x float>* null + ret void +; CHECK: test12: +; CHECK: movhlps +; CHECK: shufps +} + +define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind { + %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=1] + %tmp5 = load <4 x float>* %C ; <<4 x float>> [#uses=1] + %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] + store <4 x float> %tmp11, <4 x float>* %res + ret void +; CHECK: test13 +; CHECK: shufps $69, (%eax), %xmm0 +; CHECK: pshufd $-40, %xmm0, %xmm0 +} + +define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind { + %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=2] + %tmp5 = load <4 x float>* %x ; <<4 x float>> [#uses=2] + %tmp9 = fadd <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] + %tmp21 = fsub <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] + %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1] + ret <4 x float> %tmp27 +; CHECK: test14: +; CHECK: addps %xmm1, %xmm0 +; CHECK: subps %xmm1, %xmm2 +; CHECK: movlhps %xmm2, %xmm0 +} + +define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind { +entry: + %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=1] + %tmp3 = load <4 x float>* %x ; <<4 x float>> [#uses=1] + %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] + ret <4 x float> %tmp4 +; CHECK: test15: +; CHECK: movhlps %xmm1, %xmm0 +} diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index ef66d1a44a187..3a14fa26300c3 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -224,3 +224,28 @@ declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone +; This used to compile to insertps $0 + insertps $16. insertps $0 is always +; pointless. +define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind { +entry: + %tmp7 = extractelement <2 x float> %A, i32 0 + %tmp5 = extractelement <2 x float> %A, i32 1 + %tmp3 = extractelement <2 x float> %B, i32 0 + %tmp1 = extractelement <2 x float> %B, i32 1 + %add.r = fadd float %tmp7, %tmp3 + %add.i = fadd float %tmp5, %tmp1 + %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0 + %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1 + ret <2 x float> %tmp9 +; X32: buildvector: +; X32-NOT: insertps $0 +; X32: insertps $16 +; X32-NOT: insertps $0 +; X32: ret +; X64: buildvector: +; X64-NOT: insertps $0 +; X64: insertps $16 +; X64-NOT: insertps $0 +; X64: ret +} + diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll deleted file mode 100644 index 001a54096408e..0000000000000 --- a/test/CodeGen/X86/stack-color-with-reg.ll +++ /dev/null @@ -1,361 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t -; RUN: grep asm-printer %t | grep 166 -; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 5 - - type { [62 x %struct.Bitvec*] } ; type %0 - type { i8* } ; type %1 - type { double } ; type %2 - %struct..5sPragmaType = type { i8*, i32 } - %struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 } - %struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* } - %struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 } - %struct.AuxData = type { i8*, void (i8*)* } - %struct.Bitvec = type { i32, i32, i32, %0 } - %struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* } - %struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* } - %struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* } - %struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* } - %struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] } - %struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 } - %struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 } - %struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* } - %struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 } - %struct.Context = type { i64, i32, %struct.Fifo } - %struct.CountCtx = type { i64 } - %struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* } - %struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* } - %struct.DbPage = type { %struct.Pager*, i32, %struct.DbPage*, %struct.DbPage*, %struct.PagerLruLink, %struct.DbPage*, i8, i8, i8, i8, i8, i16, %struct.DbPage*, %struct.DbPage*, i8* } - %struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..5sPragmaType, %struct..5sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 } - %struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* } - %struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 } - %struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 } - %struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* } - %struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] } - %struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] } - %struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* } - %struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 } - %struct.IdList = type { %struct..5sPragmaType*, i32, i32 } - %struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** } - %struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] } - %struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* } - %struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.DbPage*, i32, %struct.MemPage* } - %struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* } - %struct.Op = type { i8, i8, i8, i8, i32, i32, i32, %1 } - %struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.DbPage*, %struct.DbPage*, %struct.DbPage*, i64, i64, i64, i64, i64, i32, void (%struct.DbPage*, i32)*, void (%struct.DbPage*, i32)*, i32, %struct.DbPage**, i8*, [16 x i8] } - %struct.PagerLruLink = type { %struct.DbPage*, %struct.DbPage* } - %struct.PagerLruList = type { %struct.DbPage*, %struct.DbPage*, %struct.DbPage* } - %struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* } - %struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] } - %struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] } - %struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 } - %struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* } - %struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..5sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* } - %struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..5sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* } - %struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 } - %struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] } - %struct._OvflCell = type { i8*, i16 } - %struct._ht = type { i32, %struct.HashElem* } - %struct.sColMap = type { i32, i8* } - %struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %2, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 } - %struct.sqlite3InitInfo = type { i32, i32, i8 } - %struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* } - %struct.sqlite3_file = type { %struct.sqlite3_io_methods* } - %struct.sqlite3_index_constraint = type { i32, i8, i8, i32 } - %struct.sqlite3_index_constraint_usage = type { i32, i8 } - %struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double } - %struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* } - %struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* } - %struct.sqlite3_mutex = type opaque - %struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* } - %struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* } - %struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* } -@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.MemPage*, i32, i32)* @dropCell to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] - -define fastcc void @dropCell(%struct.MemPage* nocapture %pPage, i32 %idx, i32 %sz) nounwind ssp { -entry: - %0 = getelementptr %struct.MemPage* %pPage, i64 0, i32 18 ; <i8**> [#uses=1] - %1 = load i8** %0, align 8 ; <i8*> [#uses=34] - %2 = getelementptr %struct.MemPage* %pPage, i64 0, i32 12 ; <i16*> [#uses=1] - %3 = load i16* %2, align 2 ; <i16> [#uses=1] - %4 = zext i16 %3 to i32 ; <i32> [#uses=2] - %5 = shl i32 %idx, 1 ; <i32> [#uses=2] - %6 = add i32 %4, %5 ; <i32> [#uses=1] - %7 = sext i32 %6 to i64 ; <i64> [#uses=2] - %8 = getelementptr i8* %1, i64 %7 ; <i8*> [#uses=1] - %9 = load i8* %8, align 1 ; <i8> [#uses=2] - %10 = zext i8 %9 to i32 ; <i32> [#uses=1] - %11 = shl i32 %10, 8 ; <i32> [#uses=1] - %.sum3 = add i64 %7, 1 ; <i64> [#uses=1] - %12 = getelementptr i8* %1, i64 %.sum3 ; <i8*> [#uses=1] - %13 = load i8* %12, align 1 ; <i8> [#uses=2] - %14 = zext i8 %13 to i32 ; <i32> [#uses=1] - %15 = or i32 %11, %14 ; <i32> [#uses=3] - %16 = icmp slt i32 %sz, 4 ; <i1> [#uses=1] - %size_addr.0.i = select i1 %16, i32 4, i32 %sz ; <i32> [#uses=3] - %17 = getelementptr %struct.MemPage* %pPage, i64 0, i32 8 ; <i8*> [#uses=5] - %18 = load i8* %17, align 8 ; <i8> [#uses=1] - %19 = zext i8 %18 to i32 ; <i32> [#uses=4] - %20 = add i32 %19, 1 ; <i32> [#uses=2] - br label %bb3.i - -bb3.i: ; preds = %bb3.i, %entry - %addr.0.i = phi i32 [ %20, %entry ], [ %29, %bb3.i ] ; <i32> [#uses=1] - %21 = sext i32 %addr.0.i to i64 ; <i64> [#uses=2] - %22 = getelementptr i8* %1, i64 %21 ; <i8*> [#uses=2] - %23 = load i8* %22, align 1 ; <i8> [#uses=2] - %24 = zext i8 %23 to i32 ; <i32> [#uses=1] - %25 = shl i32 %24, 8 ; <i32> [#uses=1] - %.sum34.i = add i64 %21, 1 ; <i64> [#uses=1] - %26 = getelementptr i8* %1, i64 %.sum34.i ; <i8*> [#uses=2] - %27 = load i8* %26, align 1 ; <i8> [#uses=2] - %28 = zext i8 %27 to i32 ; <i32> [#uses=1] - %29 = or i32 %25, %28 ; <i32> [#uses=3] - %.not.i = icmp uge i32 %29, %15 ; <i1> [#uses=1] - %30 = icmp eq i32 %29, 0 ; <i1> [#uses=1] - %or.cond.i = or i1 %30, %.not.i ; <i1> [#uses=1] - br i1 %or.cond.i, label %bb5.i, label %bb3.i - -bb5.i: ; preds = %bb3.i - store i8 %9, i8* %22, align 1 - store i8 %13, i8* %26, align 1 - %31 = zext i32 %15 to i64 ; <i64> [#uses=2] - %32 = getelementptr i8* %1, i64 %31 ; <i8*> [#uses=1] - store i8 %23, i8* %32, align 1 - %.sum32.i = add i64 %31, 1 ; <i64> [#uses=1] - %33 = getelementptr i8* %1, i64 %.sum32.i ; <i8*> [#uses=1] - store i8 %27, i8* %33, align 1 - %34 = add i32 %15, 2 ; <i32> [#uses=1] - %35 = zext i32 %34 to i64 ; <i64> [#uses=2] - %36 = getelementptr i8* %1, i64 %35 ; <i8*> [#uses=1] - %37 = lshr i32 %size_addr.0.i, 8 ; <i32> [#uses=1] - %38 = trunc i32 %37 to i8 ; <i8> [#uses=1] - store i8 %38, i8* %36, align 1 - %39 = trunc i32 %size_addr.0.i to i8 ; <i8> [#uses=1] - %.sum31.i = add i64 %35, 1 ; <i64> [#uses=1] - %40 = getelementptr i8* %1, i64 %.sum31.i ; <i8*> [#uses=1] - store i8 %39, i8* %40, align 1 - %41 = getelementptr %struct.MemPage* %pPage, i64 0, i32 14 ; <i16*> [#uses=4] - %42 = load i16* %41, align 2 ; <i16> [#uses=1] - %43 = trunc i32 %size_addr.0.i to i16 ; <i16> [#uses=1] - %44 = add i16 %42, %43 ; <i16> [#uses=1] - store i16 %44, i16* %41, align 2 - %45 = load i8* %17, align 8 ; <i8> [#uses=1] - %46 = zext i8 %45 to i32 ; <i32> [#uses=1] - %47 = add i32 %46, 1 ; <i32> [#uses=1] - br label %bb11.outer.i - -bb11.outer.i: ; preds = %bb6.i, %bb5.i - %addr.1.ph.i = phi i32 [ %47, %bb5.i ], [ %111, %bb6.i ] ; <i32> [#uses=1] - %48 = sext i32 %addr.1.ph.i to i64 ; <i64> [#uses=2] - %49 = getelementptr i8* %1, i64 %48 ; <i8*> [#uses=1] - %.sum30.i = add i64 %48, 1 ; <i64> [#uses=1] - %50 = getelementptr i8* %1, i64 %.sum30.i ; <i8*> [#uses=1] - br label %bb11.i - -bb6.i: ; preds = %bb11.i - %51 = zext i32 %111 to i64 ; <i64> [#uses=2] - %52 = getelementptr i8* %1, i64 %51 ; <i8*> [#uses=2] - %53 = load i8* %52, align 1 ; <i8> [#uses=1] - %54 = zext i8 %53 to i32 ; <i32> [#uses=1] - %55 = shl i32 %54, 8 ; <i32> [#uses=1] - %.sum24.i = add i64 %51, 1 ; <i64> [#uses=1] - %56 = getelementptr i8* %1, i64 %.sum24.i ; <i8*> [#uses=2] - %57 = load i8* %56, align 1 ; <i8> [#uses=3] - %58 = zext i8 %57 to i32 ; <i32> [#uses=1] - %59 = or i32 %55, %58 ; <i32> [#uses=5] - %60 = add i32 %111, 2 ; <i32> [#uses=1] - %61 = zext i32 %60 to i64 ; <i64> [#uses=2] - %62 = getelementptr i8* %1, i64 %61 ; <i8*> [#uses=2] - %63 = load i8* %62, align 1 ; <i8> [#uses=1] - %64 = zext i8 %63 to i32 ; <i32> [#uses=1] - %65 = shl i32 %64, 8 ; <i32> [#uses=1] - %.sum23.i = add i64 %61, 1 ; <i64> [#uses=1] - %66 = getelementptr i8* %1, i64 %.sum23.i ; <i8*> [#uses=2] - %67 = load i8* %66, align 1 ; <i8> [#uses=2] - %68 = zext i8 %67 to i32 ; <i32> [#uses=1] - %69 = or i32 %65, %68 ; <i32> [#uses=1] - %70 = add i32 %111, 3 ; <i32> [#uses=1] - %71 = add i32 %70, %69 ; <i32> [#uses=1] - %72 = icmp sge i32 %71, %59 ; <i1> [#uses=1] - %73 = icmp ne i32 %59, 0 ; <i1> [#uses=1] - %74 = and i1 %72, %73 ; <i1> [#uses=1] - br i1 %74, label %bb9.i, label %bb11.outer.i - -bb9.i: ; preds = %bb6.i - %75 = load i8* %17, align 8 ; <i8> [#uses=1] - %76 = zext i8 %75 to i32 ; <i32> [#uses=1] - %77 = add i32 %76, 7 ; <i32> [#uses=1] - %78 = zext i32 %77 to i64 ; <i64> [#uses=1] - %79 = getelementptr i8* %1, i64 %78 ; <i8*> [#uses=2] - %80 = load i8* %79, align 1 ; <i8> [#uses=1] - %81 = sub i8 %109, %57 ; <i8> [#uses=1] - %82 = add i8 %81, %67 ; <i8> [#uses=1] - %83 = add i8 %82, %80 ; <i8> [#uses=1] - store i8 %83, i8* %79, align 1 - %84 = zext i32 %59 to i64 ; <i64> [#uses=2] - %85 = getelementptr i8* %1, i64 %84 ; <i8*> [#uses=1] - %86 = load i8* %85, align 1 ; <i8> [#uses=1] - store i8 %86, i8* %52, align 1 - %.sum22.i = add i64 %84, 1 ; <i64> [#uses=1] - %87 = getelementptr i8* %1, i64 %.sum22.i ; <i8*> [#uses=1] - %88 = load i8* %87, align 1 ; <i8> [#uses=1] - store i8 %88, i8* %56, align 1 - %89 = add i32 %59, 2 ; <i32> [#uses=1] - %90 = zext i32 %89 to i64 ; <i64> [#uses=2] - %91 = getelementptr i8* %1, i64 %90 ; <i8*> [#uses=1] - %92 = load i8* %91, align 1 ; <i8> [#uses=1] - %93 = zext i8 %92 to i32 ; <i32> [#uses=1] - %94 = shl i32 %93, 8 ; <i32> [#uses=1] - %.sum20.i = add i64 %90, 1 ; <i64> [#uses=1] - %95 = getelementptr i8* %1, i64 %.sum20.i ; <i8*> [#uses=2] - %96 = load i8* %95, align 1 ; <i8> [#uses=1] - %97 = zext i8 %96 to i32 ; <i32> [#uses=1] - %98 = or i32 %94, %97 ; <i32> [#uses=1] - %99 = sub i32 %59, %111 ; <i32> [#uses=1] - %100 = add i32 %99, %98 ; <i32> [#uses=1] - %101 = lshr i32 %100, 8 ; <i32> [#uses=1] - %102 = trunc i32 %101 to i8 ; <i8> [#uses=1] - store i8 %102, i8* %62, align 1 - %103 = load i8* %95, align 1 ; <i8> [#uses=1] - %104 = sub i8 %57, %109 ; <i8> [#uses=1] - %105 = add i8 %104, %103 ; <i8> [#uses=1] - store i8 %105, i8* %66, align 1 - br label %bb11.i - -bb11.i: ; preds = %bb9.i, %bb11.outer.i - %106 = load i8* %49, align 1 ; <i8> [#uses=1] - %107 = zext i8 %106 to i32 ; <i32> [#uses=1] - %108 = shl i32 %107, 8 ; <i32> [#uses=1] - %109 = load i8* %50, align 1 ; <i8> [#uses=3] - %110 = zext i8 %109 to i32 ; <i32> [#uses=1] - %111 = or i32 %108, %110 ; <i32> [#uses=6] - %112 = icmp eq i32 %111, 0 ; <i1> [#uses=1] - br i1 %112, label %bb12.i, label %bb6.i - -bb12.i: ; preds = %bb11.i - %113 = zext i32 %20 to i64 ; <i64> [#uses=2] - %114 = getelementptr i8* %1, i64 %113 ; <i8*> [#uses=2] - %115 = load i8* %114, align 1 ; <i8> [#uses=2] - %116 = add i32 %19, 5 ; <i32> [#uses=1] - %117 = zext i32 %116 to i64 ; <i64> [#uses=2] - %118 = getelementptr i8* %1, i64 %117 ; <i8*> [#uses=3] - %119 = load i8* %118, align 1 ; <i8> [#uses=1] - %120 = icmp eq i8 %115, %119 ; <i1> [#uses=1] - br i1 %120, label %bb13.i, label %bb1.preheader - -bb13.i: ; preds = %bb12.i - %121 = add i32 %19, 2 ; <i32> [#uses=1] - %122 = zext i32 %121 to i64 ; <i64> [#uses=1] - %123 = getelementptr i8* %1, i64 %122 ; <i8*> [#uses=1] - %124 = load i8* %123, align 1 ; <i8> [#uses=1] - %125 = add i32 %19, 6 ; <i32> [#uses=1] - %126 = zext i32 %125 to i64 ; <i64> [#uses=1] - %127 = getelementptr i8* %1, i64 %126 ; <i8*> [#uses=1] - %128 = load i8* %127, align 1 ; <i8> [#uses=1] - %129 = icmp eq i8 %124, %128 ; <i1> [#uses=1] - br i1 %129, label %bb14.i, label %bb1.preheader - -bb14.i: ; preds = %bb13.i - %130 = zext i8 %115 to i32 ; <i32> [#uses=1] - %131 = shl i32 %130, 8 ; <i32> [#uses=1] - %.sum29.i = add i64 %113, 1 ; <i64> [#uses=1] - %132 = getelementptr i8* %1, i64 %.sum29.i ; <i8*> [#uses=1] - %133 = load i8* %132, align 1 ; <i8> [#uses=1] - %134 = zext i8 %133 to i32 ; <i32> [#uses=1] - %135 = or i32 %134, %131 ; <i32> [#uses=2] - %136 = zext i32 %135 to i64 ; <i64> [#uses=1] - %137 = getelementptr i8* %1, i64 %136 ; <i8*> [#uses=1] - %138 = bitcast i8* %137 to i16* ; <i16*> [#uses=1] - %139 = bitcast i8* %114 to i16* ; <i16*> [#uses=1] - %tmp.i = load i16* %138, align 1 ; <i16> [#uses=1] - store i16 %tmp.i, i16* %139, align 1 - %140 = load i8* %118, align 1 ; <i8> [#uses=1] - %141 = zext i8 %140 to i32 ; <i32> [#uses=1] - %142 = shl i32 %141, 8 ; <i32> [#uses=1] - %.sum28.i = add i64 %117, 1 ; <i64> [#uses=1] - %143 = getelementptr i8* %1, i64 %.sum28.i ; <i8*> [#uses=2] - %144 = load i8* %143, align 1 ; <i8> [#uses=2] - %145 = zext i8 %144 to i32 ; <i32> [#uses=1] - %146 = or i32 %142, %145 ; <i32> [#uses=1] - %147 = add i32 %135, 2 ; <i32> [#uses=1] - %148 = zext i32 %147 to i64 ; <i64> [#uses=2] - %149 = getelementptr i8* %1, i64 %148 ; <i8*> [#uses=1] - %150 = load i8* %149, align 1 ; <i8> [#uses=1] - %151 = zext i8 %150 to i32 ; <i32> [#uses=1] - %152 = shl i32 %151, 8 ; <i32> [#uses=1] - %.sum27.i = add i64 %148, 1 ; <i64> [#uses=1] - %153 = getelementptr i8* %1, i64 %.sum27.i ; <i8*> [#uses=2] - %154 = load i8* %153, align 1 ; <i8> [#uses=1] - %155 = zext i8 %154 to i32 ; <i32> [#uses=1] - %156 = or i32 %152, %155 ; <i32> [#uses=1] - %157 = add i32 %156, %146 ; <i32> [#uses=1] - %158 = lshr i32 %157, 8 ; <i32> [#uses=1] - %159 = trunc i32 %158 to i8 ; <i8> [#uses=1] - store i8 %159, i8* %118, align 1 - %160 = load i8* %153, align 1 ; <i8> [#uses=1] - %161 = add i8 %160, %144 ; <i8> [#uses=1] - store i8 %161, i8* %143, align 1 - br label %bb1.preheader - -bb1.preheader: ; preds = %bb14.i, %bb13.i, %bb12.i - %i.08 = add i32 %idx, 1 ; <i32> [#uses=2] - %162 = getelementptr %struct.MemPage* %pPage, i64 0, i32 15 ; <i16*> [#uses=4] - %163 = load i16* %162, align 4 ; <i16> [#uses=2] - %164 = zext i16 %163 to i32 ; <i32> [#uses=1] - %165 = icmp sgt i32 %164, %i.08 ; <i1> [#uses=1] - br i1 %165, label %bb, label %bb2 - -bb: ; preds = %bb, %bb1.preheader - %indvar = phi i64 [ 0, %bb1.preheader ], [ %indvar.next, %bb ] ; <i64> [#uses=3] - %tmp16 = add i32 %5, %4 ; <i32> [#uses=1] - %tmp.17 = sext i32 %tmp16 to i64 ; <i64> [#uses=1] - %tmp19 = shl i64 %indvar, 1 ; <i64> [#uses=1] - %ctg2.sum = add i64 %tmp.17, %tmp19 ; <i64> [#uses=4] - %ctg229 = getelementptr i8* %1, i64 %ctg2.sum ; <i8*> [#uses=1] - %ctg229.sum31 = add i64 %ctg2.sum, 2 ; <i64> [#uses=1] - %166 = getelementptr i8* %1, i64 %ctg229.sum31 ; <i8*> [#uses=1] - %167 = load i8* %166, align 1 ; <i8> [#uses=1] - store i8 %167, i8* %ctg229 - %ctg229.sum30 = add i64 %ctg2.sum, 3 ; <i64> [#uses=1] - %168 = getelementptr i8* %1, i64 %ctg229.sum30 ; <i8*> [#uses=1] - %169 = load i8* %168, align 1 ; <i8> [#uses=1] - %ctg229.sum = add i64 %ctg2.sum, 1 ; <i64> [#uses=1] - %170 = getelementptr i8* %1, i64 %ctg229.sum ; <i8*> [#uses=1] - store i8 %169, i8* %170, align 1 - %indvar15 = trunc i64 %indvar to i32 ; <i32> [#uses=1] - %i.09 = add i32 %indvar15, %i.08 ; <i32> [#uses=1] - %i.0 = add i32 %i.09, 1 ; <i32> [#uses=1] - %171 = load i16* %162, align 4 ; <i16> [#uses=2] - %172 = zext i16 %171 to i32 ; <i32> [#uses=1] - %173 = icmp sgt i32 %172, %i.0 ; <i1> [#uses=1] - %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] - br i1 %173, label %bb, label %bb2 - -bb2: ; preds = %bb, %bb1.preheader - %174 = phi i16 [ %163, %bb1.preheader ], [ %171, %bb ] ; <i16> [#uses=1] - %175 = add i16 %174, -1 ; <i16> [#uses=2] - store i16 %175, i16* %162, align 4 - %176 = load i8* %17, align 8 ; <i8> [#uses=1] - %177 = zext i8 %176 to i32 ; <i32> [#uses=1] - %178 = add i32 %177, 3 ; <i32> [#uses=1] - %179 = zext i32 %178 to i64 ; <i64> [#uses=1] - %180 = getelementptr i8* %1, i64 %179 ; <i8*> [#uses=1] - %181 = lshr i16 %175, 8 ; <i16> [#uses=1] - %182 = trunc i16 %181 to i8 ; <i8> [#uses=1] - store i8 %182, i8* %180, align 1 - %183 = load i8* %17, align 8 ; <i8> [#uses=1] - %184 = zext i8 %183 to i32 ; <i32> [#uses=1] - %185 = add i32 %184, 3 ; <i32> [#uses=1] - %186 = zext i32 %185 to i64 ; <i64> [#uses=1] - %187 = load i16* %162, align 4 ; <i16> [#uses=1] - %188 = trunc i16 %187 to i8 ; <i8> [#uses=1] - %.sum = add i64 %186, 1 ; <i64> [#uses=1] - %189 = getelementptr i8* %1, i64 %.sum ; <i8*> [#uses=1] - store i8 %188, i8* %189, align 1 - %190 = load i16* %41, align 2 ; <i16> [#uses=1] - %191 = add i16 %190, 2 ; <i16> [#uses=1] - store i16 %191, i16* %41, align 2 - %192 = getelementptr %struct.MemPage* %pPage, i64 0, i32 1 ; <i8*> [#uses=1] - store i8 1, i8* %192, align 1 - ret void -} diff --git a/test/CodeGen/X86/stdcall.ll b/test/CodeGen/X86/stdcall.ll index 70204bcf47455..a7c2517e7dbe7 100644 --- a/test/CodeGen/X86/stdcall.ll +++ b/test/CodeGen/X86/stdcall.ll @@ -2,7 +2,7 @@ ; PR5851 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" -target triple = "i386-mingw32" +target triple = "i386-pc-mingw32" %0 = type { void (...)* } diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll index 5682e7caf8bd5..abc5174c98de7 100644 --- a/test/CodeGen/X86/store-narrow.ll +++ b/test/CodeGen/X86/store-narrow.ll @@ -1,6 +1,6 @@ ; rdar://7860110 -; RUN: llc < %s | FileCheck %s -check-prefix=X64 -; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=X32 +; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=X64 +; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s -check-prefix=X32 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.2" @@ -125,3 +125,30 @@ entry: ; X32: movb %cl, 5(%{{.*}}) } +; PR7833 + +@g_16 = internal global i32 -1 + +; X64: test8: +; X64-NEXT: movl _g_16(%rip), %eax +; X64-NEXT: movl $0, _g_16(%rip) +; X64-NEXT: orl $1, %eax +; X64-NEXT: movl %eax, _g_16(%rip) +; X64-NEXT: ret +define void @test8() nounwind { + %tmp = load i32* @g_16 + store i32 0, i32* @g_16 + %or = or i32 %tmp, 1 + store i32 %or, i32* @g_16 + ret void +} + +; X64: test9: +; X64-NEXT: orb $1, _g_16(%rip) +; X64-NEXT: ret +define void @test9() nounwind { + %tmp = load i32* @g_16 + %or = or i32 %tmp, 1 + store i32 %or, i32* @g_16 + ret void +} diff --git a/test/CodeGen/X86/tailcall-fastisel.ll b/test/CodeGen/X86/tailcall-fastisel.ll index d54fb4115b078..7f92af4dca9f3 100644 --- a/test/CodeGen/X86/tailcall-fastisel.ll +++ b/test/CodeGen/X86/tailcall-fastisel.ll @@ -1,8 +1,6 @@ -; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | grep TAILCALL +; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | not grep TAILCALL -; Fast-isel shouldn't attempt to handle this tail call, and it should -; cleanly terminate instruction selection in the block after it's -; done to avoid emitting invalid MachineInstrs. +; Fast-isel shouldn't attempt to cope with tail calls. %0 = type { i64, i32, i8* } @@ -11,3 +9,11 @@ fail: ; preds = %entry %tmp20 = tail call fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 undef) ; <i8*> [#uses=1] ret i8* %tmp20 } + +define i32 @foo() nounwind { +entry: + %0 = tail call i32 (...)* @bar() nounwind ; <i32> [#uses=1] + ret i32 %0 +} + +declare i32 @bar(...) nounwind diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll index 4c37225ce027b..6f6d6f2cd9671 100644 --- a/test/CodeGen/X86/twoaddr-coalesce.ll +++ b/test/CodeGen/X86/twoaddr-coalesce.ll @@ -3,7 +3,7 @@ @"\01LC" = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1] -define i32 @main() nounwind { +define i32 @foo() nounwind { bb1.thread: br label %bb1 diff --git a/test/CodeGen/X86/v2f32.ll b/test/CodeGen/X86/v2f32.ll index 9c4b773a61909..76c3fdfc060cf 100644 --- a/test/CodeGen/X86/v2f32.ll +++ b/test/CodeGen/X86/v2f32.ll @@ -10,15 +10,16 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind { store float %c, float* %P2 ret void ; X64: test1: -; X64-NEXT: addss %xmm1, %xmm0 -; X64-NEXT: movss %xmm0, (%rdi) +; X64-NEXT: pshufd $1, %xmm0, %xmm1 +; X64-NEXT: addss %xmm0, %xmm1 +; X64-NEXT: movss %xmm1, (%rdi) ; X64-NEXT: ret ; X32: test1: -; X32-NEXT: movss 4(%esp), %xmm0 -; X32-NEXT: addss 8(%esp), %xmm0 -; X32-NEXT: movl 12(%esp), %eax -; X32-NEXT: movss %xmm0, (%eax) +; X32-NEXT: pshufd $1, %xmm0, %xmm1 +; X32-NEXT: addss %xmm0, %xmm1 +; X32-NEXT: movl 4(%esp), %eax +; X32-NEXT: movss %xmm1, (%eax) ; X32-NEXT: ret } @@ -28,12 +29,42 @@ define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounw ret <2 x float> %Z ; X64: test2: -; X64-NEXT: insertps $0 -; X64-NEXT: insertps $16 -; X64-NEXT: insertps $0 -; X64-NEXT: insertps $16 -; X64-NEXT: addps -; X64-NEXT: movaps -; X64-NEXT: pshufd +; X64-NEXT: addps %xmm1, %xmm0 ; X64-NEXT: ret } + + +define <2 x float> @test3(<4 x float> %A) nounwind { + %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> + %C = fadd <2 x float> %B, %B + ret <2 x float> %C +; CHECK: test3: +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: ret +} + +define <2 x float> @test4(<2 x float> %A) nounwind { + %C = fadd <2 x float> %A, %A + ret <2 x float> %C +; CHECK: test4: +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: ret +} + +define <4 x float> @test5(<4 x float> %A) nounwind { + %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> + %C = fadd <2 x float> %B, %B + br label %BB + +BB: + %D = fadd <2 x float> %C, %C + %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> + ret <4 x float> %E + +; CHECK: _test5: +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: ret +} + + diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll index 6f18d13cc9d3c..f8531646effa3 100644 --- a/test/CodeGen/X86/vec_cast.ll +++ b/test/CodeGen/X86/vec_cast.ll @@ -1,15 +1,16 @@ -; RUN: llc < %s -march=x86-64 -; RUN: llc < %s -march=x86-64 -disable-mmx +; RUN: llc < %s -march=x86-64 -mcpu=core2 +; RUN: llc < %s -march=x86-64 -mcpu=core2 -disable-mmx + define <8 x i32> @a(<8 x i16> %a) nounwind { %c = sext <8 x i16> %a to <8 x i32> ret <8 x i32> %c } -define <3 x i32> @b(<3 x i16> %a) nounwind { - %c = sext <3 x i16> %a to <3 x i32> - ret <3 x i32> %c -} +;define <3 x i32> @b(<3 x i16> %a) nounwind { +; %c = sext <3 x i16> %a to <3 x i32> +; ret <3 x i32> %c +;} define <1 x i32> @c(<1 x i16> %a) nounwind { %c = sext <1 x i16> %a to <1 x i32> @@ -21,10 +22,10 @@ define <8 x i32> @d(<8 x i16> %a) nounwind { ret <8 x i32> %c } -define <3 x i32> @e(<3 x i16> %a) nounwind { - %c = zext <3 x i16> %a to <3 x i32> - ret <3 x i32> %c -} +;define <3 x i32> @e(<3 x i16> %a) nounwind { +; %c = zext <3 x i16> %a to <3 x i32> +; ret <3 x i32> %c +;} define <1 x i32> @f(<1 x i16> %a) nounwind { %c = zext <1 x i16> %a to <1 x i32> diff --git a/test/CodeGen/X86/vec_insert-6.ll b/test/CodeGen/X86/vec_insert-6.ll index 54aa43f0c35dc..de3b36ff126c2 100644 --- a/test/CodeGen/X86/vec_insert-6.ll +++ b/test/CodeGen/X86/vec_insert-6.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pslldq -; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 6 +; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6 define <4 x float> @t3(<4 x float>* %P) nounwind { %tmp1 = load <4 x float>* %P diff --git a/test/CodeGen/X86/vec_insert-9.ll b/test/CodeGen/X86/vec_insert-9.ll index 2e829df1f8dfd..e5a7ccc5ef94f 100644 --- a/test/CodeGen/X86/vec_insert-9.ll +++ b/test/CodeGen/X86/vec_insert-9.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse41 > %t -; RUN: grep pinsrd %t | count 2 +; RUN: grep pinsrd %t | count 1 define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind { entry: diff --git a/test/CodeGen/X86/vec_shift4.ll b/test/CodeGen/X86/vec_shift4.ll new file mode 100644 index 0000000000000..9ef7fbdb0c504 --- /dev/null +++ b/test/CodeGen/X86/vec_shift4.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s + +define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp { +entry: +; CHECK-NOT: shll +; CHECK: pslld +; CHECK: paddd +; CHECK: cvttps2dq +; CHECK: pmulld + + %shl = shl <4 x i32> %r, %a ; <<4 x i32>> [#uses=1] + %tmp2 = bitcast <4 x i32> %shl to <2 x i64> ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp2 +} + +define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp { +entry: +; CHECK-NOT: shlb +; CHECK: pblendvb +; CHECK: pblendvb +; CHECK: pblendvb + %shl = shl <16 x i8> %r, %a ; <<16 x i8>> [#uses=1] + %tmp2 = bitcast <16 x i8> %shl to <2 x i64> ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp2 +} diff --git a/test/CodeGen/X86/vec_shuffle-10.ll b/test/CodeGen/X86/vec_shuffle-10.ll deleted file mode 100644 index a63e3868ad75f..0000000000000 --- a/test/CodeGen/X86/vec_shuffle-10.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep unpcklps %t | count 1 -; RUN: grep pshufd %t | count 1 -; RUN: not grep {sub.*esp} %t - -define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) { - %tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2] - %tmp3 = load <4 x float>* %A ; <<4 x float>> [#uses=2] - %tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0 ; <float> [#uses=1] - %tmp7 = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1] - %tmp8 = extractelement <4 x float> %tmp3, i32 1 ; <float> [#uses=1] - %tmp9 = extractelement <4 x float> %tmp, i32 1 ; <float> [#uses=1] - %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0 ; <<4 x float>> [#uses=1] - %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1 ; <<4 x float>> [#uses=1] - %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2 ; <<4 x float>> [#uses=1] - %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3 ; <<4 x float>> [#uses=1] - store <4 x float> %tmp13, <4 x float>* %res - ret void -} - -define void @test2(<4 x float> %X, <4 x float>* %res) { - %tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] - store <4 x float> %tmp5, <4 x float>* %res - ret void -} diff --git a/test/CodeGen/X86/vec_shuffle-19.ll b/test/CodeGen/X86/vec_shuffle-19.ll index 9fc09dfdd2b89..861a1cc5b93cf 100644 --- a/test/CodeGen/X86/vec_shuffle-19.ll +++ b/test/CodeGen/X86/vec_shuffle-19.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4 +; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4 ; PR2485 define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind { diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll index 6d1bac0743d49..fc06b9514e436 100644 --- a/test/CodeGen/X86/vec_shuffle-20.ll +++ b/test/CodeGen/X86/vec_shuffle-20.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3 +; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind { entry: diff --git a/test/CodeGen/X86/vec_shuffle-24.ll b/test/CodeGen/X86/vec_shuffle-24.ll index 7562f1d89594e..1b104deb30556 100644 --- a/test/CodeGen/X86/vec_shuffle-24.ll +++ b/test/CodeGen/X86/vec_shuffle-24.ll @@ -1,7 +1,8 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | grep punpck +; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s define i32 @t() nounwind optsize { entry: +; CHECK: punpckldq %a = alloca <4 x i32> ; <<4 x i32>*> [#uses=2] %b = alloca <4 x i32> ; <<4 x i32>*> [#uses=5] volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll deleted file mode 100644 index f4930b084504d..0000000000000 --- a/test/CodeGen/X86/vec_shuffle-3.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep movlhps %t | count 1 -; RUN: grep movhlps %t | count 1 - -define <4 x float> @test1(<4 x float>* %x, <4 x float>* %y) { - %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=2] - %tmp5 = load <4 x float>* %x ; <<4 x float>> [#uses=2] - %tmp9 = fadd <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] - %tmp21 = fsub <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] - %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1] - ret <4 x float> %tmp27 -} - -define <4 x float> @movhl(<4 x float>* %x, <4 x float>* %y) { -entry: - %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=1] - %tmp3 = load <4 x float>* %x ; <<4 x float>> [#uses=1] - %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] - ret <4 x float> %tmp4 -} diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll new file mode 100644 index 0000000000000..1ed858de64e86 --- /dev/null +++ b/test/CodeGen/X86/vec_shuffle-37.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp { +entry: +; CHECK: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: movlps (%rax), %xmm1 +; CHECK-NEXT: shufps $36, %xmm1, %xmm0 + %0 = load <4 x i32>* undef, align 16 + %1 = load <4 x i32>* %a0, align 16 + %2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4> + ret <4 x i32> %2 +} + diff --git a/test/CodeGen/X86/vec_shuffle-4.ll b/test/CodeGen/X86/vec_shuffle-4.ll deleted file mode 100644 index 829fedf97cc55..0000000000000 --- a/test/CodeGen/X86/vec_shuffle-4.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 > %t -; RUN: grep shuf %t | count 2 -; RUN: not grep unpck %t - -define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) { - %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=1] - %tmp5 = load <4 x float>* %C ; <<4 x float>> [#uses=1] - %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] - store <4 x float> %tmp11, <4 x float>* %res - ret void -} - diff --git a/test/CodeGen/X86/vec_shuffle-5.ll b/test/CodeGen/X86/vec_shuffle-5.ll deleted file mode 100644 index c24167a6150d9..0000000000000 --- a/test/CodeGen/X86/vec_shuffle-5.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep movhlps %t | count 1 -; RUN: grep shufps %t | count 1 - -define void @test() nounwind { - %tmp1 = load <4 x float>* null ; <<4 x float>> [#uses=2] - %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1] - %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] - %tmp4 = fadd <4 x float> %tmp2, %tmp3 ; <<4 x float>> [#uses=1] - store <4 x float> %tmp4, <4 x float>* null - ret void -} - diff --git a/test/CodeGen/X86/vec_shuffle-6.ll b/test/CodeGen/X86/vec_shuffle-6.ll deleted file mode 100644 index 28fd59b29dd36..0000000000000 --- a/test/CodeGen/X86/vec_shuffle-6.ll +++ /dev/null @@ -1,42 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep movapd %t | count 1 -; RUN: grep movaps %t | count 1 -; RUN: grep movups %t | count 2 - -target triple = "i686-apple-darwin" -@x = external global [4 x i32] - -define <2 x i64> @test1() { - %tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1] - %tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1) ; <i32> [#uses=1] - %tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2) ; <i32> [#uses=1] - %tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3) ; <i32> [#uses=1] - %tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 ; <<4 x i32>> [#uses=1] - %tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] - %tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2 ; <<4 x i32>> [#uses=1] - %tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3 ; <<4 x i32>> [#uses=1] - %tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64> ; <<2 x i64>> [#uses=1] - ret <2 x i64> %tmp16 -} - -define <4 x float> @test2(i32 %dummy, float %a, float %b, float %c, float %d) { - %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] - %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] - %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] - %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] - ret <4 x float> %tmp13 -} - -define <4 x float> @test3(float %a, float %b, float %c, float %d) { - %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] - %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] - %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] - %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] - ret <4 x float> %tmp13 -} - -define <2 x double> @test4(double %a, double %b) { - %tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1] - %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1] - ret <2 x double> %tmp7 -} diff --git a/test/CodeGen/X86/vec_shuffle-7.ll b/test/CodeGen/X86/vec_shuffle-7.ll deleted file mode 100644 index 64bd6a3c83b82..0000000000000 --- a/test/CodeGen/X86/vec_shuffle-7.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep pxor %t | count 1 -; RUN: not grep shufps %t - -define void @test() { - bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:1 [#uses=1] - shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1] - store <4 x float> %2, <4 x float>* null - unreachable -} - diff --git a/test/CodeGen/X86/vec_shuffle-8.ll b/test/CodeGen/X86/vec_shuffle-8.ll deleted file mode 100644 index 964ce7b2892be..0000000000000 --- a/test/CodeGen/X86/vec_shuffle-8.ll +++ /dev/null @@ -1,10 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | \ -; RUN: not grep shufps - -define void @test(<4 x float>* %res, <4 x float>* %A) { - %tmp1 = load <4 x float>* %A ; <<4 x float>> [#uses=1] - %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1] - store <4 x float> %tmp2, <4 x float>* %res - ret void -} - diff --git a/test/CodeGen/X86/vec_shuffle-9.ll b/test/CodeGen/X86/vec_shuffle-9.ll deleted file mode 100644 index 07195869b8cf9..0000000000000 --- a/test/CodeGen/X86/vec_shuffle-9.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s - -define <4 x i32> @test(i8** %ptr) { -; CHECK: pxor -; CHECK: punpcklbw -; CHECK: punpcklwd - - %tmp = load i8** %ptr ; <i8*> [#uses=1] - %tmp.upgrd.1 = bitcast i8* %tmp to float* ; <float*> [#uses=1] - %tmp.upgrd.2 = load float* %tmp.upgrd.1 ; <float> [#uses=1] - %tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0 ; <<4 x float>> [#uses=1] - %tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1] - %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1] - %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1] - %tmp21 = bitcast <4 x float> %tmp11 to <16 x i8> ; <<16 x i8>> [#uses=1] - %tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 > ; <<16 x i8>> [#uses=1] - %tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16> ; <<8 x i16>> [#uses=1] - %tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1] - %tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32> ; <<4 x i32>> [#uses=1] - ret <4 x i32> %tmp36 -} diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll index 25dde57c767ed..463f522a11dff 100644 --- a/test/CodeGen/X86/widen_shuffle-1.ll +++ b/test/CodeGen/X86/widen_shuffle-1.ll @@ -3,7 +3,8 @@ ; widening shuffle v3float and then a add define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { entry: -; CHECK: insertps +; CHECK: shuf: +; CHECK: extractps ; CHECK: extractps %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2> %val = fadd <3 x float> %x, %src2 @@ -15,7 +16,8 @@ entry: ; widening shuffle v3float with a different mask and then a add define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { entry: -; CHECK: insertps +; CHECK: shuf2: +; CHECK: extractps ; CHECK: extractps %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2> %val = fadd <3 x float> %x, %src2 @@ -26,7 +28,7 @@ entry: ; Example of when widening a v3float operation causes the DAG to replace a node ; with the operation that we are currently widening, i.e. when replacing ; opA with opB, the DAG will produce new operations with opA. -define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) { +define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind { entry: ; CHECK: pshufd %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5> diff --git a/test/CodeGen/X86/win_chkstk.ll b/test/CodeGen/X86/win_chkstk.ll new file mode 100644 index 0000000000000..27d3358d4ac1b --- /dev/null +++ b/test/CodeGen/X86/win_chkstk.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32 +; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32 +; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64 +; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX + +; Windows and mingw require a prologue helper routine if more than 4096 bytes area +; allocated on the stack. Windows uses __chkstk and mingw uses __alloca. __alloca +; and the 32-bit version of __chkstk will probe the stack and adjust the stack pointer. +; The 64-bit version of __chkstk is only responsible for probing the stack. The 64-bit +; prologue is responsible for adjusting the stack pointer. + +; Stack allocation >= 4096 bytes will require call to __chkstk in the Windows ABI. +define i32 @main4k() nounwind { +entry: +; WIN_X32: call __chkstk +; WIN_X64: call __chkstk +; MINGW_X32: call __alloca +; MINGW_X64: call _alloca +; LINUX-NOT: call __chkstk + %array4096 = alloca [4096 x i8], align 16 ; <[4096 x i8]*> [#uses=0] + ret i32 0 +} + +; Make sure we don't call __chkstk or __alloca when we have less than a 4096 stack +; allocation. +define i32 @main128() nounwind { +entry: +; WIN_X32: # BB#0: +; WIN_X32-NOT: call __chkstk +; WIN_X32: ret + +; WIN_X64: # BB#0: +; WIN_X64-NOT: call __chkstk +; WIN_X64: ret + +; MINGW_X64: # BB#0: +; MINGW_X64-NOT: call _alloca +; MINGW_X64: ret + +; LINUX: # BB#0: +; LINUX-NOT: call __chkstk +; LINUX: ret + %array128 = alloca [128 x i8], align 16 ; <[128 x i8]*> [#uses=0] + ret i32 0 +} diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll index 3e3bb95d06f77..447007439fbbd 100644 --- a/test/CodeGen/X86/zero-remat.ll +++ b/test/CodeGen/X86/zero-remat.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64 -; RUN: llc < %s -march=x86-64 -stats -info-output-file - | grep asm-printer | grep 12 +; RUN: llc < %s -march=x86-64 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 12 ; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32 declare void @bar(double %x) diff --git a/test/DebugInfo/2010-07-19-Crash.ll b/test/DebugInfo/2010-07-19-Crash.ll new file mode 100644 index 0000000000000..87a4a8955a3e9 --- /dev/null +++ b/test/DebugInfo/2010-07-19-Crash.ll @@ -0,0 +1,24 @@ +; RUN: llc -o /dev/null < %s +; PR7662 +; Do not add variables to !11 because it is a declaration entry. + +define i32 @bar() nounwind readnone ssp { +entry: + ret i32 42, !dbg !9 +} + +!llvm.dbg.sp = !{!0, !6, !11} +!llvm.dbg.lv.foo = !{!7} + +!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 524329, metadata !"one.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"one.c", metadata !".", metadata !"clang 2.8", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{metadata !5} +!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 7, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null} ; [ DW_TAG_subprogram ] +!11 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 7, metadata !3, i1 true, i1 false, i32 0, i32 0, null, i1 false, i1 true, null} ; [ DW_TAG_subprogram ] +!7 = metadata !{i32 524544, metadata !8, metadata !"one", metadata !1, i32 8, metadata !5} ; [ DW_TAG_auto_variable ] +!8 = metadata !{i32 524299, metadata !6, i32 7, i32 18} ; [ DW_TAG_lexical_block ] +!9 = metadata !{i32 4, i32 3, metadata !10, null} +!10 = metadata !{i32 524299, metadata !0, i32 3, i32 11} ; [ DW_TAG_lexical_block ] diff --git a/test/DebugInfo/2010-08-04-StackVariable.ll b/test/DebugInfo/2010-08-04-StackVariable.ll new file mode 100644 index 0000000000000..61cd20bb1ab3d --- /dev/null +++ b/test/DebugInfo/2010-08-04-StackVariable.ll @@ -0,0 +1,124 @@ +; RUN: llc -O0 < %s | grep DW_OP_fbreg +; Use DW_OP_fbreg in variable's location expression if the variable is in a stack slot. + +%struct.SVal = type { i8*, i32 } + +define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp { +entry: + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !23), !dbg !24 + call void @llvm.dbg.value(metadata !{%struct.SVal* %location}, i64 0, metadata !25), !dbg !24 + %0 = icmp ne i32 %i, 0, !dbg !27 ; <i1> [#uses=1] + br i1 %0, label %bb, label %bb1, !dbg !27 + +bb: ; preds = %entry + %1 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; <i32*> [#uses=1] + %2 = load i32* %1, align 8, !dbg !29 ; <i32> [#uses=1] + %3 = add i32 %2, %i, !dbg !29 ; <i32> [#uses=1] + br label %bb2, !dbg !29 + +bb1: ; preds = %entry + %4 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; <i32*> [#uses=1] + %5 = load i32* %4, align 8, !dbg !30 ; <i32> [#uses=1] + %6 = sub i32 %5, 1, !dbg !30 ; <i32> [#uses=1] + br label %bb2, !dbg !30 + +bb2: ; preds = %bb1, %bb + %.0 = phi i32 [ %3, %bb ], [ %6, %bb1 ] ; <i32> [#uses=1] + br label %return, !dbg !29 + +return: ; preds = %bb2 + ret i32 %.0, !dbg !29 +} + +define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 { +entry: + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + call void @llvm.dbg.value(metadata !{%struct.SVal* %this}, i64 0, metadata !31), !dbg !34 + %0 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 0, !dbg !34 ; <i8**> [#uses=1] + store i8* null, i8** %0, align 8, !dbg !34 + %1 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 1, !dbg !34 ; <i32*> [#uses=1] + store i32 0, i32* %1, align 8, !dbg !34 + br label %return, !dbg !34 + +return: ; preds = %entry + ret void, !dbg !35 +} + +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone + +define i32 @main() nounwind ssp { +entry: + %0 = alloca %struct.SVal ; <%struct.SVal*> [#uses=3] + %v = alloca %struct.SVal ; <%struct.SVal*> [#uses=4] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + call void @llvm.dbg.declare(metadata !{%struct.SVal* %v}, metadata !38), !dbg !41 + call void @_ZN4SValC1Ev(%struct.SVal* %v) nounwind, !dbg !41 + %1 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !42 ; <i32*> [#uses=1] + store i32 1, i32* %1, align 8, !dbg !42 + %2 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1] + %3 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1] + %4 = load i8** %3, align 8, !dbg !43 ; <i8*> [#uses=1] + store i8* %4, i8** %2, align 8, !dbg !43 + %5 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1] + %6 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1] + %7 = load i32* %6, align 8, !dbg !43 ; <i32> [#uses=1] + store i32 %7, i32* %5, align 8, !dbg !43 + %8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; <i32> [#uses=0] + call void @llvm.dbg.value(metadata !{i32 %8}, i64 0, metadata !44), !dbg !43 + br label %return, !dbg !45 + +return: ; preds = %entry + ret i32 0, !dbg !45 +} + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +!llvm.dbg.sp = !{!0, !9, !16, !17, !20} + +!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 524307, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ] +!2 = metadata !{i32 524329, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 524305, i32 0, i32 4, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9} +!5 = metadata !{i32 524301, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ] +!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] +!7 = metadata !{i32 524301, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ] +!8 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!10 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ] +!11 = metadata !{null, metadata !12, metadata !13} +!12 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ] +!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!14 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ] +!15 = metadata !{null, metadata !12} +!16 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ] +!17 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ] +!18 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ] +!19 = metadata !{metadata !13, metadata !13, metadata !1} +!20 = metadata !{i32 524334, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ] +!21 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ] +!22 = metadata !{metadata !13} +!23 = metadata !{i32 524545, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13} ; [ DW_TAG_arg_variable ] +!24 = metadata !{i32 16, i32 0, metadata !17, null} +!25 = metadata !{i32 524545, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26} ; [ DW_TAG_arg_variable ] +!26 = metadata !{i32 524304, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ] +!27 = metadata !{i32 17, i32 0, metadata !28, null} +!28 = metadata !{i32 524299, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ] +!29 = metadata !{i32 18, i32 0, metadata !28, null} +!30 = metadata !{i32 20, i32 0, metadata !28, null} +!31 = metadata !{i32 524545, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32} ; [ DW_TAG_arg_variable ] +!32 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ] +!33 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ] +!34 = metadata !{i32 11, i32 0, metadata !16, null} +!35 = metadata !{i32 11, i32 0, metadata !36, null} +!36 = metadata !{i32 524299, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ] +!37 = metadata !{i32 524299, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ] +!38 = metadata !{i32 524544, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1} ; [ DW_TAG_auto_variable ] +!39 = metadata !{i32 524299, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ] +!40 = metadata !{i32 524299, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ] +!41 = metadata !{i32 24, i32 0, metadata !39, null} +!42 = metadata !{i32 25, i32 0, metadata !39, null} +!43 = metadata !{i32 26, i32 0, metadata !39, null} +!44 = metadata !{i32 524544, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13} ; [ DW_TAG_auto_variable ] +!45 = metadata !{i32 27, i32 0, metadata !39, null} diff --git a/test/DebugInfo/printdbginfo2.ll b/test/DebugInfo/printdbginfo2.ll index e19395b0df1f4..3193791974775 100644 --- a/test/DebugInfo/printdbginfo2.ll +++ b/test/DebugInfo/printdbginfo2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -print-dbginfo -disable-output | FileCheck %s +; RUN: opt < %s -print-dbginfo -disable-output |& FileCheck %s ; grep {%b is variable b of type x declared at x.c:7} %t1 ; grep {%2 is variable b of type x declared at x.c:7} %t1 ; grep {@c.1442 is variable c of type int declared at x.c:4} %t1 diff --git a/test/Feature/NamedMDNode.ll b/test/Feature/NamedMDNode.ll index 02a79f81cdf16..0c6bcd9abfe52 100644 --- a/test/Feature/NamedMDNode.ll +++ b/test/Feature/NamedMDNode.ll @@ -3,7 +3,7 @@ ;; Simple NamedMDNode !0 = metadata !{i32 42} !1 = metadata !{metadata !"foo"} -!llvm.stuff = !{!0, !1, null} +!llvm.stuff = !{!0, !1} !samename = !{!0, !1} declare void @samename() diff --git a/test/Feature/linker_private_linkages.ll b/test/Feature/linker_private_linkages.ll index 19bcbb40aa017..f9f2908756451 100644 --- a/test/Feature/linker_private_linkages.ll +++ b/test/Feature/linker_private_linkages.ll @@ -4,3 +4,4 @@ @foo = linker_private hidden global i32 0 @bar = linker_private_weak hidden global i32 0 +@qux = linker_private_weak_def_auto global i32 0 diff --git a/test/Feature/metadata.ll b/test/Feature/metadata.ll index d43815be46aa5..9856b375495ce 100644 --- a/test/Feature/metadata.ll +++ b/test/Feature/metadata.ll @@ -1,9 +1,11 @@ ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis ; PR7105 -define void @foo() { +define void @foo(i32 %x) { call void @llvm.zonk(metadata !1, i64 0, metadata !1) - ret void + store i32 0, i32* null, !whatever !0, !whatever_else !{}, !more !{metadata !"hello"} + store i32 0, i32* null, !whatever !{i32 %x, metadata !"hello", metadata !1, metadata !{}, metadata !2} + ret void, !whatever !{i32 %x} } declare void @llvm.zonk(metadata, i64, metadata) nounwind readnone diff --git a/test/Feature/unions.ll b/test/Feature/unions.ll deleted file mode 100644 index 3cf8c3ce0e979..0000000000000 --- a/test/Feature/unions.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: llvm-as < %s | llvm-dis > %t1.ll -; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll -; RUN: diff %t1.ll %t2.ll - -%union.anon = type union { i8, i32, float } - -@union1 = constant union { i32, i8 } { i32 4 } -@union2 = constant union { i32, i8 } insertvalue(union { i32, i8 } undef, i32 4, 0) -@union3 = common global %union.anon zeroinitializer, align 8 - -define void @"Unions" () { - ret void -} - diff --git a/test/FrontendC++/2009-07-15-LineNumbers.cpp b/test/FrontendC++/2009-07-15-LineNumbers.cpp index a8eda77e4a72d..e1cc81f40f79d 100644 --- a/test/FrontendC++/2009-07-15-LineNumbers.cpp +++ b/test/FrontendC++/2009-07-15-LineNumbers.cpp @@ -1,7 +1,7 @@ // This is a regression test on debug info to make sure that we can // print line numbers in asm. // RUN: %llvmgcc -S -O0 -g %s -o - | \ -// RUN: llc --disable-fp-elim -O0 -relocation-model=pic | grep { 2009-07-15-LineNumbers.cpp:25$} +// RUN: llc --disable-fp-elim -O0 -relocation-model=pic | grep {2009-07-15-LineNumbers.cpp:25$} #include <stdlib.h> diff --git a/test/FrontendC++/2010-07-19-nowarn.cpp b/test/FrontendC++/2010-07-19-nowarn.cpp new file mode 100644 index 0000000000000..8742bf152329a --- /dev/null +++ b/test/FrontendC++/2010-07-19-nowarn.cpp @@ -0,0 +1,21 @@ +// RUN: %llvmgcc %s -c -m32 -fasm-blocks -o /dev/null +// This should not warn about unreferenced label. 8195660. +// XFAIL: * +// XTARGET: x86,i386,i686 + +void quarterAsm(int array[], int len) +{ + __asm + { + mov esi, array; + mov ecx, len; + shr ecx, 2; +loop: + movdqa xmm0, [esi]; + psrad xmm0, 2; + movdqa [esi], xmm0; + add esi, 16; + sub ecx, 1; + jnz loop; + } +} diff --git a/test/FrontendC++/2010-07-23-DeclLoc.cpp b/test/FrontendC++/2010-07-23-DeclLoc.cpp new file mode 100644 index 0000000000000..c72de3b336239 --- /dev/null +++ b/test/FrontendC++/2010-07-23-DeclLoc.cpp @@ -0,0 +1,86 @@ +// RUN: %llvmgxx -emit-llvm -S -g %s -o - | FileCheck %s +// Require the template function declaration refer to the correct filename. +// First, locate the function decl in metadata, and pluck out the file handle: +// CHECK: {{extract_dwarf_data_from_header.*extract_dwarf_data_from_header.*extract_dwarf_data_from_header.*[^ ]+", metadata !}}[[filehandle:[0-9]+]], +// Second: Require that filehandle refer to the correct filename: +// CHECK: {{^!}}[[filehandle]] = metadata {{![{].*}} metadata !"decl_should_be_here.hpp", +typedef long unsigned int __darwin_size_t; +typedef __darwin_size_t size_t; +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; +namespace std { + template<typename _Tp> class auto_ptr { + _Tp* _M_ptr; + public: + typedef _Tp element_type; + auto_ptr(element_type* __p = 0) throw() : _M_ptr(__p) { } + element_type& operator*() const throw() { } + }; +} +class Pointer32 { +public: + typedef uint32_t ptr_t; + typedef uint32_t size_t; +}; +class Pointer64 { +public: + typedef uint64_t ptr_t; + typedef uint64_t size_t; +}; +class BigEndian {}; +class LittleEndian {}; +template <typename _SIZE, typename _ENDIANNESS> class SizeAndEndianness { +public: + typedef _SIZE SIZE; +}; +typedef SizeAndEndianness<Pointer32, LittleEndian> ISA32Little; +typedef SizeAndEndianness<Pointer32, BigEndian> ISA32Big; +typedef SizeAndEndianness<Pointer64, LittleEndian> ISA64Little; +typedef SizeAndEndianness<Pointer64, BigEndian> ISA64Big; +template <typename SIZE> class TRange { +protected: + typename SIZE::ptr_t _location; + typename SIZE::size_t _length; + TRange(typename SIZE::ptr_t location, typename SIZE::size_t length) : _location(location), _length(length) { } +}; +template <typename SIZE, typename T> class TRangeValue : public TRange<SIZE> { + T _value; +public: + TRangeValue(typename SIZE::ptr_t location, typename SIZE::size_t length, T value) : TRange<SIZE>(location, length), _value(value) {}; +}; +template <typename SIZE> class TAddressRelocator {}; +class CSCppSymbolOwner{}; +class CSCppSymbolOwnerData{}; +template <typename SIZE> class TRawSymbolOwnerData +{ + TRangeValue< SIZE, uint8_t* > _TEXT_text_section; + const char* _dsym_path; + uint32_t _dylib_current_version; + uint32_t _dylib_compatibility_version; +public: + TRawSymbolOwnerData() : + _TEXT_text_section(0, 0, __null), _dsym_path(__null), _dylib_current_version(0), _dylib_compatibility_version(0) {} +}; +template <typename SIZE_AND_ENDIANNESS> class TExtendedMachOHeader {}; +# 16 "decl_should_be_here.hpp" +template <typename SIZE_AND_ENDIANNESS> void extract_dwarf_data_from_header(TExtendedMachOHeader<SIZE_AND_ENDIANNESS>& header, + TRawSymbolOwnerData<typename SIZE_AND_ENDIANNESS::SIZE>& symbol_owner_data, + TAddressRelocator<typename SIZE_AND_ENDIANNESS::SIZE>* address_relocator) {} +struct CSCppSymbolOwnerHashFunctor { + size_t operator()(const CSCppSymbolOwner& symbol_owner) const { +# 97 "wrong_place_for_decl.cpp" + } +}; +template <typename SIZE_AND_ENDIANNESS> CSCppSymbolOwnerData* create_symbol_owner_data_arch_specific(CSCppSymbolOwner* symbol_owner, const char* dsym_path) { + typedef typename SIZE_AND_ENDIANNESS::SIZE SIZE; + std::auto_ptr< TRawSymbolOwnerData<SIZE> > data(new TRawSymbolOwnerData<SIZE>()); + std::auto_ptr< TExtendedMachOHeader<SIZE_AND_ENDIANNESS> > header; + extract_dwarf_data_from_header(*header, *data, (TAddressRelocator<typename SIZE_AND_ENDIANNESS::SIZE>*)__null); +} +CSCppSymbolOwnerData* create_symbol_owner_data2(CSCppSymbolOwner* symbol_owner, const char* dsym_path) { + create_symbol_owner_data_arch_specific< ISA32Little >(symbol_owner, dsym_path); + create_symbol_owner_data_arch_specific< ISA32Big >(symbol_owner, dsym_path); + create_symbol_owner_data_arch_specific< ISA64Little >(symbol_owner, dsym_path); + create_symbol_owner_data_arch_specific< ISA64Big >(symbol_owner, dsym_path); +} diff --git a/test/FrontendC++/2010-08-31-ByValArg.cpp b/test/FrontendC++/2010-08-31-ByValArg.cpp new file mode 100644 index 0000000000000..be0d354b1d983 --- /dev/null +++ b/test/FrontendC++/2010-08-31-ByValArg.cpp @@ -0,0 +1,53 @@ +// This regression test checks byval arguments' debug info. +// Radar 8367011 +// RUN: %llvmgcc -S -O0 -g %s -o - | \ +// RUN: llc --disable-fp-elim -o %t.s -O0 -relocation-model=pic +// RUN: %compile_c %t.s -o %t.o +// RUN: %link %t.o -o %t.exe +// RUN: echo {break get\nrun\np missing_arg.b} > %t.in +// RUN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | \ +// RUN: grep {1 = 4242} + +// XTARGET: x86_64-apple-darwin + +class EVT { +public: + int a; + int b; + int c; +}; + +class VAL { +public: + int x; + int y; +}; +void foo(EVT e); +EVT bar(); + +void get(int *i, unsigned dl, VAL v, VAL *p, unsigned n, EVT missing_arg) { +//CHECK: .ascii "missing_arg" + EVT e = bar(); + if (dl == n) + foo(missing_arg); +} + + +EVT bar() { + EVT e; + return e; +} + +void foo(EVT e) {} + +int main(){ + VAL v; + EVT ma; + ma.a = 1; + ma.b = 4242; + ma.c = 3; + int i = 42; + get (&i, 1, v, &v, 2, ma); + return 0; +} + diff --git a/test/FrontendC/2008-03-24-BitField-And-Alloca.c b/test/FrontendC/2008-03-24-BitField-And-Alloca.c index 291f036523a42..641bcf1dbeb9e 100644 --- a/test/FrontendC/2008-03-24-BitField-And-Alloca.c +++ b/test/FrontendC/2008-03-24-BitField-And-Alloca.c @@ -1,5 +1,5 @@ // RUN: %llvmgcc -O2 -S %s -o - | not grep alloca -// RUN: %llvmgcc -m32 -O2 -S %s -o - | not grep store +// RUN: %llvmgcc -m32 -O2 -S %s -o - | not grep {store } enum { PP_C, diff --git a/test/FrontendC/2010-05-18-asmsched.c b/test/FrontendC/2010-05-18-asmsched.c index 34abbe3e5c5cd..12e91405d10ff 100644 --- a/test/FrontendC/2010-05-18-asmsched.c +++ b/test/FrontendC/2010-05-18-asmsched.c @@ -1,4 +1,4 @@ -// RUN: %llvmgcc %s -c -O3 -m64 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin | FileCheck %s +// RUN: %llvmgcc %s -c -O3 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin | FileCheck %s // r9 used to be clobbered before its value was moved to r10. 7993104. void foo(int x, int y) { @@ -14,4 +14,4 @@ void foo(int x, int y) { lr9 = x; lr10 = foo; asm volatile("bar" : "=r"(lr9) : "r"(lr9), "r"(lr10)); -}
\ No newline at end of file +} diff --git a/test/FrontendC/2010-07-14-overconservative-align.c b/test/FrontendC/2010-07-14-overconservative-align.c index 65fbdb83003b7..1744ba84185d3 100644 --- a/test/FrontendC/2010-07-14-overconservative-align.c +++ b/test/FrontendC/2010-07-14-overconservative-align.c @@ -1,4 +1,4 @@ -// RUN: %llvmgcc %s -emit-llvm -m64 -S -o - | FileCheck %s +// RUN: %llvmgcc %s -emit-llvm -S -o - | FileCheck %s // PR 5995 struct s { int word; @@ -9,6 +9,6 @@ struct s { void func (struct s *s) { -// CHECK: load %struct.s** %s_addr, align 8 +// CHECK: load %struct.s** %s_addr, align {{[48]}} s->word = 0; } diff --git a/test/FrontendC/2010-07-14-ref-off-end.c b/test/FrontendC/2010-07-14-ref-off-end.c index 6ccd05b770e94..c7fdd95a7aa0e 100644 --- a/test/FrontendC/2010-07-14-ref-off-end.c +++ b/test/FrontendC/2010-07-14-ref-off-end.c @@ -17,8 +17,8 @@ return(char)s->c; } main() { -// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0 ; <i32*> [#uses=2] -// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0 ; <i32*> [#uses=2] +// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0 +// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0 struct T t; t.i=0xff; t.c=0xffff11; diff --git a/test/FrontendC/2010-07-27-MinNoFoldConst.c b/test/FrontendC/2010-07-27-MinNoFoldConst.c new file mode 100644 index 0000000000000..7cd8b4c43764d --- /dev/null +++ b/test/FrontendC/2010-07-27-MinNoFoldConst.c @@ -0,0 +1,18 @@ +// RUN: %llvmgcc -S %s -o - | FileCheck %s +extern int printf(const char *, ...); +static void bad(unsigned int v1, unsigned int v2) { + printf("%u\n", 1631381461u * (((v2 - 1273463329u <= v1 - 1273463329u) ? v2 : v1) - 1273463329u) + 121322179u); +} +// Radar 8198362 +// GCC FE wants to convert the above to +// 1631381461u * MIN(v2 - 1273463329u, v1 - 1273463329u) +// and then to +// MIN(1631381461u * v2 - 4047041419, 1631381461u * v1 - 4047041419) +// +// 1631381461u * 1273463329u = 2077504466193943669, but 32-bit overflow clips +// this to 4047041419. This breaks the comparision implicit in the MIN(). +// Two multiply operations suggests the bad optimization is happening; +// one multiplication, after the MIN(), is correct. +// CHECK: mul +// CHECK-NOT: mul +// CHECK: ret diff --git a/test/FrontendC/2010-08-12-asm-aggr-arg.c b/test/FrontendC/2010-08-12-asm-aggr-arg.c new file mode 100644 index 0000000000000..81ec14b288263 --- /dev/null +++ b/test/FrontendC/2010-08-12-asm-aggr-arg.c @@ -0,0 +1,16 @@ +// RUN: %llvmgcc %s -S -O0 -o - | FileCheck %s +// Radar 8288710: A small aggregate can be passed as an integer. Make sure +// we don't get an error with "input constraint with a matching output +// constraint of incompatible type!" + +struct wrapper { + int i; +}; + +// CHECK: xyz +int test(int i) { + struct wrapper w; + w.i = i; + __asm__("xyz" : "=r" (w) : "0" (w)); + return w.i; +} diff --git a/test/FrontendC/asm-reg-var-local.c b/test/FrontendC/asm-reg-var-local.c new file mode 100644 index 0000000000000..22bd43c076d28 --- /dev/null +++ b/test/FrontendC/asm-reg-var-local.c @@ -0,0 +1,32 @@ +// RUN: %llvmgcc %s -S -o - | FileCheck %s +// Exercise various use cases for local asm "register variables". +// XFAIL: * +// XTARGET: x86_64,i686,i386 + +int foo() { +// CHECK: %a = alloca i32 + + register int a asm("rsi")=5; +// CHECK: store i32 5, i32* %a, align 4 + + asm volatile("; %0 This asm defines rsi" : "=r"(a)); +// CHECK: %asmtmp = call i32 asm sideeffect "; $0 This asm defines rsi", "={rsi} +// CHECK: store i32 %asmtmp, i32* %a + + a = 42; +// CHECK: store i32 42, i32* %a, align 4 + + asm volatile("; %0 This asm uses rsi" : : "r"(a)); +// CHECK: %1 = load i32* %a, align 4 +// CHECK: call void asm sideeffect "", "{rsi}"(i32 %1) nounwind +// CHECK: %2 = call i32 asm sideeffect "", "={rsi}"() nounwind +// CHECK: call void asm sideeffect "; $0 This asm uses rsi", "{rsi},~{dirflag},~{fpsr},~{flags}"(i32 %2) + + return a; +// CHECK: %3 = load i32* %a, align 4 +// CHECK: call void asm sideeffect "", "{rsi}"(i32 %3) nounwind +// CHECK: %4 = call i32 asm sideeffect "", "={rsi}"() nounwind +// CHECK: store i32 %4, i32* %0, align 4 +// CHECK: %5 = load i32* %0, align 4 +// CHECK: store i32 %5, i32* %retval, align 4 +} diff --git a/test/FrontendC/cstring-align.c b/test/FrontendC/cstring-align.c index b9ec281f5677d..764126e02184f 100644 --- a/test/FrontendC/cstring-align.c +++ b/test/FrontendC/cstring-align.c @@ -1,6 +1,4 @@ -// RUN: %llvmgcc %s -c -Os -m32 -emit-llvm -o - | llc -march=x86 -mtriple=i386-apple-darwin10 | FileCheck %s -check-prefix=DARWIN32 -// RUN: %llvmgcc %s -c -Os -m64 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=DARWIN64 -// XTARGET: darwin +// RUN: %llvmgcc %s -c -Os -emit-llvm -o - | llc -march=x86 -mtriple=i386-apple-darwin10 | FileCheck %s extern void func(const char *, const char *); @@ -8,10 +6,6 @@ void long_function_name() { func("%s: the function name", __func__); } -// DARWIN64: .align 4 -// DARWIN64: ___func__. -// DARWIN64: .asciz "long_function_name" - -// DARWIN32: .align 4 -// DARWIN32: ___func__. -// DARWIN32: .asciz "long_function_name" +// CHECK: .align 4 +// CHECK: ___func__. +// CHECK: .asciz "long_function_name" diff --git a/test/FrontendC/misaligned-param.c b/test/FrontendC/misaligned-param.c new file mode 100644 index 0000000000000..b4fcfe312f5af --- /dev/null +++ b/test/FrontendC/misaligned-param.c @@ -0,0 +1,15 @@ +// RUN: %llvmgcc %s -m32 -S -o - | FileCheck %s +// Misaligned parameter must be memcpy'd to correctly aligned temporary. +// XFAIL: * +// XTARGET: i386-apple-darwin,i686-apple-darwin,x86_64-apple-darwin + +struct s { int x; long double y; }; +long double foo(struct s x, int i, struct s y) { +// CHECK: foo +// CHECK: %x_addr = alloca %struct.s, align 16 +// CHECK: %y_addr = alloca %struct.s, align 16 +// CHECK: memcpy +// CHECK: memcpy +// CHECK: bar + return bar(&x, &y); +} diff --git a/test/FrontendC/vla-1.c b/test/FrontendC/vla-1.c index 76f6c53c1e16f..77f78a5e3af74 100644 --- a/test/FrontendC/vla-1.c +++ b/test/FrontendC/vla-1.c @@ -1,5 +1,6 @@ -// RUN: true -// %llvmgcc -std=gnu99 %s -S |& grep {error: "is greater than the stack alignment" } +// RUN: %llvmgcc_only -std=gnu99 %s -S |& grep {warning: alignment for} +// ppc does not support this feature, and gets a fatal error at runtime. +// XFAIL: powerpc int foo(int a) { diff --git a/test/FrontendC/vla-2.c b/test/FrontendC/vla-2.c new file mode 100644 index 0000000000000..555cfc789250b --- /dev/null +++ b/test/FrontendC/vla-2.c @@ -0,0 +1,10 @@ +// RUN: %llvmgcc -std=gnu99 %s -S -o - | grep ".*alloca.*align 16" + +extern void bar(int[]); + +void foo(int a) +{ + int var[a] __attribute__((__aligned__(16))); + bar(var); + return; +} diff --git a/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm b/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm new file mode 100644 index 0000000000000..298844e97b5d3 --- /dev/null +++ b/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm @@ -0,0 +1,27 @@ +// RUN: not %llvmgcc %s -S -emit-llvm -o - |& FileCheck %s +// This tests for a specific diagnostic in LLVM-GCC. +// Clang compiles this correctly with no diagnostic, +// ergo this test will fail with a Clang-based front-end. +class TFENodeVector { +public: + TFENodeVector(const TFENodeVector& inNodeVector); + TFENodeVector(); +}; + +@interface TWindowHistoryEntry {} +@property (assign, nonatomic) TFENodeVector targetPath; +@end + +@implementation TWindowHistoryEntry +@synthesize targetPath; +- (void) initWithWindowController { + TWindowHistoryEntry* entry; + TFENodeVector newPath; + // CHECK: setting a C++ non-POD object value is not implemented +#ifdef __clang__ +#error setting a C++ non-POD object value is not implemented +#endif + entry.targetPath = newPath; + [entry setTargetPath:newPath]; +} +@end diff --git a/test/FrontendObjC++/2010-08-04-Template.mm b/test/FrontendObjC++/2010-08-04-Template.mm new file mode 100644 index 0000000000000..d0383406d7e27 --- /dev/null +++ b/test/FrontendObjC++/2010-08-04-Template.mm @@ -0,0 +1,10 @@ +// RUN: %llvmgcc %s -S -emit-llvm +struct TRunSoon { + template <class P1> static void Post() {} +}; + +@implementation TPrivsTableViewMainController +- (void) applyToEnclosed { + TRunSoon::Post<int>(); +} +@end diff --git a/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm b/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm new file mode 100644 index 0000000000000..b33d7307af49e --- /dev/null +++ b/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm @@ -0,0 +1,16 @@ +// RUN: %llvmgcc %s -S -emit-llvm +struct TFENode { + TFENode(const TFENode& inNode); +}; + +@interface TIconViewController +- (const TFENode&) target; +@end + +void sortAllChildrenForNode(const TFENode&node); + +@implementation TIconViewController +- (void) setArrangeBy { + sortAllChildrenForNode(self.target); +} +@end diff --git a/test/FrontendObjC/2009-08-17-DebugInfo.m b/test/FrontendObjC/2009-08-17-DebugInfo.m index 2c72e9532ceb8..8ed7c24dc1348 100644 --- a/test/FrontendObjC/2009-08-17-DebugInfo.m +++ b/test/FrontendObjC/2009-08-17-DebugInfo.m @@ -5,7 +5,7 @@ // RUN: %link %t.o -o %t.exe -framework Foundation // RUN: echo {break randomFunc\n} > %t.in // RUN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | \ -// RUN: grep {Breakpoint 1 at 0x.*: file 2009-08-17-DebugInfo.m, line 21} +// RUN: grep {Breakpoint 1 at 0x.*: file .*2009-08-17-DebugInfo.m, line 21} // XTARGET: darwin @interface MyClass { diff --git a/test/Integer/a15.ll b/test/Integer/a15.ll deleted file mode 100644 index 5c9dc3b1be0d9..0000000000000 --- a/test/Integer/a15.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 15 bits -; -@b = constant i15 add(i15 32767, i15 1) -@c = constant i15 add(i15 32767, i15 32767) -@d = constant i15 add(i15 32760, i15 8) -@e = constant i15 sub(i15 0 , i15 1) -@f = constant i15 sub(i15 0 , i15 32767) -@g = constant i15 sub(i15 2 , i15 32767) - -@h = constant i15 shl(i15 1 , i15 15) -@i = constant i15 shl(i15 1 , i15 14) -@j = constant i15 lshr(i15 32767 , i15 14) -@l = constant i15 ashr(i15 32767 , i15 14) - -@n = constant i15 mul(i15 32767, i15 2) -@q = constant i15 mul(i15 -16383,i15 -3) -@r = constant i15 sdiv(i15 -1, i15 16383) -@s = constant i15 udiv(i15 -1, i15 16383) -@t = constant i15 srem(i15 1, i15 32766) -@u = constant i15 urem(i15 32767,i15 -1) -@o = constant i15 trunc( i16 32768 to i15 ) -@p = constant i15 trunc( i16 32767 to i15 ) -@v = constant i15 srem(i15 -1, i15 768) - diff --git a/test/Integer/a15.ll.out b/test/Integer/a15.ll.out deleted file mode 100644 index 5195cdf3761f0..0000000000000 --- a/test/Integer/a15.ll.out +++ /dev/null @@ -1,21 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i15 0 ; <i15*> [#uses=0] -@c = constant i15 -2 ; <i15*> [#uses=0] -@d = constant i15 0 ; <i15*> [#uses=0] -@e = constant i15 -1 ; <i15*> [#uses=0] -@f = constant i15 1 ; <i15*> [#uses=0] -@g = constant i15 3 ; <i15*> [#uses=0] -@h = constant i15 undef ; <i15*> [#uses=0] -@i = constant i15 -16384 ; <i15*> [#uses=0] -@j = constant i15 1 ; <i15*> [#uses=0] -@l = constant i15 -1 ; <i15*> [#uses=0] -@n = constant i15 -2 ; <i15*> [#uses=0] -@q = constant i15 16381 ; <i15*> [#uses=0] -@r = constant i15 0 ; <i15*> [#uses=0] -@s = constant i15 2 ; <i15*> [#uses=0] -@t = constant i15 1 ; <i15*> [#uses=0] -@u = constant i15 0 ; <i15*> [#uses=0] -@o = constant i15 0 ; <i15*> [#uses=0] -@p = constant i15 -1 ; <i15*> [#uses=0] -@v = constant i15 -1 ; <i15*> [#uses=0] diff --git a/test/Integer/a17.ll b/test/Integer/a17.ll deleted file mode 100644 index db03e7c6be0d3..0000000000000 --- a/test/Integer/a17.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 17 bits -; -@b = constant i17 add(i17 131071, i17 1) -@c = constant i17 add(i17 131071, i17 131071) -@d = constant i17 add(i17 131064, i17 8) -@e = constant i17 sub(i17 0 , i17 1) -@f = constant i17 sub(i17 0 , i17 131071) -@g = constant i17 sub(i17 2 , i17 131071) - -@h = constant i17 shl(i17 1 , i17 17) -@i = constant i17 shl(i17 1 , i17 16) -@j = constant i17 lshr(i17 131071 , i17 16) -@l = constant i17 ashr(i17 131071 , i17 16) - -@n = constant i17 mul(i17 131071, i17 2) -@q = constant i17 sdiv(i17 -1, i17 65535) -@r = constant i17 udiv(i17 -1, i17 65535) -@s = constant i17 srem(i17 1, i17 131070) -@t = constant i17 urem(i17 131071,i17 -1) -@o = constant i17 trunc( i18 131072 to i17 ) -@p = constant i17 trunc( i18 131071 to i17 ) -@v = constant i17 srem(i17 -1, i17 15) diff --git a/test/Integer/a17.ll.out b/test/Integer/a17.ll.out deleted file mode 100644 index ba6641289e316..0000000000000 --- a/test/Integer/a17.ll.out +++ /dev/null @@ -1,20 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i17 0 ; <i17*> [#uses=0] -@c = constant i17 -2 ; <i17*> [#uses=0] -@d = constant i17 0 ; <i17*> [#uses=0] -@e = constant i17 -1 ; <i17*> [#uses=0] -@f = constant i17 1 ; <i17*> [#uses=0] -@g = constant i17 3 ; <i17*> [#uses=0] -@h = constant i17 undef ; <i17*> [#uses=0] -@i = constant i17 -65536 ; <i17*> [#uses=0] -@j = constant i17 1 ; <i17*> [#uses=0] -@l = constant i17 -1 ; <i17*> [#uses=0] -@n = constant i17 -2 ; <i17*> [#uses=0] -@q = constant i17 0 ; <i17*> [#uses=0] -@r = constant i17 2 ; <i17*> [#uses=0] -@s = constant i17 1 ; <i17*> [#uses=0] -@t = constant i17 0 ; <i17*> [#uses=0] -@o = constant i17 0 ; <i17*> [#uses=0] -@p = constant i17 -1 ; <i17*> [#uses=0] -@v = constant i17 -1 ; <i17*> [#uses=0] diff --git a/test/Integer/a31.ll b/test/Integer/a31.ll deleted file mode 100644 index c0c571f630680..0000000000000 --- a/test/Integer/a31.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 31 bits -; -@b = constant i31 add(i31 2147483647, i31 1) -@c = constant i31 add(i31 2147483647, i31 2147483647) -@d = constant i31 add(i31 2147483640, i31 8) -@e = constant i31 sub(i31 0 , i31 1) -@f = constant i31 sub(i31 0 , i31 2147483647) -@g = constant i31 sub(i31 2 , i31 2147483647) - -@h = constant i31 shl(i31 1 , i31 31) -@i = constant i31 shl(i31 1 , i31 30) -@j = constant i31 lshr(i31 2147483647 , i31 30) -@l = constant i31 ashr(i31 2147483647 , i31 30) - -@n = constant i31 mul(i31 2147483647, i31 2) -@q = constant i31 sdiv(i31 -1, i31 1073741823) -@r = constant i31 udiv(i31 -1, i31 1073741823) -@s = constant i31 srem(i31 1, i31 2147483646) -@t = constant i31 urem(i31 2147483647,i31 -1) -@o = constant i31 trunc( i32 2147483648 to i31 ) -@p = constant i31 trunc( i32 2147483647 to i31 ) -@u = constant i31 srem(i31 -3, i31 17) diff --git a/test/Integer/a31.ll.out b/test/Integer/a31.ll.out deleted file mode 100644 index 7407a746b5bf7..0000000000000 --- a/test/Integer/a31.ll.out +++ /dev/null @@ -1,20 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i31 0 ; <i31*> [#uses=0] -@c = constant i31 -2 ; <i31*> [#uses=0] -@d = constant i31 0 ; <i31*> [#uses=0] -@e = constant i31 -1 ; <i31*> [#uses=0] -@f = constant i31 1 ; <i31*> [#uses=0] -@g = constant i31 3 ; <i31*> [#uses=0] -@h = constant i31 undef ; <i31*> [#uses=0] -@i = constant i31 -1073741824 ; <i31*> [#uses=0] -@j = constant i31 1 ; <i31*> [#uses=0] -@l = constant i31 -1 ; <i31*> [#uses=0] -@n = constant i31 -2 ; <i31*> [#uses=0] -@q = constant i31 0 ; <i31*> [#uses=0] -@r = constant i31 2 ; <i31*> [#uses=0] -@s = constant i31 1 ; <i31*> [#uses=0] -@t = constant i31 0 ; <i31*> [#uses=0] -@o = constant i31 0 ; <i31*> [#uses=0] -@p = constant i31 -1 ; <i31*> [#uses=0] -@u = constant i31 -3 ; <i31*> [#uses=0] diff --git a/test/Integer/a33.ll b/test/Integer/a33.ll deleted file mode 100644 index f328907b46087..0000000000000 --- a/test/Integer/a33.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 33 bits -; -@b = constant i33 add(i33 8589934591, i33 1) -@c = constant i33 add(i33 8589934591, i33 8589934591) -@d = constant i33 add(i33 8589934584, i33 8) -@e = constant i33 sub(i33 0 , i33 1) -@f = constant i33 sub(i33 0 , i33 8589934591) -@g = constant i33 sub(i33 2 , i33 8589934591) - -@h = constant i33 shl(i33 1 , i33 33) -@i = constant i33 shl(i33 1 , i33 32) -@j = constant i33 lshr(i33 8589934591 , i33 32) -@l = constant i33 ashr(i33 8589934591 , i33 32) - -@n = constant i33 mul(i33 8589934591, i33 2) -@q = constant i33 sdiv(i33 -1, i33 4294967295) -@r = constant i33 udiv(i33 -1, i33 4294967295) -@s = constant i33 srem(i33 1, i33 8589934590) -@t = constant i33 urem(i33 8589934591,i33 -1) -@o = constant i33 trunc( i34 8589934592 to i33 ) -@p = constant i33 trunc( i34 8589934591 to i33 ) -@u = constant i33 srem(i33 -1, i33 17) - diff --git a/test/Integer/a33.ll.out b/test/Integer/a33.ll.out deleted file mode 100644 index 6cd61ee69cdca..0000000000000 --- a/test/Integer/a33.ll.out +++ /dev/null @@ -1,20 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i33 0 ; <i33*> [#uses=0] -@c = constant i33 -2 ; <i33*> [#uses=0] -@d = constant i33 0 ; <i33*> [#uses=0] -@e = constant i33 -1 ; <i33*> [#uses=0] -@f = constant i33 1 ; <i33*> [#uses=0] -@g = constant i33 3 ; <i33*> [#uses=0] -@h = constant i33 undef ; <i33*> [#uses=0] -@i = constant i33 -4294967296 ; <i33*> [#uses=0] -@j = constant i33 1 ; <i33*> [#uses=0] -@l = constant i33 -1 ; <i33*> [#uses=0] -@n = constant i33 -2 ; <i33*> [#uses=0] -@q = constant i33 0 ; <i33*> [#uses=0] -@r = constant i33 2 ; <i33*> [#uses=0] -@s = constant i33 1 ; <i33*> [#uses=0] -@t = constant i33 0 ; <i33*> [#uses=0] -@o = constant i33 0 ; <i33*> [#uses=0] -@p = constant i33 -1 ; <i33*> [#uses=0] -@u = constant i33 -1 ; <i33*> [#uses=0] diff --git a/test/Integer/a63.ll b/test/Integer/a63.ll deleted file mode 100644 index 052ecd585002d..0000000000000 --- a/test/Integer/a63.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 63 bits -; -@b = constant i63 add(i63 9223372036854775807, i63 1) -@c = constant i63 add(i63 9223372036854775807, i63 9223372036854775807) -@d = constant i63 add(i63 9223372036854775800, i63 8) -@e = constant i63 sub(i63 0 , i63 1) -@f = constant i63 sub(i63 0 , i63 9223372036854775807) -@g = constant i63 sub(i63 2 , i63 9223372036854775807) - -@h = constant i63 shl(i63 1 , i63 63) -@i = constant i63 shl(i63 1 , i63 62) -@j = constant i63 lshr(i63 9223372036854775807 , i63 62) -@l = constant i63 ashr(i63 9223372036854775807 , i63 62) - -@n = constant i63 mul(i63 9223372036854775807, i63 2) -@q = constant i63 sdiv(i63 -1, i63 4611686018427387903) -@u = constant i63 sdiv(i63 -1, i63 1) -@r = constant i63 udiv(i63 -1, i63 4611686018427387903) -@s = constant i63 srem(i63 3, i63 9223372036854775806) -@t = constant i63 urem(i63 9223372036854775807,i63 -1) -@o = constant i63 trunc( i64 9223372036854775808 to i63 ) -@p = constant i63 trunc( i64 9223372036854775807 to i63 ) diff --git a/test/Integer/a63.ll.out b/test/Integer/a63.ll.out deleted file mode 100644 index 18dff5a2964ef..0000000000000 --- a/test/Integer/a63.ll.out +++ /dev/null @@ -1,20 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i63 0 ; <i63*> [#uses=0] -@c = constant i63 -2 ; <i63*> [#uses=0] -@d = constant i63 0 ; <i63*> [#uses=0] -@e = constant i63 -1 ; <i63*> [#uses=0] -@f = constant i63 1 ; <i63*> [#uses=0] -@g = constant i63 3 ; <i63*> [#uses=0] -@h = constant i63 undef ; <i63*> [#uses=0] -@i = constant i63 -4611686018427387904 ; <i63*> [#uses=0] -@j = constant i63 1 ; <i63*> [#uses=0] -@l = constant i63 -1 ; <i63*> [#uses=0] -@n = constant i63 -2 ; <i63*> [#uses=0] -@q = constant i63 0 ; <i63*> [#uses=0] -@u = constant i63 -1 ; <i63*> [#uses=0] -@r = constant i63 2 ; <i63*> [#uses=0] -@s = constant i63 1 ; <i63*> [#uses=0] -@t = constant i63 0 ; <i63*> [#uses=0] -@o = constant i63 0 ; <i63*> [#uses=0] -@p = constant i63 -1 ; <i63*> [#uses=0] diff --git a/test/Integer/a7.ll b/test/Integer/a7.ll deleted file mode 100644 index 1edb35f9104b7..0000000000000 --- a/test/Integer/a7.ll +++ /dev/null @@ -1,31 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 7 bits -; -@b = constant i7 add(i7 127, i7 1) -@q = constant i7 add(i7 -64, i7 -1) -@c = constant i7 add(i7 127, i7 127) -@d = constant i7 add(i7 120, i7 8) -@e = constant i7 sub(i7 0 , i7 1) -@f = constant i7 sub(i7 0 , i7 127) -@g = constant i7 sub(i7 2 , i7 127) -@r = constant i7 sub(i7 -3, i7 120) -@s = constant i7 sub(i7 -3, i7 -8) - -@h = constant i7 shl(i7 1 , i7 7) -@i = constant i7 shl(i7 1 , i7 6) -@j = constant i7 lshr(i7 127 , i7 6) -@l = constant i7 ashr(i7 127 , i7 6) -@m2= constant i7 ashr(i7 -1 , i7 3) - -@n = constant i7 mul(i7 127, i7 2) -@t = constant i7 mul(i7 -63, i7 -2) -@u = constant i7 mul(i7 -32, i7 2) -@v = constant i7 sdiv(i7 -1, i7 63) -@w = constant i7 udiv(i7 -1, i7 63) -@x = constant i7 srem(i7 1 , i7 126) -@y = constant i7 urem(i7 127, i7 -1) -@o = constant i7 trunc( i8 128 to i7 ) -@p = constant i7 trunc( i8 255 to i7 ) - diff --git a/test/Integer/a7.ll.out b/test/Integer/a7.ll.out deleted file mode 100644 index 250925d795e64..0000000000000 --- a/test/Integer/a7.ll.out +++ /dev/null @@ -1,25 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i7 0 ; <i7*> [#uses=0] -@q = constant i7 63 ; <i7*> [#uses=0] -@c = constant i7 -2 ; <i7*> [#uses=0] -@d = constant i7 0 ; <i7*> [#uses=0] -@e = constant i7 -1 ; <i7*> [#uses=0] -@f = constant i7 1 ; <i7*> [#uses=0] -@g = constant i7 3 ; <i7*> [#uses=0] -@r = constant i7 5 ; <i7*> [#uses=0] -@s = constant i7 5 ; <i7*> [#uses=0] -@h = constant i7 undef ; <i7*> [#uses=0] -@i = constant i7 -64 ; <i7*> [#uses=0] -@j = constant i7 1 ; <i7*> [#uses=0] -@l = constant i7 -1 ; <i7*> [#uses=0] -@m2 = constant i7 -1 ; <i7*> [#uses=0] -@n = constant i7 -2 ; <i7*> [#uses=0] -@t = constant i7 -2 ; <i7*> [#uses=0] -@u = constant i7 -64 ; <i7*> [#uses=0] -@v = constant i7 0 ; <i7*> [#uses=0] -@w = constant i7 2 ; <i7*> [#uses=0] -@x = constant i7 1 ; <i7*> [#uses=0] -@y = constant i7 0 ; <i7*> [#uses=0] -@o = constant i7 0 ; <i7*> [#uses=0] -@p = constant i7 -1 ; <i7*> [#uses=0] diff --git a/test/Integer/a9.ll b/test/Integer/a9.ll deleted file mode 100644 index 711ec821c295f..0000000000000 --- a/test/Integer/a9.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 9 bits -; -@b = constant i9 add(i9 511, i9 1) -@c = constant i9 add(i9 511, i9 511) -@d = constant i9 add(i9 504, i9 8) -@e = constant i9 sub(i9 0 , i9 1) -@f = constant i9 sub(i9 0 , i9 511) -@g = constant i9 sub(i9 2 , i9 511) - -@h = constant i9 shl(i9 1 , i9 9) -@i = constant i9 shl(i9 1 , i9 8) -@j = constant i9 lshr(i9 511 , i9 8) -@l = constant i9 ashr(i9 511 , i9 8) - -@n = constant i9 mul(i9 511, i9 2) -@q = constant i9 sdiv(i9 511, i9 2) -@r = constant i9 udiv(i9 511, i9 2) -@s = constant i9 urem(i9 511, i9 -1) -@t = constant i9 srem(i9 1, i9 510) -@o = constant i9 trunc( i10 512 to i9 ) -@p = constant i9 trunc( i10 511 to i9 ) - diff --git a/test/Integer/a9.ll.out b/test/Integer/a9.ll.out deleted file mode 100644 index 6e38062c4a035..0000000000000 --- a/test/Integer/a9.ll.out +++ /dev/null @@ -1,19 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i9 0 ; <i9*> [#uses=0] -@c = constant i9 -2 ; <i9*> [#uses=0] -@d = constant i9 0 ; <i9*> [#uses=0] -@e = constant i9 -1 ; <i9*> [#uses=0] -@f = constant i9 1 ; <i9*> [#uses=0] -@g = constant i9 3 ; <i9*> [#uses=0] -@h = constant i9 undef ; <i9*> [#uses=0] -@i = constant i9 -256 ; <i9*> [#uses=0] -@j = constant i9 1 ; <i9*> [#uses=0] -@l = constant i9 -1 ; <i9*> [#uses=0] -@n = constant i9 -2 ; <i9*> [#uses=0] -@q = constant i9 0 ; <i9*> [#uses=0] -@r = constant i9 255 ; <i9*> [#uses=0] -@s = constant i9 0 ; <i9*> [#uses=0] -@t = constant i9 1 ; <i9*> [#uses=0] -@o = constant i9 0 ; <i9*> [#uses=0] -@p = constant i9 -1 ; <i9*> [#uses=0] diff --git a/test/LLVMC/Alias.td b/test/LLVMC/Alias.td new file mode 100644 index 0000000000000..5d37889304bdc --- /dev/null +++ b/test/LLVMC/Alias.td @@ -0,0 +1,24 @@ +// Test alias generation. +// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t +// RUN: FileCheck -input-file %t %s +// RUN: %compile_cxx %t +// XFAIL: vg_leak + +include "llvm/CompilerDriver/Common.td" + +def OptList : OptionList<[ + +(switch_option "dummy1", (help "none")), +// CHECK: cl::alias Alias_dummy2 +(alias_option "dummy2", "dummy1") +]>; + +def dummy_tool : Tool<[ +(command "dummy_cmd"), +(in_language "dummy_lang"), +(out_language "dummy_lang"), +(actions (case + (switch_on "dummy1"), (forward "dummy1"))) +]>; + +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/AppendCmdHook.td b/test/LLVMC/AppendCmdHook.td index 254d5eaf37af7..c85f002e6e8b4 100644 --- a/test/LLVMC/AppendCmdHook.td +++ b/test/LLVMC/AppendCmdHook.td @@ -1,7 +1,7 @@ // Check that hooks can be invoked from 'append_cmd'. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -26,4 +26,4 @@ def dummy_tool : Tool<[ (switch_on "dummy2"), (append_cmd "-arg3 $CALL(MyHook)"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/EmptyCompilationGraph.td b/test/LLVMC/EmptyCompilationGraph.td index e5d5e9a64cdbb..a52b8a8c19905 100644 --- a/test/LLVMC/EmptyCompilationGraph.td +++ b/test/LLVMC/EmptyCompilationGraph.td @@ -1,6 +1,6 @@ // Check that the compilation graph can be empty. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" diff --git a/test/LLVMC/EnvParentheses.td b/test/LLVMC/EnvParentheses.td index 86091db9bdfb8..ce0cb824604ce 100644 --- a/test/LLVMC/EnvParentheses.td +++ b/test/LLVMC/EnvParentheses.td @@ -2,7 +2,7 @@ // http://llvm.org/bugs/show_bug.cgi?id=4157 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: not grep {FOO")));} %t -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -13,6 +13,6 @@ def dummy_tool : Tool<[ (out_language "dummy") ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; def Graph : CompilationGraph<[]>; diff --git a/test/LLVMC/ExternOptions.td b/test/LLVMC/ExternOptions.td deleted file mode 100644 index d84ea847bf128..0000000000000 --- a/test/LLVMC/ExternOptions.td +++ /dev/null @@ -1,26 +0,0 @@ -// Check that extern options work. -// The dummy tool and graph are required to silence warnings. -// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t -// XFAIL: vg_leak - -include "llvm/CompilerDriver/Common.td" - -// CHECK: extern cl::opt<bool> AutoGeneratedSwitch_Wall - -def OptList : OptionList<[(switch_option "Wall", (extern)), - (parameter_option "std", (extern)), - (prefix_list_option "L", (extern))]>; - -def dummy_tool : Tool<[ -(command "dummy_cmd"), -(in_language "dummy"), -(out_language "dummy"), -(actions (case - (switch_on "Wall"), (stop_compilation), - (not_empty "std"), (stop_compilation), - (not_empty "L"), (stop_compilation))) -]>; - -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; diff --git a/test/LLVMC/ForwardAs.td b/test/LLVMC/ForwardAs.td index 536b96a9758fa..99b240e30fb3e 100644 --- a/test/LLVMC/ForwardAs.td +++ b/test/LLVMC/ForwardAs.td @@ -2,12 +2,12 @@ // http://llvm.org/bugs/show_bug.cgi?id=4159 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" -def OptList : OptionList<[(parameter_option "dummy", (extern))]>; +def OptList : OptionList<[(parameter_option "dummy", (help "dummmy"))]>; def dummy_tool : Tool<[ (command "dummy_cmd"), @@ -18,4 +18,4 @@ def dummy_tool : Tool<[ (not_empty "dummy"), (forward_as "dummy", "unique_name"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/ForwardTransformedValue.td b/test/LLVMC/ForwardTransformedValue.td index 5e0bf290d1fd1..9184ede361018 100644 --- a/test/LLVMC/ForwardTransformedValue.td +++ b/test/LLVMC/ForwardTransformedValue.td @@ -2,13 +2,13 @@ // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" -def OptList : OptionList<[(parameter_option "a", (extern)), - (prefix_list_option "b", (extern))]>; +def OptList : OptionList<[(parameter_option "a", (help "dummy")), + (prefix_list_option "b", (help "dummy"))]>; // CHECK: std::string HookA // CHECK: std::string HookB @@ -18,10 +18,10 @@ def dummy_tool : Tool<[ (in_language "dummy"), (out_language "dummy"), (actions (case - // CHECK: HookA(AutoGeneratedParameter_a + // CHECK: HookA(autogenerated::Parameter_a (not_empty "a"), (forward_transformed_value "a", "HookA"), - // CHECK: HookB(AutoGeneratedList_b + // CHECK: HookB(autogenerated::List_b (not_empty "b"), (forward_transformed_value "b", "HookB"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/ForwardValue.td b/test/LLVMC/ForwardValue.td index 4c7a0ee0ec5e0..a42a3f06ec3d7 100644 --- a/test/LLVMC/ForwardValue.td +++ b/test/LLVMC/ForwardValue.td @@ -2,23 +2,23 @@ // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" -def OptList : OptionList<[(parameter_option "a", (extern)), - (prefix_list_option "b", (extern))]>; +def OptList : OptionList<[(parameter_option "a", (help "dummy")), + (prefix_list_option "b", (help "dummy"))]>; def dummy_tool : Tool<[ (command "dummy_cmd"), (in_language "dummy"), (out_language "dummy"), (actions (case - // CHECK: , AutoGeneratedParameter_a)); + // CHECK: , autogenerated::Parameter_a)); (not_empty "a"), (forward_value "a"), - // CHECK: B = AutoGeneratedList_b.begin() + // CHECK: B = autogenerated::List_b.begin() (not_empty "b"), (forward_value "b"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/HookWithArguments.td b/test/LLVMC/HookWithArguments.td index 5ff96cd6a88d8..bbba2e9845999 100644 --- a/test/LLVMC/HookWithArguments.td +++ b/test/LLVMC/HookWithArguments.td @@ -1,7 +1,7 @@ // Check that hooks with arguments work. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -17,4 +17,4 @@ def dummy_tool : Tool<[ (out_language "dummy") ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/HookWithInFile.td b/test/LLVMC/HookWithInFile.td index 9855dbc5bd9a0..ed08b5321ccfb 100644 --- a/test/LLVMC/HookWithInFile.td +++ b/test/LLVMC/HookWithInFile.td @@ -1,7 +1,7 @@ // Check that a hook can be given $INFILE as an argument. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -13,4 +13,4 @@ def dummy_tool : Tool<[ (out_language "dummy") ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/Init.td b/test/LLVMC/Init.td index 05209bf61acae..c3846797026e4 100644 --- a/test/LLVMC/Init.td +++ b/test/LLVMC/Init.td @@ -1,7 +1,7 @@ // Check that (init true/false) and (init "str") work. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -22,4 +22,4 @@ def dummy_tool : Tool<[ (not_empty "dummy2"), (forward "dummy2"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/LanguageMap.td b/test/LLVMC/LanguageMap.td new file mode 100644 index 0000000000000..a0502142e6d72 --- /dev/null +++ b/test/LLVMC/LanguageMap.td @@ -0,0 +1,29 @@ +// Check that LanguageMap is processed properly. +// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t +// RUN: FileCheck -input-file %t %s +// RUN: %compile_cxx %t +// XFAIL: vg_leak + +include "llvm/CompilerDriver/Common.td" + +def OptList : OptionList<[ +(switch_option "dummy1", (help "none")) +]>; + +def dummy_tool : Tool<[ +(command "dummy_cmd"), +(in_language "dummy_lang"), +(out_language "dummy_lang"), +(actions (case + (switch_on "dummy1"), (forward "dummy1"))) +]>; + +def lang_map : LanguageMap<[ + // CHECK: langMap["dummy"] = "dummy_lang" + // CHECK: langMap["DUM"] = "dummy_lang" + (lang_to_suffixes "dummy_lang", ["dummy", "DUM"]), + // CHECK: langMap["DUM2"] = "dummy_lang_2" + (lang_to_suffixes "dummy_lang_2", "DUM2") +]>; + +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/MultiValuedOption.td b/test/LLVMC/MultiValuedOption.td index 73ccb6311f3cd..08c753380d475 100644 --- a/test/LLVMC/MultiValuedOption.td +++ b/test/LLVMC/MultiValuedOption.td @@ -2,7 +2,7 @@ // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -10,7 +10,7 @@ include "llvm/CompilerDriver/Common.td" def OptList : OptionList<[ // CHECK: cl::multi_val(2) (prefix_list_option "foo", (multi_val 2)), - (parameter_list_option "baz", (multi_val 2), (extern))]>; + (parameter_list_option "baz", (multi_val 2))]>; def dummy_tool : Tool<[ (command "dummy_cmd"), @@ -21,4 +21,4 @@ def dummy_tool : Tool<[ (not_empty "baz"), (forward "baz"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/MultipleCompilationGraphs.td b/test/LLVMC/MultipleCompilationGraphs.td index 86cd6131243a5..b3746c03b6cb5 100644 --- a/test/LLVMC/MultipleCompilationGraphs.td +++ b/test/LLVMC/MultipleCompilationGraphs.td @@ -1,6 +1,6 @@ // Check that multiple compilation graphs are allowed. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" diff --git a/test/LLVMC/MultiplePluginPriorities.td b/test/LLVMC/MultiplePluginPriorities.td deleted file mode 100644 index 2fe06450eecb4..0000000000000 --- a/test/LLVMC/MultiplePluginPriorities.td +++ /dev/null @@ -1,17 +0,0 @@ -// Check that multiple plugin priorities are not allowed. -// RUN: ignore tblgen -I %p/../../include --gen-llvmc %s |& grep "More than one 'PluginPriority' instance found" - -// Disable for Darwin PPC: <rdar://problem/7598390> -// XFAIL: powerpc-apple-darwin - -// Generally XFAIL'ed for now, this is (sometimes?) failing on x86_64-apple-darwin10. -// RUN: false -// XFAIL: * - -include "llvm/CompilerDriver/Common.td" - -def Graph : CompilationGraph<[]>; - -def Priority1 : PluginPriority<1>; - -def Priority2 : PluginPriority<2>; diff --git a/test/LLVMC/NoActions.td b/test/LLVMC/NoActions.td index a80bcfe6ce1cc..34b444066350c 100644 --- a/test/LLVMC/NoActions.td +++ b/test/LLVMC/NoActions.td @@ -1,7 +1,7 @@ // Check that tools without associated actions are accepted. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -13,4 +13,4 @@ def dummy_tool : Tool<[ (out_language "dummy") ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/NoCompilationGraph.td b/test/LLVMC/NoCompilationGraph.td index 69df70133307f..4182882c451f8 100644 --- a/test/LLVMC/NoCompilationGraph.td +++ b/test/LLVMC/NoCompilationGraph.td @@ -1,6 +1,6 @@ // Check that the compilation graph is not required. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" diff --git a/test/LLVMC/OneOrMore.td b/test/LLVMC/OneOrMore.td index 37fbc87fdfabc..54fa62d1ff041 100644 --- a/test/LLVMC/OneOrMore.td +++ b/test/LLVMC/OneOrMore.td @@ -2,7 +2,7 @@ // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -22,4 +22,4 @@ def dummy_tool : Tool<[ (not_empty "baz"), (forward "baz"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/OptionPreprocessor.td b/test/LLVMC/OptionPreprocessor.td index c2641be7e6456..8019c42634f30 100644 --- a/test/LLVMC/OptionPreprocessor.td +++ b/test/LLVMC/OptionPreprocessor.td @@ -1,7 +1,7 @@ // Test for the OptionPreprocessor and related functionality. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -63,5 +63,5 @@ def dummy : Tool< (not_empty "foo_l"), (error))) ]>; -def Graph : CompilationGraph<[Edge<"root", "dummy">]>; +def Graph : CompilationGraph<[(edge "root", "dummy")]>; diff --git a/test/LLVMC/OutputSuffixHook.td b/test/LLVMC/OutputSuffixHook.td index 4ecad2360ba06..1f5ecd1237f3d 100644 --- a/test/LLVMC/OutputSuffixHook.td +++ b/test/LLVMC/OutputSuffixHook.td @@ -1,8 +1,8 @@ // Check that hooks can be invoked from 'output_suffix'. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t -// XFAIL: * +// RUN: %compile_cxx %t +// XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -13,7 +13,7 @@ def OptList : OptionList<[ ]>; def dummy_tool : Tool<[ -(cmd_line "dummy_cmd $INFILE"), +(command "dummy_cmd"), (in_language "dummy_lang"), (out_language "dummy_lang"), (actions (case @@ -21,4 +21,4 @@ def dummy_tool : Tool<[ (switch_on "dummy1"), (output_suffix "$CALL(MyHook)"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/TestWarnings.td b/test/LLVMC/TestWarnings.td index 0388cb0b0f1e8..b0f57e97e0d8c 100644 --- a/test/LLVMC/TestWarnings.td +++ b/test/LLVMC/TestWarnings.td @@ -5,4 +5,4 @@ include "llvm/CompilerDriver/Common.td" -def OptList : OptionList<[(switch_option "Wall", (extern))]>; +def OptList : OptionList<[(switch_option "Wall", (help "dummy"))]>; diff --git a/test/Linker/metadata-a.ll b/test/Linker/metadata-a.ll new file mode 100644 index 0000000000000..5a9d2e40b948f --- /dev/null +++ b/test/Linker/metadata-a.ll @@ -0,0 +1,15 @@ +; RUN: llvm-link %s %p/metadata-b.ll -S -o - | FileCheck %s + +; CHECK: define void @foo(i32 %a) +; CHECK: ret void, !attach !0, !also !{i32 %a} +; CHECK: define void @goo(i32 %b) +; CHECK: ret void, !attach !1, !and !{i32 %b} +; CHECK: !0 = metadata !{i32 524334, void (i32)* @foo} +; CHECK: !1 = metadata !{i32 524334, void (i32)* @goo} + +define void @foo(i32 %a) nounwind { +entry: + ret void, !attach !0, !also !{ i32 %a } +} + +!0 = metadata !{i32 524334, void (i32)* @foo} diff --git a/test/Linker/metadata-b.ll b/test/Linker/metadata-b.ll new file mode 100644 index 0000000000000..ef0270af07560 --- /dev/null +++ b/test/Linker/metadata-b.ll @@ -0,0 +1,9 @@ +; This file is for use with metadata-a.ll +; RUN: true + +define void @goo(i32 %b) nounwind { +entry: + ret void, !attach !0, !and !{ i32 %b } +} + +!0 = metadata !{i32 524334, void (i32)* @goo} diff --git a/test/MC/AsmParser/ARM/arm_instructions.s b/test/MC/AsmParser/ARM/arm_instructions.s new file mode 100644 index 0000000000000..8632cb0cefd82 --- /dev/null +++ b/test/MC/AsmParser/ARM/arm_instructions.s @@ -0,0 +1,8 @@ +@ RUN: llvm-mc -triple arm-unknown-unknown %s | FileCheck %s + +@ CHECK: nop + nop + +@ CHECK: nopeq + nopeq + diff --git a/test/MC/AsmParser/ELF/dg.exp b/test/MC/AsmParser/ELF/dg.exp new file mode 100644 index 0000000000000..ca6aefe9c53d3 --- /dev/null +++ b/test/MC/AsmParser/ELF/dg.exp @@ -0,0 +1,6 @@ +load_lib llvm.exp + +if { [llvm_supports_target X86] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]] +} + diff --git a/test/MC/AsmParser/ELF/directive_previous.s b/test/MC/AsmParser/ELF/directive_previous.s new file mode 100644 index 0000000000000..5db1eac03d394 --- /dev/null +++ b/test/MC/AsmParser/ELF/directive_previous.s @@ -0,0 +1,13 @@ +# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s + +.bss +# CHECK: .bss + +.text +# CHECK: .text + +.previous +# CHECK: .bss + +.previous +# CHECK: .text diff --git a/test/MC/AsmParser/ELF/directive_section.s b/test/MC/AsmParser/ELF/directive_section.s new file mode 100644 index 0000000000000..9531c026e6745 --- /dev/null +++ b/test/MC/AsmParser/ELF/directive_section.s @@ -0,0 +1,23 @@ +# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s + + .bss +# CHECK: .bss + + .data.rel.ro +# CHECK: .data.rel.ro + + .data.rel +# CHECK: .data.rel + + .eh_frame +# CHECK: .eh_frame + + .rodata +# CHECK: .rodata + + .tbss +# CHECK: .tbss + + .tdata +# CHECK: .tdata + diff --git a/test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s b/test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s new file mode 100644 index 0000000000000..47bf980894d09 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s @@ -0,0 +1,42 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vpclmulqdq $17, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x11] + vpclmulhqhqdq %xmm2, %xmm5, %xmm1 + +// CHECK: vpclmulqdq $17, (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x11] + vpclmulhqhqdq (%eax), %xmm5, %xmm3 + +// CHECK: vpclmulqdq $1, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x01] + vpclmulhqlqdq %xmm2, %xmm5, %xmm1 + +// CHECK: vpclmulqdq $1, (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x01] + vpclmulhqlqdq (%eax), %xmm5, %xmm3 + +// CHECK: vpclmulqdq $16, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x10] + vpclmullqhqdq %xmm2, %xmm5, %xmm1 + +// CHECK: vpclmulqdq $16, (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x10] + vpclmullqhqdq (%eax), %xmm5, %xmm3 + +// CHECK: vpclmulqdq $0, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x00] + vpclmullqlqdq %xmm2, %xmm5, %xmm1 + +// CHECK: vpclmulqdq $0, (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x00] + vpclmullqlqdq (%eax), %xmm5, %xmm3 + +// CHECK: vpclmulqdq $17, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x11] + vpclmulqdq $17, %xmm2, %xmm5, %xmm1 + +// CHECK: vpclmulqdq $17, (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x11] + vpclmulqdq $17, (%eax), %xmm5, %xmm3 + diff --git a/test/MC/AsmParser/X86/x86_32-avx-encoding.s b/test/MC/AsmParser/X86/x86_32-avx-encoding.s new file mode 100644 index 0000000000000..b7ade6670a010 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_32-avx-encoding.s @@ -0,0 +1,3241 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vaddss %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0x58,0xd4] + vaddss %xmm4, %xmm6, %xmm2 + +// CHECK: vmulss %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0x59,0xd4] + vmulss %xmm4, %xmm6, %xmm2 + +// CHECK: vsubss %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0x5c,0xd4] + vsubss %xmm4, %xmm6, %xmm2 + +// CHECK: vdivss %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0x5e,0xd4] + vdivss %xmm4, %xmm6, %xmm2 + +// CHECK: vaddsd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0x58,0xd4] + vaddsd %xmm4, %xmm6, %xmm2 + +// CHECK: vmulsd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0x59,0xd4] + vmulsd %xmm4, %xmm6, %xmm2 + +// CHECK: vsubsd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0x5c,0xd4] + vsubsd %xmm4, %xmm6, %xmm2 + +// CHECK: vdivsd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0x5e,0xd4] + vdivsd %xmm4, %xmm6, %xmm2 + +// CHECK: vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] + vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] + vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] + vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] + vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] + vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] + vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] + vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] + vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vaddps %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0x58,0xd4] + vaddps %xmm4, %xmm6, %xmm2 + +// CHECK: vsubps %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0x5c,0xd4] + vsubps %xmm4, %xmm6, %xmm2 + +// CHECK: vmulps %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0x59,0xd4] + vmulps %xmm4, %xmm6, %xmm2 + +// CHECK: vdivps %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0x5e,0xd4] + vdivps %xmm4, %xmm6, %xmm2 + +// CHECK: vaddpd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0x58,0xd4] + vaddpd %xmm4, %xmm6, %xmm2 + +// CHECK: vsubpd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0x5c,0xd4] + vsubpd %xmm4, %xmm6, %xmm2 + +// CHECK: vmulpd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0x59,0xd4] + vmulpd %xmm4, %xmm6, %xmm2 + +// CHECK: vdivpd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0x5e,0xd4] + vdivpd %xmm4, %xmm6, %xmm2 + +// CHECK: vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] + vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] + vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] + vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] + vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] + vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] + vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] + vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] + vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: vmaxss %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xda,0x5f,0xf2] + vmaxss %xmm2, %xmm4, %xmm6 + +// CHECK: vmaxsd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xdb,0x5f,0xf2] + vmaxsd %xmm2, %xmm4, %xmm6 + +// CHECK: vminss %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xda,0x5d,0xf2] + vminss %xmm2, %xmm4, %xmm6 + +// CHECK: vminsd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xdb,0x5d,0xf2] + vminsd %xmm2, %xmm4, %xmm6 + +// CHECK: vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x5f,0x6c,0xcb,0xfc] + vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x5f,0x6c,0xcb,0xfc] + vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vminss -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x5d,0x6c,0xcb,0xfc] + vminss -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x5d,0x6c,0xcb,0xfc] + vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmaxps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x5f,0xf2] + vmaxps %xmm2, %xmm4, %xmm6 + +// CHECK: vmaxpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x5f,0xf2] + vmaxpd %xmm2, %xmm4, %xmm6 + +// CHECK: vminps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x5d,0xf2] + vminps %xmm2, %xmm4, %xmm6 + +// CHECK: vminpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x5d,0xf2] + vminpd %xmm2, %xmm4, %xmm6 + +// CHECK: vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x5f,0x6c,0xcb,0xfc] + vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x5f,0x6c,0xcb,0xfc] + vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vminps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x5d,0x6c,0xcb,0xfc] + vminps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc] + vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vandps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x54,0xf2] + vandps %xmm2, %xmm4, %xmm6 + +// CHECK: vandpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x54,0xf2] + vandpd %xmm2, %xmm4, %xmm6 + +// CHECK: vandps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x54,0x6c,0xcb,0xfc] + vandps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x54,0x6c,0xcb,0xfc] + vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vorps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x56,0xf2] + vorps %xmm2, %xmm4, %xmm6 + +// CHECK: vorpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x56,0xf2] + vorpd %xmm2, %xmm4, %xmm6 + +// CHECK: vorps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x56,0x6c,0xcb,0xfc] + vorps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x56,0x6c,0xcb,0xfc] + vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vxorps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x57,0xf2] + vxorps %xmm2, %xmm4, %xmm6 + +// CHECK: vxorpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x57,0xf2] + vxorpd %xmm2, %xmm4, %xmm6 + +// CHECK: vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x57,0x6c,0xcb,0xfc] + vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x57,0x6c,0xcb,0xfc] + vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vandnps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x55,0xf2] + vandnps %xmm2, %xmm4, %xmm6 + +// CHECK: vandnpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x55,0xf2] + vandnpd %xmm2, %xmm4, %xmm6 + +// CHECK: vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x55,0x6c,0xcb,0xfc] + vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x55,0x6c,0xcb,0xfc] + vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmovss -4(%ebx,%ecx,8), %xmm5 +// CHECK: encoding: [0xc5,0xfa,0x10,0x6c,0xcb,0xfc] + vmovss -4(%ebx,%ecx,8), %xmm5 + +// CHECK: vmovss %xmm4, %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x10,0xec] + vmovss %xmm4, %xmm2, %xmm5 + +// CHECK: vmovsd -4(%ebx,%ecx,8), %xmm5 +// CHECK: encoding: [0xc5,0xfb,0x10,0x6c,0xcb,0xfc] + vmovsd -4(%ebx,%ecx,8), %xmm5 + +// CHECK: vmovsd %xmm4, %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x10,0xec] + vmovsd %xmm4, %xmm2, %xmm5 + +// CHECK: vunpckhps %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe8,0x15,0xe1] + vunpckhps %xmm1, %xmm2, %xmm4 + +// CHECK: vunpckhpd %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe9,0x15,0xe1] + vunpckhpd %xmm1, %xmm2, %xmm4 + +// CHECK: vunpcklps %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe8,0x14,0xe1] + vunpcklps %xmm1, %xmm2, %xmm4 + +// CHECK: vunpcklpd %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe9,0x14,0xe1] + vunpcklpd %xmm1, %xmm2, %xmm4 + +// CHECK: vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x15,0x6c,0xcb,0xfc] + vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x15,0x6c,0xcb,0xfc] + vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x14,0x6c,0xcb,0xfc] + vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x14,0x6c,0xcb,0xfc] + vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vcmpps $0, %xmm0, %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x00] + vcmpps $0, %xmm0, %xmm6, %xmm1 + +// CHECK: vcmpps $0, (%eax), %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc8,0xc2,0x08,0x00] + vcmpps $0, (%eax), %xmm6, %xmm1 + +// CHECK: vcmpps $7, %xmm0, %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x07] + vcmpps $7, %xmm0, %xmm6, %xmm1 + +// CHECK: vcmppd $0, %xmm0, %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x00] + vcmppd $0, %xmm0, %xmm6, %xmm1 + +// CHECK: vcmppd $0, (%eax), %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc9,0xc2,0x08,0x00] + vcmppd $0, (%eax), %xmm6, %xmm1 + +// CHECK: vcmppd $7, %xmm0, %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x07] + vcmppd $7, %xmm0, %xmm6, %xmm1 + +// CHECK: vshufps $8, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc6,0xd9,0x08] + vshufps $8, %xmm1, %xmm2, %xmm3 + +// CHECK: vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc6,0x5c,0xcb,0xfc,0x08] + vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vshufpd $8, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc6,0xd9,0x08] + vshufpd $8, %xmm1, %xmm2, %xmm3 + +// CHECK: vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc6,0x5c,0xcb,0xfc,0x08] + vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $0, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x00] + vcmpeqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $2, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x02] + vcmpleps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $1, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x01] + vcmpltps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $4, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x04] + vcmpneqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $6, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x06] + vcmpnleps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $5, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x05] + vcmpnltps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x07] + vcmpordps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $3, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x03] + vcmpunordps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x02] + vcmpleps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnleps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordps -4(%ebx,%ecx,8), %xmm6, %xmm2 + +// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $0, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x00] + vcmpeqpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $2, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x02] + vcmplepd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $1, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x01] + vcmpltpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $4, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x04] + vcmpneqpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $6, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x06] + vcmpnlepd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $5, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x05] + vcmpnltpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x07] + vcmpordpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $3, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x03] + vcmpunordpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqpd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x02] + vcmplepd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltpd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqpd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnlepd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltpd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordpd -4(%ebx,%ecx,8), %xmm6, %xmm2 + +// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordpd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vmovmskps %xmm2, %eax +// CHECK: encoding: [0xc5,0xf8,0x50,0xc2] + vmovmskps %xmm2, %eax + +// CHECK: vmovmskpd %xmm2, %eax +// CHECK: encoding: [0xc5,0xf9,0x50,0xc2] + vmovmskpd %xmm2, %eax + +// CHECK: vcmpss $0, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x00] + vcmpeqss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $2, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x02] + vcmpless %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $1, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x01] + vcmpltss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $4, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x04] + vcmpneqss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $6, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x06] + vcmpnless %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $5, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x05] + vcmpnltss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x07] + vcmpordss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $3, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x03] + vcmpunordss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x02] + vcmpless -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnless -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordss -4(%ebx,%ecx,8), %xmm6, %xmm2 + +// CHECK: vcmpss $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $0, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x00] + vcmpeqsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $2, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x02] + vcmplesd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $1, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x01] + vcmpltsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $4, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x04] + vcmpneqsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $6, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x06] + vcmpnlesd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $5, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x05] + vcmpnltsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x07] + vcmpordsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $3, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x03] + vcmpunordsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x02] + vcmplesd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnlesd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordsd -4(%ebx,%ecx,8), %xmm6, %xmm2 + +// CHECK: vcmpsd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vucomiss %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2e,0xd1] + vucomiss %xmm1, %xmm2 + +// CHECK: vucomiss (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2e,0x10] + vucomiss (%eax), %xmm2 + +// CHECK: vcomiss %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2f,0xd1] + vcomiss %xmm1, %xmm2 + +// CHECK: vcomiss (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2f,0x10] + vcomiss (%eax), %xmm2 + +// CHECK: vucomisd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2e,0xd1] + vucomisd %xmm1, %xmm2 + +// CHECK: vucomisd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2e,0x10] + vucomisd (%eax), %xmm2 + +// CHECK: vcomisd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2f,0xd1] + vcomisd %xmm1, %xmm2 + +// CHECK: vcomisd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2f,0x10] + vcomisd (%eax), %xmm2 + +// CHECK: vcvttss2si %xmm1, %eax +// CHECK: encoding: [0xc5,0xfa,0x2c,0xc1] + vcvttss2si %xmm1, %eax + +// CHECK: vcvttss2si (%ecx), %eax +// CHECK: encoding: [0xc5,0xfa,0x2c,0x01] + vcvttss2si (%ecx), %eax + +// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf2,0x2a,0x10] + vcvtsi2ss (%eax), %xmm1, %xmm2 + +// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf2,0x2a,0x10] + vcvtsi2ss (%eax), %xmm1, %xmm2 + +// CHECK: vcvttsd2si %xmm1, %eax +// CHECK: encoding: [0xc5,0xfb,0x2c,0xc1] + vcvttsd2si %xmm1, %eax + +// CHECK: vcvttsd2si (%ecx), %eax +// CHECK: encoding: [0xc5,0xfb,0x2c,0x01] + vcvttsd2si (%ecx), %eax + +// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf3,0x2a,0x10] + vcvtsi2sd (%eax), %xmm1, %xmm2 + +// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf3,0x2a,0x10] + vcvtsi2sd (%eax), %xmm1, %xmm2 + +// CHECK: vmovaps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x28,0x10] + vmovaps (%eax), %xmm2 + +// CHECK: vmovaps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x28,0xd1] + vmovaps %xmm1, %xmm2 + +// CHECK: vmovaps %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x29,0x08] + vmovaps %xmm1, (%eax) + +// CHECK: vmovapd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x28,0x10] + vmovapd (%eax), %xmm2 + +// CHECK: vmovapd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x28,0xd1] + vmovapd %xmm1, %xmm2 + +// CHECK: vmovapd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x29,0x08] + vmovapd %xmm1, (%eax) + +// CHECK: vmovups (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x10,0x10] + vmovups (%eax), %xmm2 + +// CHECK: vmovups %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x10,0xd1] + vmovups %xmm1, %xmm2 + +// CHECK: vmovups %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x11,0x08] + vmovups %xmm1, (%eax) + +// CHECK: vmovupd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x10,0x10] + vmovupd (%eax), %xmm2 + +// CHECK: vmovupd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x10,0xd1] + vmovupd %xmm1, %xmm2 + +// CHECK: vmovupd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x11,0x08] + vmovupd %xmm1, (%eax) + +// CHECK: vmovlps %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x13,0x08] + vmovlps %xmm1, (%eax) + +// CHECK: vmovlps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x12,0x18] + vmovlps (%eax), %xmm2, %xmm3 + +// CHECK: vmovlpd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x13,0x08] + vmovlpd %xmm1, (%eax) + +// CHECK: vmovlpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x12,0x18] + vmovlpd (%eax), %xmm2, %xmm3 + +// CHECK: vmovhps %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x17,0x08] + vmovhps %xmm1, (%eax) + +// CHECK: vmovhps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x16,0x18] + vmovhps (%eax), %xmm2, %xmm3 + +// CHECK: vmovhpd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x17,0x08] + vmovhpd %xmm1, (%eax) + +// CHECK: vmovhpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x16,0x18] + vmovhpd (%eax), %xmm2, %xmm3 + +// CHECK: vmovlhps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x16,0xd9] + vmovlhps %xmm1, %xmm2, %xmm3 + +// CHECK: vmovhlps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x12,0xd9] + vmovhlps %xmm1, %xmm2, %xmm3 + +// CHECK: vcvtss2sil %xmm1, %eax +// CHECK: encoding: [0xc5,0xfa,0x2d,0xc1] + vcvtss2si %xmm1, %eax + +// CHECK: vcvtss2sil (%eax), %ebx +// CHECK: encoding: [0xc5,0xfa,0x2d,0x18] + vcvtss2si (%eax), %ebx + +// CHECK: vcvtdq2ps %xmm5, %xmm6 +// CHECK: encoding: [0xc5,0xf8,0x5b,0xf5] + vcvtdq2ps %xmm5, %xmm6 + +// CHECK: vcvtdq2ps (%eax), %xmm6 +// CHECK: encoding: [0xc5,0xf8,0x5b,0x30] + vcvtdq2ps (%eax), %xmm6 + +// CHECK: vcvtsd2ss %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xdb,0x5a,0xf2] + vcvtsd2ss %xmm2, %xmm4, %xmm6 + +// CHECK: vcvtsd2ss (%eax), %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xdb,0x5a,0x30] + vcvtsd2ss (%eax), %xmm4, %xmm6 + +// CHECK: vcvtps2dq %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xf9,0x5b,0xda] + vcvtps2dq %xmm2, %xmm3 + +// CHECK: vcvtps2dq (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xf9,0x5b,0x18] + vcvtps2dq (%eax), %xmm3 + +// CHECK: vcvtss2sd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xda,0x5a,0xf2] + vcvtss2sd %xmm2, %xmm4, %xmm6 + +// CHECK: vcvtss2sd (%eax), %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xda,0x5a,0x30] + vcvtss2sd (%eax), %xmm4, %xmm6 + +// CHECK: vcvtdq2ps %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xf8,0x5b,0xf4] + vcvtdq2ps %xmm4, %xmm6 + +// CHECK: vcvtdq2ps (%ecx), %xmm4 +// CHECK: encoding: [0xc5,0xf8,0x5b,0x21] + vcvtdq2ps (%ecx), %xmm4 + +// CHECK: vcvttps2dq %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xfa,0x5b,0xda] + vcvttps2dq %xmm2, %xmm3 + +// CHECK: vcvttps2dq (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xfa,0x5b,0x18] + vcvttps2dq (%eax), %xmm3 + +// CHECK: vcvtps2pd %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xf8,0x5a,0xda] + vcvtps2pd %xmm2, %xmm3 + +// CHECK: vcvtps2pd (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xf8,0x5a,0x18] + vcvtps2pd (%eax), %xmm3 + +// CHECK: vcvtpd2ps %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xf9,0x5a,0xda] + vcvtpd2ps %xmm2, %xmm3 + +// CHECK: vsqrtpd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x51,0xd1] + vsqrtpd %xmm1, %xmm2 + +// CHECK: vsqrtpd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x51,0x10] + vsqrtpd (%eax), %xmm2 + +// CHECK: vsqrtps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x51,0xd1] + vsqrtps %xmm1, %xmm2 + +// CHECK: vsqrtps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x51,0x10] + vsqrtps (%eax), %xmm2 + +// CHECK: vsqrtsd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x51,0xd9] + vsqrtsd %xmm1, %xmm2, %xmm3 + +// CHECK: vsqrtsd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x51,0x18] + vsqrtsd (%eax), %xmm2, %xmm3 + +// CHECK: vsqrtss %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x51,0xd9] + vsqrtss %xmm1, %xmm2, %xmm3 + +// CHECK: vsqrtss (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x51,0x18] + vsqrtss (%eax), %xmm2, %xmm3 + +// CHECK: vrsqrtps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x52,0xd1] + vrsqrtps %xmm1, %xmm2 + +// CHECK: vrsqrtps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x52,0x10] + vrsqrtps (%eax), %xmm2 + +// CHECK: vrsqrtss %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x52,0xd9] + vrsqrtss %xmm1, %xmm2, %xmm3 + +// CHECK: vrsqrtss (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x52,0x18] + vrsqrtss (%eax), %xmm2, %xmm3 + +// CHECK: vrcpps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x53,0xd1] + vrcpps %xmm1, %xmm2 + +// CHECK: vrcpps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x53,0x10] + vrcpps (%eax), %xmm2 + +// CHECK: vrcpss %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x53,0xd9] + vrcpss %xmm1, %xmm2, %xmm3 + +// CHECK: vrcpss (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x53,0x18] + vrcpss (%eax), %xmm2, %xmm3 + +// CHECK: vmovntdq %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0xe7,0x08] + vmovntdq %xmm1, (%eax) + +// CHECK: vmovntpd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x2b,0x08] + vmovntpd %xmm1, (%eax) + +// CHECK: vmovntps %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x2b,0x08] + vmovntps %xmm1, (%eax) + +// CHECK: vldmxcsr (%eax) +// CHECK: encoding: [0xc5,0xf8,0xae,0x10] + vldmxcsr (%eax) + +// CHECK: vstmxcsr (%eax) +// CHECK: encoding: [0xc5,0xf8,0xae,0x18] + vstmxcsr (%eax) + +// CHECK: vldmxcsr 3735928559 +// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xef,0xbe,0xad,0xde] + vldmxcsr 0xdeadbeef + +// CHECK: vstmxcsr 3735928559 +// CHECK: encoding: [0xc5,0xf8,0xae,0x1d,0xef,0xbe,0xad,0xde] + vstmxcsr 0xdeadbeef + +// CHECK: vpsubb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf8,0xd9] + vpsubb %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf8,0x18] + vpsubb (%eax), %xmm2, %xmm3 + +// CHECK: vpsubw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf9,0xd9] + vpsubw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf9,0x18] + vpsubw (%eax), %xmm2, %xmm3 + +// CHECK: vpsubd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfa,0xd9] + vpsubd %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfa,0x18] + vpsubd (%eax), %xmm2, %xmm3 + +// CHECK: vpsubq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfb,0xd9] + vpsubq %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfb,0x18] + vpsubq (%eax), %xmm2, %xmm3 + +// CHECK: vpsubsb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe8,0xd9] + vpsubsb %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubsb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe8,0x18] + vpsubsb (%eax), %xmm2, %xmm3 + +// CHECK: vpsubsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe9,0xd9] + vpsubsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe9,0x18] + vpsubsw (%eax), %xmm2, %xmm3 + +// CHECK: vpsubusb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd8,0xd9] + vpsubusb %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubusb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd8,0x18] + vpsubusb (%eax), %xmm2, %xmm3 + +// CHECK: vpsubusw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd9,0xd9] + vpsubusw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubusw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd9,0x18] + vpsubusw (%eax), %xmm2, %xmm3 + +// CHECK: vpaddb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfc,0xd9] + vpaddb %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfc,0x18] + vpaddb (%eax), %xmm2, %xmm3 + +// CHECK: vpaddw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfd,0xd9] + vpaddw %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfd,0x18] + vpaddw (%eax), %xmm2, %xmm3 + +// CHECK: vpaddd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfe,0xd9] + vpaddd %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfe,0x18] + vpaddd (%eax), %xmm2, %xmm3 + +// CHECK: vpaddq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd4,0xd9] + vpaddq %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd4,0x18] + vpaddq (%eax), %xmm2, %xmm3 + +// CHECK: vpaddsb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xec,0xd9] + vpaddsb %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddsb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xec,0x18] + vpaddsb (%eax), %xmm2, %xmm3 + +// CHECK: vpaddsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xed,0xd9] + vpaddsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xed,0x18] + vpaddsw (%eax), %xmm2, %xmm3 + +// CHECK: vpaddusb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdc,0xd9] + vpaddusb %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddusb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdc,0x18] + vpaddusb (%eax), %xmm2, %xmm3 + +// CHECK: vpaddusw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdd,0xd9] + vpaddusw %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddusw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdd,0x18] + vpaddusw (%eax), %xmm2, %xmm3 + +// CHECK: vpmulhuw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe4,0xd9] + vpmulhuw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmulhuw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe4,0x18] + vpmulhuw (%eax), %xmm2, %xmm3 + +// CHECK: vpmulhw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe5,0xd9] + vpmulhw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmulhw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe5,0x18] + vpmulhw (%eax), %xmm2, %xmm3 + +// CHECK: vpmullw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd5,0xd9] + vpmullw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmullw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd5,0x18] + vpmullw (%eax), %xmm2, %xmm3 + +// CHECK: vpmuludq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf4,0xd9] + vpmuludq %xmm1, %xmm2, %xmm3 + +// CHECK: vpmuludq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf4,0x18] + vpmuludq (%eax), %xmm2, %xmm3 + +// CHECK: vpavgb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe0,0xd9] + vpavgb %xmm1, %xmm2, %xmm3 + +// CHECK: vpavgb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe0,0x18] + vpavgb (%eax), %xmm2, %xmm3 + +// CHECK: vpavgw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe3,0xd9] + vpavgw %xmm1, %xmm2, %xmm3 + +// CHECK: vpavgw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe3,0x18] + vpavgw (%eax), %xmm2, %xmm3 + +// CHECK: vpminsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xea,0xd9] + vpminsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpminsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xea,0x18] + vpminsw (%eax), %xmm2, %xmm3 + +// CHECK: vpminub %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xda,0xd9] + vpminub %xmm1, %xmm2, %xmm3 + +// CHECK: vpminub (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xda,0x18] + vpminub (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xee,0xd9] + vpmaxsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmaxsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xee,0x18] + vpmaxsw (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxub %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xde,0xd9] + vpmaxub %xmm1, %xmm2, %xmm3 + +// CHECK: vpmaxub (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xde,0x18] + vpmaxub (%eax), %xmm2, %xmm3 + +// CHECK: vpsadbw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf6,0xd9] + vpsadbw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsadbw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf6,0x18] + vpsadbw (%eax), %xmm2, %xmm3 + +// CHECK: vpsllw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf1,0xd9] + vpsllw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsllw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf1,0x18] + vpsllw (%eax), %xmm2, %xmm3 + +// CHECK: vpslld %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf2,0xd9] + vpslld %xmm1, %xmm2, %xmm3 + +// CHECK: vpslld (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf2,0x18] + vpslld (%eax), %xmm2, %xmm3 + +// CHECK: vpsllq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf3,0xd9] + vpsllq %xmm1, %xmm2, %xmm3 + +// CHECK: vpsllq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf3,0x18] + vpsllq (%eax), %xmm2, %xmm3 + +// CHECK: vpsraw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe1,0xd9] + vpsraw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsraw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe1,0x18] + vpsraw (%eax), %xmm2, %xmm3 + +// CHECK: vpsrad %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe2,0xd9] + vpsrad %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrad (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe2,0x18] + vpsrad (%eax), %xmm2, %xmm3 + +// CHECK: vpsrlw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd1,0xd9] + vpsrlw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrlw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd1,0x18] + vpsrlw (%eax), %xmm2, %xmm3 + +// CHECK: vpsrld %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd2,0xd9] + vpsrld %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrld (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd2,0x18] + vpsrld (%eax), %xmm2, %xmm3 + +// CHECK: vpsrlq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd3,0xd9] + vpsrlq %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrlq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd3,0x18] + vpsrlq (%eax), %xmm2, %xmm3 + +// CHECK: vpslld $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a] + vpslld $10, %xmm2, %xmm3 + +// CHECK: vpslldq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xfa,0x0a] + vpslldq $10, %xmm2, %xmm3 + +// CHECK: vpsllq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xf2,0x0a] + vpsllq $10, %xmm2, %xmm3 + +// CHECK: vpsllw $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x71,0xf2,0x0a] + vpsllw $10, %xmm2, %xmm3 + +// CHECK: vpsrad $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xe2,0x0a] + vpsrad $10, %xmm2, %xmm3 + +// CHECK: vpsraw $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x71,0xe2,0x0a] + vpsraw $10, %xmm2, %xmm3 + +// CHECK: vpsrld $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xd2,0x0a] + vpsrld $10, %xmm2, %xmm3 + +// CHECK: vpsrldq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xda,0x0a] + vpsrldq $10, %xmm2, %xmm3 + +// CHECK: vpsrlq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xd2,0x0a] + vpsrlq $10, %xmm2, %xmm3 + +// CHECK: vpsrlw $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x71,0xd2,0x0a] + vpsrlw $10, %xmm2, %xmm3 + +// CHECK: vpslld $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a] + vpslld $10, %xmm2, %xmm3 + +// CHECK: vpand %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdb,0xd9] + vpand %xmm1, %xmm2, %xmm3 + +// CHECK: vpand (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdb,0x18] + vpand (%eax), %xmm2, %xmm3 + +// CHECK: vpor %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xeb,0xd9] + vpor %xmm1, %xmm2, %xmm3 + +// CHECK: vpor (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xeb,0x18] + vpor (%eax), %xmm2, %xmm3 + +// CHECK: vpxor %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xef,0xd9] + vpxor %xmm1, %xmm2, %xmm3 + +// CHECK: vpxor (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xef,0x18] + vpxor (%eax), %xmm2, %xmm3 + +// CHECK: vpandn %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdf,0xd9] + vpandn %xmm1, %xmm2, %xmm3 + +// CHECK: vpandn (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdf,0x18] + vpandn (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpeqb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x74,0xd9] + vpcmpeqb %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpeqb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x74,0x18] + vpcmpeqb (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpeqw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x75,0xd9] + vpcmpeqw %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpeqw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x75,0x18] + vpcmpeqw (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpeqd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x76,0xd9] + vpcmpeqd %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpeqd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x76,0x18] + vpcmpeqd (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpgtb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x64,0xd9] + vpcmpgtb %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpgtb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x64,0x18] + vpcmpgtb (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpgtw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x65,0xd9] + vpcmpgtw %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpgtw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x65,0x18] + vpcmpgtw (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpgtd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x66,0xd9] + vpcmpgtd %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpgtd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x66,0x18] + vpcmpgtd (%eax), %xmm2, %xmm3 + +// CHECK: vpacksswb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x63,0xd9] + vpacksswb %xmm1, %xmm2, %xmm3 + +// CHECK: vpacksswb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x63,0x18] + vpacksswb (%eax), %xmm2, %xmm3 + +// CHECK: vpackssdw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6b,0xd9] + vpackssdw %xmm1, %xmm2, %xmm3 + +// CHECK: vpackssdw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6b,0x18] + vpackssdw (%eax), %xmm2, %xmm3 + +// CHECK: vpackuswb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x67,0xd9] + vpackuswb %xmm1, %xmm2, %xmm3 + +// CHECK: vpackuswb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x67,0x18] + vpackuswb (%eax), %xmm2, %xmm3 + +// CHECK: vpshufd $4, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xf9,0x70,0xda,0x04] + vpshufd $4, %xmm2, %xmm3 + +// CHECK: vpshufd $4, (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xf9,0x70,0x18,0x04] + vpshufd $4, (%eax), %xmm3 + +// CHECK: vpshufhw $4, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xfa,0x70,0xda,0x04] + vpshufhw $4, %xmm2, %xmm3 + +// CHECK: vpshufhw $4, (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xfa,0x70,0x18,0x04] + vpshufhw $4, (%eax), %xmm3 + +// CHECK: vpshuflw $4, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xfb,0x70,0xda,0x04] + vpshuflw $4, %xmm2, %xmm3 + +// CHECK: vpshuflw $4, (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xfb,0x70,0x18,0x04] + vpshuflw $4, (%eax), %xmm3 + +// CHECK: vpunpcklbw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x60,0xd9] + vpunpcklbw %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpcklbw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x60,0x18] + vpunpcklbw (%eax), %xmm2, %xmm3 + +// CHECK: vpunpcklwd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x61,0xd9] + vpunpcklwd %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpcklwd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x61,0x18] + vpunpcklwd (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckldq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x62,0xd9] + vpunpckldq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckldq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x62,0x18] + vpunpckldq (%eax), %xmm2, %xmm3 + +// CHECK: vpunpcklqdq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6c,0xd9] + vpunpcklqdq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpcklqdq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6c,0x18] + vpunpcklqdq (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhbw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x68,0xd9] + vpunpckhbw %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhbw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x68,0x18] + vpunpckhbw (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhwd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x69,0xd9] + vpunpckhwd %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhwd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x69,0x18] + vpunpckhwd (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhdq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6a,0xd9] + vpunpckhdq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhdq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6a,0x18] + vpunpckhdq (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhqdq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6d,0xd9] + vpunpckhqdq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhqdq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6d,0x18] + vpunpckhqdq (%eax), %xmm2, %xmm3 + +// CHECK: vpinsrw $7, %eax, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc4,0xd8,0x07] + vpinsrw $7, %eax, %xmm2, %xmm3 + +// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc4,0x18,0x07] + vpinsrw $7, (%eax), %xmm2, %xmm3 + +// CHECK: vpextrw $7, %xmm2, %eax +// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07] + vpextrw $7, %xmm2, %eax + +// CHECK: vpmovmskb %xmm1, %eax +// CHECK: encoding: [0xc5,0xf9,0xd7,0xc1] + vpmovmskb %xmm1, %eax + +// CHECK: vmaskmovdqu %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0xf7,0xd1] + vmaskmovdqu %xmm1, %xmm2 + +// CHECK: vmovd %xmm1, %eax +// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8] + vmovd %xmm1, %eax + +// CHECK: vmovd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x08] + vmovd %xmm1, (%eax) + +// CHECK: vmovd %eax, %xmm1 +// CHECK: encoding: [0xc5,0xf9,0x6e,0xc8] + vmovd %eax, %xmm1 + +// CHECK: vmovd (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x08] + vmovd (%eax), %xmm1 + +// CHECK: vmovq %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0xd6,0x08] + vmovq %xmm1, (%eax) + +// CHECK: vmovq %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x7e,0xd1] + vmovq %xmm1, %xmm2 + +// CHECK: vmovq (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xfa,0x7e,0x08] + vmovq (%eax), %xmm1 + +// CHECK: vcvtpd2dq %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfb,0xe6,0xd1] + vcvtpd2dq %xmm1, %xmm2 + +// CHECK: vcvtdq2pd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfa,0xe6,0xd1] + vcvtdq2pd %xmm1, %xmm2 + +// CHECK: vcvtdq2pd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xfa,0xe6,0x10] + vcvtdq2pd (%eax), %xmm2 + +// CHECK: vmovshdup %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x16,0xd1] + vmovshdup %xmm1, %xmm2 + +// CHECK: vmovshdup (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x16,0x10] + vmovshdup (%eax), %xmm2 + +// CHECK: vmovsldup %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x12,0xd1] + vmovsldup %xmm1, %xmm2 + +// CHECK: vmovsldup (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x12,0x10] + vmovsldup (%eax), %xmm2 + +// CHECK: vmovddup %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfb,0x12,0xd1] + vmovddup %xmm1, %xmm2 + +// CHECK: vmovddup (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xfb,0x12,0x10] + vmovddup (%eax), %xmm2 + +// CHECK: vaddsubps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xd0,0xd9] + vaddsubps %xmm1, %xmm2, %xmm3 + +// CHECK: vaddsubps (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf3,0xd0,0x10] + vaddsubps (%eax), %xmm1, %xmm2 + +// CHECK: vaddsubpd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd0,0xd9] + vaddsubpd %xmm1, %xmm2, %xmm3 + +// CHECK: vaddsubpd (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf1,0xd0,0x10] + vaddsubpd (%eax), %xmm1, %xmm2 + +// CHECK: vhaddps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7c,0xd9] + vhaddps %xmm1, %xmm2, %xmm3 + +// CHECK: vhaddps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7c,0x18] + vhaddps (%eax), %xmm2, %xmm3 + +// CHECK: vhaddpd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7c,0xd9] + vhaddpd %xmm1, %xmm2, %xmm3 + +// CHECK: vhaddpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7c,0x18] + vhaddpd (%eax), %xmm2, %xmm3 + +// CHECK: vhsubps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7d,0xd9] + vhsubps %xmm1, %xmm2, %xmm3 + +// CHECK: vhsubps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7d,0x18] + vhsubps (%eax), %xmm2, %xmm3 + +// CHECK: vhsubpd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7d,0xd9] + vhsubpd %xmm1, %xmm2, %xmm3 + +// CHECK: vhsubpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7d,0x18] + vhsubpd (%eax), %xmm2, %xmm3 + +// CHECK: vpabsb %xmm1, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0xd1] + vpabsb %xmm1, %xmm2 + +// CHECK: vpabsb (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0x10] + vpabsb (%eax), %xmm2 + +// CHECK: vpabsw %xmm1, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0xd1] + vpabsw %xmm1, %xmm2 + +// CHECK: vpabsw (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0x10] + vpabsw (%eax), %xmm2 + +// CHECK: vpabsd %xmm1, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0xd1] + vpabsd %xmm1, %xmm2 + +// CHECK: vpabsd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0x10] + vpabsd (%eax), %xmm2 + +// CHECK: vphaddw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0xd9] + vphaddw %xmm1, %xmm2, %xmm3 + +// CHECK: vphaddw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0x18] + vphaddw (%eax), %xmm2, %xmm3 + +// CHECK: vphaddd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0xd9] + vphaddd %xmm1, %xmm2, %xmm3 + +// CHECK: vphaddd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0x18] + vphaddd (%eax), %xmm2, %xmm3 + +// CHECK: vphaddsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0xd9] + vphaddsw %xmm1, %xmm2, %xmm3 + +// CHECK: vphaddsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0x18] + vphaddsw (%eax), %xmm2, %xmm3 + +// CHECK: vphsubw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0xd9] + vphsubw %xmm1, %xmm2, %xmm3 + +// CHECK: vphsubw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0x18] + vphsubw (%eax), %xmm2, %xmm3 + +// CHECK: vphsubd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0xd9] + vphsubd %xmm1, %xmm2, %xmm3 + +// CHECK: vphsubd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0x18] + vphsubd (%eax), %xmm2, %xmm3 + +// CHECK: vphsubsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0xd9] + vphsubsw %xmm1, %xmm2, %xmm3 + +// CHECK: vphsubsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0x18] + vphsubsw (%eax), %xmm2, %xmm3 + +// CHECK: vpmaddubsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0xd9] + vpmaddubsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmaddubsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0x18] + vpmaddubsw (%eax), %xmm2, %xmm3 + +// CHECK: vpshufb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0xd9] + vpshufb %xmm1, %xmm2, %xmm3 + +// CHECK: vpshufb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0x18] + vpshufb (%eax), %xmm2, %xmm3 + +// CHECK: vpsignb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0xd9] + vpsignb %xmm1, %xmm2, %xmm3 + +// CHECK: vpsignb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0x18] + vpsignb (%eax), %xmm2, %xmm3 + +// CHECK: vpsignw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0xd9] + vpsignw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsignw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0x18] + vpsignw (%eax), %xmm2, %xmm3 + +// CHECK: vpsignd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0xd9] + vpsignd %xmm1, %xmm2, %xmm3 + +// CHECK: vpsignd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0x18] + vpsignd (%eax), %xmm2, %xmm3 + +// CHECK: vpmulhrsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0xd9] + vpmulhrsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmulhrsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0x18] + vpmulhrsw (%eax), %xmm2, %xmm3 + +// CHECK: vpalignr $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0xd9,0x07] + vpalignr $7, %xmm1, %xmm2, %xmm3 + +// CHECK: vpalignr $7, (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0x18,0x07] + vpalignr $7, (%eax), %xmm2, %xmm3 + +// CHECK: vroundsd $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0xd9,0x07] + vroundsd $7, %xmm1, %xmm2, %xmm3 + +// CHECK: vroundsd $7, (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0x18,0x07] + vroundsd $7, (%eax), %xmm2, %xmm3 + +// CHECK: vroundss $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0xd9,0x07] + vroundss $7, %xmm1, %xmm2, %xmm3 + +// CHECK: vroundss $7, (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0x18,0x07] + vroundss $7, (%eax), %xmm2, %xmm3 + +// CHECK: vroundpd $7, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0xda,0x07] + vroundpd $7, %xmm2, %xmm3 + +// CHECK: vroundpd $7, (%eax), %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0x18,0x07] + vroundpd $7, (%eax), %xmm3 + +// CHECK: vroundps $7, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0xda,0x07] + vroundps $7, %xmm2, %xmm3 + +// CHECK: vroundps $7, (%eax), %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0x18,0x07] + vroundps $7, (%eax), %xmm3 + +// CHECK: vphminposuw %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0xda] + vphminposuw %xmm2, %xmm3 + +// CHECK: vphminposuw (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0x10] + vphminposuw (%eax), %xmm2 + +// CHECK: vpackusdw %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x2b,0xca] + vpackusdw %xmm2, %xmm3, %xmm1 + +// CHECK: vpackusdw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x2b,0x18] + vpackusdw (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpeqq %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x29,0xca] + vpcmpeqq %xmm2, %xmm3, %xmm1 + +// CHECK: vpcmpeqq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x29,0x18] + vpcmpeqq (%eax), %xmm2, %xmm3 + +// CHECK: vpminsb %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x38,0xca] + vpminsb %xmm2, %xmm3, %xmm1 + +// CHECK: vpminsb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x38,0x18] + vpminsb (%eax), %xmm2, %xmm3 + +// CHECK: vpminsd %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x39,0xca] + vpminsd %xmm2, %xmm3, %xmm1 + +// CHECK: vpminsd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x39,0x18] + vpminsd (%eax), %xmm2, %xmm3 + +// CHECK: vpminud %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3b,0xca] + vpminud %xmm2, %xmm3, %xmm1 + +// CHECK: vpminud (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3b,0x18] + vpminud (%eax), %xmm2, %xmm3 + +// CHECK: vpminuw %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3a,0xca] + vpminuw %xmm2, %xmm3, %xmm1 + +// CHECK: vpminuw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3a,0x18] + vpminuw (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxsb %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3c,0xca] + vpmaxsb %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxsb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3c,0x18] + vpmaxsb (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxsd %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3d,0xca] + vpmaxsd %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxsd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3d,0x18] + vpmaxsd (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxud %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3f,0xca] + vpmaxud %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxud (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3f,0x18] + vpmaxud (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxuw %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3e,0xca] + vpmaxuw %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxuw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3e,0x18] + vpmaxuw (%eax), %xmm2, %xmm3 + +// CHECK: vpmuldq %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x28,0xca] + vpmuldq %xmm2, %xmm3, %xmm1 + +// CHECK: vpmuldq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x28,0x18] + vpmuldq (%eax), %xmm2, %xmm3 + +// CHECK: vpmulld %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0xca] + vpmulld %xmm2, %xmm5, %xmm1 + +// CHECK: vpmulld (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0x18] + vpmulld (%eax), %xmm5, %xmm3 + +// CHECK: vblendps $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0xca,0x03] + vblendps $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vblendps $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0x08,0x03] + vblendps $3, (%eax), %xmm5, %xmm1 + +// CHECK: vblendpd $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0xca,0x03] + vblendpd $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vblendpd $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0x08,0x03] + vblendpd $3, (%eax), %xmm5, %xmm1 + +// CHECK: vpblendw $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0xca,0x03] + vpblendw $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vpblendw $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0x08,0x03] + vpblendw $3, (%eax), %xmm5, %xmm1 + +// CHECK: vmpsadbw $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0xca,0x03] + vmpsadbw $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vmpsadbw $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0x08,0x03] + vmpsadbw $3, (%eax), %xmm5, %xmm1 + +// CHECK: vdpps $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0xca,0x03] + vdpps $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vdpps $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0x08,0x03] + vdpps $3, (%eax), %xmm5, %xmm1 + +// CHECK: vdppd $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0xca,0x03] + vdppd $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vdppd $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0x08,0x03] + vdppd $3, (%eax), %xmm5, %xmm1 + +// CHECK: vblendvpd %xmm2, %xmm5, %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0xdd,0x20] + vblendvpd %xmm2, %xmm5, %xmm1, %xmm3 + +// CHECK: vblendvpd %xmm2, (%eax), %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0x18,0x20] + vblendvpd %xmm2, (%eax), %xmm1, %xmm3 + +// CHECK: vblendvps %xmm2, %xmm5, %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0xdd,0x20] + vblendvps %xmm2, %xmm5, %xmm1, %xmm3 + +// CHECK: vblendvps %xmm2, (%eax), %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0x18,0x20] + vblendvps %xmm2, (%eax), %xmm1, %xmm3 + +// CHECK: vpblendvb %xmm2, %xmm5, %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0xdd,0x20] + vpblendvb %xmm2, %xmm5, %xmm1, %xmm3 + +// CHECK: vpblendvb %xmm2, (%eax), %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0x18,0x20] + vpblendvb %xmm2, (%eax), %xmm1, %xmm3 + +// CHECK: vpmovsxbw %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0xea] + vpmovsxbw %xmm2, %xmm5 + +// CHECK: vpmovsxbw (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0x10] + vpmovsxbw (%eax), %xmm2 + +// CHECK: vpmovsxwd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0xea] + vpmovsxwd %xmm2, %xmm5 + +// CHECK: vpmovsxwd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0x10] + vpmovsxwd (%eax), %xmm2 + +// CHECK: vpmovsxdq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0xea] + vpmovsxdq %xmm2, %xmm5 + +// CHECK: vpmovsxdq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0x10] + vpmovsxdq (%eax), %xmm2 + +// CHECK: vpmovzxbw %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0xea] + vpmovzxbw %xmm2, %xmm5 + +// CHECK: vpmovzxbw (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0x10] + vpmovzxbw (%eax), %xmm2 + +// CHECK: vpmovzxwd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0xea] + vpmovzxwd %xmm2, %xmm5 + +// CHECK: vpmovzxwd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0x10] + vpmovzxwd (%eax), %xmm2 + +// CHECK: vpmovzxdq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0xea] + vpmovzxdq %xmm2, %xmm5 + +// CHECK: vpmovzxdq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0x10] + vpmovzxdq (%eax), %xmm2 + +// CHECK: vpmovsxbq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0xea] + vpmovsxbq %xmm2, %xmm5 + +// CHECK: vpmovsxbq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0x10] + vpmovsxbq (%eax), %xmm2 + +// CHECK: vpmovzxbq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0xea] + vpmovzxbq %xmm2, %xmm5 + +// CHECK: vpmovzxbq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0x10] + vpmovzxbq (%eax), %xmm2 + +// CHECK: vpmovsxbd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0xea] + vpmovsxbd %xmm2, %xmm5 + +// CHECK: vpmovsxbd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0x10] + vpmovsxbd (%eax), %xmm2 + +// CHECK: vpmovsxwq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0xea] + vpmovsxwq %xmm2, %xmm5 + +// CHECK: vpmovsxwq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0x10] + vpmovsxwq (%eax), %xmm2 + +// CHECK: vpmovzxbd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0xea] + vpmovzxbd %xmm2, %xmm5 + +// CHECK: vpmovzxbd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0x10] + vpmovzxbd (%eax), %xmm2 + +// CHECK: vpmovzxwq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0xea] + vpmovzxwq %xmm2, %xmm5 + +// CHECK: vpmovzxwq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0x10] + vpmovzxwq (%eax), %xmm2 + +// CHECK: vpextrw $7, %xmm2, %eax +// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07] + vpextrw $7, %xmm2, %eax + +// CHECK: vpextrw $7, %xmm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x79,0x15,0x10,0x07] + vpextrw $7, %xmm2, (%eax) + +// CHECK: vpextrd $7, %xmm2, %eax +// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0xd0,0x07] + vpextrd $7, %xmm2, %eax + +// CHECK: vpextrd $7, %xmm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0x10,0x07] + vpextrd $7, %xmm2, (%eax) + +// CHECK: vpextrb $7, %xmm2, %eax +// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xd0,0x07] + vpextrb $7, %xmm2, %eax + +// CHECK: vpextrb $7, %xmm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0x10,0x07] + vpextrb $7, %xmm2, (%eax) + +// CHECK: vextractps $7, %xmm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0x10,0x07] + vextractps $7, %xmm2, (%eax) + +// CHECK: vextractps $7, %xmm2, %eax +// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd0,0x07] + vextractps $7, %xmm2, %eax + +// CHECK: vpinsrw $7, %eax, %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0xc4,0xe8,0x07] + vpinsrw $7, %eax, %xmm2, %xmm5 + +// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0xc4,0x28,0x07] + vpinsrw $7, (%eax), %xmm2, %xmm5 + +// CHECK: vpinsrb $7, %eax, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0xe8,0x07] + vpinsrb $7, %eax, %xmm2, %xmm5 + +// CHECK: vpinsrb $7, (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0x28,0x07] + vpinsrb $7, (%eax), %xmm2, %xmm5 + +// CHECK: vpinsrd $7, %eax, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0xe8,0x07] + vpinsrd $7, %eax, %xmm2, %xmm5 + +// CHECK: vpinsrd $7, (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07] + vpinsrd $7, (%eax), %xmm2, %xmm5 + +// CHECK: vinsertps $7, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0xca,0x07] + vinsertps $7, %xmm2, %xmm5, %xmm1 + +// CHECK: vinsertps $7, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0x08,0x07] + vinsertps $7, (%eax), %xmm5, %xmm1 + +// CHECK: vptest %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0xea] + vptest %xmm2, %xmm5 + +// CHECK: vptest (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0x10] + vptest (%eax), %xmm2 + +// CHECK: vmovntdqa (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x2a,0x10] + vmovntdqa (%eax), %xmm2 + +// CHECK: vpcmpgtq %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0xca] + vpcmpgtq %xmm2, %xmm5, %xmm1 + +// CHECK: vpcmpgtq (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0x18] + vpcmpgtq (%eax), %xmm5, %xmm3 + +// CHECK: vpcmpistrm $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0xea,0x07] + vpcmpistrm $7, %xmm2, %xmm5 + +// CHECK: vpcmpistrm $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0x28,0x07] + vpcmpistrm $7, (%eax), %xmm5 + +// CHECK: vpcmpestrm $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0xea,0x07] + vpcmpestrm $7, %xmm2, %xmm5 + +// CHECK: vpcmpestrm $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0x28,0x07] + vpcmpestrm $7, (%eax), %xmm5 + +// CHECK: vpcmpistri $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0xea,0x07] + vpcmpistri $7, %xmm2, %xmm5 + +// CHECK: vpcmpistri $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0x28,0x07] + vpcmpistri $7, (%eax), %xmm5 + +// CHECK: vpcmpestri $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0xea,0x07] + vpcmpestri $7, %xmm2, %xmm5 + +// CHECK: vpcmpestri $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0x28,0x07] + vpcmpestri $7, (%eax), %xmm5 + +// CHECK: vaesimc %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0xea] + vaesimc %xmm2, %xmm5 + +// CHECK: vaesimc (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0x10] + vaesimc (%eax), %xmm2 + +// CHECK: vaesenc %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0xca] + vaesenc %xmm2, %xmm5, %xmm1 + +// CHECK: vaesenc (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0x18] + vaesenc (%eax), %xmm5, %xmm3 + +// CHECK: vaesenclast %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0xca] + vaesenclast %xmm2, %xmm5, %xmm1 + +// CHECK: vaesenclast (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0x18] + vaesenclast (%eax), %xmm5, %xmm3 + +// CHECK: vaesdec %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0xca] + vaesdec %xmm2, %xmm5, %xmm1 + +// CHECK: vaesdec (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0x18] + vaesdec (%eax), %xmm5, %xmm3 + +// CHECK: vaesdeclast %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0xca] + vaesdeclast %xmm2, %xmm5, %xmm1 + +// CHECK: vaesdeclast (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0x18] + vaesdeclast (%eax), %xmm5, %xmm3 + +// CHECK: vaeskeygenassist $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0xea,0x07] + vaeskeygenassist $7, %xmm2, %xmm5 + +// CHECK: vaeskeygenassist $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0x28,0x07] + vaeskeygenassist $7, (%eax), %xmm5 + +// CHECK: vcmpps $8, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x08] + vcmpeq_uqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $9, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x09] + vcmpngeps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $10, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0a] + vcmpngtps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $11, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0b] + vcmpfalseps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $12, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0c] + vcmpneq_oqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $13, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0d] + vcmpgeps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $14, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0e] + vcmpgtps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $15, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0f] + vcmptrueps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $16, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x10] + vcmpeq_osps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $17, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x11] + vcmplt_oqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $18, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x12] + vcmple_oqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $19, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x13] + vcmpunord_sps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $20, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x14] + vcmpneq_usps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $21, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x15] + vcmpnlt_uqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $22, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x16] + vcmpnle_uqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $23, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x17] + vcmpord_sps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $24, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x18] + vcmpeq_usps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $25, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x19] + vcmpnge_uqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $26, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1a] + vcmpngt_uqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $27, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1b] + vcmpfalse_osps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $28, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1c] + vcmpneq_osps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $29, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1d] + vcmpge_oqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $30, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1e] + vcmpgt_oqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $31, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1f] + vcmptrue_usps %xmm1, %xmm2, %xmm3 + +// CHECK: vmovaps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x28,0x10] + vmovaps (%eax), %ymm2 + +// CHECK: vmovaps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x28,0xd1] + vmovaps %ymm1, %ymm2 + +// CHECK: vmovaps %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfc,0x29,0x08] + vmovaps %ymm1, (%eax) + +// CHECK: vmovapd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x28,0x10] + vmovapd (%eax), %ymm2 + +// CHECK: vmovapd %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x28,0xd1] + vmovapd %ymm1, %ymm2 + +// CHECK: vmovapd %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfd,0x29,0x08] + vmovapd %ymm1, (%eax) + +// CHECK: vmovups (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x10,0x10] + vmovups (%eax), %ymm2 + +// CHECK: vmovups %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x10,0xd1] + vmovups %ymm1, %ymm2 + +// CHECK: vmovups %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfc,0x11,0x08] + vmovups %ymm1, (%eax) + +// CHECK: vmovupd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x10,0x10] + vmovupd (%eax), %ymm2 + +// CHECK: vmovupd %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x10,0xd1] + vmovupd %ymm1, %ymm2 + +// CHECK: vmovupd %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfd,0x11,0x08] + vmovupd %ymm1, (%eax) + +// CHECK: vunpckhps %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xec,0x15,0xe1] + vunpckhps %ymm1, %ymm2, %ymm4 + +// CHECK: vunpckhpd %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xed,0x15,0xe1] + vunpckhpd %ymm1, %ymm2, %ymm4 + +// CHECK: vunpcklps %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xec,0x14,0xe1] + vunpcklps %ymm1, %ymm2, %ymm4 + +// CHECK: vunpcklpd %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xed,0x14,0xe1] + vunpcklpd %ymm1, %ymm2, %ymm4 + +// CHECK: vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x15,0x6c,0xcb,0xfc] + vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x15,0x6c,0xcb,0xfc] + vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x14,0x6c,0xcb,0xfc] + vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x14,0x6c,0xcb,0xfc] + vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vmovntdq %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfd,0xe7,0x08] + vmovntdq %ymm1, (%eax) + +// CHECK: vmovntpd %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfd,0x2b,0x08] + vmovntpd %ymm1, (%eax) + +// CHECK: vmovntps %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfc,0x2b,0x08] + vmovntps %ymm1, (%eax) + +// CHECK: vmovmskps %xmm2, %eax +// CHECK: encoding: [0xc5,0xf8,0x50,0xc2] + vmovmskps %xmm2, %eax + +// CHECK: vmovmskpd %xmm2, %eax +// CHECK: encoding: [0xc5,0xf9,0x50,0xc2] + vmovmskpd %xmm2, %eax + +// CHECK: vmaxps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5f,0xf2] + vmaxps %ymm2, %ymm4, %ymm6 + +// CHECK: vmaxpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5f,0xf2] + vmaxpd %ymm2, %ymm4, %ymm6 + +// CHECK: vminps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5d,0xf2] + vminps %ymm2, %ymm4, %ymm6 + +// CHECK: vminpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5d,0xf2] + vminpd %ymm2, %ymm4, %ymm6 + +// CHECK: vsubps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5c,0xf2] + vsubps %ymm2, %ymm4, %ymm6 + +// CHECK: vsubpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5c,0xf2] + vsubpd %ymm2, %ymm4, %ymm6 + +// CHECK: vdivps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5e,0xf2] + vdivps %ymm2, %ymm4, %ymm6 + +// CHECK: vdivpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5e,0xf2] + vdivpd %ymm2, %ymm4, %ymm6 + +// CHECK: vaddps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x58,0xf2] + vaddps %ymm2, %ymm4, %ymm6 + +// CHECK: vaddpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x58,0xf2] + vaddpd %ymm2, %ymm4, %ymm6 + +// CHECK: vmulps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x59,0xf2] + vmulps %ymm2, %ymm4, %ymm6 + +// CHECK: vmulpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x59,0xf2] + vmulpd %ymm2, %ymm4, %ymm6 + +// CHECK: vmaxps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5f,0x30] + vmaxps (%eax), %ymm4, %ymm6 + +// CHECK: vmaxpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5f,0x30] + vmaxpd (%eax), %ymm4, %ymm6 + +// CHECK: vminps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5d,0x30] + vminps (%eax), %ymm4, %ymm6 + +// CHECK: vminpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5d,0x30] + vminpd (%eax), %ymm4, %ymm6 + +// CHECK: vsubps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5c,0x30] + vsubps (%eax), %ymm4, %ymm6 + +// CHECK: vsubpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5c,0x30] + vsubpd (%eax), %ymm4, %ymm6 + +// CHECK: vdivps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5e,0x30] + vdivps (%eax), %ymm4, %ymm6 + +// CHECK: vdivpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5e,0x30] + vdivpd (%eax), %ymm4, %ymm6 + +// CHECK: vaddps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x58,0x30] + vaddps (%eax), %ymm4, %ymm6 + +// CHECK: vaddpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x58,0x30] + vaddpd (%eax), %ymm4, %ymm6 + +// CHECK: vmulps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x59,0x30] + vmulps (%eax), %ymm4, %ymm6 + +// CHECK: vmulpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x59,0x30] + vmulpd (%eax), %ymm4, %ymm6 + +// CHECK: vsqrtpd %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x51,0xd1] + vsqrtpd %ymm1, %ymm2 + +// CHECK: vsqrtpd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x51,0x10] + vsqrtpd (%eax), %ymm2 + +// CHECK: vsqrtps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x51,0xd1] + vsqrtps %ymm1, %ymm2 + +// CHECK: vsqrtps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x51,0x10] + vsqrtps (%eax), %ymm2 + +// CHECK: vrsqrtps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x52,0xd1] + vrsqrtps %ymm1, %ymm2 + +// CHECK: vrsqrtps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x52,0x10] + vrsqrtps (%eax), %ymm2 + +// CHECK: vrcpps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x53,0xd1] + vrcpps %ymm1, %ymm2 + +// CHECK: vrcpps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x53,0x10] + vrcpps (%eax), %ymm2 + +// CHECK: vandps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x54,0xf2] + vandps %ymm2, %ymm4, %ymm6 + +// CHECK: vandpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x54,0xf2] + vandpd %ymm2, %ymm4, %ymm6 + +// CHECK: vandps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x54,0x6c,0xcb,0xfc] + vandps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x54,0x6c,0xcb,0xfc] + vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vorps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x56,0xf2] + vorps %ymm2, %ymm4, %ymm6 + +// CHECK: vorpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x56,0xf2] + vorpd %ymm2, %ymm4, %ymm6 + +// CHECK: vorps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x56,0x6c,0xcb,0xfc] + vorps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x56,0x6c,0xcb,0xfc] + vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vxorps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x57,0xf2] + vxorps %ymm2, %ymm4, %ymm6 + +// CHECK: vxorpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x57,0xf2] + vxorpd %ymm2, %ymm4, %ymm6 + +// CHECK: vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x57,0x6c,0xcb,0xfc] + vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x57,0x6c,0xcb,0xfc] + vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vandnps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x55,0xf2] + vandnps %ymm2, %ymm4, %ymm6 + +// CHECK: vandnpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x55,0xf2] + vandnpd %ymm2, %ymm4, %ymm6 + +// CHECK: vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x55,0x6c,0xcb,0xfc] + vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x55,0x6c,0xcb,0xfc] + vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vcvtps2pd %xmm3, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x5a,0xd3] + vcvtps2pd %xmm3, %ymm2 + +// CHECK: vcvtps2pd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x5a,0x10] + vcvtps2pd (%eax), %ymm2 + +// CHECK: vcvtdq2pd %xmm3, %ymm2 +// CHECK: encoding: [0xc5,0xfe,0xe6,0xd3] + vcvtdq2pd %xmm3, %ymm2 + +// CHECK: vcvtdq2pd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfe,0xe6,0x10] + vcvtdq2pd (%eax), %ymm2 + +// CHECK: vcvtdq2ps %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfc,0x5b,0xea] + vcvtdq2ps %ymm2, %ymm5 + +// CHECK: vcvtdq2ps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x5b,0x10] + vcvtdq2ps (%eax), %ymm2 + +// CHECK: vcvtps2dq %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfd,0x5b,0xea] + vcvtps2dq %ymm2, %ymm5 + +// CHECK: vcvtps2dq (%eax), %ymm5 +// CHECK: encoding: [0xc5,0xfd,0x5b,0x28] + vcvtps2dq (%eax), %ymm5 + +// CHECK: vcvttps2dq %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x5b,0xea] + vcvttps2dq %ymm2, %ymm5 + +// CHECK: vcvttps2dq (%eax), %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x5b,0x28] + vcvttps2dq (%eax), %ymm5 + +// CHECK: vcvttpd2dq %xmm1, %xmm5 +// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9] + vcvttpd2dq %xmm1, %xmm5 + +// CHECK: vcvttpd2dq %ymm2, %xmm5 +// CHECK: encoding: [0xc5,0xfd,0xe6,0xea] + vcvttpd2dq %ymm2, %xmm5 + +// CHECK: vcvttpd2dqx %xmm1, %xmm5 +// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9] + vcvttpd2dqx %xmm1, %xmm5 + +// CHECK: vcvttpd2dqx (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xf9,0xe6,0x08] + vcvttpd2dqx (%eax), %xmm1 + +// CHECK: vcvttpd2dqy %ymm2, %xmm1 +// CHECK: encoding: [0xc5,0xfd,0xe6,0xca] + vcvttpd2dqy %ymm2, %xmm1 + +// CHECK: vcvttpd2dqy (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xfd,0xe6,0x08] + vcvttpd2dqy (%eax), %xmm1 + +// CHECK: vcvtpd2ps %ymm2, %xmm5 +// CHECK: encoding: [0xc5,0xfd,0x5a,0xea] + vcvtpd2ps %ymm2, %xmm5 + +// CHECK: vcvtpd2psx %xmm1, %xmm5 +// CHECK: encoding: [0xc5,0xf9,0x5a,0xe9] + vcvtpd2psx %xmm1, %xmm5 + +// CHECK: vcvtpd2psx (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xf9,0x5a,0x08] + vcvtpd2psx (%eax), %xmm1 + +// CHECK: vcvtpd2psy %ymm2, %xmm1 +// CHECK: encoding: [0xc5,0xfd,0x5a,0xca] + vcvtpd2psy %ymm2, %xmm1 + +// CHECK: vcvtpd2psy (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xfd,0x5a,0x08] + vcvtpd2psy (%eax), %xmm1 + +// CHECK: vcvtpd2dq %ymm2, %xmm5 +// CHECK: encoding: [0xc5,0xff,0xe6,0xea] + vcvtpd2dq %ymm2, %xmm5 + +// CHECK: vcvtpd2dqy %ymm2, %xmm1 +// CHECK: encoding: [0xc5,0xff,0xe6,0xca] + vcvtpd2dqy %ymm2, %xmm1 + +// CHECK: vcvtpd2dqy (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xff,0xe6,0x08] + vcvtpd2dqy (%eax), %xmm1 + +// CHECK: vcvtpd2dqx %xmm1, %xmm5 +// CHECK: encoding: [0xc5,0xfb,0xe6,0xe9] + vcvtpd2dqx %xmm1, %xmm5 + +// CHECK: vcvtpd2dqx (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xfb,0xe6,0x08] + vcvtpd2dqx (%eax), %xmm1 + +// CHECK: vcmpps $0, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x00] + vcmpeqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $2, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x02] + vcmpleps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $1, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x01] + vcmpltps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $4, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x04] + vcmpneqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $6, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x06] + vcmpnleps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $5, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x05] + vcmpnltps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $7, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x07] + vcmpordps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $3, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x03] + vcmpunordps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x02] + vcmpleps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnleps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %ymm6, %ymm2 +// CHECK: encoding: [0xc5,0xcc,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordps -4(%ebx,%ecx,8), %ymm6, %ymm2 + +// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $0, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x00] + vcmpeqpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $2, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x02] + vcmplepd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $1, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x01] + vcmpltpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $4, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x04] + vcmpneqpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $6, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x06] + vcmpnlepd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $5, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x05] + vcmpnltpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $7, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x07] + vcmpordpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $3, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x03] + vcmpunordpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqpd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x02] + vcmplepd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltpd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqpd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnlepd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltpd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %ymm6, %ymm2 +// CHECK: encoding: [0xc5,0xcd,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordpd -4(%ebx,%ecx,8), %ymm6, %ymm2 + +// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordpd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $8, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x08] + vcmpeq_uqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $9, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x09] + vcmpngeps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $10, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0a] + vcmpngtps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $11, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0b] + vcmpfalseps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $12, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0c] + vcmpneq_oqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $13, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0d] + vcmpgeps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $14, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0e] + vcmpgtps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $15, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0f] + vcmptrueps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $16, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x10] + vcmpeq_osps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $17, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x11] + vcmplt_oqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $18, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x12] + vcmple_oqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $19, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x13] + vcmpunord_sps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $20, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x14] + vcmpneq_usps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $21, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x15] + vcmpnlt_uqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $22, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x16] + vcmpnle_uqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $23, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x17] + vcmpord_sps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $24, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x18] + vcmpeq_usps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $25, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x19] + vcmpnge_uqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $26, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1a] + vcmpngt_uqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $27, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1b] + vcmpfalse_osps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $28, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1c] + vcmpneq_osps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $29, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1d] + vcmpge_oqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $30, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1e] + vcmpgt_oqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $31, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1f] + vcmptrue_usps %ymm1, %ymm2, %ymm3 + +// CHECK: vaddsubps %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xef,0xd0,0xd9] + vaddsubps %ymm1, %ymm2, %ymm3 + +// CHECK: vaddsubps (%eax), %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xf7,0xd0,0x10] + vaddsubps (%eax), %ymm1, %ymm2 + +// CHECK: vaddsubpd %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xd0,0xd9] + vaddsubpd %ymm1, %ymm2, %ymm3 + +// CHECK: vaddsubpd (%eax), %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xf5,0xd0,0x10] + vaddsubpd (%eax), %ymm1, %ymm2 + +// CHECK: vhaddps %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xef,0x7c,0xd9] + vhaddps %ymm1, %ymm2, %ymm3 + +// CHECK: vhaddps (%eax), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xef,0x7c,0x18] + vhaddps (%eax), %ymm2, %ymm3 + +// CHECK: vhaddpd %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0x7c,0xd9] + vhaddpd %ymm1, %ymm2, %ymm3 + +// CHECK: vhaddpd (%eax), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0x7c,0x18] + vhaddpd (%eax), %ymm2, %ymm3 + +// CHECK: vhsubps %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xef,0x7d,0xd9] + vhsubps %ymm1, %ymm2, %ymm3 + +// CHECK: vhsubps (%eax), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xef,0x7d,0x18] + vhsubps (%eax), %ymm2, %ymm3 + +// CHECK: vhsubpd %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0x7d,0xd9] + vhsubpd %ymm1, %ymm2, %ymm3 + +// CHECK: vhsubpd (%eax), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0x7d,0x18] + vhsubpd (%eax), %ymm2, %ymm3 + +// CHECK: vblendps $3, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0xca,0x03] + vblendps $3, %ymm2, %ymm5, %ymm1 + +// CHECK: vblendps $3, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0x08,0x03] + vblendps $3, (%eax), %ymm5, %ymm1 + +// CHECK: vblendpd $3, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0xca,0x03] + vblendpd $3, %ymm2, %ymm5, %ymm1 + +// CHECK: vblendpd $3, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0x08,0x03] + vblendpd $3, (%eax), %ymm5, %ymm1 + +// CHECK: vdpps $3, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0xca,0x03] + vdpps $3, %ymm2, %ymm5, %ymm1 + +// CHECK: vdpps $3, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0x08,0x03] + vdpps $3, (%eax), %ymm5, %ymm1 + +// CHECK: vbroadcastf128 (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x1a,0x10] + vbroadcastf128 (%eax), %ymm2 + +// CHECK: vbroadcastsd (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x19,0x10] + vbroadcastsd (%eax), %ymm2 + +// CHECK: vbroadcastss (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x18,0x10] + vbroadcastss (%eax), %xmm2 + +// CHECK: vbroadcastss (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x18,0x10] + vbroadcastss (%eax), %ymm2 + +// CHECK: vinsertf128 $7, %xmm2, %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x6d,0x18,0xea,0x07] + vinsertf128 $7, %xmm2, %ymm2, %ymm5 + +// CHECK: vinsertf128 $7, (%eax), %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x6d,0x18,0x28,0x07] + vinsertf128 $7, (%eax), %ymm2, %ymm5 + +// CHECK: vextractf128 $7, %ymm2, %xmm2 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0xd2,0x07] + vextractf128 $7, %ymm2, %xmm2 + +// CHECK: vextractf128 $7, %ymm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0x10,0x07] + vextractf128 $7, %ymm2, (%eax) + +// CHECK: vmaskmovpd %xmm2, %xmm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x51,0x2f,0x10] + vmaskmovpd %xmm2, %xmm5, (%eax) + +// CHECK: vmaskmovpd %ymm2, %ymm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x55,0x2f,0x10] + vmaskmovpd %ymm2, %ymm5, (%eax) + +// CHECK: vmaskmovpd (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x69,0x2d,0x28] + vmaskmovpd (%eax), %xmm2, %xmm5 + +// CHECK: vmaskmovpd (%eax), %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x6d,0x2d,0x28] + vmaskmovpd (%eax), %ymm2, %ymm5 + +// CHECK: vmaskmovps %xmm2, %xmm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x51,0x2e,0x10] + vmaskmovps %xmm2, %xmm5, (%eax) + +// CHECK: vmaskmovps %ymm2, %ymm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x55,0x2e,0x10] + vmaskmovps %ymm2, %ymm5, (%eax) + +// CHECK: vmaskmovps (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x69,0x2c,0x28] + vmaskmovps (%eax), %xmm2, %xmm5 + +// CHECK: vmaskmovps (%eax), %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x6d,0x2c,0x28] + vmaskmovps (%eax), %ymm2, %ymm5 + +// CHECK: vpermilps $7, %xmm1, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x04,0xe9,0x07] + vpermilps $7, %xmm1, %xmm5 + +// CHECK: vpermilps $7, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x04,0xcd,0x07] + vpermilps $7, %ymm5, %ymm1 + +// CHECK: vpermilps $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x04,0x28,0x07] + vpermilps $7, (%eax), %xmm5 + +// CHECK: vpermilps $7, (%eax), %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x04,0x28,0x07] + vpermilps $7, (%eax), %ymm5 + +// CHECK: vpermilps %xmm1, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x0c,0xc9] + vpermilps %xmm1, %xmm5, %xmm1 + +// CHECK: vpermilps %ymm1, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x0c,0xc9] + vpermilps %ymm1, %ymm5, %ymm1 + +// CHECK: vpermilps (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0x0c,0x18] + vpermilps (%eax), %xmm5, %xmm3 + +// CHECK: vpermilps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x0c,0x08] + vpermilps (%eax), %ymm5, %ymm1 + +// CHECK: vpermilpd $7, %xmm1, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x05,0xe9,0x07] + vpermilpd $7, %xmm1, %xmm5 + +// CHECK: vpermilpd $7, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x05,0xcd,0x07] + vpermilpd $7, %ymm5, %ymm1 + +// CHECK: vpermilpd $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x05,0x28,0x07] + vpermilpd $7, (%eax), %xmm5 + +// CHECK: vpermilpd $7, (%eax), %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x05,0x28,0x07] + vpermilpd $7, (%eax), %ymm5 + +// CHECK: vpermilpd %xmm1, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x0d,0xc9] + vpermilpd %xmm1, %xmm5, %xmm1 + +// CHECK: vpermilpd %ymm1, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x0d,0xc9] + vpermilpd %ymm1, %ymm5, %ymm1 + +// CHECK: vpermilpd (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0x0d,0x18] + vpermilpd (%eax), %xmm5, %xmm3 + +// CHECK: vpermilpd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x0d,0x08] + vpermilpd (%eax), %ymm5, %ymm1 + +// CHECK: vperm2f128 $7, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x06,0xca,0x07] + vperm2f128 $7, %ymm2, %ymm5, %ymm1 + +// CHECK: vperm2f128 $7, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x06,0x08,0x07] + vperm2f128 $7, (%eax), %ymm5, %ymm1 + +// CHECK: vzeroall +// CHECK: encoding: [0xc5,0xfc,0x77] + vzeroall + +// CHECK: vzeroupper +// CHECK: encoding: [0xc5,0xf8,0x77] + vzeroupper + +// CHECK: vcvtsd2si %xmm4, %ecx +// CHECK: encoding: [0xc5,0xfb,0x2d,0xcc] + vcvtsd2si %xmm4, %ecx + +// CHECK: vcvtsd2si (%ecx), %ecx +// CHECK: encoding: [0xc5,0xfb,0x2d,0x09] + vcvtsd2si (%ecx), %ecx + +// CHECK: vcvtsi2sdl (%ebp), %xmm0, %xmm7 +// CHECK: encoding: [0xc5,0xfb,0x2a,0x7d,0x00] + vcvtsi2sdl (%ebp), %xmm0, %xmm7 + +// CHECK: vcvtsi2sdl (%esp), %xmm0, %xmm7 +// CHECK: encoding: [0xc5,0xfb,0x2a,0x3c,0x24] + vcvtsi2sdl (%esp), %xmm0, %xmm7 + +// CHECK: vlddqu (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xff,0xf0,0x10] + vlddqu (%eax), %ymm2 + +// CHECK: vmovddup %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xff,0x12,0xea] + vmovddup %ymm2, %ymm5 + +// CHECK: vmovddup (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xff,0x12,0x10] + vmovddup (%eax), %ymm2 + +// CHECK: vmovdqa %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfd,0x6f,0xea] + vmovdqa %ymm2, %ymm5 + +// CHECK: vmovdqa %ymm2, (%eax) +// CHECK: encoding: [0xc5,0xfd,0x7f,0x10] + vmovdqa %ymm2, (%eax) + +// CHECK: vmovdqa (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x6f,0x10] + vmovdqa (%eax), %ymm2 + +// CHECK: vmovdqu %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x6f,0xea] + vmovdqu %ymm2, %ymm5 + +// CHECK: vmovdqu %ymm2, (%eax) +// CHECK: encoding: [0xc5,0xfe,0x7f,0x10] + vmovdqu %ymm2, (%eax) + +// CHECK: vmovdqu (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfe,0x6f,0x10] + vmovdqu (%eax), %ymm2 + +// CHECK: vmovshdup %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x16,0xea] + vmovshdup %ymm2, %ymm5 + +// CHECK: vmovshdup (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfe,0x16,0x10] + vmovshdup (%eax), %ymm2 + +// CHECK: vmovsldup %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x12,0xea] + vmovsldup %ymm2, %ymm5 + +// CHECK: vmovsldup (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfe,0x12,0x10] + vmovsldup (%eax), %ymm2 + +// CHECK: vptest %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0xea] + vptest %ymm2, %ymm5 + +// CHECK: vptest (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0x10] + vptest (%eax), %ymm2 + +// CHECK: vroundpd $7, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0xcd,0x07] + vroundpd $7, %ymm5, %ymm1 + +// CHECK: vroundpd $7, (%eax), %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0x28,0x07] + vroundpd $7, (%eax), %ymm5 + +// CHECK: vroundps $7, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0xcd,0x07] + vroundps $7, %ymm5, %ymm1 + +// CHECK: vroundps $7, (%eax), %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0x28,0x07] + vroundps $7, (%eax), %ymm5 + +// CHECK: vshufpd $7, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd5,0xc6,0xca,0x07] + vshufpd $7, %ymm2, %ymm5, %ymm1 + +// CHECK: vshufpd $7, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd5,0xc6,0x08,0x07] + vshufpd $7, (%eax), %ymm5, %ymm1 + +// CHECK: vshufps $7, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd4,0xc6,0xca,0x07] + vshufps $7, %ymm2, %ymm5, %ymm1 + +// CHECK: vshufps $7, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd4,0xc6,0x08,0x07] + vshufps $7, (%eax), %ymm5, %ymm1 + +// CHECK: vtestpd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0xea] + vtestpd %xmm2, %xmm5 + +// CHECK: vtestpd %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0xea] + vtestpd %ymm2, %ymm5 + +// CHECK: vtestpd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0x10] + vtestpd (%eax), %xmm2 + +// CHECK: vtestpd (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0x10] + vtestpd (%eax), %ymm2 + +// CHECK: vtestps %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0xea] + vtestps %xmm2, %xmm5 + +// CHECK: vtestps %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0xea] + vtestps %ymm2, %ymm5 + +// CHECK: vtestps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0x10] + vtestps (%eax), %xmm2 + +// CHECK: vtestps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0x10] + vtestps (%eax), %ymm2 + +// CHECK: vblendvpd %ymm0, 57005(%eax,%eiz), %ymm1, %ymm2 +// CHECK: encoding: [0xc4,0xe3,0x75,0x4b,0x94,0x20,0xad,0xde,0x00,0x00,0x00] + vblendvpd %ymm0, 0xdead(%eax,%eiz), %ymm1, %ymm2 + diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index ebafb11061ef6..ef774239ffe84 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -10047,2882 +10047,23 @@ // CHECK: encoding: [0xde,0x1d,0xed,0x7e,0x00,0x00] ficomps 32493 -// CHECK: vaddss %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0x58,0xd4] - vaddss %xmm4, %xmm6, %xmm2 +// CHECK: movl 57005(,%eiz), %ebx +// CHECK: encoding: [0x8b,0x1c,0x25,0xad,0xde,0x00,0x00] + movl 57005(,%eiz), %ebx -// CHECK: vmulss %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0x59,0xd4] - vmulss %xmm4, %xmm6, %xmm2 +// CHECK: movl 48879(,%eiz), %eax +// CHECK: encoding: [0x8b,0x04,0x25,0xef,0xbe,0x00,0x00] + movl 48879(,%eiz), %eax -// CHECK: vsubss %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0x5c,0xd4] - vsubss %xmm4, %xmm6, %xmm2 +// CHECK: movl -4(,%eiz,8), %eax +// CHECK: encoding: [0x8b,0x04,0xe5,0xfc,0xff,0xff,0xff] + movl -4(,%eiz,8), %eax -// CHECK: vdivss %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0x5e,0xd4] - vdivss %xmm4, %xmm6, %xmm2 +// CHECK: movl (%ecx,%eiz), %eax +// CHECK: encoding: [0x8b,0x04,0x21] + movl (%ecx,%eiz), %eax -// CHECK: vaddsd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0x58,0xd4] - vaddsd %xmm4, %xmm6, %xmm2 - -// CHECK: vmulsd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0x59,0xd4] - vmulsd %xmm4, %xmm6, %xmm2 - -// CHECK: vsubsd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0x5c,0xd4] - vsubsd %xmm4, %xmm6, %xmm2 - -// CHECK: vdivsd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0x5e,0xd4] - vdivsd %xmm4, %xmm6, %xmm2 - -// CHECK: vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] - vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] - vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] - vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] - vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] - vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] - vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] - vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] - vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vaddps %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0x58,0xd4] - vaddps %xmm4, %xmm6, %xmm2 - -// CHECK: vsubps %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0x5c,0xd4] - vsubps %xmm4, %xmm6, %xmm2 - -// CHECK: vmulps %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0x59,0xd4] - vmulps %xmm4, %xmm6, %xmm2 - -// CHECK: vdivps %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0x5e,0xd4] - vdivps %xmm4, %xmm6, %xmm2 - -// CHECK: vaddpd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0x58,0xd4] - vaddpd %xmm4, %xmm6, %xmm2 - -// CHECK: vsubpd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0x5c,0xd4] - vsubpd %xmm4, %xmm6, %xmm2 - -// CHECK: vmulpd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0x59,0xd4] - vmulpd %xmm4, %xmm6, %xmm2 - -// CHECK: vdivpd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0x5e,0xd4] - vdivpd %xmm4, %xmm6, %xmm2 - -// CHECK: vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] - vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] - vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] - vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] - vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] - vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] - vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] - vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] - vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: vmaxss %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xda,0x5f,0xf2] - vmaxss %xmm2, %xmm4, %xmm6 - -// CHECK: vmaxsd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xdb,0x5f,0xf2] - vmaxsd %xmm2, %xmm4, %xmm6 - -// CHECK: vminss %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xda,0x5d,0xf2] - vminss %xmm2, %xmm4, %xmm6 - -// CHECK: vminsd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xdb,0x5d,0xf2] - vminsd %xmm2, %xmm4, %xmm6 - -// CHECK: vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x5f,0x6c,0xcb,0xfc] - vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x5f,0x6c,0xcb,0xfc] - vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vminss -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x5d,0x6c,0xcb,0xfc] - vminss -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x5d,0x6c,0xcb,0xfc] - vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmaxps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x5f,0xf2] - vmaxps %xmm2, %xmm4, %xmm6 - -// CHECK: vmaxpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x5f,0xf2] - vmaxpd %xmm2, %xmm4, %xmm6 - -// CHECK: vminps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x5d,0xf2] - vminps %xmm2, %xmm4, %xmm6 - -// CHECK: vminpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x5d,0xf2] - vminpd %xmm2, %xmm4, %xmm6 - -// CHECK: vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x5f,0x6c,0xcb,0xfc] - vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x5f,0x6c,0xcb,0xfc] - vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vminps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x5d,0x6c,0xcb,0xfc] - vminps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc] - vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vandps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x54,0xf2] - vandps %xmm2, %xmm4, %xmm6 - -// CHECK: vandpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x54,0xf2] - vandpd %xmm2, %xmm4, %xmm6 - -// CHECK: vandps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x54,0x6c,0xcb,0xfc] - vandps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x54,0x6c,0xcb,0xfc] - vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vorps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x56,0xf2] - vorps %xmm2, %xmm4, %xmm6 - -// CHECK: vorpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x56,0xf2] - vorpd %xmm2, %xmm4, %xmm6 - -// CHECK: vorps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x56,0x6c,0xcb,0xfc] - vorps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x56,0x6c,0xcb,0xfc] - vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vxorps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x57,0xf2] - vxorps %xmm2, %xmm4, %xmm6 - -// CHECK: vxorpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x57,0xf2] - vxorpd %xmm2, %xmm4, %xmm6 - -// CHECK: vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x57,0x6c,0xcb,0xfc] - vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x57,0x6c,0xcb,0xfc] - vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vandnps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x55,0xf2] - vandnps %xmm2, %xmm4, %xmm6 - -// CHECK: vandnpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x55,0xf2] - vandnpd %xmm2, %xmm4, %xmm6 - -// CHECK: vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x55,0x6c,0xcb,0xfc] - vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x55,0x6c,0xcb,0xfc] - vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmovss -4(%ebx,%ecx,8), %xmm5 -// CHECK: encoding: [0xc5,0xfa,0x10,0x6c,0xcb,0xfc] - vmovss -4(%ebx,%ecx,8), %xmm5 - -// CHECK: vmovss %xmm4, %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x10,0xec] - vmovss %xmm4, %xmm2, %xmm5 - -// CHECK: vmovsd -4(%ebx,%ecx,8), %xmm5 -// CHECK: encoding: [0xc5,0xfb,0x10,0x6c,0xcb,0xfc] - vmovsd -4(%ebx,%ecx,8), %xmm5 - -// CHECK: vmovsd %xmm4, %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x10,0xec] - vmovsd %xmm4, %xmm2, %xmm5 - -// CHECK: vunpckhps %xmm1, %xmm2, %xmm4 -// CHECK: encoding: [0xc5,0xe8,0x15,0xe1] - vunpckhps %xmm1, %xmm2, %xmm4 - -// CHECK: vunpckhpd %xmm1, %xmm2, %xmm4 -// CHECK: encoding: [0xc5,0xe9,0x15,0xe1] - vunpckhpd %xmm1, %xmm2, %xmm4 - -// CHECK: vunpcklps %xmm1, %xmm2, %xmm4 -// CHECK: encoding: [0xc5,0xe8,0x14,0xe1] - vunpcklps %xmm1, %xmm2, %xmm4 - -// CHECK: vunpcklpd %xmm1, %xmm2, %xmm4 -// CHECK: encoding: [0xc5,0xe9,0x14,0xe1] - vunpcklpd %xmm1, %xmm2, %xmm4 - -// CHECK: vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x15,0x6c,0xcb,0xfc] - vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x15,0x6c,0xcb,0xfc] - vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x14,0x6c,0xcb,0xfc] - vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x14,0x6c,0xcb,0xfc] - vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vcmpps $0, %xmm0, %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x00] - vcmpps $0, %xmm0, %xmm6, %xmm1 - -// CHECK: vcmpps $0, (%eax), %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc8,0xc2,0x08,0x00] - vcmpps $0, (%eax), %xmm6, %xmm1 - -// CHECK: vcmpps $7, %xmm0, %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x07] - vcmpps $7, %xmm0, %xmm6, %xmm1 - -// CHECK: vcmppd $0, %xmm0, %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x00] - vcmppd $0, %xmm0, %xmm6, %xmm1 - -// CHECK: vcmppd $0, (%eax), %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc9,0xc2,0x08,0x00] - vcmppd $0, (%eax), %xmm6, %xmm1 - -// CHECK: vcmppd $7, %xmm0, %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x07] - vcmppd $7, %xmm0, %xmm6, %xmm1 - -// CHECK: vshufps $8, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc6,0xd9,0x08] - vshufps $8, %xmm1, %xmm2, %xmm3 - -// CHECK: vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc6,0x5c,0xcb,0xfc,0x08] - vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vshufpd $8, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc6,0xd9,0x08] - vshufpd $8, %xmm1, %xmm2, %xmm3 - -// CHECK: vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc6,0x5c,0xcb,0xfc,0x08] - vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $0, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x00] - vcmpeqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $2, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x02] - vcmpleps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $1, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x01] - vcmpltps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $4, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x04] - vcmpneqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $6, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x06] - vcmpnleps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $5, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x05] - vcmpnltps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x07] - vcmpordps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $3, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x03] - vcmpunordps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x02] - vcmpleps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnleps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordps -4(%ebx,%ecx,8), %xmm6, %xmm2 - -// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $0, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x00] - vcmpeqpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $2, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x02] - vcmplepd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $1, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x01] - vcmpltpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $4, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x04] - vcmpneqpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $6, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x06] - vcmpnlepd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $5, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x05] - vcmpnltpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x07] - vcmpordpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $3, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x03] - vcmpunordpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqpd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x02] - vcmplepd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltpd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqpd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnlepd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltpd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordpd -4(%ebx,%ecx,8), %xmm6, %xmm2 - -// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordpd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vmovmskps %xmm2, %eax -// CHECK: encoding: [0xc5,0xf8,0x50,0xc2] - vmovmskps %xmm2, %eax - -// CHECK: vmovmskpd %xmm2, %eax -// CHECK: encoding: [0xc5,0xf9,0x50,0xc2] - vmovmskpd %xmm2, %eax - -// CHECK: vcmpss $0, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x00] - vcmpeqss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $2, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x02] - vcmpless %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $1, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x01] - vcmpltss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $4, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x04] - vcmpneqss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $6, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x06] - vcmpnless %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $5, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x05] - vcmpnltss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x07] - vcmpordss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $3, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x03] - vcmpunordss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqss -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x02] - vcmpless -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltss -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqss -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnless -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltss -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordss -4(%ebx,%ecx,8), %xmm6, %xmm2 - -// CHECK: vcmpss $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordss -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $0, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x00] - vcmpeqsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $2, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x02] - vcmplesd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $1, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x01] - vcmpltsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $4, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x04] - vcmpneqsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $6, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x06] - vcmpnlesd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $5, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x05] - vcmpnltsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x07] - vcmpordsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $3, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x03] - vcmpunordsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqsd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x02] - vcmplesd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltsd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqsd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnlesd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltsd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordsd -4(%ebx,%ecx,8), %xmm6, %xmm2 - -// CHECK: vcmpsd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordsd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vucomiss %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x2e,0xd1] - vucomiss %xmm1, %xmm2 - -// CHECK: vucomiss (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x2e,0x10] - vucomiss (%eax), %xmm2 - -// CHECK: vcomiss %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x2f,0xd1] - vcomiss %xmm1, %xmm2 - -// CHECK: vcomiss (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x2f,0x10] - vcomiss (%eax), %xmm2 - -// CHECK: vucomisd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x2e,0xd1] - vucomisd %xmm1, %xmm2 - -// CHECK: vucomisd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x2e,0x10] - vucomisd (%eax), %xmm2 - -// CHECK: vcomisd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x2f,0xd1] - vcomisd %xmm1, %xmm2 - -// CHECK: vcomisd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x2f,0x10] - vcomisd (%eax), %xmm2 - -// CHECK: vcvttss2si %xmm1, %eax -// CHECK: encoding: [0xc5,0xfa,0x2c,0xc1] - vcvttss2si %xmm1, %eax - -// CHECK: vcvttss2si (%ecx), %eax -// CHECK: encoding: [0xc5,0xfa,0x2c,0x01] - vcvttss2si (%ecx), %eax - -// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf2,0x2a,0x10] - vcvtsi2ss (%eax), %xmm1, %xmm2 - -// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf2,0x2a,0x10] - vcvtsi2ss (%eax), %xmm1, %xmm2 - -// CHECK: vcvttsd2si %xmm1, %eax -// CHECK: encoding: [0xc5,0xfb,0x2c,0xc1] - vcvttsd2si %xmm1, %eax - -// CHECK: vcvttsd2si (%ecx), %eax -// CHECK: encoding: [0xc5,0xfb,0x2c,0x01] - vcvttsd2si (%ecx), %eax - -// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf3,0x2a,0x10] - vcvtsi2sd (%eax), %xmm1, %xmm2 - -// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf3,0x2a,0x10] - vcvtsi2sd (%eax), %xmm1, %xmm2 - -// CHECK: vmovaps (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x28,0x10] - vmovaps (%eax), %xmm2 - -// CHECK: vmovaps %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x28,0xd1] - vmovaps %xmm1, %xmm2 - -// CHECK: vmovaps %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf8,0x29,0x08] - vmovaps %xmm1, (%eax) - -// CHECK: vmovapd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x28,0x10] - vmovapd (%eax), %xmm2 - -// CHECK: vmovapd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x28,0xd1] - vmovapd %xmm1, %xmm2 - -// CHECK: vmovapd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x29,0x08] - vmovapd %xmm1, (%eax) - -// CHECK: vmovups (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x10,0x10] - vmovups (%eax), %xmm2 - -// CHECK: vmovups %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x10,0xd1] - vmovups %xmm1, %xmm2 - -// CHECK: vmovups %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf8,0x11,0x08] - vmovups %xmm1, (%eax) - -// CHECK: vmovupd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x10,0x10] - vmovupd (%eax), %xmm2 - -// CHECK: vmovupd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x10,0xd1] - vmovupd %xmm1, %xmm2 - -// CHECK: vmovupd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x11,0x08] - vmovupd %xmm1, (%eax) - -// CHECK: vmovlps %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf8,0x13,0x08] - vmovlps %xmm1, (%eax) - -// CHECK: vmovlps (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0x12,0x18] - vmovlps (%eax), %xmm2, %xmm3 - -// CHECK: vmovlpd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x13,0x08] - vmovlpd %xmm1, (%eax) - -// CHECK: vmovlpd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x12,0x18] - vmovlpd (%eax), %xmm2, %xmm3 - -// CHECK: vmovhps %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf8,0x17,0x08] - vmovhps %xmm1, (%eax) - -// CHECK: vmovhps (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0x16,0x18] - vmovhps (%eax), %xmm2, %xmm3 - -// CHECK: vmovhpd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x17,0x08] - vmovhpd %xmm1, (%eax) - -// CHECK: vmovhpd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x16,0x18] - vmovhpd (%eax), %xmm2, %xmm3 - -// CHECK: vmovlhps %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0x16,0xd9] - vmovlhps %xmm1, %xmm2, %xmm3 - -// CHECK: vmovhlps %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0x12,0xd9] - vmovhlps %xmm1, %xmm2, %xmm3 - -// CHECK: vcvtss2sil %xmm1, %eax -// CHECK: encoding: [0xc5,0xfa,0x2d,0xc1] - vcvtss2si %xmm1, %eax - -// CHECK: vcvtss2sil (%eax), %ebx -// CHECK: encoding: [0xc5,0xfa,0x2d,0x18] - vcvtss2si (%eax), %ebx - -// CHECK: vcvtdq2ps %xmm5, %xmm6 -// CHECK: encoding: [0xc5,0xf8,0x5b,0xf5] - vcvtdq2ps %xmm5, %xmm6 - -// CHECK: vcvtdq2ps (%eax), %xmm6 -// CHECK: encoding: [0xc5,0xf8,0x5b,0x30] - vcvtdq2ps (%eax), %xmm6 - -// CHECK: vcvtsd2ss %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xdb,0x5a,0xf2] - vcvtsd2ss %xmm2, %xmm4, %xmm6 - -// CHECK: vcvtsd2ss (%eax), %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xdb,0x5a,0x30] - vcvtsd2ss (%eax), %xmm4, %xmm6 - -// CHECK: vcvtps2dq %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xf9,0x5b,0xda] - vcvtps2dq %xmm2, %xmm3 - -// CHECK: vcvtps2dq (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xf9,0x5b,0x18] - vcvtps2dq (%eax), %xmm3 - -// CHECK: vcvtss2sd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xda,0x5a,0xf2] - vcvtss2sd %xmm2, %xmm4, %xmm6 - -// CHECK: vcvtss2sd (%eax), %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xda,0x5a,0x30] - vcvtss2sd (%eax), %xmm4, %xmm6 - -// CHECK: vcvtdq2ps %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xf8,0x5b,0xf4] - vcvtdq2ps %xmm4, %xmm6 - -// CHECK: vcvtdq2ps (%ecx), %xmm4 -// CHECK: encoding: [0xc5,0xf8,0x5b,0x21] - vcvtdq2ps (%ecx), %xmm4 - -// CHECK: vcvttps2dq %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xfa,0x5b,0xda] - vcvttps2dq %xmm2, %xmm3 - -// CHECK: vcvttps2dq (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xfa,0x5b,0x18] - vcvttps2dq (%eax), %xmm3 - -// CHECK: vcvtps2pd %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xf8,0x5a,0xda] - vcvtps2pd %xmm2, %xmm3 - -// CHECK: vcvtps2pd (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xf8,0x5a,0x18] - vcvtps2pd (%eax), %xmm3 - -// CHECK: vcvtpd2ps %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xf9,0x5a,0xda] - vcvtpd2ps %xmm2, %xmm3 - -// CHECK: vsqrtpd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x51,0xd1] - vsqrtpd %xmm1, %xmm2 - -// CHECK: vsqrtpd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x51,0x10] - vsqrtpd (%eax), %xmm2 - -// CHECK: vsqrtps %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x51,0xd1] - vsqrtps %xmm1, %xmm2 - -// CHECK: vsqrtps (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x51,0x10] - vsqrtps (%eax), %xmm2 - -// CHECK: vsqrtsd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x51,0xd9] - vsqrtsd %xmm1, %xmm2, %xmm3 - -// CHECK: vsqrtsd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x51,0x18] - vsqrtsd (%eax), %xmm2, %xmm3 - -// CHECK: vsqrtss %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x51,0xd9] - vsqrtss %xmm1, %xmm2, %xmm3 - -// CHECK: vsqrtss (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x51,0x18] - vsqrtss (%eax), %xmm2, %xmm3 - -// CHECK: vrsqrtps %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x52,0xd1] - vrsqrtps %xmm1, %xmm2 - -// CHECK: vrsqrtps (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x52,0x10] - vrsqrtps (%eax), %xmm2 - -// CHECK: vrsqrtss %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x52,0xd9] - vrsqrtss %xmm1, %xmm2, %xmm3 - -// CHECK: vrsqrtss (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x52,0x18] - vrsqrtss (%eax), %xmm2, %xmm3 - -// CHECK: vrcpps %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x53,0xd1] - vrcpps %xmm1, %xmm2 - -// CHECK: vrcpps (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x53,0x10] - vrcpps (%eax), %xmm2 - -// CHECK: vrcpss %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x53,0xd9] - vrcpss %xmm1, %xmm2, %xmm3 - -// CHECK: vrcpss (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x53,0x18] - vrcpss (%eax), %xmm2, %xmm3 - -// CHECK: vmovntdq %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0xe7,0x08] - vmovntdq %xmm1, (%eax) - -// CHECK: vmovntpd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x2b,0x08] - vmovntpd %xmm1, (%eax) - -// CHECK: vmovntps %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf8,0x2b,0x08] - vmovntps %xmm1, (%eax) - -// CHECK: vldmxcsr (%eax) -// CHECK: encoding: [0xc5,0xf8,0xae,0x10] - vldmxcsr (%eax) - -// CHECK: vstmxcsr (%eax) -// CHECK: encoding: [0xc5,0xf8,0xae,0x18] - vstmxcsr (%eax) - -// CHECK: vldmxcsr 3735928559 -// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xef,0xbe,0xad,0xde] - vldmxcsr 0xdeadbeef - -// CHECK: vstmxcsr 3735928559 -// CHECK: encoding: [0xc5,0xf8,0xae,0x1d,0xef,0xbe,0xad,0xde] - vstmxcsr 0xdeadbeef - -// CHECK: vpsubb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf8,0xd9] - vpsubb %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf8,0x18] - vpsubb (%eax), %xmm2, %xmm3 - -// CHECK: vpsubw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf9,0xd9] - vpsubw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf9,0x18] - vpsubw (%eax), %xmm2, %xmm3 - -// CHECK: vpsubd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfa,0xd9] - vpsubd %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfa,0x18] - vpsubd (%eax), %xmm2, %xmm3 - -// CHECK: vpsubq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfb,0xd9] - vpsubq %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfb,0x18] - vpsubq (%eax), %xmm2, %xmm3 - -// CHECK: vpsubsb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe8,0xd9] - vpsubsb %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubsb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe8,0x18] - vpsubsb (%eax), %xmm2, %xmm3 - -// CHECK: vpsubsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe9,0xd9] - vpsubsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe9,0x18] - vpsubsw (%eax), %xmm2, %xmm3 - -// CHECK: vpsubusb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd8,0xd9] - vpsubusb %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubusb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd8,0x18] - vpsubusb (%eax), %xmm2, %xmm3 - -// CHECK: vpsubusw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd9,0xd9] - vpsubusw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubusw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd9,0x18] - vpsubusw (%eax), %xmm2, %xmm3 - -// CHECK: vpaddb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfc,0xd9] - vpaddb %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfc,0x18] - vpaddb (%eax), %xmm2, %xmm3 - -// CHECK: vpaddw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfd,0xd9] - vpaddw %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfd,0x18] - vpaddw (%eax), %xmm2, %xmm3 - -// CHECK: vpaddd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfe,0xd9] - vpaddd %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfe,0x18] - vpaddd (%eax), %xmm2, %xmm3 - -// CHECK: vpaddq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd4,0xd9] - vpaddq %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd4,0x18] - vpaddq (%eax), %xmm2, %xmm3 - -// CHECK: vpaddsb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xec,0xd9] - vpaddsb %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddsb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xec,0x18] - vpaddsb (%eax), %xmm2, %xmm3 - -// CHECK: vpaddsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xed,0xd9] - vpaddsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xed,0x18] - vpaddsw (%eax), %xmm2, %xmm3 - -// CHECK: vpaddusb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdc,0xd9] - vpaddusb %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddusb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdc,0x18] - vpaddusb (%eax), %xmm2, %xmm3 - -// CHECK: vpaddusw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdd,0xd9] - vpaddusw %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddusw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdd,0x18] - vpaddusw (%eax), %xmm2, %xmm3 - -// CHECK: vpmulhuw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe4,0xd9] - vpmulhuw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmulhuw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe4,0x18] - vpmulhuw (%eax), %xmm2, %xmm3 - -// CHECK: vpmulhw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe5,0xd9] - vpmulhw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmulhw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe5,0x18] - vpmulhw (%eax), %xmm2, %xmm3 - -// CHECK: vpmullw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd5,0xd9] - vpmullw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmullw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd5,0x18] - vpmullw (%eax), %xmm2, %xmm3 - -// CHECK: vpmuludq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf4,0xd9] - vpmuludq %xmm1, %xmm2, %xmm3 - -// CHECK: vpmuludq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf4,0x18] - vpmuludq (%eax), %xmm2, %xmm3 - -// CHECK: vpavgb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe0,0xd9] - vpavgb %xmm1, %xmm2, %xmm3 - -// CHECK: vpavgb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe0,0x18] - vpavgb (%eax), %xmm2, %xmm3 - -// CHECK: vpavgw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe3,0xd9] - vpavgw %xmm1, %xmm2, %xmm3 - -// CHECK: vpavgw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe3,0x18] - vpavgw (%eax), %xmm2, %xmm3 - -// CHECK: vpminsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xea,0xd9] - vpminsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpminsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xea,0x18] - vpminsw (%eax), %xmm2, %xmm3 - -// CHECK: vpminub %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xda,0xd9] - vpminub %xmm1, %xmm2, %xmm3 - -// CHECK: vpminub (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xda,0x18] - vpminub (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xee,0xd9] - vpmaxsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmaxsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xee,0x18] - vpmaxsw (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxub %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xde,0xd9] - vpmaxub %xmm1, %xmm2, %xmm3 - -// CHECK: vpmaxub (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xde,0x18] - vpmaxub (%eax), %xmm2, %xmm3 - -// CHECK: vpsadbw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf6,0xd9] - vpsadbw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsadbw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf6,0x18] - vpsadbw (%eax), %xmm2, %xmm3 - -// CHECK: vpsllw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf1,0xd9] - vpsllw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsllw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf1,0x18] - vpsllw (%eax), %xmm2, %xmm3 - -// CHECK: vpslld %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf2,0xd9] - vpslld %xmm1, %xmm2, %xmm3 - -// CHECK: vpslld (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf2,0x18] - vpslld (%eax), %xmm2, %xmm3 - -// CHECK: vpsllq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf3,0xd9] - vpsllq %xmm1, %xmm2, %xmm3 - -// CHECK: vpsllq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf3,0x18] - vpsllq (%eax), %xmm2, %xmm3 - -// CHECK: vpsraw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe1,0xd9] - vpsraw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsraw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe1,0x18] - vpsraw (%eax), %xmm2, %xmm3 - -// CHECK: vpsrad %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe2,0xd9] - vpsrad %xmm1, %xmm2, %xmm3 - -// CHECK: vpsrad (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe2,0x18] - vpsrad (%eax), %xmm2, %xmm3 - -// CHECK: vpsrlw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd1,0xd9] - vpsrlw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsrlw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd1,0x18] - vpsrlw (%eax), %xmm2, %xmm3 - -// CHECK: vpsrld %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd2,0xd9] - vpsrld %xmm1, %xmm2, %xmm3 - -// CHECK: vpsrld (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd2,0x18] - vpsrld (%eax), %xmm2, %xmm3 - -// CHECK: vpsrlq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd3,0xd9] - vpsrlq %xmm1, %xmm2, %xmm3 - -// CHECK: vpsrlq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd3,0x18] - vpsrlq (%eax), %xmm2, %xmm3 - -// CHECK: vpslld $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a] - vpslld $10, %xmm2, %xmm3 - -// CHECK: vpslldq $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x73,0xfa,0x0a] - vpslldq $10, %xmm2, %xmm3 - -// CHECK: vpsllq $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x73,0xf2,0x0a] - vpsllq $10, %xmm2, %xmm3 - -// CHECK: vpsllw $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x71,0xf2,0x0a] - vpsllw $10, %xmm2, %xmm3 - -// CHECK: vpsrad $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x72,0xe2,0x0a] - vpsrad $10, %xmm2, %xmm3 - -// CHECK: vpsraw $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x71,0xe2,0x0a] - vpsraw $10, %xmm2, %xmm3 - -// CHECK: vpsrld $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x72,0xd2,0x0a] - vpsrld $10, %xmm2, %xmm3 - -// CHECK: vpsrldq $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x73,0xda,0x0a] - vpsrldq $10, %xmm2, %xmm3 - -// CHECK: vpsrlq $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x73,0xd2,0x0a] - vpsrlq $10, %xmm2, %xmm3 - -// CHECK: vpsrlw $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x71,0xd2,0x0a] - vpsrlw $10, %xmm2, %xmm3 - -// CHECK: vpslld $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a] - vpslld $10, %xmm2, %xmm3 - -// CHECK: vpand %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdb,0xd9] - vpand %xmm1, %xmm2, %xmm3 - -// CHECK: vpand (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdb,0x18] - vpand (%eax), %xmm2, %xmm3 - -// CHECK: vpor %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xeb,0xd9] - vpor %xmm1, %xmm2, %xmm3 - -// CHECK: vpor (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xeb,0x18] - vpor (%eax), %xmm2, %xmm3 - -// CHECK: vpxor %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xef,0xd9] - vpxor %xmm1, %xmm2, %xmm3 - -// CHECK: vpxor (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xef,0x18] - vpxor (%eax), %xmm2, %xmm3 - -// CHECK: vpandn %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdf,0xd9] - vpandn %xmm1, %xmm2, %xmm3 - -// CHECK: vpandn (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdf,0x18] - vpandn (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpeqb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x74,0xd9] - vpcmpeqb %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpeqb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x74,0x18] - vpcmpeqb (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpeqw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x75,0xd9] - vpcmpeqw %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpeqw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x75,0x18] - vpcmpeqw (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpeqd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x76,0xd9] - vpcmpeqd %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpeqd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x76,0x18] - vpcmpeqd (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpgtb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x64,0xd9] - vpcmpgtb %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpgtb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x64,0x18] - vpcmpgtb (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpgtw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x65,0xd9] - vpcmpgtw %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpgtw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x65,0x18] - vpcmpgtw (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpgtd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x66,0xd9] - vpcmpgtd %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpgtd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x66,0x18] - vpcmpgtd (%eax), %xmm2, %xmm3 - -// CHECK: vpacksswb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x63,0xd9] - vpacksswb %xmm1, %xmm2, %xmm3 - -// CHECK: vpacksswb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x63,0x18] - vpacksswb (%eax), %xmm2, %xmm3 - -// CHECK: vpackssdw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6b,0xd9] - vpackssdw %xmm1, %xmm2, %xmm3 - -// CHECK: vpackssdw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6b,0x18] - vpackssdw (%eax), %xmm2, %xmm3 - -// CHECK: vpackuswb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x67,0xd9] - vpackuswb %xmm1, %xmm2, %xmm3 - -// CHECK: vpackuswb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x67,0x18] - vpackuswb (%eax), %xmm2, %xmm3 - -// CHECK: vpshufd $4, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xf9,0x70,0xda,0x04] - vpshufd $4, %xmm2, %xmm3 - -// CHECK: vpshufd $4, (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xf9,0x70,0x18,0x04] - vpshufd $4, (%eax), %xmm3 - -// CHECK: vpshufhw $4, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xfa,0x70,0xda,0x04] - vpshufhw $4, %xmm2, %xmm3 - -// CHECK: vpshufhw $4, (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xfa,0x70,0x18,0x04] - vpshufhw $4, (%eax), %xmm3 - -// CHECK: vpshuflw $4, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xfb,0x70,0xda,0x04] - vpshuflw $4, %xmm2, %xmm3 - -// CHECK: vpshuflw $4, (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xfb,0x70,0x18,0x04] - vpshuflw $4, (%eax), %xmm3 - -// CHECK: vpunpcklbw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x60,0xd9] - vpunpcklbw %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpcklbw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x60,0x18] - vpunpcklbw (%eax), %xmm2, %xmm3 - -// CHECK: vpunpcklwd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x61,0xd9] - vpunpcklwd %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpcklwd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x61,0x18] - vpunpcklwd (%eax), %xmm2, %xmm3 - -// CHECK: vpunpckldq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x62,0xd9] - vpunpckldq %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpckldq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x62,0x18] - vpunpckldq (%eax), %xmm2, %xmm3 - -// CHECK: vpunpcklqdq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6c,0xd9] - vpunpcklqdq %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpcklqdq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6c,0x18] - vpunpcklqdq (%eax), %xmm2, %xmm3 - -// CHECK: vpunpckhbw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x68,0xd9] - vpunpckhbw %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpckhbw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x68,0x18] - vpunpckhbw (%eax), %xmm2, %xmm3 - -// CHECK: vpunpckhwd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x69,0xd9] - vpunpckhwd %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpckhwd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x69,0x18] - vpunpckhwd (%eax), %xmm2, %xmm3 - -// CHECK: vpunpckhdq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6a,0xd9] - vpunpckhdq %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpckhdq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6a,0x18] - vpunpckhdq (%eax), %xmm2, %xmm3 - -// CHECK: vpunpckhqdq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6d,0xd9] - vpunpckhqdq %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpckhqdq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6d,0x18] - vpunpckhqdq (%eax), %xmm2, %xmm3 - -// CHECK: vpinsrw $7, %eax, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc4,0xd8,0x07] - vpinsrw $7, %eax, %xmm2, %xmm3 - -// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc4,0x18,0x07] - vpinsrw $7, (%eax), %xmm2, %xmm3 - -// CHECK: vpextrw $7, %xmm2, %eax -// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07] - vpextrw $7, %xmm2, %eax - -// CHECK: vpmovmskb %xmm1, %eax -// CHECK: encoding: [0xc5,0xf9,0xd7,0xc1] - vpmovmskb %xmm1, %eax - -// CHECK: vmaskmovdqu %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0xf7,0xd1] - vmaskmovdqu %xmm1, %xmm2 - -// CHECK: vmovd %xmm1, %eax -// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8] - vmovd %xmm1, %eax - -// CHECK: vmovd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x7e,0x08] - vmovd %xmm1, (%eax) - -// CHECK: vmovd %eax, %xmm1 -// CHECK: encoding: [0xc5,0xf9,0x6e,0xc8] - vmovd %eax, %xmm1 - -// CHECK: vmovd (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xf9,0x6e,0x08] - vmovd (%eax), %xmm1 - -// CHECK: vmovq %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0xd6,0x08] - vmovq %xmm1, (%eax) - -// CHECK: vmovq %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfa,0x7e,0xd1] - vmovq %xmm1, %xmm2 - -// CHECK: vmovq (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xfa,0x7e,0x08] - vmovq (%eax), %xmm1 - -// CHECK: vcvtpd2dq %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfb,0xe6,0xd1] - vcvtpd2dq %xmm1, %xmm2 - -// CHECK: vcvtdq2pd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfa,0xe6,0xd1] - vcvtdq2pd %xmm1, %xmm2 - -// CHECK: vcvtdq2pd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xfa,0xe6,0x10] - vcvtdq2pd (%eax), %xmm2 - -// CHECK: vmovshdup %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfa,0x16,0xd1] - vmovshdup %xmm1, %xmm2 - -// CHECK: vmovshdup (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xfa,0x16,0x10] - vmovshdup (%eax), %xmm2 - -// CHECK: vmovsldup %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfa,0x12,0xd1] - vmovsldup %xmm1, %xmm2 - -// CHECK: vmovsldup (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xfa,0x12,0x10] - vmovsldup (%eax), %xmm2 - -// CHECK: vmovddup %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfb,0x12,0xd1] - vmovddup %xmm1, %xmm2 - -// CHECK: vmovddup (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xfb,0x12,0x10] - vmovddup (%eax), %xmm2 - -// CHECK: vaddsubps %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xd0,0xd9] - vaddsubps %xmm1, %xmm2, %xmm3 - -// CHECK: vaddsubps (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf3,0xd0,0x10] - vaddsubps (%eax), %xmm1, %xmm2 - -// CHECK: vaddsubpd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd0,0xd9] - vaddsubpd %xmm1, %xmm2, %xmm3 - -// CHECK: vaddsubpd (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf1,0xd0,0x10] - vaddsubpd (%eax), %xmm1, %xmm2 - -// CHECK: vhaddps %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x7c,0xd9] - vhaddps %xmm1, %xmm2, %xmm3 - -// CHECK: vhaddps (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x7c,0x18] - vhaddps (%eax), %xmm2, %xmm3 - -// CHECK: vhaddpd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x7c,0xd9] - vhaddpd %xmm1, %xmm2, %xmm3 - -// CHECK: vhaddpd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x7c,0x18] - vhaddpd (%eax), %xmm2, %xmm3 - -// CHECK: vhsubps %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x7d,0xd9] - vhsubps %xmm1, %xmm2, %xmm3 - -// CHECK: vhsubps (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x7d,0x18] - vhsubps (%eax), %xmm2, %xmm3 - -// CHECK: vhsubpd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x7d,0xd9] - vhsubpd %xmm1, %xmm2, %xmm3 - -// CHECK: vhsubpd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x7d,0x18] - vhsubpd (%eax), %xmm2, %xmm3 - -// CHECK: vpabsb %xmm1, %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0xd1] - vpabsb %xmm1, %xmm2 - -// CHECK: vpabsb (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0x10] - vpabsb (%eax), %xmm2 - -// CHECK: vpabsw %xmm1, %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0xd1] - vpabsw %xmm1, %xmm2 - -// CHECK: vpabsw (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0x10] - vpabsw (%eax), %xmm2 - -// CHECK: vpabsd %xmm1, %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0xd1] - vpabsd %xmm1, %xmm2 - -// CHECK: vpabsd (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0x10] - vpabsd (%eax), %xmm2 - -// CHECK: vphaddw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0xd9] - vphaddw %xmm1, %xmm2, %xmm3 - -// CHECK: vphaddw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0x18] - vphaddw (%eax), %xmm2, %xmm3 - -// CHECK: vphaddd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0xd9] - vphaddd %xmm1, %xmm2, %xmm3 - -// CHECK: vphaddd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0x18] - vphaddd (%eax), %xmm2, %xmm3 - -// CHECK: vphaddsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0xd9] - vphaddsw %xmm1, %xmm2, %xmm3 - -// CHECK: vphaddsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0x18] - vphaddsw (%eax), %xmm2, %xmm3 - -// CHECK: vphsubw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0xd9] - vphsubw %xmm1, %xmm2, %xmm3 - -// CHECK: vphsubw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0x18] - vphsubw (%eax), %xmm2, %xmm3 - -// CHECK: vphsubd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0xd9] - vphsubd %xmm1, %xmm2, %xmm3 - -// CHECK: vphsubd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0x18] - vphsubd (%eax), %xmm2, %xmm3 - -// CHECK: vphsubsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0xd9] - vphsubsw %xmm1, %xmm2, %xmm3 - -// CHECK: vphsubsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0x18] - vphsubsw (%eax), %xmm2, %xmm3 - -// CHECK: vpmaddubsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0xd9] - vpmaddubsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmaddubsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0x18] - vpmaddubsw (%eax), %xmm2, %xmm3 - -// CHECK: vpshufb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0xd9] - vpshufb %xmm1, %xmm2, %xmm3 - -// CHECK: vpshufb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0x18] - vpshufb (%eax), %xmm2, %xmm3 - -// CHECK: vpsignb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0xd9] - vpsignb %xmm1, %xmm2, %xmm3 - -// CHECK: vpsignb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0x18] - vpsignb (%eax), %xmm2, %xmm3 - -// CHECK: vpsignw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0xd9] - vpsignw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsignw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0x18] - vpsignw (%eax), %xmm2, %xmm3 - -// CHECK: vpsignd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0xd9] - vpsignd %xmm1, %xmm2, %xmm3 - -// CHECK: vpsignd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0x18] - vpsignd (%eax), %xmm2, %xmm3 - -// CHECK: vpmulhrsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0xd9] - vpmulhrsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmulhrsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0x18] - vpmulhrsw (%eax), %xmm2, %xmm3 - -// CHECK: vpalignr $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0xd9,0x07] - vpalignr $7, %xmm1, %xmm2, %xmm3 - -// CHECK: vpalignr $7, (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0x18,0x07] - vpalignr $7, (%eax), %xmm2, %xmm3 - -// CHECK: vroundsd $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0xd9,0x07] - vroundsd $7, %xmm1, %xmm2, %xmm3 - -// CHECK: vroundsd $7, (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0x18,0x07] - vroundsd $7, (%eax), %xmm2, %xmm3 - -// CHECK: vroundss $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0xd9,0x07] - vroundss $7, %xmm1, %xmm2, %xmm3 - -// CHECK: vroundss $7, (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0x18,0x07] - vroundss $7, (%eax), %xmm2, %xmm3 - -// CHECK: vroundpd $7, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0xda,0x07] - vroundpd $7, %xmm2, %xmm3 - -// CHECK: vroundpd $7, (%eax), %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0x18,0x07] - vroundpd $7, (%eax), %xmm3 - -// CHECK: vroundps $7, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0xda,0x07] - vroundps $7, %xmm2, %xmm3 - -// CHECK: vroundps $7, (%eax), %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0x18,0x07] - vroundps $7, (%eax), %xmm3 - -// CHECK: vphminposuw %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0xda] - vphminposuw %xmm2, %xmm3 - -// CHECK: vphminposuw (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0x10] - vphminposuw (%eax), %xmm2 - -// CHECK: vpackusdw %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x2b,0xca] - vpackusdw %xmm2, %xmm3, %xmm1 - -// CHECK: vpackusdw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x2b,0x18] - vpackusdw (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpeqq %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x29,0xca] - vpcmpeqq %xmm2, %xmm3, %xmm1 - -// CHECK: vpcmpeqq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x29,0x18] - vpcmpeqq (%eax), %xmm2, %xmm3 - -// CHECK: vpminsb %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x38,0xca] - vpminsb %xmm2, %xmm3, %xmm1 - -// CHECK: vpminsb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x38,0x18] - vpminsb (%eax), %xmm2, %xmm3 - -// CHECK: vpminsd %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x39,0xca] - vpminsd %xmm2, %xmm3, %xmm1 - -// CHECK: vpminsd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x39,0x18] - vpminsd (%eax), %xmm2, %xmm3 - -// CHECK: vpminud %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3b,0xca] - vpminud %xmm2, %xmm3, %xmm1 - -// CHECK: vpminud (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3b,0x18] - vpminud (%eax), %xmm2, %xmm3 - -// CHECK: vpminuw %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3a,0xca] - vpminuw %xmm2, %xmm3, %xmm1 - -// CHECK: vpminuw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3a,0x18] - vpminuw (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxsb %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3c,0xca] - vpmaxsb %xmm2, %xmm3, %xmm1 - -// CHECK: vpmaxsb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3c,0x18] - vpmaxsb (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxsd %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3d,0xca] - vpmaxsd %xmm2, %xmm3, %xmm1 - -// CHECK: vpmaxsd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3d,0x18] - vpmaxsd (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxud %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3f,0xca] - vpmaxud %xmm2, %xmm3, %xmm1 - -// CHECK: vpmaxud (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3f,0x18] - vpmaxud (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxuw %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3e,0xca] - vpmaxuw %xmm2, %xmm3, %xmm1 - -// CHECK: vpmaxuw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3e,0x18] - vpmaxuw (%eax), %xmm2, %xmm3 - -// CHECK: vpmuldq %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x28,0xca] - vpmuldq %xmm2, %xmm3, %xmm1 - -// CHECK: vpmuldq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x28,0x18] - vpmuldq (%eax), %xmm2, %xmm3 - -// CHECK: vpmulld %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0xca] - vpmulld %xmm2, %xmm5, %xmm1 - -// CHECK: vpmulld (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0x18] - vpmulld (%eax), %xmm5, %xmm3 - -// CHECK: vblendps $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0xca,0x03] - vblendps $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vblendps $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0x08,0x03] - vblendps $3, (%eax), %xmm5, %xmm1 - -// CHECK: vblendpd $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0xca,0x03] - vblendpd $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vblendpd $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0x08,0x03] - vblendpd $3, (%eax), %xmm5, %xmm1 - -// CHECK: vpblendw $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0xca,0x03] - vpblendw $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vpblendw $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0x08,0x03] - vpblendw $3, (%eax), %xmm5, %xmm1 - -// CHECK: vmpsadbw $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0xca,0x03] - vmpsadbw $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vmpsadbw $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0x08,0x03] - vmpsadbw $3, (%eax), %xmm5, %xmm1 - -// CHECK: vdpps $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0xca,0x03] - vdpps $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vdpps $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0x08,0x03] - vdpps $3, (%eax), %xmm5, %xmm1 - -// CHECK: vdppd $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0xca,0x03] - vdppd $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vdppd $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0x08,0x03] - vdppd $3, (%eax), %xmm5, %xmm1 - -// CHECK: vblendvpd %xmm2, %xmm5, %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0xdd,0x20] - vblendvpd %xmm2, %xmm5, %xmm1, %xmm3 - -// CHECK: vblendvpd %xmm2, (%eax), %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0x18,0x20] - vblendvpd %xmm2, (%eax), %xmm1, %xmm3 - -// CHECK: vblendvps %xmm2, %xmm5, %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0xdd,0x20] - vblendvps %xmm2, %xmm5, %xmm1, %xmm3 - -// CHECK: vblendvps %xmm2, (%eax), %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0x18,0x20] - vblendvps %xmm2, (%eax), %xmm1, %xmm3 - -// CHECK: vpblendvb %xmm2, %xmm5, %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0xdd,0x20] - vpblendvb %xmm2, %xmm5, %xmm1, %xmm3 - -// CHECK: vpblendvb %xmm2, (%eax), %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0x18,0x20] - vpblendvb %xmm2, (%eax), %xmm1, %xmm3 - -// CHECK: vpmovsxbw %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0xea] - vpmovsxbw %xmm2, %xmm5 - -// CHECK: vpmovsxbw (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0x10] - vpmovsxbw (%eax), %xmm2 - -// CHECK: vpmovsxwd %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0xea] - vpmovsxwd %xmm2, %xmm5 - -// CHECK: vpmovsxwd (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0x10] - vpmovsxwd (%eax), %xmm2 - -// CHECK: vpmovsxdq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0xea] - vpmovsxdq %xmm2, %xmm5 - -// CHECK: vpmovsxdq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0x10] - vpmovsxdq (%eax), %xmm2 - -// CHECK: vpmovzxbw %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0xea] - vpmovzxbw %xmm2, %xmm5 - -// CHECK: vpmovzxbw (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0x10] - vpmovzxbw (%eax), %xmm2 - -// CHECK: vpmovzxwd %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0xea] - vpmovzxwd %xmm2, %xmm5 - -// CHECK: vpmovzxwd (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0x10] - vpmovzxwd (%eax), %xmm2 - -// CHECK: vpmovzxdq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0xea] - vpmovzxdq %xmm2, %xmm5 - -// CHECK: vpmovzxdq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0x10] - vpmovzxdq (%eax), %xmm2 - -// CHECK: vpmovsxbq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0xea] - vpmovsxbq %xmm2, %xmm5 - -// CHECK: vpmovsxbq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0x10] - vpmovsxbq (%eax), %xmm2 - -// CHECK: vpmovzxbq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0xea] - vpmovzxbq %xmm2, %xmm5 - -// CHECK: vpmovzxbq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0x10] - vpmovzxbq (%eax), %xmm2 - -// CHECK: vpmovsxbd %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0xea] - vpmovsxbd %xmm2, %xmm5 - -// CHECK: vpmovsxbd (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0x10] - vpmovsxbd (%eax), %xmm2 - -// CHECK: vpmovsxwq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0xea] - vpmovsxwq %xmm2, %xmm5 - -// CHECK: vpmovsxwq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0x10] - vpmovsxwq (%eax), %xmm2 - -// CHECK: vpmovzxbd %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0xea] - vpmovzxbd %xmm2, %xmm5 - -// CHECK: vpmovzxbd (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0x10] - vpmovzxbd (%eax), %xmm2 - -// CHECK: vpmovzxwq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0xea] - vpmovzxwq %xmm2, %xmm5 - -// CHECK: vpmovzxwq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0x10] - vpmovzxwq (%eax), %xmm2 - -// CHECK: vpextrw $7, %xmm2, %eax -// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07] - vpextrw $7, %xmm2, %eax - -// CHECK: vpextrw $7, %xmm2, (%eax) -// CHECK: encoding: [0xc4,0xe3,0x79,0x15,0x10,0x07] - vpextrw $7, %xmm2, (%eax) - -// CHECK: vpextrd $7, %xmm2, %eax -// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0xd0,0x07] - vpextrd $7, %xmm2, %eax - -// CHECK: vpextrd $7, %xmm2, (%eax) -// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0x10,0x07] - vpextrd $7, %xmm2, (%eax) - -// CHECK: vpextrb $7, %xmm2, %eax -// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xd0,0x07] - vpextrb $7, %xmm2, %eax - -// CHECK: vpextrb $7, %xmm2, (%eax) -// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0x10,0x07] - vpextrb $7, %xmm2, (%eax) - -// CHECK: vextractps $7, %xmm2, (%eax) -// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0x10,0x07] - vextractps $7, %xmm2, (%eax) - -// CHECK: vextractps $7, %xmm2, %eax -// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd0,0x07] - vextractps $7, %xmm2, %eax - -// CHECK: vpinsrw $7, %eax, %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0xc4,0xe8,0x07] - vpinsrw $7, %eax, %xmm2, %xmm5 - -// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0xc4,0x28,0x07] - vpinsrw $7, (%eax), %xmm2, %xmm5 - -// CHECK: vpinsrb $7, %eax, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0xe8,0x07] - vpinsrb $7, %eax, %xmm2, %xmm5 - -// CHECK: vpinsrb $7, (%eax), %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0x28,0x07] - vpinsrb $7, (%eax), %xmm2, %xmm5 - -// CHECK: vpinsrd $7, %eax, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0xe8,0x07] - vpinsrd $7, %eax, %xmm2, %xmm5 - -// CHECK: vpinsrd $7, (%eax), %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07] - vpinsrd $7, (%eax), %xmm2, %xmm5 - -// CHECK: vinsertps $7, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0xca,0x07] - vinsertps $7, %xmm2, %xmm5, %xmm1 - -// CHECK: vinsertps $7, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0x08,0x07] - vinsertps $7, (%eax), %xmm5, %xmm1 - -// CHECK: vptest %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0xea] - vptest %xmm2, %xmm5 - -// CHECK: vptest (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0x10] - vptest (%eax), %xmm2 - -// CHECK: vmovntdqa (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x2a,0x10] - vmovntdqa (%eax), %xmm2 - -// CHECK: vpcmpgtq %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0xca] - vpcmpgtq %xmm2, %xmm5, %xmm1 - -// CHECK: vpcmpgtq (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0x18] - vpcmpgtq (%eax), %xmm5, %xmm3 - -// CHECK: vpcmpistrm $7, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0xea,0x07] - vpcmpistrm $7, %xmm2, %xmm5 - -// CHECK: vpcmpistrm $7, (%eax), %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0x28,0x07] - vpcmpistrm $7, (%eax), %xmm5 - -// CHECK: vpcmpestrm $7, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0xea,0x07] - vpcmpestrm $7, %xmm2, %xmm5 - -// CHECK: vpcmpestrm $7, (%eax), %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0x28,0x07] - vpcmpestrm $7, (%eax), %xmm5 - -// CHECK: vpcmpistri $7, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0xea,0x07] - vpcmpistri $7, %xmm2, %xmm5 - -// CHECK: vpcmpistri $7, (%eax), %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0x28,0x07] - vpcmpistri $7, (%eax), %xmm5 - -// CHECK: vpcmpestri $7, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0xea,0x07] - vpcmpestri $7, %xmm2, %xmm5 - -// CHECK: vpcmpestri $7, (%eax), %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0x28,0x07] - vpcmpestri $7, (%eax), %xmm5 - -// CHECK: vaesimc %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0xea] - vaesimc %xmm2, %xmm5 - -// CHECK: vaesimc (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0x10] - vaesimc (%eax), %xmm2 - -// CHECK: vaesenc %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0xca] - vaesenc %xmm2, %xmm5, %xmm1 - -// CHECK: vaesenc (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0x18] - vaesenc (%eax), %xmm5, %xmm3 - -// CHECK: vaesenclast %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0xca] - vaesenclast %xmm2, %xmm5, %xmm1 - -// CHECK: vaesenclast (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0x18] - vaesenclast (%eax), %xmm5, %xmm3 - -// CHECK: vaesdec %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0xca] - vaesdec %xmm2, %xmm5, %xmm1 - -// CHECK: vaesdec (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0x18] - vaesdec (%eax), %xmm5, %xmm3 - -// CHECK: vaesdeclast %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0xca] - vaesdeclast %xmm2, %xmm5, %xmm1 - -// CHECK: vaesdeclast (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0x18] - vaesdeclast (%eax), %xmm5, %xmm3 - -// CHECK: vaeskeygenassist $7, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0xea,0x07] - vaeskeygenassist $7, %xmm2, %xmm5 - -// CHECK: vaeskeygenassist $7, (%eax), %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0x28,0x07] - vaeskeygenassist $7, (%eax), %xmm5 - -// CHECK: vcmpps $8, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x08] - vcmpeq_uqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $9, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x09] - vcmpngeps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $10, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0a] - vcmpngtps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $11, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0b] - vcmpfalseps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $12, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0c] - vcmpneq_oqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $13, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0d] - vcmpgeps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $14, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0e] - vcmpgtps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $15, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0f] - vcmptrueps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $16, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x10] - vcmpeq_osps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $17, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x11] - vcmplt_oqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $18, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x12] - vcmple_oqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $19, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x13] - vcmpunord_sps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $20, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x14] - vcmpneq_usps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $21, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x15] - vcmpnlt_uqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $22, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x16] - vcmpnle_uqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $23, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x17] - vcmpord_sps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $24, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x18] - vcmpeq_usps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $25, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x19] - vcmpnge_uqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $26, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1a] - vcmpngt_uqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $27, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1b] - vcmpfalse_osps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $28, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1c] - vcmpneq_osps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $29, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1d] - vcmpge_oqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $30, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1e] - vcmpgt_oqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $31, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1f] - vcmptrue_usps %xmm1, %xmm2, %xmm3 - -// CHECK: vmovaps (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x28,0x10] - vmovaps (%eax), %ymm2 - -// CHECK: vmovaps %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x28,0xd1] - vmovaps %ymm1, %ymm2 - -// CHECK: vmovaps %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfc,0x29,0x08] - vmovaps %ymm1, (%eax) - -// CHECK: vmovapd (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x28,0x10] - vmovapd (%eax), %ymm2 - -// CHECK: vmovapd %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x28,0xd1] - vmovapd %ymm1, %ymm2 - -// CHECK: vmovapd %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfd,0x29,0x08] - vmovapd %ymm1, (%eax) - -// CHECK: vmovups (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x10,0x10] - vmovups (%eax), %ymm2 - -// CHECK: vmovups %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x10,0xd1] - vmovups %ymm1, %ymm2 - -// CHECK: vmovups %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfc,0x11,0x08] - vmovups %ymm1, (%eax) - -// CHECK: vmovupd (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x10,0x10] - vmovupd (%eax), %ymm2 - -// CHECK: vmovupd %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x10,0xd1] - vmovupd %ymm1, %ymm2 - -// CHECK: vmovupd %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfd,0x11,0x08] - vmovupd %ymm1, (%eax) - -// CHECK: vunpckhps %ymm1, %ymm2, %ymm4 -// CHECK: encoding: [0xc5,0xec,0x15,0xe1] - vunpckhps %ymm1, %ymm2, %ymm4 - -// CHECK: vunpckhpd %ymm1, %ymm2, %ymm4 -// CHECK: encoding: [0xc5,0xed,0x15,0xe1] - vunpckhpd %ymm1, %ymm2, %ymm4 - -// CHECK: vunpcklps %ymm1, %ymm2, %ymm4 -// CHECK: encoding: [0xc5,0xec,0x14,0xe1] - vunpcklps %ymm1, %ymm2, %ymm4 - -// CHECK: vunpcklpd %ymm1, %ymm2, %ymm4 -// CHECK: encoding: [0xc5,0xed,0x14,0xe1] - vunpcklpd %ymm1, %ymm2, %ymm4 - -// CHECK: vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x15,0x6c,0xcb,0xfc] - vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x15,0x6c,0xcb,0xfc] - vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x14,0x6c,0xcb,0xfc] - vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x14,0x6c,0xcb,0xfc] - vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vmovntdq %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfd,0xe7,0x08] - vmovntdq %ymm1, (%eax) - -// CHECK: vmovntpd %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfd,0x2b,0x08] - vmovntpd %ymm1, (%eax) - -// CHECK: vmovntps %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfc,0x2b,0x08] - vmovntps %ymm1, (%eax) - -// CHECK: vmovmskps %xmm2, %eax -// CHECK: encoding: [0xc5,0xf8,0x50,0xc2] - vmovmskps %xmm2, %eax - -// CHECK: vmovmskpd %xmm2, %eax -// CHECK: encoding: [0xc5,0xf9,0x50,0xc2] - vmovmskpd %xmm2, %eax - -// CHECK: vmaxps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5f,0xf2] - vmaxps %ymm2, %ymm4, %ymm6 - -// CHECK: vmaxpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5f,0xf2] - vmaxpd %ymm2, %ymm4, %ymm6 - -// CHECK: vminps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5d,0xf2] - vminps %ymm2, %ymm4, %ymm6 - -// CHECK: vminpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5d,0xf2] - vminpd %ymm2, %ymm4, %ymm6 - -// CHECK: vsubps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5c,0xf2] - vsubps %ymm2, %ymm4, %ymm6 - -// CHECK: vsubpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5c,0xf2] - vsubpd %ymm2, %ymm4, %ymm6 - -// CHECK: vdivps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5e,0xf2] - vdivps %ymm2, %ymm4, %ymm6 - -// CHECK: vdivpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5e,0xf2] - vdivpd %ymm2, %ymm4, %ymm6 - -// CHECK: vaddps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x58,0xf2] - vaddps %ymm2, %ymm4, %ymm6 - -// CHECK: vaddpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x58,0xf2] - vaddpd %ymm2, %ymm4, %ymm6 - -// CHECK: vmulps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x59,0xf2] - vmulps %ymm2, %ymm4, %ymm6 - -// CHECK: vmulpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x59,0xf2] - vmulpd %ymm2, %ymm4, %ymm6 - -// CHECK: vmaxps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5f,0x30] - vmaxps (%eax), %ymm4, %ymm6 - -// CHECK: vmaxpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5f,0x30] - vmaxpd (%eax), %ymm4, %ymm6 - -// CHECK: vminps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5d,0x30] - vminps (%eax), %ymm4, %ymm6 - -// CHECK: vminpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5d,0x30] - vminpd (%eax), %ymm4, %ymm6 - -// CHECK: vsubps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5c,0x30] - vsubps (%eax), %ymm4, %ymm6 - -// CHECK: vsubpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5c,0x30] - vsubpd (%eax), %ymm4, %ymm6 - -// CHECK: vdivps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5e,0x30] - vdivps (%eax), %ymm4, %ymm6 - -// CHECK: vdivpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5e,0x30] - vdivpd (%eax), %ymm4, %ymm6 - -// CHECK: vaddps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x58,0x30] - vaddps (%eax), %ymm4, %ymm6 - -// CHECK: vaddpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x58,0x30] - vaddpd (%eax), %ymm4, %ymm6 - -// CHECK: vmulps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x59,0x30] - vmulps (%eax), %ymm4, %ymm6 - -// CHECK: vmulpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x59,0x30] - vmulpd (%eax), %ymm4, %ymm6 - -// CHECK: vsqrtpd %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x51,0xd1] - vsqrtpd %ymm1, %ymm2 - -// CHECK: vsqrtpd (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x51,0x10] - vsqrtpd (%eax), %ymm2 - -// CHECK: vsqrtps %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x51,0xd1] - vsqrtps %ymm1, %ymm2 - -// CHECK: vsqrtps (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x51,0x10] - vsqrtps (%eax), %ymm2 - -// CHECK: vrsqrtps %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x52,0xd1] - vrsqrtps %ymm1, %ymm2 - -// CHECK: vrsqrtps (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x52,0x10] - vrsqrtps (%eax), %ymm2 - -// CHECK: vrcpps %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x53,0xd1] - vrcpps %ymm1, %ymm2 - -// CHECK: vrcpps (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x53,0x10] - vrcpps (%eax), %ymm2 - -// CHECK: vandps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x54,0xf2] - vandps %ymm2, %ymm4, %ymm6 - -// CHECK: vandpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x54,0xf2] - vandpd %ymm2, %ymm4, %ymm6 - -// CHECK: vandps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x54,0x6c,0xcb,0xfc] - vandps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x54,0x6c,0xcb,0xfc] - vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vorps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x56,0xf2] - vorps %ymm2, %ymm4, %ymm6 - -// CHECK: vorpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x56,0xf2] - vorpd %ymm2, %ymm4, %ymm6 - -// CHECK: vorps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x56,0x6c,0xcb,0xfc] - vorps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x56,0x6c,0xcb,0xfc] - vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vxorps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x57,0xf2] - vxorps %ymm2, %ymm4, %ymm6 - -// CHECK: vxorpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x57,0xf2] - vxorpd %ymm2, %ymm4, %ymm6 - -// CHECK: vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x57,0x6c,0xcb,0xfc] - vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x57,0x6c,0xcb,0xfc] - vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vandnps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x55,0xf2] - vandnps %ymm2, %ymm4, %ymm6 - -// CHECK: vandnpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x55,0xf2] - vandnpd %ymm2, %ymm4, %ymm6 - -// CHECK: vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x55,0x6c,0xcb,0xfc] - vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x55,0x6c,0xcb,0xfc] - vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vcvtps2pd %xmm3, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x5a,0xd3] - vcvtps2pd %xmm3, %ymm2 - -// CHECK: vcvtps2pd (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x5a,0x10] - vcvtps2pd (%eax), %ymm2 - -// CHECK: vcvtdq2pd %xmm3, %ymm2 -// CHECK: encoding: [0xc5,0xfe,0xe6,0xd3] - vcvtdq2pd %xmm3, %ymm2 - -// CHECK: vcvtdq2pd (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfe,0xe6,0x10] - vcvtdq2pd (%eax), %ymm2 - -// CHECK: vcvtdq2ps %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xfc,0x5b,0xea] - vcvtdq2ps %ymm2, %ymm5 - -// CHECK: vcvtdq2ps (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x5b,0x10] - vcvtdq2ps (%eax), %ymm2 - -// CHECK: vcvtps2dq %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xfd,0x5b,0xea] - vcvtps2dq %ymm2, %ymm5 - -// CHECK: vcvtps2dq (%eax), %ymm5 -// CHECK: encoding: [0xc5,0xfd,0x5b,0x28] - vcvtps2dq (%eax), %ymm5 - -// CHECK: vcvttps2dq %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xfe,0x5b,0xea] - vcvttps2dq %ymm2, %ymm5 - -// CHECK: vcvttps2dq (%eax), %ymm5 -// CHECK: encoding: [0xc5,0xfe,0x5b,0x28] - vcvttps2dq (%eax), %ymm5 - -// CHECK: vcvttpd2dq %xmm1, %xmm5 -// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9] - vcvttpd2dq %xmm1, %xmm5 - -// CHECK: vcvttpd2dq %ymm2, %xmm5 -// CHECK: encoding: [0xc5,0xfd,0xe6,0xea] - vcvttpd2dq %ymm2, %xmm5 - -// CHECK: vcvttpd2dqx %xmm1, %xmm5 -// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9] - vcvttpd2dqx %xmm1, %xmm5 - -// CHECK: vcvttpd2dqx (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xf9,0xe6,0x08] - vcvttpd2dqx (%eax), %xmm1 - -// CHECK: vcvttpd2dqy %ymm2, %xmm1 -// CHECK: encoding: [0xc5,0xfd,0xe6,0xca] - vcvttpd2dqy %ymm2, %xmm1 - -// CHECK: vcvttpd2dqy (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xfd,0xe6,0x08] - vcvttpd2dqy (%eax), %xmm1 - -// CHECK: vcvtpd2ps %ymm2, %xmm5 -// CHECK: encoding: [0xc5,0xfd,0x5a,0xea] - vcvtpd2ps %ymm2, %xmm5 - -// CHECK: vcvtpd2psx %xmm1, %xmm5 -// CHECK: encoding: [0xc5,0xf9,0x5a,0xe9] - vcvtpd2psx %xmm1, %xmm5 - -// CHECK: vcvtpd2psx (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xf9,0x5a,0x08] - vcvtpd2psx (%eax), %xmm1 - -// CHECK: vcvtpd2psy %ymm2, %xmm1 -// CHECK: encoding: [0xc5,0xfd,0x5a,0xca] - vcvtpd2psy %ymm2, %xmm1 - -// CHECK: vcvtpd2psy (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xfd,0x5a,0x08] - vcvtpd2psy (%eax), %xmm1 - -// CHECK: vcvtpd2dq %ymm2, %xmm5 -// CHECK: encoding: [0xc5,0xff,0xe6,0xea] - vcvtpd2dq %ymm2, %xmm5 - -// CHECK: vcvtpd2dqy %ymm2, %xmm1 -// CHECK: encoding: [0xc5,0xff,0xe6,0xca] - vcvtpd2dqy %ymm2, %xmm1 - -// CHECK: vcvtpd2dqy (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xff,0xe6,0x08] - vcvtpd2dqy (%eax), %xmm1 - -// CHECK: vcvtpd2dqx %xmm1, %xmm5 -// CHECK: encoding: [0xc5,0xfb,0xe6,0xe9] - vcvtpd2dqx %xmm1, %xmm5 - -// CHECK: vcvtpd2dqx (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xfb,0xe6,0x08] - vcvtpd2dqx (%eax), %xmm1 - -// CHECK: vcmpps $0, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x00] - vcmpeqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $2, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x02] - vcmpleps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $1, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x01] - vcmpltps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $4, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x04] - vcmpneqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $6, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x06] - vcmpnleps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $5, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x05] - vcmpnltps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $7, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x07] - vcmpordps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $3, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x03] - vcmpunordps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x02] - vcmpleps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnleps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %ymm6, %ymm2 -// CHECK: encoding: [0xc5,0xcc,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordps -4(%ebx,%ecx,8), %ymm6, %ymm2 - -// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $0, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x00] - vcmpeqpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $2, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x02] - vcmplepd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $1, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x01] - vcmpltpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $4, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x04] - vcmpneqpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $6, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x06] - vcmpnlepd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $5, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x05] - vcmpnltpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $7, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x07] - vcmpordpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $3, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x03] - vcmpunordpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqpd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x02] - vcmplepd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltpd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqpd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnlepd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltpd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %ymm6, %ymm2 -// CHECK: encoding: [0xc5,0xcd,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordpd -4(%ebx,%ecx,8), %ymm6, %ymm2 - -// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordpd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $8, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x08] - vcmpeq_uqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $9, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x09] - vcmpngeps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $10, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0a] - vcmpngtps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $11, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0b] - vcmpfalseps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $12, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0c] - vcmpneq_oqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $13, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0d] - vcmpgeps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $14, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0e] - vcmpgtps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $15, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0f] - vcmptrueps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $16, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x10] - vcmpeq_osps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $17, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x11] - vcmplt_oqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $18, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x12] - vcmple_oqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $19, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x13] - vcmpunord_sps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $20, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x14] - vcmpneq_usps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $21, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x15] - vcmpnlt_uqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $22, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x16] - vcmpnle_uqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $23, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x17] - vcmpord_sps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $24, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x18] - vcmpeq_usps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $25, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x19] - vcmpnge_uqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $26, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1a] - vcmpngt_uqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $27, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1b] - vcmpfalse_osps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $28, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1c] - vcmpneq_osps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $29, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1d] - vcmpge_oqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $30, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1e] - vcmpgt_oqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $31, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1f] - vcmptrue_usps %ymm1, %ymm2, %ymm3 +// CHECK: movl (%ecx,%eiz,8), %eax +// CHECK: encoding: [0x8b,0x04,0xe1] + movl (%ecx,%eiz,8), %eax diff --git a/test/MC/AsmParser/X86/x86_32-fma3-encoding.s b/test/MC/AsmParser/X86/x86_32-fma3-encoding.s new file mode 100644 index 0000000000000..db7efecfb51bc --- /dev/null +++ b/test/MC/AsmParser/X86/x86_32-fma3-encoding.s @@ -0,0 +1,674 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vfmadd132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0xca] + vfmadd132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0x08] + vfmadd132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0xca] + vfmadd132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0x08] + vfmadd132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0xca] + vfmadd213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0x08] + vfmadd213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0xca] + vfmadd213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0x08] + vfmadd213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0xca] + vfmadd231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0x08] + vfmadd231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0xca] + vfmadd231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0x08] + vfmadd231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0xca] + vfmadd132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0x08] + vfmadd132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0xca] + vfmadd132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0x08] + vfmadd132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0xca] + vfmadd213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0x08] + vfmadd213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0xca] + vfmadd213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0x08] + vfmadd213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0xca] + vfmadd231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0x08] + vfmadd231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0xca] + vfmadd231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0x08] + vfmadd231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0xca] + vfmadd132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0x08] + vfmadd132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0xca] + vfmadd132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0x08] + vfmadd132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0xca] + vfmadd213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0x08] + vfmadd213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0xca] + vfmadd213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0x08] + vfmadd213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0xca] + vfmadd231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0x08] + vfmadd231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0xca] + vfmadd231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0x08] + vfmadd231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x96,0xca] + vfmaddsub132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x96,0x08] + vfmaddsub132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x96,0xca] + vfmaddsub132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x96,0x08] + vfmaddsub132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa6,0xca] + vfmaddsub213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa6,0x08] + vfmaddsub213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa6,0xca] + vfmaddsub213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa6,0x08] + vfmaddsub213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb6,0xca] + vfmaddsub231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb6,0x08] + vfmaddsub231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb6,0xca] + vfmaddsub231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb6,0x08] + vfmaddsub231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x97,0xca] + vfmsubadd132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x97,0x08] + vfmsubadd132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x97,0xca] + vfmsubadd132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x97,0x08] + vfmsubadd132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa7,0xca] + vfmsubadd213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa7,0x08] + vfmsubadd213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa7,0xca] + vfmsubadd213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa7,0x08] + vfmsubadd213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb7,0xca] + vfmsubadd231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb7,0x08] + vfmsubadd231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb7,0xca] + vfmsubadd231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb7,0x08] + vfmsubadd231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9a,0xca] + vfmsub132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9a,0x08] + vfmsub132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9a,0xca] + vfmsub132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9a,0x08] + vfmsub132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xaa,0xca] + vfmsub213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xaa,0x08] + vfmsub213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xaa,0xca] + vfmsub213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xaa,0x08] + vfmsub213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xba,0xca] + vfmsub231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xba,0x08] + vfmsub231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xba,0xca] + vfmsub231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xba,0x08] + vfmsub231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9c,0xca] + vfnmadd132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9c,0x08] + vfnmadd132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9c,0xca] + vfnmadd132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9c,0x08] + vfnmadd132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xac,0xca] + vfnmadd213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xac,0x08] + vfnmadd213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xac,0xca] + vfnmadd213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xac,0x08] + vfnmadd213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xbc,0xca] + vfnmadd231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xbc,0x08] + vfnmadd231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xbc,0xca] + vfnmadd231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xbc,0x08] + vfnmadd231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9e,0xca] + vfnmsub132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9e,0x08] + vfnmsub132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9e,0xca] + vfnmsub132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9e,0x08] + vfnmsub132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xae,0xca] + vfnmsub213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xae,0x08] + vfnmsub213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xae,0xca] + vfnmsub213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xae,0x08] + vfnmsub213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xbe,0xca] + vfnmsub231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xbe,0x08] + vfnmsub231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xbe,0xca] + vfnmsub231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xbe,0x08] + vfnmsub231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0xca] + vfmadd132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0x08] + vfmadd132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0xca] + vfmadd132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0x08] + vfmadd132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0xca] + vfmadd213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0x08] + vfmadd213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0xca] + vfmadd213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0x08] + vfmadd213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0xca] + vfmadd231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0x08] + vfmadd231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0xca] + vfmadd231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0x08] + vfmadd231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x96,0xca] + vfmaddsub132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x96,0x08] + vfmaddsub132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x96,0xca] + vfmaddsub132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x96,0x08] + vfmaddsub132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa6,0xca] + vfmaddsub213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa6,0x08] + vfmaddsub213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa6,0xca] + vfmaddsub213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa6,0x08] + vfmaddsub213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb6,0xca] + vfmaddsub231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb6,0x08] + vfmaddsub231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb6,0xca] + vfmaddsub231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb6,0x08] + vfmaddsub231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x97,0xca] + vfmsubadd132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x97,0x08] + vfmsubadd132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x97,0xca] + vfmsubadd132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x97,0x08] + vfmsubadd132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa7,0xca] + vfmsubadd213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa7,0x08] + vfmsubadd213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa7,0xca] + vfmsubadd213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa7,0x08] + vfmsubadd213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb7,0xca] + vfmsubadd231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb7,0x08] + vfmsubadd231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb7,0xca] + vfmsubadd231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb7,0x08] + vfmsubadd231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9a,0xca] + vfmsub132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9a,0x08] + vfmsub132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9a,0xca] + vfmsub132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9a,0x08] + vfmsub132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xaa,0xca] + vfmsub213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xaa,0x08] + vfmsub213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xaa,0xca] + vfmsub213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xaa,0x08] + vfmsub213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xba,0xca] + vfmsub231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xba,0x08] + vfmsub231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xba,0xca] + vfmsub231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xba,0x08] + vfmsub231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9c,0xca] + vfnmadd132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9c,0x08] + vfnmadd132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9c,0xca] + vfnmadd132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9c,0x08] + vfnmadd132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xac,0xca] + vfnmadd213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xac,0x08] + vfnmadd213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xac,0xca] + vfnmadd213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xac,0x08] + vfnmadd213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xbc,0xca] + vfnmadd231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xbc,0x08] + vfnmadd231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xbc,0xca] + vfnmadd231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xbc,0x08] + vfnmadd231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9e,0xca] + vfnmsub132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9e,0x08] + vfnmsub132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9e,0xca] + vfnmsub132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9e,0x08] + vfnmsub132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xae,0xca] + vfnmsub213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xae,0x08] + vfnmsub213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xae,0xca] + vfnmsub213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xae,0x08] + vfnmsub213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xbe,0xca] + vfnmsub231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xbe,0x08] + vfnmsub231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xbe,0xca] + vfnmsub231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xbe,0x08] + vfnmsub231ps (%eax), %ymm5, %ymm1 + diff --git a/test/MC/AsmParser/X86/x86_32-new-encoder.s b/test/MC/AsmParser/X86/x86_32-new-encoder.s index e97e4940a42c5..e3aa1887ef81e 100644 --- a/test/MC/AsmParser/X86/x86_32-new-encoder.s +++ b/test/MC/AsmParser/X86/x86_32-new-encoder.s @@ -415,3 +415,11 @@ retl // CHECK: encoding: [0x61] popal +// CHECK: jmpl *8(%eax) +// CHECK: encoding: [0xff,0x60,0x08] + jmp *8(%eax) + +// PR7465 +// CHECK: lcalll $2, $4660 +// CHECK: encoding: [0x9a,0x34,0x12,0x00,0x00,0x02,0x00] +lcalll $0x2, $0x1234 diff --git a/test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s b/test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s new file mode 100644 index 0000000000000..67e82c6cd0d29 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s @@ -0,0 +1,42 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vpclmulqdq $17, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x11] + vpclmulhqhqdq %xmm12, %xmm10, %xmm11 + +// CHECK: vpclmulqdq $17, (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x11] + vpclmulhqhqdq (%rax), %xmm10, %xmm13 + +// CHECK: vpclmulqdq $1, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x01] + vpclmulhqlqdq %xmm12, %xmm10, %xmm11 + +// CHECK: vpclmulqdq $1, (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x01] + vpclmulhqlqdq (%rax), %xmm10, %xmm13 + +// CHECK: vpclmulqdq $16, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x10] + vpclmullqhqdq %xmm12, %xmm10, %xmm11 + +// CHECK: vpclmulqdq $16, (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x10] + vpclmullqhqdq (%rax), %xmm10, %xmm13 + +// CHECK: vpclmulqdq $0, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x00] + vpclmullqlqdq %xmm12, %xmm10, %xmm11 + +// CHECK: vpclmulqdq $0, (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x00] + vpclmullqlqdq (%rax), %xmm10, %xmm13 + +// CHECK: vpclmulqdq $17, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x11] + vpclmulqdq $17, %xmm12, %xmm10, %xmm11 + +// CHECK: vpclmulqdq $17, (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x11] + vpclmulqdq $17, (%rax), %xmm10, %xmm13 + diff --git a/test/MC/AsmParser/X86/x86_64-avx-encoding.s b/test/MC/AsmParser/X86/x86_64-avx-encoding.s new file mode 100644 index 0000000000000..7a96bb5a2b485 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_64-avx-encoding.s @@ -0,0 +1,3318 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vaddss %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x32,0x58,0xd0] +vaddss %xmm8, %xmm9, %xmm10 + +// CHECK: vmulss %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x32,0x59,0xd0] +vmulss %xmm8, %xmm9, %xmm10 + +// CHECK: vsubss %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x32,0x5c,0xd0] +vsubss %xmm8, %xmm9, %xmm10 + +// CHECK: vdivss %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x32,0x5e,0xd0] +vdivss %xmm8, %xmm9, %xmm10 + +// CHECK: vaddsd %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x33,0x58,0xd0] +vaddsd %xmm8, %xmm9, %xmm10 + +// CHECK: vmulsd %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x33,0x59,0xd0] +vmulsd %xmm8, %xmm9, %xmm10 + +// CHECK: vsubsd %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x33,0x5c,0xd0] +vsubsd %xmm8, %xmm9, %xmm10 + +// CHECK: vdivsd %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x33,0x5e,0xd0] +vdivsd %xmm8, %xmm9, %xmm10 + +// CHECK: vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2a,0x58,0x5c,0xd9,0xfc] +vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2a,0x5c,0x5c,0xd9,0xfc] +vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2a,0x59,0x5c,0xd9,0xfc] +vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2a,0x5e,0x5c,0xd9,0xfc] +vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2b,0x58,0x5c,0xd9,0xfc] +vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2b,0x5c,0x5c,0xd9,0xfc] +vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2b,0x59,0x5c,0xd9,0xfc] +vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2b,0x5e,0x5c,0xd9,0xfc] +vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vaddps %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x20,0x58,0xfa] +vaddps %xmm10, %xmm11, %xmm15 + +// CHECK: vsubps %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x20,0x5c,0xfa] +vsubps %xmm10, %xmm11, %xmm15 + +// CHECK: vmulps %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x20,0x59,0xfa] +vmulps %xmm10, %xmm11, %xmm15 + +// CHECK: vdivps %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x20,0x5e,0xfa] +vdivps %xmm10, %xmm11, %xmm15 + +// CHECK: vaddpd %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x21,0x58,0xfa] +vaddpd %xmm10, %xmm11, %xmm15 + +// CHECK: vsubpd %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x21,0x5c,0xfa] +vsubpd %xmm10, %xmm11, %xmm15 + +// CHECK: vmulpd %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x21,0x59,0xfa] +vmulpd %xmm10, %xmm11, %xmm15 + +// CHECK: vdivpd %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x21,0x5e,0xfa] +vdivpd %xmm10, %xmm11, %xmm15 + +// CHECK: vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x28,0x58,0x5c,0xd9,0xfc] +vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x28,0x5c,0x5c,0xd9,0xfc] +vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x28,0x59,0x5c,0xd9,0xfc] +vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x28,0x5e,0x5c,0xd9,0xfc] +vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x29,0x58,0x5c,0xd9,0xfc] +vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x29,0x5c,0x5c,0xd9,0xfc] +vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x29,0x59,0x5c,0xd9,0xfc] +vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x29,0x5e,0x5c,0xd9,0xfc] +vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vmaxss %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x0a,0x5f,0xe2] + vmaxss %xmm10, %xmm14, %xmm12 + +// CHECK: vmaxsd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x0b,0x5f,0xe2] + vmaxsd %xmm10, %xmm14, %xmm12 + +// CHECK: vminss %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x0a,0x5d,0xe2] + vminss %xmm10, %xmm14, %xmm12 + +// CHECK: vminsd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x0b,0x5d,0xe2] + vminsd %xmm10, %xmm14, %xmm12 + +// CHECK: vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x5f,0x54,0xcb,0xfc] + vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1b,0x5f,0x54,0xcb,0xfc] + vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vminss -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x5d,0x54,0xcb,0xfc] + vminss -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1b,0x5d,0x54,0xcb,0xfc] + vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vmaxps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x5f,0xe2] + vmaxps %xmm10, %xmm14, %xmm12 + +// CHECK: vmaxpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x5f,0xe2] + vmaxpd %xmm10, %xmm14, %xmm12 + +// CHECK: vminps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x5d,0xe2] + vminps %xmm10, %xmm14, %xmm12 + +// CHECK: vminpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x5d,0xe2] + vminpd %xmm10, %xmm14, %xmm12 + +// CHECK: vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x5f,0x54,0xcb,0xfc] + vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x5f,0x54,0xcb,0xfc] + vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vminps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x5d,0x54,0xcb,0xfc] + vminps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc] + vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vandps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x54,0xe2] + vandps %xmm10, %xmm14, %xmm12 + +// CHECK: vandpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x54,0xe2] + vandpd %xmm10, %xmm14, %xmm12 + +// CHECK: vandps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x54,0x54,0xcb,0xfc] + vandps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x54,0x54,0xcb,0xfc] + vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vorps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x56,0xe2] + vorps %xmm10, %xmm14, %xmm12 + +// CHECK: vorpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x56,0xe2] + vorpd %xmm10, %xmm14, %xmm12 + +// CHECK: vorps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x56,0x54,0xcb,0xfc] + vorps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x56,0x54,0xcb,0xfc] + vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vxorps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x57,0xe2] + vxorps %xmm10, %xmm14, %xmm12 + +// CHECK: vxorpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x57,0xe2] + vxorpd %xmm10, %xmm14, %xmm12 + +// CHECK: vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x57,0x54,0xcb,0xfc] + vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x57,0x54,0xcb,0xfc] + vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vandnps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x55,0xe2] + vandnps %xmm10, %xmm14, %xmm12 + +// CHECK: vandnpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x55,0xe2] + vandnpd %xmm10, %xmm14, %xmm12 + +// CHECK: vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x55,0x54,0xcb,0xfc] + vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x55,0x54,0xcb,0xfc] + vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vmovss -4(%rbx,%rcx,8), %xmm10 +// CHECK: encoding: [0xc5,0x7a,0x10,0x54,0xcb,0xfc] + vmovss -4(%rbx,%rcx,8), %xmm10 + +// CHECK: vmovss %xmm14, %xmm10, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x2a,0x10,0xfe] + vmovss %xmm14, %xmm10, %xmm15 + +// CHECK: vmovsd -4(%rbx,%rcx,8), %xmm10 +// CHECK: encoding: [0xc5,0x7b,0x10,0x54,0xcb,0xfc] + vmovsd -4(%rbx,%rcx,8), %xmm10 + +// CHECK: vmovsd %xmm14, %xmm10, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x2b,0x10,0xfe] + vmovsd %xmm14, %xmm10, %xmm15 + +// CHECK: vunpckhps %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x15,0xef] + vunpckhps %xmm15, %xmm12, %xmm13 + +// CHECK: vunpckhpd %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x15,0xef] + vunpckhpd %xmm15, %xmm12, %xmm13 + +// CHECK: vunpcklps %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x14,0xef] + vunpcklps %xmm15, %xmm12, %xmm13 + +// CHECK: vunpcklpd %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x14,0xef] + vunpcklpd %xmm15, %xmm12, %xmm13 + +// CHECK: vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x18,0x15,0x7c,0xcb,0xfc] + vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15 + +// CHECK: vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x19,0x15,0x7c,0xcb,0xfc] + vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15 + +// CHECK: vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x18,0x14,0x7c,0xcb,0xfc] + vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15 + +// CHECK: vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x19,0x14,0x7c,0xcb,0xfc] + vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15 + +// CHECK: vcmpps $0, %xmm10, %xmm12, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x00] + vcmpps $0, %xmm10, %xmm12, %xmm15 + +// CHECK: vcmpps $0, (%rax), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x18,0xc2,0x38,0x00] + vcmpps $0, (%rax), %xmm12, %xmm15 + +// CHECK: vcmpps $7, %xmm10, %xmm12, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x07] + vcmpps $7, %xmm10, %xmm12, %xmm15 + +// CHECK: vcmppd $0, %xmm10, %xmm12, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x00] + vcmppd $0, %xmm10, %xmm12, %xmm15 + +// CHECK: vcmppd $0, (%rax), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x19,0xc2,0x38,0x00] + vcmppd $0, (%rax), %xmm12, %xmm15 + +// CHECK: vcmppd $7, %xmm10, %xmm12, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x07] + vcmppd $7, %xmm10, %xmm12, %xmm15 + +// CHECK: vshufps $8, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc6,0xeb,0x08] + vshufps $8, %xmm11, %xmm12, %xmm13 + +// CHECK: vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc6,0x6c,0xcb,0xfc,0x08] + vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vshufpd $8, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc6,0xeb,0x08] + vshufpd $8, %xmm11, %xmm12, %xmm13 + +// CHECK: vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc6,0x6c,0xcb,0xfc,0x08] + vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $0, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x00] + vcmpeqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $2, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x02] + vcmpleps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $1, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x01] + vcmpltps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $4, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x04] + vcmpneqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $6, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x06] + vcmpnleps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $5, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x05] + vcmpnltps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x07] + vcmpordps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $3, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x03] + vcmpunordps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x02] + vcmpleps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnleps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordps -4(%rbx,%rcx,8), %xmm6, %xmm2 + +// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $0, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x00] + vcmpeqpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $2, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x02] + vcmplepd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $1, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x01] + vcmpltpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $4, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x04] + vcmpneqpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $6, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x06] + vcmpnlepd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $5, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x05] + vcmpnltpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x07] + vcmpordpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $3, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x03] + vcmpunordpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqpd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x02] + vcmplepd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltpd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqpd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnlepd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltpd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordpd -4(%rbx,%rcx,8), %xmm6, %xmm2 + +// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordpd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $0, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x00] + vcmpeqss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $2, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x02] + vcmpless %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $1, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x01] + vcmpltss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $4, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x04] + vcmpneqss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $6, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x06] + vcmpnless %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $5, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x05] + vcmpnltss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x07] + vcmpordss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $3, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x03] + vcmpunordss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x02] + vcmpless -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnless -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordss -4(%rbx,%rcx,8), %xmm6, %xmm2 + +// CHECK: vcmpss $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $0, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x00] + vcmpeqsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $2, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x02] + vcmplesd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $1, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x01] + vcmpltsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $4, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x04] + vcmpneqsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $6, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x06] + vcmpnlesd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $5, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x05] + vcmpnltsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x07] + vcmpordsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $3, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x03] + vcmpunordsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x02] + vcmplesd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnlesd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordsd -4(%rbx,%rcx,8), %xmm6, %xmm2 + +// CHECK: vcmpsd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vucomiss %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3] + vucomiss %xmm11, %xmm12 + +// CHECK: vucomiss (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x2e,0x20] + vucomiss (%rax), %xmm12 + +// CHECK: vcomiss %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x2f,0xe3] + vcomiss %xmm11, %xmm12 + +// CHECK: vcomiss (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x2f,0x20] + vcomiss (%rax), %xmm12 + +// CHECK: vucomisd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xe3] + vucomisd %xmm11, %xmm12 + +// CHECK: vucomisd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x2e,0x20] + vucomisd (%rax), %xmm12 + +// CHECK: vcomisd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x2f,0xe3] + vcomisd %xmm11, %xmm12 + +// CHECK: vcomisd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x2f,0x20] + vcomisd (%rax), %xmm12 + +// CHECK: vcvttss2si (%rcx), %eax +// CHECK: encoding: [0xc5,0xfa,0x2c,0x01] + vcvttss2si (%rcx), %eax + +// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x22,0x2a,0x20] + vcvtsi2ss (%rax), %xmm11, %xmm12 + +// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x22,0x2a,0x20] + vcvtsi2ss (%rax), %xmm11, %xmm12 + +// CHECK: vcvttsd2si (%rcx), %eax +// CHECK: encoding: [0xc5,0xfb,0x2c,0x01] + vcvttsd2si (%rcx), %eax + +// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x23,0x2a,0x20] + vcvtsi2sd (%rax), %xmm11, %xmm12 + +// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x23,0x2a,0x20] + vcvtsi2sd (%rax), %xmm11, %xmm12 + +// CHECK: vmovaps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x28,0x20] + vmovaps (%rax), %xmm12 + +// CHECK: vmovaps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x28,0xe3] + vmovaps %xmm11, %xmm12 + +// CHECK: vmovaps %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x29,0x18] + vmovaps %xmm11, (%rax) + +// CHECK: vmovapd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x28,0x20] + vmovapd (%rax), %xmm12 + +// CHECK: vmovapd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x28,0xe3] + vmovapd %xmm11, %xmm12 + +// CHECK: vmovapd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x29,0x18] + vmovapd %xmm11, (%rax) + +// CHECK: vmovups (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x10,0x20] + vmovups (%rax), %xmm12 + +// CHECK: vmovups %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x10,0xe3] + vmovups %xmm11, %xmm12 + +// CHECK: vmovups %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x11,0x18] + vmovups %xmm11, (%rax) + +// CHECK: vmovupd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x10,0x20] + vmovupd (%rax), %xmm12 + +// CHECK: vmovupd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x10,0xe3] + vmovupd %xmm11, %xmm12 + +// CHECK: vmovupd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x11,0x18] + vmovupd %xmm11, (%rax) + +// CHECK: vmovlps %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x13,0x18] + vmovlps %xmm11, (%rax) + +// CHECK: vmovlps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0x12,0x28] + vmovlps (%rax), %xmm12, %xmm13 + +// CHECK: vmovlpd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x13,0x18] + vmovlpd %xmm11, (%rax) + +// CHECK: vmovlpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x12,0x28] + vmovlpd (%rax), %xmm12, %xmm13 + +// CHECK: vmovhps %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x17,0x18] + vmovhps %xmm11, (%rax) + +// CHECK: vmovhps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0x16,0x28] + vmovhps (%rax), %xmm12, %xmm13 + +// CHECK: vmovhpd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x17,0x18] + vmovhpd %xmm11, (%rax) + +// CHECK: vmovhpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x16,0x28] + vmovhpd (%rax), %xmm12, %xmm13 + +// CHECK: vmovlhps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x16,0xeb] + vmovlhps %xmm11, %xmm12, %xmm13 + +// CHECK: vmovhlps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb] + vmovhlps %xmm11, %xmm12, %xmm13 + +// CHECK: vcvtss2sil %xmm11, %eax +// CHECK: encoding: [0xc4,0xc1,0x7a,0x2d,0xc3] + vcvtss2si %xmm11, %eax + +// CHECK: vcvtss2sil (%rax), %ebx +// CHECK: encoding: [0xc5,0xfa,0x2d,0x18] + vcvtss2si (%rax), %ebx + +// CHECK: vcvtdq2ps %xmm10, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xe2] + vcvtdq2ps %xmm10, %xmm12 + +// CHECK: vcvtdq2ps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x5b,0x20] + vcvtdq2ps (%rax), %xmm12 + +// CHECK: vcvtsd2ss %xmm12, %xmm13, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x13,0x5a,0xd4] + vcvtsd2ss %xmm12, %xmm13, %xmm10 + +// CHECK: vcvtsd2ss (%rax), %xmm13, %xmm10 +// CHECK: encoding: [0xc5,0x13,0x5a,0x10] + vcvtsd2ss (%rax), %xmm13, %xmm10 + +// CHECK: vcvtps2dq %xmm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x79,0x5b,0xdc] + vcvtps2dq %xmm12, %xmm11 + +// CHECK: vcvtps2dq (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x79,0x5b,0x18] + vcvtps2dq (%rax), %xmm11 + +// CHECK: vcvtss2sd %xmm12, %xmm13, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x12,0x5a,0xd4] + vcvtss2sd %xmm12, %xmm13, %xmm10 + +// CHECK: vcvtss2sd (%rax), %xmm13, %xmm10 +// CHECK: encoding: [0xc5,0x12,0x5a,0x10] + vcvtss2sd (%rax), %xmm13, %xmm10 + +// CHECK: vcvtdq2ps %xmm13, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xd5] + vcvtdq2ps %xmm13, %xmm10 + +// CHECK: vcvtdq2ps (%ecx), %xmm13 +// CHECK: encoding: [0xc5,0x78,0x5b,0x29] + vcvtdq2ps (%ecx), %xmm13 + +// CHECK: vcvttps2dq %xmm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x7a,0x5b,0xdc] + vcvttps2dq %xmm12, %xmm11 + +// CHECK: vcvttps2dq (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x7a,0x5b,0x18] + vcvttps2dq (%rax), %xmm11 + +// CHECK: vcvtps2pd %xmm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x78,0x5a,0xdc] + vcvtps2pd %xmm12, %xmm11 + +// CHECK: vcvtps2pd (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x78,0x5a,0x18] + vcvtps2pd (%rax), %xmm11 + +// CHECK: vcvtpd2ps %xmm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc] + vcvtpd2ps %xmm12, %xmm11 + +// CHECK: vsqrtpd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3] + vsqrtpd %xmm11, %xmm12 + +// CHECK: vsqrtpd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x51,0x20] + vsqrtpd (%rax), %xmm12 + +// CHECK: vsqrtps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3] + vsqrtps %xmm11, %xmm12 + +// CHECK: vsqrtps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x51,0x20] + vsqrtps (%rax), %xmm12 + +// CHECK: vsqrtsd %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3] + vsqrtsd %xmm11, %xmm12, %xmm10 + +// CHECK: vsqrtsd (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1b,0x51,0x10] + vsqrtsd (%rax), %xmm12, %xmm10 + +// CHECK: vsqrtss %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3] + vsqrtss %xmm11, %xmm12, %xmm10 + +// CHECK: vsqrtss (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x51,0x10] + vsqrtss (%rax), %xmm12, %xmm10 + +// CHECK: vrsqrtps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3] + vrsqrtps %xmm11, %xmm12 + +// CHECK: vrsqrtps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x52,0x20] + vrsqrtps (%rax), %xmm12 + +// CHECK: vrsqrtss %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3] + vrsqrtss %xmm11, %xmm12, %xmm10 + +// CHECK: vrsqrtss (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x52,0x10] + vrsqrtss (%rax), %xmm12, %xmm10 + +// CHECK: vrcpps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3] + vrcpps %xmm11, %xmm12 + +// CHECK: vrcpps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x53,0x20] + vrcpps (%rax), %xmm12 + +// CHECK: vrcpss %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3] + vrcpss %xmm11, %xmm12, %xmm10 + +// CHECK: vrcpss (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x53,0x10] + vrcpss (%rax), %xmm12, %xmm10 + +// CHECK: vmovntdq %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0xe7,0x18] + vmovntdq %xmm11, (%rax) + +// CHECK: vmovntpd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x2b,0x18] + vmovntpd %xmm11, (%rax) + +// CHECK: vmovntps %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x2b,0x18] + vmovntps %xmm11, (%rax) + +// CHECK: vldmxcsr -4(%rip) +// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xfc,0xff,0xff,0xff] + vldmxcsr -4(%rip) + +// CHECK: vstmxcsr -4(%rsp) +// CHECK: encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc] + vstmxcsr -4(%rsp) + +// CHECK: vpsubb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf8,0xeb] + vpsubb %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf8,0x28] + vpsubb (%rax), %xmm12, %xmm13 + +// CHECK: vpsubw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf9,0xeb] + vpsubw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf9,0x28] + vpsubw (%rax), %xmm12, %xmm13 + +// CHECK: vpsubd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xfa,0xeb] + vpsubd %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xfa,0x28] + vpsubd (%rax), %xmm12, %xmm13 + +// CHECK: vpsubq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xfb,0xeb] + vpsubq %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xfb,0x28] + vpsubq (%rax), %xmm12, %xmm13 + +// CHECK: vpsubsb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe8,0xeb] + vpsubsb %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubsb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe8,0x28] + vpsubsb (%rax), %xmm12, %xmm13 + +// CHECK: vpsubsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe9,0xeb] + vpsubsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe9,0x28] + vpsubsw (%rax), %xmm12, %xmm13 + +// CHECK: vpsubusb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd8,0xeb] + vpsubusb %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubusb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd8,0x28] + vpsubusb (%rax), %xmm12, %xmm13 + +// CHECK: vpsubusw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd9,0xeb] + vpsubusw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubusw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd9,0x28] + vpsubusw (%rax), %xmm12, %xmm13 + +// CHECK: vpaddb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xfc,0xeb] + vpaddb %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xfc,0x28] + vpaddb (%rax), %xmm12, %xmm13 + +// CHECK: vpaddw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xfd,0xeb] + vpaddw %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xfd,0x28] + vpaddw (%rax), %xmm12, %xmm13 + +// CHECK: vpaddd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xfe,0xeb] + vpaddd %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xfe,0x28] + vpaddd (%rax), %xmm12, %xmm13 + +// CHECK: vpaddq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd4,0xeb] + vpaddq %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd4,0x28] + vpaddq (%rax), %xmm12, %xmm13 + +// CHECK: vpaddsb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xec,0xeb] + vpaddsb %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddsb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xec,0x28] + vpaddsb (%rax), %xmm12, %xmm13 + +// CHECK: vpaddsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xed,0xeb] + vpaddsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xed,0x28] + vpaddsw (%rax), %xmm12, %xmm13 + +// CHECK: vpaddusb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xdc,0xeb] + vpaddusb %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddusb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xdc,0x28] + vpaddusb (%rax), %xmm12, %xmm13 + +// CHECK: vpaddusw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xdd,0xeb] + vpaddusw %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddusw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xdd,0x28] + vpaddusw (%rax), %xmm12, %xmm13 + +// CHECK: vpmulhuw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe4,0xeb] + vpmulhuw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmulhuw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe4,0x28] + vpmulhuw (%rax), %xmm12, %xmm13 + +// CHECK: vpmulhw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe5,0xeb] + vpmulhw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmulhw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe5,0x28] + vpmulhw (%rax), %xmm12, %xmm13 + +// CHECK: vpmullw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd5,0xeb] + vpmullw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmullw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd5,0x28] + vpmullw (%rax), %xmm12, %xmm13 + +// CHECK: vpmuludq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf4,0xeb] + vpmuludq %xmm11, %xmm12, %xmm13 + +// CHECK: vpmuludq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf4,0x28] + vpmuludq (%rax), %xmm12, %xmm13 + +// CHECK: vpavgb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe0,0xeb] + vpavgb %xmm11, %xmm12, %xmm13 + +// CHECK: vpavgb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe0,0x28] + vpavgb (%rax), %xmm12, %xmm13 + +// CHECK: vpavgw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe3,0xeb] + vpavgw %xmm11, %xmm12, %xmm13 + +// CHECK: vpavgw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe3,0x28] + vpavgw (%rax), %xmm12, %xmm13 + +// CHECK: vpminsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xea,0xeb] + vpminsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpminsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xea,0x28] + vpminsw (%rax), %xmm12, %xmm13 + +// CHECK: vpminub %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xda,0xeb] + vpminub %xmm11, %xmm12, %xmm13 + +// CHECK: vpminub (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xda,0x28] + vpminub (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xee,0xeb] + vpmaxsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmaxsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xee,0x28] + vpmaxsw (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxub %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xde,0xeb] + vpmaxub %xmm11, %xmm12, %xmm13 + +// CHECK: vpmaxub (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xde,0x28] + vpmaxub (%rax), %xmm12, %xmm13 + +// CHECK: vpsadbw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf6,0xeb] + vpsadbw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsadbw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf6,0x28] + vpsadbw (%rax), %xmm12, %xmm13 + +// CHECK: vpsllw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf1,0xeb] + vpsllw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsllw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf1,0x28] + vpsllw (%rax), %xmm12, %xmm13 + +// CHECK: vpslld %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf2,0xeb] + vpslld %xmm11, %xmm12, %xmm13 + +// CHECK: vpslld (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf2,0x28] + vpslld (%rax), %xmm12, %xmm13 + +// CHECK: vpsllq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf3,0xeb] + vpsllq %xmm11, %xmm12, %xmm13 + +// CHECK: vpsllq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf3,0x28] + vpsllq (%rax), %xmm12, %xmm13 + +// CHECK: vpsraw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe1,0xeb] + vpsraw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsraw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe1,0x28] + vpsraw (%rax), %xmm12, %xmm13 + +// CHECK: vpsrad %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe2,0xeb] + vpsrad %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrad (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe2,0x28] + vpsrad (%rax), %xmm12, %xmm13 + +// CHECK: vpsrlw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd1,0xeb] + vpsrlw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrlw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd1,0x28] + vpsrlw (%rax), %xmm12, %xmm13 + +// CHECK: vpsrld %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd2,0xeb] + vpsrld %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrld (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd2,0x28] + vpsrld (%rax), %xmm12, %xmm13 + +// CHECK: vpsrlq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd3,0xeb] + vpsrlq %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrlq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd3,0x28] + vpsrlq (%rax), %xmm12, %xmm13 + +// CHECK: vpslld $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a] + vpslld $10, %xmm12, %xmm13 + +// CHECK: vpslldq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xfc,0x0a] + vpslldq $10, %xmm12, %xmm13 + +// CHECK: vpsllq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xf4,0x0a] + vpsllq $10, %xmm12, %xmm13 + +// CHECK: vpsllw $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xf4,0x0a] + vpsllw $10, %xmm12, %xmm13 + +// CHECK: vpsrad $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xe4,0x0a] + vpsrad $10, %xmm12, %xmm13 + +// CHECK: vpsraw $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xe4,0x0a] + vpsraw $10, %xmm12, %xmm13 + +// CHECK: vpsrld $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xd4,0x0a] + vpsrld $10, %xmm12, %xmm13 + +// CHECK: vpsrldq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xdc,0x0a] + vpsrldq $10, %xmm12, %xmm13 + +// CHECK: vpsrlq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xd4,0x0a] + vpsrlq $10, %xmm12, %xmm13 + +// CHECK: vpsrlw $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xd4,0x0a] + vpsrlw $10, %xmm12, %xmm13 + +// CHECK: vpslld $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a] + vpslld $10, %xmm12, %xmm13 + +// CHECK: vpand %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xdb,0xeb] + vpand %xmm11, %xmm12, %xmm13 + +// CHECK: vpand (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xdb,0x28] + vpand (%rax), %xmm12, %xmm13 + +// CHECK: vpor %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xeb,0xeb] + vpor %xmm11, %xmm12, %xmm13 + +// CHECK: vpor (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xeb,0x28] + vpor (%rax), %xmm12, %xmm13 + +// CHECK: vpxor %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xef,0xeb] + vpxor %xmm11, %xmm12, %xmm13 + +// CHECK: vpxor (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xef,0x28] + vpxor (%rax), %xmm12, %xmm13 + +// CHECK: vpandn %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xdf,0xeb] + vpandn %xmm11, %xmm12, %xmm13 + +// CHECK: vpandn (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xdf,0x28] + vpandn (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpeqb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x74,0xeb] + vpcmpeqb %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpeqb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x74,0x28] + vpcmpeqb (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpeqw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x75,0xeb] + vpcmpeqw %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpeqw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x75,0x28] + vpcmpeqw (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpeqd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x76,0xeb] + vpcmpeqd %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpeqd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x76,0x28] + vpcmpeqd (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpgtb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x64,0xeb] + vpcmpgtb %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpgtb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x64,0x28] + vpcmpgtb (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpgtw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x65,0xeb] + vpcmpgtw %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpgtw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x65,0x28] + vpcmpgtw (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpgtd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x66,0xeb] + vpcmpgtd %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpgtd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x66,0x28] + vpcmpgtd (%rax), %xmm12, %xmm13 + +// CHECK: vpacksswb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x63,0xeb] + vpacksswb %xmm11, %xmm12, %xmm13 + +// CHECK: vpacksswb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x63,0x28] + vpacksswb (%rax), %xmm12, %xmm13 + +// CHECK: vpackssdw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x6b,0xeb] + vpackssdw %xmm11, %xmm12, %xmm13 + +// CHECK: vpackssdw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x6b,0x28] + vpackssdw (%rax), %xmm12, %xmm13 + +// CHECK: vpackuswb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x67,0xeb] + vpackuswb %xmm11, %xmm12, %xmm13 + +// CHECK: vpackuswb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x67,0x28] + vpackuswb (%rax), %xmm12, %xmm13 + +// CHECK: vpshufd $4, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x79,0x70,0xec,0x04] + vpshufd $4, %xmm12, %xmm13 + +// CHECK: vpshufd $4, (%rax), %xmm13 +// CHECK: encoding: [0xc5,0x79,0x70,0x28,0x04] + vpshufd $4, (%rax), %xmm13 + +// CHECK: vpshufhw $4, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x7a,0x70,0xec,0x04] + vpshufhw $4, %xmm12, %xmm13 + +// CHECK: vpshufhw $4, (%rax), %xmm13 +// CHECK: encoding: [0xc5,0x7a,0x70,0x28,0x04] + vpshufhw $4, (%rax), %xmm13 + +// CHECK: vpshuflw $4, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x7b,0x70,0xec,0x04] + vpshuflw $4, %xmm12, %xmm13 + +// CHECK: vpshuflw $4, (%rax), %xmm13 +// CHECK: encoding: [0xc5,0x7b,0x70,0x28,0x04] + vpshuflw $4, (%rax), %xmm13 + +// CHECK: vpunpcklbw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x60,0xeb] + vpunpcklbw %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpcklbw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x60,0x28] + vpunpcklbw (%rax), %xmm12, %xmm13 + +// CHECK: vpunpcklwd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x61,0xeb] + vpunpcklwd %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpcklwd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x61,0x28] + vpunpcklwd (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckldq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x62,0xeb] + vpunpckldq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckldq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x62,0x28] + vpunpckldq (%rax), %xmm12, %xmm13 + +// CHECK: vpunpcklqdq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x6c,0xeb] + vpunpcklqdq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpcklqdq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x6c,0x28] + vpunpcklqdq (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhbw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x68,0xeb] + vpunpckhbw %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhbw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x68,0x28] + vpunpckhbw (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhwd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x69,0xeb] + vpunpckhwd %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhwd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x69,0x28] + vpunpckhwd (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhdq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x6a,0xeb] + vpunpckhdq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhdq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x6a,0x28] + vpunpckhdq (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhqdq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x6d,0xeb] + vpunpckhqdq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhqdq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x6d,0x28] + vpunpckhqdq (%rax), %xmm12, %xmm13 + +// CHECK: vpinsrw $7, %eax, %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc4,0xe8,0x07] + vpinsrw $7, %eax, %xmm12, %xmm13 + +// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc4,0x28,0x07] + vpinsrw $7, (%rax), %xmm12, %xmm13 + +// CHECK: vpextrw $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07] + vpextrw $7, %xmm12, %eax + +// CHECK: vpmovmskb %xmm12, %eax +// CHECK: encoding: [0xc4,0xc1,0x79,0xd7,0xc4] + vpmovmskb %xmm12, %eax + +// CHECK: vmaskmovdqu %xmm14, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x79,0xf7,0xfe] + vmaskmovdqu %xmm14, %xmm15 + +// CHECK: vmovd %eax, %xmm14 +// CHECK: encoding: [0xc5,0x79,0x6e,0xf0] + vmovd %eax, %xmm14 + +// CHECK: vmovd (%rax), %xmm14 +// CHECK: encoding: [0xc5,0x79,0x6e,0x30] + vmovd (%rax), %xmm14 + +// CHECK: vmovd %xmm14, (%rax) +// CHECK: encoding: [0xc5,0x79,0x7e,0x30] + vmovd %xmm14, (%rax) + +// CHECK: vmovd %rax, %xmm14 +// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0] + vmovd %rax, %xmm14 + +// CHECK: vmovq %xmm14, (%rax) +// CHECK: encoding: [0xc5,0x79,0xd6,0x30] + vmovq %xmm14, (%rax) + +// CHECK: vmovq %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7a,0x7e,0xe6] + vmovq %xmm14, %xmm12 + +// CHECK: vmovq (%rax), %xmm14 +// CHECK: encoding: [0xc5,0x7a,0x7e,0x30] + vmovq (%rax), %xmm14 + +// CHECK: vmovq %rax, %xmm14 +// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0] + vmovq %rax, %xmm14 + +// CHECK: vmovq %xmm14, %rax +// CHECK: encoding: [0xc4,0x61,0xf9,0x7e,0xf0] + vmovq %xmm14, %rax + +// CHECK: vcvtpd2dq %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xe3] + vcvtpd2dq %xmm11, %xmm12 + +// CHECK: vcvtdq2pd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7a,0xe6,0xe3] + vcvtdq2pd %xmm11, %xmm12 + +// CHECK: vcvtdq2pd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x7a,0xe6,0x20] + vcvtdq2pd (%rax), %xmm12 + +// CHECK: vmovshdup %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7a,0x16,0xe3] + vmovshdup %xmm11, %xmm12 + +// CHECK: vmovshdup (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x7a,0x16,0x20] + vmovshdup (%rax), %xmm12 + +// CHECK: vmovsldup %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7a,0x12,0xe3] + vmovsldup %xmm11, %xmm12 + +// CHECK: vmovsldup (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x7a,0x12,0x20] + vmovsldup (%rax), %xmm12 + +// CHECK: vmovddup %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7b,0x12,0xe3] + vmovddup %xmm11, %xmm12 + +// CHECK: vmovddup (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x7b,0x12,0x20] + vmovddup (%rax), %xmm12 + +// CHECK: vaddsubps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xd0,0xeb] + vaddsubps %xmm11, %xmm12, %xmm13 + +// CHECK: vaddsubps (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x23,0xd0,0x20] + vaddsubps (%rax), %xmm11, %xmm12 + +// CHECK: vaddsubpd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd0,0xeb] + vaddsubpd %xmm11, %xmm12, %xmm13 + +// CHECK: vaddsubpd (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x21,0xd0,0x20] + vaddsubpd (%rax), %xmm11, %xmm12 + +// CHECK: vhaddps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0x7c,0xeb] + vhaddps %xmm11, %xmm12, %xmm13 + +// CHECK: vhaddps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0x7c,0x28] + vhaddps (%rax), %xmm12, %xmm13 + +// CHECK: vhaddpd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x7c,0xeb] + vhaddpd %xmm11, %xmm12, %xmm13 + +// CHECK: vhaddpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x7c,0x28] + vhaddpd (%rax), %xmm12, %xmm13 + +// CHECK: vhsubps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0x7d,0xeb] + vhsubps %xmm11, %xmm12, %xmm13 + +// CHECK: vhsubps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0x7d,0x28] + vhsubps (%rax), %xmm12, %xmm13 + +// CHECK: vhsubpd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x7d,0xeb] + vhsubpd %xmm11, %xmm12, %xmm13 + +// CHECK: vhsubpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x7d,0x28] + vhsubpd (%rax), %xmm12, %xmm13 + +// CHECK: vpabsb %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x42,0x79,0x1c,0xe3] + vpabsb %xmm11, %xmm12 + +// CHECK: vpabsb (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x1c,0x20] + vpabsb (%rax), %xmm12 + +// CHECK: vpabsw %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x42,0x79,0x1d,0xe3] + vpabsw %xmm11, %xmm12 + +// CHECK: vpabsw (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x1d,0x20] + vpabsw (%rax), %xmm12 + +// CHECK: vpabsd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x42,0x79,0x1e,0xe3] + vpabsd %xmm11, %xmm12 + +// CHECK: vpabsd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x1e,0x20] + vpabsd (%rax), %xmm12 + +// CHECK: vphaddw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x01,0xeb] + vphaddw %xmm11, %xmm12, %xmm13 + +// CHECK: vphaddw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x01,0x28] + vphaddw (%rax), %xmm12, %xmm13 + +// CHECK: vphaddd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x02,0xeb] + vphaddd %xmm11, %xmm12, %xmm13 + +// CHECK: vphaddd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x02,0x28] + vphaddd (%rax), %xmm12, %xmm13 + +// CHECK: vphaddsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x03,0xeb] + vphaddsw %xmm11, %xmm12, %xmm13 + +// CHECK: vphaddsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x03,0x28] + vphaddsw (%rax), %xmm12, %xmm13 + +// CHECK: vphsubw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x05,0xeb] + vphsubw %xmm11, %xmm12, %xmm13 + +// CHECK: vphsubw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x05,0x28] + vphsubw (%rax), %xmm12, %xmm13 + +// CHECK: vphsubd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x06,0xeb] + vphsubd %xmm11, %xmm12, %xmm13 + +// CHECK: vphsubd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x06,0x28] + vphsubd (%rax), %xmm12, %xmm13 + +// CHECK: vphsubsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x07,0xeb] + vphsubsw %xmm11, %xmm12, %xmm13 + +// CHECK: vphsubsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x07,0x28] + vphsubsw (%rax), %xmm12, %xmm13 + +// CHECK: vpmaddubsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x04,0xeb] + vpmaddubsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmaddubsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x04,0x28] + vpmaddubsw (%rax), %xmm12, %xmm13 + +// CHECK: vpshufb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x00,0xeb] + vpshufb %xmm11, %xmm12, %xmm13 + +// CHECK: vpshufb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x00,0x28] + vpshufb (%rax), %xmm12, %xmm13 + +// CHECK: vpsignb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x08,0xeb] + vpsignb %xmm11, %xmm12, %xmm13 + +// CHECK: vpsignb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x08,0x28] + vpsignb (%rax), %xmm12, %xmm13 + +// CHECK: vpsignw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x09,0xeb] + vpsignw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsignw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x09,0x28] + vpsignw (%rax), %xmm12, %xmm13 + +// CHECK: vpsignd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x0a,0xeb] + vpsignd %xmm11, %xmm12, %xmm13 + +// CHECK: vpsignd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x0a,0x28] + vpsignd (%rax), %xmm12, %xmm13 + +// CHECK: vpmulhrsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x0b,0xeb] + vpmulhrsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmulhrsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x0b,0x28] + vpmulhrsw (%rax), %xmm12, %xmm13 + +// CHECK: vpalignr $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x19,0x0f,0xeb,0x07] + vpalignr $7, %xmm11, %xmm12, %xmm13 + +// CHECK: vpalignr $7, (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x19,0x0f,0x28,0x07] + vpalignr $7, (%rax), %xmm12, %xmm13 + +// CHECK: vroundsd $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x19,0x0b,0xeb,0x07] + vroundsd $7, %xmm11, %xmm12, %xmm13 + +// CHECK: vroundsd $7, (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x19,0x0b,0x28,0x07] + vroundsd $7, (%rax), %xmm12, %xmm13 + +// CHECK: vroundss $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x19,0x0a,0xeb,0x07] + vroundss $7, %xmm11, %xmm12, %xmm13 + +// CHECK: vroundss $7, (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x19,0x0a,0x28,0x07] + vroundss $7, (%rax), %xmm12, %xmm13 + +// CHECK: vroundpd $7, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x79,0x09,0xec,0x07] + vroundpd $7, %xmm12, %xmm13 + +// CHECK: vroundpd $7, (%rax), %xmm13 +// CHECK: encoding: [0xc4,0x63,0x79,0x09,0x28,0x07] + vroundpd $7, (%rax), %xmm13 + +// CHECK: vroundps $7, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x79,0x08,0xec,0x07] + vroundps $7, %xmm12, %xmm13 + +// CHECK: vroundps $7, (%rax), %xmm13 +// CHECK: encoding: [0xc4,0x63,0x79,0x08,0x28,0x07] + vroundps $7, (%rax), %xmm13 + +// CHECK: vphminposuw %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x79,0x41,0xec] + vphminposuw %xmm12, %xmm13 + +// CHECK: vphminposuw (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x41,0x20] + vphminposuw (%rax), %xmm12 + +// CHECK: vpackusdw %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x2b,0xdc] + vpackusdw %xmm12, %xmm13, %xmm11 + +// CHECK: vpackusdw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x2b,0x28] + vpackusdw (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpeqq %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x29,0xdc] + vpcmpeqq %xmm12, %xmm13, %xmm11 + +// CHECK: vpcmpeqq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x29,0x28] + vpcmpeqq (%rax), %xmm12, %xmm13 + +// CHECK: vpminsb %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x38,0xdc] + vpminsb %xmm12, %xmm13, %xmm11 + +// CHECK: vpminsb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x38,0x28] + vpminsb (%rax), %xmm12, %xmm13 + +// CHECK: vpminsd %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x39,0xdc] + vpminsd %xmm12, %xmm13, %xmm11 + +// CHECK: vpminsd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x39,0x28] + vpminsd (%rax), %xmm12, %xmm13 + +// CHECK: vpminud %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3b,0xdc] + vpminud %xmm12, %xmm13, %xmm11 + +// CHECK: vpminud (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3b,0x28] + vpminud (%rax), %xmm12, %xmm13 + +// CHECK: vpminuw %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3a,0xdc] + vpminuw %xmm12, %xmm13, %xmm11 + +// CHECK: vpminuw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3a,0x28] + vpminuw (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxsb %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3c,0xdc] + vpmaxsb %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxsb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3c,0x28] + vpmaxsb (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxsd %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3d,0xdc] + vpmaxsd %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxsd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3d,0x28] + vpmaxsd (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxud %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3f,0xdc] + vpmaxud %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxud (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3f,0x28] + vpmaxud (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxuw %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3e,0xdc] + vpmaxuw %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxuw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3e,0x28] + vpmaxuw (%rax), %xmm12, %xmm13 + +// CHECK: vpmuldq %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x28,0xdc] + vpmuldq %xmm12, %xmm13, %xmm11 + +// CHECK: vpmuldq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x28,0x28] + vpmuldq (%rax), %xmm12, %xmm13 + +// CHECK: vpmulld %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x51,0x40,0xdc] + vpmulld %xmm12, %xmm5, %xmm11 + +// CHECK: vpmulld (%rax), %xmm5, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x51,0x40,0x28] + vpmulld (%rax), %xmm5, %xmm13 + +// CHECK: vblendps $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x0c,0xdc,0x03] + vblendps $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vblendps $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x0c,0x18,0x03] + vblendps $3, (%rax), %xmm5, %xmm11 + +// CHECK: vblendpd $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x0d,0xdc,0x03] + vblendpd $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vblendpd $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x0d,0x18,0x03] + vblendpd $3, (%rax), %xmm5, %xmm11 + +// CHECK: vpblendw $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x0e,0xdc,0x03] + vpblendw $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vpblendw $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x0e,0x18,0x03] + vpblendw $3, (%rax), %xmm5, %xmm11 + +// CHECK: vmpsadbw $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x42,0xdc,0x03] + vmpsadbw $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vmpsadbw $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x42,0x18,0x03] + vmpsadbw $3, (%rax), %xmm5, %xmm11 + +// CHECK: vdpps $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x40,0xdc,0x03] + vdpps $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vdpps $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x40,0x18,0x03] + vdpps $3, (%rax), %xmm5, %xmm11 + +// CHECK: vdppd $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x41,0xdc,0x03] + vdppd $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vdppd $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x41,0x18,0x03] + vdppd $3, (%rax), %xmm5, %xmm11 + +// CHECK: vblendvpd %xmm12, %xmm5, %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0xed,0xc0] + vblendvpd %xmm12, %xmm5, %xmm11, %xmm13 + +// CHECK: vblendvpd %xmm12, (%rax), %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0x28,0xc0] + vblendvpd %xmm12, (%rax), %xmm11, %xmm13 + +// CHECK: vblendvps %xmm12, %xmm5, %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0xed,0xc0] + vblendvps %xmm12, %xmm5, %xmm11, %xmm13 + +// CHECK: vblendvps %xmm12, (%rax), %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0x28,0xc0] + vblendvps %xmm12, (%rax), %xmm11, %xmm13 + +// CHECK: vpblendvb %xmm12, %xmm5, %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0xed,0xc0] + vpblendvb %xmm12, %xmm5, %xmm11, %xmm13 + +// CHECK: vpblendvb %xmm12, (%rax), %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0x28,0xc0] + vpblendvb %xmm12, (%rax), %xmm11, %xmm13 + +// CHECK: vpmovsxbw %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x20,0xd4] + vpmovsxbw %xmm12, %xmm10 + +// CHECK: vpmovsxbw (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x20,0x20] + vpmovsxbw (%rax), %xmm12 + +// CHECK: vpmovsxwd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x23,0xd4] + vpmovsxwd %xmm12, %xmm10 + +// CHECK: vpmovsxwd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x23,0x20] + vpmovsxwd (%rax), %xmm12 + +// CHECK: vpmovsxdq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x25,0xd4] + vpmovsxdq %xmm12, %xmm10 + +// CHECK: vpmovsxdq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x25,0x20] + vpmovsxdq (%rax), %xmm12 + +// CHECK: vpmovzxbw %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x30,0xd4] + vpmovzxbw %xmm12, %xmm10 + +// CHECK: vpmovzxbw (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x30,0x20] + vpmovzxbw (%rax), %xmm12 + +// CHECK: vpmovzxwd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x33,0xd4] + vpmovzxwd %xmm12, %xmm10 + +// CHECK: vpmovzxwd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x33,0x20] + vpmovzxwd (%rax), %xmm12 + +// CHECK: vpmovzxdq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x35,0xd4] + vpmovzxdq %xmm12, %xmm10 + +// CHECK: vpmovzxdq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x35,0x20] + vpmovzxdq (%rax), %xmm12 + +// CHECK: vpmovsxbq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x22,0xd4] + vpmovsxbq %xmm12, %xmm10 + +// CHECK: vpmovsxbq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x22,0x20] + vpmovsxbq (%rax), %xmm12 + +// CHECK: vpmovzxbq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x32,0xd4] + vpmovzxbq %xmm12, %xmm10 + +// CHECK: vpmovzxbq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x32,0x20] + vpmovzxbq (%rax), %xmm12 + +// CHECK: vpmovsxbd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x21,0xd4] + vpmovsxbd %xmm12, %xmm10 + +// CHECK: vpmovsxbd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x21,0x20] + vpmovsxbd (%rax), %xmm12 + +// CHECK: vpmovsxwq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x24,0xd4] + vpmovsxwq %xmm12, %xmm10 + +// CHECK: vpmovsxwq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x24,0x20] + vpmovsxwq (%rax), %xmm12 + +// CHECK: vpmovzxbd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x31,0xd4] + vpmovzxbd %xmm12, %xmm10 + +// CHECK: vpmovzxbd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x31,0x20] + vpmovzxbd (%rax), %xmm12 + +// CHECK: vpmovzxwq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x34,0xd4] + vpmovzxwq %xmm12, %xmm10 + +// CHECK: vpmovzxwq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x34,0x20] + vpmovzxwq (%rax), %xmm12 + +// CHECK: vpextrw $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07] + vpextrw $7, %xmm12, %eax + +// CHECK: vpextrw $7, %xmm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x79,0x15,0x20,0x07] + vpextrw $7, %xmm12, (%rax) + +// CHECK: vpextrd $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0x63,0x79,0x16,0xe0,0x07] + vpextrd $7, %xmm12, %eax + +// CHECK: vpextrd $7, %xmm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x79,0x16,0x20,0x07] + vpextrd $7, %xmm12, (%rax) + +// CHECK: vpextrb $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0x63,0x79,0x14,0xe0,0x07] + vpextrb $7, %xmm12, %eax + +// CHECK: vpextrb $7, %xmm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x79,0x14,0x20,0x07] + vpextrb $7, %xmm12, (%rax) + +// CHECK: vpextrq $7, %xmm12, %rcx +// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0xe1,0x07] + vpextrq $7, %xmm12, %rcx + +// CHECK: vpextrq $7, %xmm12, (%rcx) +// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0x21,0x07] + vpextrq $7, %xmm12, (%rcx) + +// CHECK: vextractps $7, %xmm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x79,0x17,0x20,0x07] + vextractps $7, %xmm12, (%rax) + +// CHECK: vextractps $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0x63,0x79,0x17,0xe0,0x07] + vextractps $7, %xmm12, %eax + +// CHECK: vpinsrw $7, %eax, %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0xc4,0xd0,0x07] + vpinsrw $7, %eax, %xmm12, %xmm10 + +// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0xc4,0x10,0x07] + vpinsrw $7, (%rax), %xmm12, %xmm10 + +// CHECK: vpinsrb $7, %eax, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x20,0xd0,0x07] + vpinsrb $7, %eax, %xmm12, %xmm10 + +// CHECK: vpinsrb $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x20,0x10,0x07] + vpinsrb $7, (%rax), %xmm12, %xmm10 + +// CHECK: vpinsrd $7, %eax, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x22,0xd0,0x07] + vpinsrd $7, %eax, %xmm12, %xmm10 + +// CHECK: vpinsrd $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x22,0x10,0x07] + vpinsrd $7, (%rax), %xmm12, %xmm10 + +// CHECK: vpinsrq $7, %rax, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x99,0x22,0xd0,0x07] + vpinsrq $7, %rax, %xmm12, %xmm10 + +// CHECK: vpinsrq $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07] + vpinsrq $7, (%rax), %xmm12, %xmm10 + +// CHECK: vinsertps $7, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x21,0xdc,0x07] + vinsertps $7, %xmm12, %xmm10, %xmm11 + +// CHECK: vinsertps $7, (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x29,0x21,0x18,0x07] + vinsertps $7, (%rax), %xmm10, %xmm11 + +// CHECK: vptest %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x17,0xd4] + vptest %xmm12, %xmm10 + +// CHECK: vptest (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x17,0x20] + vptest (%rax), %xmm12 + +// CHECK: vmovntdqa (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x2a,0x20] + vmovntdqa (%rax), %xmm12 + +// CHECK: vpcmpgtq %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x37,0xdc] + vpcmpgtq %xmm12, %xmm10, %xmm11 + +// CHECK: vpcmpgtq (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0x37,0x28] + vpcmpgtq (%rax), %xmm10, %xmm13 + +// CHECK: vpcmpistrm $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x62,0xd4,0x07] + vpcmpistrm $7, %xmm12, %xmm10 + +// CHECK: vpcmpistrm $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x62,0x10,0x07] + vpcmpistrm $7, (%rax), %xmm10 + +// CHECK: vpcmpestrm $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x60,0xd4,0x07] + vpcmpestrm $7, %xmm12, %xmm10 + +// CHECK: vpcmpestrm $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x60,0x10,0x07] + vpcmpestrm $7, (%rax), %xmm10 + +// CHECK: vpcmpistri $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x63,0xd4,0x07] + vpcmpistri $7, %xmm12, %xmm10 + +// CHECK: vpcmpistri $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x63,0x10,0x07] + vpcmpistri $7, (%rax), %xmm10 + +// CHECK: vpcmpestri $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x61,0xd4,0x07] + vpcmpestri $7, %xmm12, %xmm10 + +// CHECK: vpcmpestri $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x61,0x10,0x07] + vpcmpestri $7, (%rax), %xmm10 + +// CHECK: vaesimc %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0xdb,0xd4] + vaesimc %xmm12, %xmm10 + +// CHECK: vaesimc (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0xdb,0x20] + vaesimc (%rax), %xmm12 + +// CHECK: vaesenc %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xdc,0xdc] + vaesenc %xmm12, %xmm10, %xmm11 + +// CHECK: vaesenc (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0xdc,0x28] + vaesenc (%rax), %xmm10, %xmm13 + +// CHECK: vaesenclast %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xdd,0xdc] + vaesenclast %xmm12, %xmm10, %xmm11 + +// CHECK: vaesenclast (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0xdd,0x28] + vaesenclast (%rax), %xmm10, %xmm13 + +// CHECK: vaesdec %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xde,0xdc] + vaesdec %xmm12, %xmm10, %xmm11 + +// CHECK: vaesdec (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0xde,0x28] + vaesdec (%rax), %xmm10, %xmm13 + +// CHECK: vaesdeclast %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xdf,0xdc] + vaesdeclast %xmm12, %xmm10, %xmm11 + +// CHECK: vaesdeclast (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0xdf,0x28] + vaesdeclast (%rax), %xmm10, %xmm13 + +// CHECK: vaeskeygenassist $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0xdf,0xd4,0x07] + vaeskeygenassist $7, %xmm12, %xmm10 + +// CHECK: vaeskeygenassist $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0xdf,0x10,0x07] + vaeskeygenassist $7, (%rax), %xmm10 + +// CHECK: vcmpps $8, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x08] + vcmpeq_uqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $9, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x09] + vcmpngeps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $10, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0a] + vcmpngtps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $11, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0b] + vcmpfalseps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $12, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0c] + vcmpneq_oqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $13, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0d] + vcmpgeps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $14, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0e] + vcmpgtps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $15, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0f] + vcmptrueps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $16, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x10] + vcmpeq_osps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $17, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x11] + vcmplt_oqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $18, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x12] + vcmple_oqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $19, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x13] + vcmpunord_sps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $20, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x14] + vcmpneq_usps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $21, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x15] + vcmpnlt_uqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $22, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x16] + vcmpnle_uqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $23, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x17] + vcmpord_sps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $24, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x18] + vcmpeq_usps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $25, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x19] + vcmpnge_uqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $26, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1a] + vcmpngt_uqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $27, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1b] + vcmpfalse_osps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $28, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1c] + vcmpneq_osps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $29, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1d] + vcmpge_oqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $30, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1e] + vcmpgt_oqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $31, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1f] + vcmptrue_usps %xmm11, %xmm12, %xmm13 + +// CHECK: vmovaps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x28,0x20] + vmovaps (%rax), %ymm12 + +// CHECK: vmovaps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x28,0xe3] + vmovaps %ymm11, %ymm12 + +// CHECK: vmovaps %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7c,0x29,0x18] + vmovaps %ymm11, (%rax) + +// CHECK: vmovapd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x28,0x20] + vmovapd (%rax), %ymm12 + +// CHECK: vmovapd %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7d,0x28,0xe3] + vmovapd %ymm11, %ymm12 + +// CHECK: vmovapd %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7d,0x29,0x18] + vmovapd %ymm11, (%rax) + +// CHECK: vmovups (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x10,0x20] + vmovups (%rax), %ymm12 + +// CHECK: vmovups %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x10,0xe3] + vmovups %ymm11, %ymm12 + +// CHECK: vmovups %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7c,0x11,0x18] + vmovups %ymm11, (%rax) + +// CHECK: vmovupd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x10,0x20] + vmovupd (%rax), %ymm12 + +// CHECK: vmovupd %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7d,0x10,0xe3] + vmovupd %ymm11, %ymm12 + +// CHECK: vmovupd %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7d,0x11,0x18] + vmovupd %ymm11, (%rax) + +// CHECK: vunpckhps %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1c,0x15,0xe3] + vunpckhps %ymm11, %ymm12, %ymm4 + +// CHECK: vunpckhpd %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1d,0x15,0xe3] + vunpckhpd %ymm11, %ymm12, %ymm4 + +// CHECK: vunpcklps %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1c,0x14,0xe3] + vunpcklps %ymm11, %ymm12, %ymm4 + +// CHECK: vunpcklpd %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1d,0x14,0xe3] + vunpcklpd %ymm11, %ymm12, %ymm4 + +// CHECK: vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x15,0x54,0xcb,0xfc] + vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x15,0x54,0xcb,0xfc] + vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x14,0x54,0xcb,0xfc] + vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x14,0x54,0xcb,0xfc] + vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vmovntdq %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7d,0xe7,0x18] + vmovntdq %ymm11, (%rax) + +// CHECK: vmovntpd %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7d,0x2b,0x18] + vmovntpd %ymm11, (%rax) + +// CHECK: vmovntps %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7c,0x2b,0x18] + vmovntps %ymm11, (%rax) + +// CHECK: vmovmskps %xmm12, %eax +// CHECK: encoding: [0xc4,0xc1,0x78,0x50,0xc4] + vmovmskps %xmm12, %eax + +// CHECK: vmovmskpd %xmm12, %eax +// CHECK: encoding: [0xc4,0xc1,0x79,0x50,0xc4] + vmovmskpd %xmm12, %eax + +// CHECK: vmaxps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x5f,0xf4] + vmaxps %ymm12, %ymm4, %ymm6 + +// CHECK: vmaxpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x5f,0xf4] + vmaxpd %ymm12, %ymm4, %ymm6 + +// CHECK: vminps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x5d,0xf4] + vminps %ymm12, %ymm4, %ymm6 + +// CHECK: vminpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x5d,0xf4] + vminpd %ymm12, %ymm4, %ymm6 + +// CHECK: vsubps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x5c,0xf4] + vsubps %ymm12, %ymm4, %ymm6 + +// CHECK: vsubpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x5c,0xf4] + vsubpd %ymm12, %ymm4, %ymm6 + +// CHECK: vdivps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x5e,0xf4] + vdivps %ymm12, %ymm4, %ymm6 + +// CHECK: vdivpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x5e,0xf4] + vdivpd %ymm12, %ymm4, %ymm6 + +// CHECK: vaddps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x58,0xf4] + vaddps %ymm12, %ymm4, %ymm6 + +// CHECK: vaddpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x58,0xf4] + vaddpd %ymm12, %ymm4, %ymm6 + +// CHECK: vmulps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x59,0xf4] + vmulps %ymm12, %ymm4, %ymm6 + +// CHECK: vmulpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x59,0xf4] + vmulpd %ymm12, %ymm4, %ymm6 + +// CHECK: vmaxps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5f,0x30] + vmaxps (%rax), %ymm4, %ymm6 + +// CHECK: vmaxpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5f,0x30] + vmaxpd (%rax), %ymm4, %ymm6 + +// CHECK: vminps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5d,0x30] + vminps (%rax), %ymm4, %ymm6 + +// CHECK: vminpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5d,0x30] + vminpd (%rax), %ymm4, %ymm6 + +// CHECK: vsubps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5c,0x30] + vsubps (%rax), %ymm4, %ymm6 + +// CHECK: vsubpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5c,0x30] + vsubpd (%rax), %ymm4, %ymm6 + +// CHECK: vdivps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5e,0x30] + vdivps (%rax), %ymm4, %ymm6 + +// CHECK: vdivpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5e,0x30] + vdivpd (%rax), %ymm4, %ymm6 + +// CHECK: vaddps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x58,0x30] + vaddps (%rax), %ymm4, %ymm6 + +// CHECK: vaddpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x58,0x30] + vaddpd (%rax), %ymm4, %ymm6 + +// CHECK: vmulps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x59,0x30] + vmulps (%rax), %ymm4, %ymm6 + +// CHECK: vmulpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x59,0x30] + vmulpd (%rax), %ymm4, %ymm6 + +// CHECK: vsqrtpd %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7d,0x51,0xe3] + vsqrtpd %ymm11, %ymm12 + +// CHECK: vsqrtpd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x51,0x20] + vsqrtpd (%rax), %ymm12 + +// CHECK: vsqrtps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x51,0xe3] + vsqrtps %ymm11, %ymm12 + +// CHECK: vsqrtps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x51,0x20] + vsqrtps (%rax), %ymm12 + +// CHECK: vrsqrtps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x52,0xe3] + vrsqrtps %ymm11, %ymm12 + +// CHECK: vrsqrtps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x52,0x20] + vrsqrtps (%rax), %ymm12 + +// CHECK: vrcpps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x53,0xe3] + vrcpps %ymm11, %ymm12 + +// CHECK: vrcpps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x53,0x20] + vrcpps (%rax), %ymm12 + +// CHECK: vandps %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0c,0x54,0xdc] + vandps %ymm12, %ymm14, %ymm11 + +// CHECK: vandpd %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0d,0x54,0xdc] + vandpd %ymm12, %ymm14, %ymm11 + +// CHECK: vandps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x54,0x54,0xcb,0xfc] + vandps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x54,0x54,0xcb,0xfc] + vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vorps %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0c,0x56,0xdc] + vorps %ymm12, %ymm14, %ymm11 + +// CHECK: vorpd %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0d,0x56,0xdc] + vorpd %ymm12, %ymm14, %ymm11 + +// CHECK: vorps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x56,0x54,0xcb,0xfc] + vorps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x56,0x54,0xcb,0xfc] + vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vxorps %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0c,0x57,0xdc] + vxorps %ymm12, %ymm14, %ymm11 + +// CHECK: vxorpd %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0d,0x57,0xdc] + vxorpd %ymm12, %ymm14, %ymm11 + +// CHECK: vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x57,0x54,0xcb,0xfc] + vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x57,0x54,0xcb,0xfc] + vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vandnps %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0c,0x55,0xdc] + vandnps %ymm12, %ymm14, %ymm11 + +// CHECK: vandnpd %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0d,0x55,0xdc] + vandnpd %ymm12, %ymm14, %ymm11 + +// CHECK: vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x55,0x54,0xcb,0xfc] + vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x55,0x54,0xcb,0xfc] + vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vcvtps2pd %xmm13, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x5a,0xe5] + vcvtps2pd %xmm13, %ymm12 + +// CHECK: vcvtps2pd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x5a,0x20] + vcvtps2pd (%rax), %ymm12 + +// CHECK: vcvtdq2pd %xmm13, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7e,0xe6,0xe5] + vcvtdq2pd %xmm13, %ymm12 + +// CHECK: vcvtdq2pd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7e,0xe6,0x20] + vcvtdq2pd (%rax), %ymm12 + +// CHECK: vcvtdq2ps %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7c,0x5b,0xd4] + vcvtdq2ps %ymm12, %ymm10 + +// CHECK: vcvtdq2ps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x5b,0x20] + vcvtdq2ps (%rax), %ymm12 + +// CHECK: vcvtps2dq %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7d,0x5b,0xd4] + vcvtps2dq %ymm12, %ymm10 + +// CHECK: vcvtps2dq (%rax), %ymm10 +// CHECK: encoding: [0xc5,0x7d,0x5b,0x10] + vcvtps2dq (%rax), %ymm10 + +// CHECK: vcvttps2dq %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7e,0x5b,0xd4] + vcvttps2dq %ymm12, %ymm10 + +// CHECK: vcvttps2dq (%rax), %ymm10 +// CHECK: encoding: [0xc5,0x7e,0x5b,0x10] + vcvttps2dq (%rax), %ymm10 + +// CHECK: vcvttpd2dq %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3] + vcvttpd2dq %xmm11, %xmm10 + +// CHECK: vcvttpd2dq %ymm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xd4] + vcvttpd2dq %ymm12, %xmm10 + +// CHECK: vcvttpd2dqx %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3] + vcvttpd2dqx %xmm11, %xmm10 + +// CHECK: vcvttpd2dqx (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x79,0xe6,0x18] + vcvttpd2dqx (%rax), %xmm11 + +// CHECK: vcvttpd2dqy %ymm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xdc] + vcvttpd2dqy %ymm12, %xmm11 + +// CHECK: vcvttpd2dqy (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x7d,0xe6,0x18] + vcvttpd2dqy (%rax), %xmm11 + +// CHECK: vcvtpd2ps %ymm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xd4] + vcvtpd2ps %ymm12, %xmm10 + +// CHECK: vcvtpd2psx %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xd3] + vcvtpd2psx %xmm11, %xmm10 + +// CHECK: vcvtpd2psx (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x79,0x5a,0x18] + vcvtpd2psx (%rax), %xmm11 + +// CHECK: vcvtpd2psy %ymm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xdc] + vcvtpd2psy %ymm12, %xmm11 + +// CHECK: vcvtpd2psy (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x7d,0x5a,0x18] + vcvtpd2psy (%rax), %xmm11 + +// CHECK: vcvtpd2dq %ymm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xd4] + vcvtpd2dq %ymm12, %xmm10 + +// CHECK: vcvtpd2dqy %ymm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xdc] + vcvtpd2dqy %ymm12, %xmm11 + +// CHECK: vcvtpd2dqy (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x7f,0xe6,0x18] + vcvtpd2dqy (%rax), %xmm11 + +// CHECK: vcvtpd2dqx %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xd3] + vcvtpd2dqx %xmm11, %xmm10 + +// CHECK: vcvtpd2dqx (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x7b,0xe6,0x18] + vcvtpd2dqx (%rax), %xmm11 + +// CHECK: vcmpps $0, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x00] + vcmpeqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $2, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x02] + vcmpleps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $1, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x01] + vcmpltps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $4, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x04] + vcmpneqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $6, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x06] + vcmpnleps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $5, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x05] + vcmpnltps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $7, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x07] + vcmpordps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $3, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x03] + vcmpunordps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x02] + vcmpleps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnleps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %ymm6, %ymm12 +// CHECK: encoding: [0xc5,0x4c,0xc2,0x64,0xcb,0xfc,0x07] + vcmpordps -4(%rbx,%rcx,8), %ymm6, %ymm12 + +// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $0, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x00] + vcmpeqpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $2, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x02] + vcmplepd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $1, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x01] + vcmpltpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $4, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x04] + vcmpneqpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $6, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x06] + vcmpnlepd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $5, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x05] + vcmpnltpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $7, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x07] + vcmpordpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $3, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x03] + vcmpunordpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqpd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x02] + vcmplepd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltpd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqpd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnlepd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltpd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %ymm6, %ymm12 +// CHECK: encoding: [0xc5,0x4d,0xc2,0x64,0xcb,0xfc,0x07] + vcmpordpd -4(%rbx,%rcx,8), %ymm6, %ymm12 + +// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordpd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $8, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x08] + vcmpeq_uqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $9, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x09] + vcmpngeps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $10, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0a] + vcmpngtps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $11, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0b] + vcmpfalseps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $12, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0c] + vcmpneq_oqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $13, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0d] + vcmpgeps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $14, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0e] + vcmpgtps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $15, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0f] + vcmptrueps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $16, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x10] + vcmpeq_osps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $17, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x11] + vcmplt_oqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $18, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x12] + vcmple_oqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $19, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x13] + vcmpunord_sps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $20, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x14] + vcmpneq_usps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $21, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x15] + vcmpnlt_uqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $22, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x16] + vcmpnle_uqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $23, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x17] + vcmpord_sps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $24, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x18] + vcmpeq_usps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $25, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x19] + vcmpnge_uqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $26, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1a] + vcmpngt_uqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $27, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1b] + vcmpfalse_osps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $28, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1c] + vcmpneq_osps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $29, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1d] + vcmpge_oqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $30, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1e] + vcmpgt_oqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $31, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1f] + vcmptrue_usps %ymm11, %ymm12, %ymm13 + +// CHECK: vaddsubps %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1f,0xd0,0xeb] + vaddsubps %ymm11, %ymm12, %ymm13 + +// CHECK: vaddsubps (%rax), %ymm11, %ymm12 +// CHECK: encoding: [0xc5,0x27,0xd0,0x20] + vaddsubps (%rax), %ymm11, %ymm12 + +// CHECK: vaddsubpd %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xd0,0xeb] + vaddsubpd %ymm11, %ymm12, %ymm13 + +// CHECK: vaddsubpd (%rax), %ymm11, %ymm12 +// CHECK: encoding: [0xc5,0x25,0xd0,0x20] + vaddsubpd (%rax), %ymm11, %ymm12 + +// CHECK: vhaddps %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1f,0x7c,0xeb] + vhaddps %ymm11, %ymm12, %ymm13 + +// CHECK: vhaddps (%rax), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1f,0x7c,0x28] + vhaddps (%rax), %ymm12, %ymm13 + +// CHECK: vhaddpd %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0x7c,0xeb] + vhaddpd %ymm11, %ymm12, %ymm13 + +// CHECK: vhaddpd (%rax), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0x7c,0x28] + vhaddpd (%rax), %ymm12, %ymm13 + +// CHECK: vhsubps %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1f,0x7d,0xeb] + vhsubps %ymm11, %ymm12, %ymm13 + +// CHECK: vhsubps (%rax), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1f,0x7d,0x28] + vhsubps (%rax), %ymm12, %ymm13 + +// CHECK: vhsubpd %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0x7d,0xeb] + vhsubpd %ymm11, %ymm12, %ymm13 + +// CHECK: vhsubpd (%rax), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0x7d,0x28] + vhsubpd (%rax), %ymm12, %ymm13 + +// CHECK: vblendps $3, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x2d,0x0c,0xdc,0x03] + vblendps $3, %ymm12, %ymm10, %ymm11 + +// CHECK: vblendps $3, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x63,0x2d,0x0c,0x18,0x03] + vblendps $3, (%rax), %ymm10, %ymm11 + +// CHECK: vblendpd $3, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x2d,0x0d,0xdc,0x03] + vblendpd $3, %ymm12, %ymm10, %ymm11 + +// CHECK: vblendpd $3, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x63,0x2d,0x0d,0x18,0x03] + vblendpd $3, (%rax), %ymm10, %ymm11 + +// CHECK: vdpps $3, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x2d,0x40,0xdc,0x03] + vdpps $3, %ymm12, %ymm10, %ymm11 + +// CHECK: vdpps $3, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x63,0x2d,0x40,0x18,0x03] + vdpps $3, (%rax), %ymm10, %ymm11 + +// CHECK: vbroadcastf128 (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x1a,0x20] + vbroadcastf128 (%rax), %ymm12 + +// CHECK: vbroadcastsd (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x19,0x20] + vbroadcastsd (%rax), %ymm12 + +// CHECK: vbroadcastss (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x18,0x20] + vbroadcastss (%rax), %xmm12 + +// CHECK: vbroadcastss (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x18,0x20] + vbroadcastss (%rax), %ymm12 + +// CHECK: vinsertf128 $7, %xmm12, %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x43,0x1d,0x18,0xd4,0x07] + vinsertf128 $7, %xmm12, %ymm12, %ymm10 + +// CHECK: vinsertf128 $7, (%rax), %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x63,0x1d,0x18,0x10,0x07] + vinsertf128 $7, (%rax), %ymm12, %ymm10 + +// CHECK: vextractf128 $7, %ymm12, %xmm12 +// CHECK: encoding: [0xc4,0x43,0x7d,0x19,0xe4,0x07] + vextractf128 $7, %ymm12, %xmm12 + +// CHECK: vextractf128 $7, %ymm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x7d,0x19,0x20,0x07] + vextractf128 $7, %ymm12, (%rax) + +// CHECK: vmaskmovpd %xmm12, %xmm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x29,0x2f,0x20] + vmaskmovpd %xmm12, %xmm10, (%rax) + +// CHECK: vmaskmovpd %ymm12, %ymm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x2d,0x2f,0x20] + vmaskmovpd %ymm12, %ymm10, (%rax) + +// CHECK: vmaskmovpd (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x62,0x19,0x2d,0x10] + vmaskmovpd (%rax), %xmm12, %xmm10 + +// CHECK: vmaskmovpd (%rax), %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x62,0x1d,0x2d,0x10] + vmaskmovpd (%rax), %ymm12, %ymm10 + +// CHECK: vmaskmovps %xmm12, %xmm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x29,0x2e,0x20] + vmaskmovps %xmm12, %xmm10, (%rax) + +// CHECK: vmaskmovps %ymm12, %ymm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x2d,0x2e,0x20] + vmaskmovps %ymm12, %ymm10, (%rax) + +// CHECK: vmaskmovps (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x62,0x19,0x2c,0x10] + vmaskmovps (%rax), %xmm12, %xmm10 + +// CHECK: vmaskmovps (%rax), %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x62,0x1d,0x2c,0x10] + vmaskmovps (%rax), %ymm12, %ymm10 + +// CHECK: vpermilps $7, %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x04,0xd3,0x07] + vpermilps $7, %xmm11, %xmm10 + +// CHECK: vpermilps $7, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x7d,0x04,0xda,0x07] + vpermilps $7, %ymm10, %ymm11 + +// CHECK: vpermilps $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x04,0x10,0x07] + vpermilps $7, (%rax), %xmm10 + +// CHECK: vpermilps $7, (%rax), %ymm10 +// CHECK: encoding: [0xc4,0x63,0x7d,0x04,0x10,0x07] + vpermilps $7, (%rax), %ymm10 + +// CHECK: vpermilps %xmm11, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x0c,0xdb] + vpermilps %xmm11, %xmm10, %xmm11 + +// CHECK: vpermilps %ymm11, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x0c,0xdb] + vpermilps %ymm11, %ymm10, %ymm11 + +// CHECK: vpermilps (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0x0c,0x28] + vpermilps (%rax), %xmm10, %xmm13 + +// CHECK: vpermilps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x0c,0x18] + vpermilps (%rax), %ymm10, %ymm11 + +// CHECK: vpermilpd $7, %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x05,0xd3,0x07] + vpermilpd $7, %xmm11, %xmm10 + +// CHECK: vpermilpd $7, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x7d,0x05,0xda,0x07] + vpermilpd $7, %ymm10, %ymm11 + +// CHECK: vpermilpd $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x05,0x10,0x07] + vpermilpd $7, (%rax), %xmm10 + +// CHECK: vpermilpd $7, (%rax), %ymm10 +// CHECK: encoding: [0xc4,0x63,0x7d,0x05,0x10,0x07] + vpermilpd $7, (%rax), %ymm10 + +// CHECK: vpermilpd %xmm11, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x0d,0xdb] + vpermilpd %xmm11, %xmm10, %xmm11 + +// CHECK: vpermilpd %ymm11, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x0d,0xdb] + vpermilpd %ymm11, %ymm10, %ymm11 + +// CHECK: vpermilpd (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0x0d,0x28] + vpermilpd (%rax), %xmm10, %xmm13 + +// CHECK: vpermilpd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x0d,0x18] + vpermilpd (%rax), %ymm10, %ymm11 + +// CHECK: vperm2f128 $7, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x2d,0x06,0xdc,0x07] + vperm2f128 $7, %ymm12, %ymm10, %ymm11 + +// CHECK: vperm2f128 $7, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x63,0x2d,0x06,0x18,0x07] + vperm2f128 $7, (%rax), %ymm10, %ymm11 + +// CHECK: vcvtsd2si %xmm8, %r8d +// CHECK: encoding: [0xc4,0x41,0x7b,0x2d,0xc0] + vcvtsd2si %xmm8, %r8d + +// CHECK: vcvtsd2si (%rcx), %ecx +// CHECK: encoding: [0xc5,0xfb,0x2d,0x09] + vcvtsd2si (%rcx), %ecx + +// CHECK: vcvtss2si %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe1,0xfa,0x2d,0xcc] + vcvtss2si %xmm4, %rcx + +// CHECK: vcvtss2si (%rcx), %r8 +// CHECK: encoding: [0xc4,0x61,0xfa,0x2d,0x01] + vcvtss2si (%rcx), %r8 + +// CHECK: vcvtsi2sdl %r8d, %xmm8, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x3b,0x2a,0xf8] + vcvtsi2sdl %r8d, %xmm8, %xmm15 + +// CHECK: vcvtsi2sdl (%rbp), %xmm8, %xmm15 +// CHECK: encoding: [0xc5,0x3b,0x2a,0x7d,0x00] + vcvtsi2sdl (%rbp), %xmm8, %xmm15 + +// CHECK: vcvtsi2sdq %rcx, %xmm4, %xmm6 +// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0xf1] + vcvtsi2sdq %rcx, %xmm4, %xmm6 + +// CHECK: vcvtsi2sdq (%rcx), %xmm4, %xmm6 +// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0x31] + vcvtsi2sdq (%rcx), %xmm4, %xmm6 + +// CHECK: vcvtsi2ssq %rcx, %xmm4, %xmm6 +// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0xf1] + vcvtsi2ssq %rcx, %xmm4, %xmm6 + +// CHECK: vcvtsi2ssq (%rcx), %xmm4, %xmm6 +// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0x31] + vcvtsi2ssq (%rcx), %xmm4, %xmm6 + +// CHECK: vcvttsd2si %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0xcc] + vcvttsd2si %xmm4, %rcx + +// CHECK: vcvttsd2si (%rcx), %rcx +// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0x09] + vcvttsd2si (%rcx), %rcx + +// CHECK: vcvttss2si %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0xcc] + vcvttss2si %xmm4, %rcx + +// CHECK: vcvttss2si (%rcx), %rcx +// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0x09] + vcvttss2si (%rcx), %rcx + +// CHECK: vlddqu (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7f,0xf0,0x20] + vlddqu (%rax), %ymm12 + +// CHECK: vmovddup %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7f,0x12,0xd4] + vmovddup %ymm12, %ymm10 + +// CHECK: vmovddup (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7f,0x12,0x20] + vmovddup (%rax), %ymm12 + +// CHECK: vmovdqa %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7d,0x6f,0xd4] + vmovdqa %ymm12, %ymm10 + +// CHECK: vmovdqa %ymm12, (%rax) +// CHECK: encoding: [0xc5,0x7d,0x7f,0x20] + vmovdqa %ymm12, (%rax) + +// CHECK: vmovdqa (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x6f,0x20] + vmovdqa (%rax), %ymm12 + +// CHECK: vmovdqu %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7e,0x6f,0xd4] + vmovdqu %ymm12, %ymm10 + +// CHECK: vmovdqu %ymm12, (%rax) +// CHECK: encoding: [0xc5,0x7e,0x7f,0x20] + vmovdqu %ymm12, (%rax) + +// CHECK: vmovdqu (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7e,0x6f,0x20] + vmovdqu (%rax), %ymm12 + +// CHECK: vmovshdup %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7e,0x16,0xd4] + vmovshdup %ymm12, %ymm10 + +// CHECK: vmovshdup (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7e,0x16,0x20] + vmovshdup (%rax), %ymm12 + +// CHECK: vmovsldup %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7e,0x12,0xd4] + vmovsldup %ymm12, %ymm10 + +// CHECK: vmovsldup (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7e,0x12,0x20] + vmovsldup (%rax), %ymm12 + +// CHECK: vptest %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x42,0x7d,0x17,0xd4] + vptest %ymm12, %ymm10 + +// CHECK: vptest (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x17,0x20] + vptest (%rax), %ymm12 + +// CHECK: vroundpd $7, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x7d,0x09,0xda,0x07] + vroundpd $7, %ymm10, %ymm11 + +// CHECK: vroundpd $7, (%rax), %ymm10 +// CHECK: encoding: [0xc4,0x63,0x7d,0x09,0x10,0x07] + vroundpd $7, (%rax), %ymm10 + +// CHECK: vroundps $7, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x7d,0x08,0xda,0x07] + vroundps $7, %ymm10, %ymm11 + +// CHECK: vroundps $7, (%rax), %ymm10 +// CHECK: encoding: [0xc4,0x63,0x7d,0x08,0x10,0x07] + vroundps $7, (%rax), %ymm10 + +// CHECK: vshufpd $7, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x2d,0xc6,0xdc,0x07] + vshufpd $7, %ymm12, %ymm10, %ymm11 + +// CHECK: vshufpd $7, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc5,0x2d,0xc6,0x18,0x07] + vshufpd $7, (%rax), %ymm10, %ymm11 + +// CHECK: vshufps $7, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x2c,0xc6,0xdc,0x07] + vshufps $7, %ymm12, %ymm10, %ymm11 + +// CHECK: vshufps $7, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc5,0x2c,0xc6,0x18,0x07] + vshufps $7, (%rax), %ymm10, %ymm11 + +// CHECK: vtestpd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x0f,0xd4] + vtestpd %xmm12, %xmm10 + +// CHECK: vtestpd %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x42,0x7d,0x0f,0xd4] + vtestpd %ymm12, %ymm10 + +// CHECK: vtestpd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x0f,0x20] + vtestpd (%rax), %xmm12 + +// CHECK: vtestpd (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x0f,0x20] + vtestpd (%rax), %ymm12 + +// CHECK: vtestps %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x0e,0xd4] + vtestps %xmm12, %xmm10 + +// CHECK: vtestps %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x42,0x7d,0x0e,0xd4] + vtestps %ymm12, %ymm10 + +// CHECK: vtestps (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x0e,0x20] + vtestps (%rax), %xmm12 + +// CHECK: vtestps (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x0e,0x20] + vtestps (%rax), %ymm12 + +// CHECK: vextractps $10, %xmm8, %r8 +// CHECK: encoding: [0xc4,0x43,0x79,0x17,0xc0,0x0a] + vextractps $10, %xmm8, %r8 + +// CHECK: vextractps $7, %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xe1,0x07] + vextractps $7, %xmm4, %rcx + +// CHECK: vmovd %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xe1] + vmovd %xmm4, %rcx + +// CHECK: vmovmskpd %xmm4, %rcx +// CHECK: encoding: [0xc5,0xf9,0x50,0xcc] + vmovmskpd %xmm4, %rcx + +// CHECK: vmovmskpd %ymm4, %rcx +// CHECK: encoding: [0xc5,0xfd,0x50,0xcc] + vmovmskpd %ymm4, %rcx + +// CHECK: vmovmskps %xmm4, %rcx +// CHECK: encoding: [0xc5,0xf8,0x50,0xcc] + vmovmskps %xmm4, %rcx + +// CHECK: vmovmskps %ymm4, %rcx +// CHECK: encoding: [0xc5,0xfc,0x50,0xcc] + vmovmskps %ymm4, %rcx + +// CHECK: vpextrb $7, %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xe1,0x07] + vpextrb $7, %xmm4, %rcx + +// CHECK: vpinsrw $7, %r8, %xmm15, %xmm8 +// CHECK: encoding: [0xc4,0x41,0x01,0xc4,0xc0,0x07] + vpinsrw $7, %r8, %xmm15, %xmm8 + +// CHECK: vpinsrw $7, %rcx, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0xc4,0xf1,0x07] + vpinsrw $7, %rcx, %xmm4, %xmm6 + +// CHECK: vpmovmskb %xmm4, %rcx +// CHECK: encoding: [0xc5,0xf9,0xd7,0xcc] + vpmovmskb %xmm4, %rcx + +// CHECK: vblendvpd %ymm11, 57005(%rax,%riz), %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x63,0x1d,0x4b,0xac,0x20,0xad,0xde,0x00,0x00,0xb0] + vblendvpd %ymm11, 0xdead(%rax,%riz), %ymm12, %ymm13 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index 0fce5925cba92..f45b0a23d5e81 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -4,7 +4,7 @@ // CHECK: callw 42 // CHECK: encoding: [0x66,0xe8,A,A] callw 42 - + // rdar://8127102 // CHECK: movq %gs:(%rdi), %rax // CHECK: encoding: [0x65,0x48,0x8b,0x07] @@ -114,2889 +114,29 @@ movd %mm1, %rdx // CHECK: encoding: [0x0f,0x7e,0xca] movd %mm1, %edx -// CHECK: vaddss %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x32,0x58,0xd0] -vaddss %xmm8, %xmm9, %xmm10 - -// CHECK: vmulss %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x32,0x59,0xd0] -vmulss %xmm8, %xmm9, %xmm10 - -// CHECK: vsubss %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x32,0x5c,0xd0] -vsubss %xmm8, %xmm9, %xmm10 - -// CHECK: vdivss %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x32,0x5e,0xd0] -vdivss %xmm8, %xmm9, %xmm10 - -// CHECK: vaddsd %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x33,0x58,0xd0] -vaddsd %xmm8, %xmm9, %xmm10 - -// CHECK: vmulsd %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x33,0x59,0xd0] -vmulsd %xmm8, %xmm9, %xmm10 - -// CHECK: vsubsd %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x33,0x5c,0xd0] -vsubsd %xmm8, %xmm9, %xmm10 - -// CHECK: vdivsd %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x33,0x5e,0xd0] -vdivsd %xmm8, %xmm9, %xmm10 - -// CHECK: vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2a,0x58,0x5c,0xd9,0xfc] -vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2a,0x5c,0x5c,0xd9,0xfc] -vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2a,0x59,0x5c,0xd9,0xfc] -vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2a,0x5e,0x5c,0xd9,0xfc] -vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2b,0x58,0x5c,0xd9,0xfc] -vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2b,0x5c,0x5c,0xd9,0xfc] -vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2b,0x59,0x5c,0xd9,0xfc] -vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2b,0x5e,0x5c,0xd9,0xfc] -vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vaddps %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x20,0x58,0xfa] -vaddps %xmm10, %xmm11, %xmm15 - -// CHECK: vsubps %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x20,0x5c,0xfa] -vsubps %xmm10, %xmm11, %xmm15 - -// CHECK: vmulps %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x20,0x59,0xfa] -vmulps %xmm10, %xmm11, %xmm15 - -// CHECK: vdivps %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x20,0x5e,0xfa] -vdivps %xmm10, %xmm11, %xmm15 - -// CHECK: vaddpd %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x21,0x58,0xfa] -vaddpd %xmm10, %xmm11, %xmm15 - -// CHECK: vsubpd %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x21,0x5c,0xfa] -vsubpd %xmm10, %xmm11, %xmm15 - -// CHECK: vmulpd %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x21,0x59,0xfa] -vmulpd %xmm10, %xmm11, %xmm15 - -// CHECK: vdivpd %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x21,0x5e,0xfa] -vdivpd %xmm10, %xmm11, %xmm15 - -// CHECK: vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x28,0x58,0x5c,0xd9,0xfc] -vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x28,0x5c,0x5c,0xd9,0xfc] -vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x28,0x59,0x5c,0xd9,0xfc] -vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x28,0x5e,0x5c,0xd9,0xfc] -vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x29,0x58,0x5c,0xd9,0xfc] -vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x29,0x5c,0x5c,0xd9,0xfc] -vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x29,0x59,0x5c,0xd9,0xfc] -vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x29,0x5e,0x5c,0xd9,0xfc] -vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vmaxss %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x0a,0x5f,0xe2] - vmaxss %xmm10, %xmm14, %xmm12 - -// CHECK: vmaxsd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x0b,0x5f,0xe2] - vmaxsd %xmm10, %xmm14, %xmm12 - -// CHECK: vminss %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x0a,0x5d,0xe2] - vminss %xmm10, %xmm14, %xmm12 - -// CHECK: vminsd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x0b,0x5d,0xe2] - vminsd %xmm10, %xmm14, %xmm12 - -// CHECK: vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1a,0x5f,0x54,0xcb,0xfc] - vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1b,0x5f,0x54,0xcb,0xfc] - vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vminss -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1a,0x5d,0x54,0xcb,0xfc] - vminss -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1b,0x5d,0x54,0xcb,0xfc] - vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vmaxps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x5f,0xe2] - vmaxps %xmm10, %xmm14, %xmm12 - -// CHECK: vmaxpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x5f,0xe2] - vmaxpd %xmm10, %xmm14, %xmm12 - -// CHECK: vminps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x5d,0xe2] - vminps %xmm10, %xmm14, %xmm12 - -// CHECK: vminpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x5d,0xe2] - vminpd %xmm10, %xmm14, %xmm12 - -// CHECK: vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x5f,0x54,0xcb,0xfc] - vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x5f,0x54,0xcb,0xfc] - vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vminps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x5d,0x54,0xcb,0xfc] - vminps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc] - vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vandps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x54,0xe2] - vandps %xmm10, %xmm14, %xmm12 - -// CHECK: vandpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x54,0xe2] - vandpd %xmm10, %xmm14, %xmm12 - -// CHECK: vandps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x54,0x54,0xcb,0xfc] - vandps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x54,0x54,0xcb,0xfc] - vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vorps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x56,0xe2] - vorps %xmm10, %xmm14, %xmm12 - -// CHECK: vorpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x56,0xe2] - vorpd %xmm10, %xmm14, %xmm12 - -// CHECK: vorps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x56,0x54,0xcb,0xfc] - vorps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x56,0x54,0xcb,0xfc] - vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vxorps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x57,0xe2] - vxorps %xmm10, %xmm14, %xmm12 - -// CHECK: vxorpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x57,0xe2] - vxorpd %xmm10, %xmm14, %xmm12 - -// CHECK: vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x57,0x54,0xcb,0xfc] - vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x57,0x54,0xcb,0xfc] - vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vandnps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x55,0xe2] - vandnps %xmm10, %xmm14, %xmm12 - -// CHECK: vandnpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x55,0xe2] - vandnpd %xmm10, %xmm14, %xmm12 - -// CHECK: vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x55,0x54,0xcb,0xfc] - vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x55,0x54,0xcb,0xfc] - vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vmovss -4(%rbx,%rcx,8), %xmm10 -// CHECK: encoding: [0xc5,0x7a,0x10,0x54,0xcb,0xfc] - vmovss -4(%rbx,%rcx,8), %xmm10 - -// CHECK: vmovss %xmm14, %xmm10, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x2a,0x10,0xfe] - vmovss %xmm14, %xmm10, %xmm15 - -// CHECK: vmovsd -4(%rbx,%rcx,8), %xmm10 -// CHECK: encoding: [0xc5,0x7b,0x10,0x54,0xcb,0xfc] - vmovsd -4(%rbx,%rcx,8), %xmm10 - -// CHECK: vmovsd %xmm14, %xmm10, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x2b,0x10,0xfe] - vmovsd %xmm14, %xmm10, %xmm15 - // rdar://7840289 // CHECK: pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0x66,0x0f,0x38,0x00,0x0d,A,A,A,A] // CHECK: fixup A - offset: 5, value: CPI1_0-4 pshufb CPI1_0(%rip), %xmm1 -// CHECK: vunpckhps %xmm15, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0x15,0xef] - vunpckhps %xmm15, %xmm12, %xmm13 - -// CHECK: vunpckhpd %xmm15, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x15,0xef] - vunpckhpd %xmm15, %xmm12, %xmm13 - -// CHECK: vunpcklps %xmm15, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0x14,0xef] - vunpcklps %xmm15, %xmm12, %xmm13 - -// CHECK: vunpcklpd %xmm15, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x14,0xef] - vunpcklpd %xmm15, %xmm12, %xmm13 - -// CHECK: vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x18,0x15,0x7c,0xcb,0xfc] - vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15 - -// CHECK: vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x19,0x15,0x7c,0xcb,0xfc] - vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15 - -// CHECK: vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x18,0x14,0x7c,0xcb,0xfc] - vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15 - -// CHECK: vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x19,0x14,0x7c,0xcb,0xfc] - vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15 - -// CHECK: vcmpps $0, %xmm10, %xmm12, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x00] - vcmpps $0, %xmm10, %xmm12, %xmm15 - -// CHECK: vcmpps $0, (%rax), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x18,0xc2,0x38,0x00] - vcmpps $0, (%rax), %xmm12, %xmm15 - -// CHECK: vcmpps $7, %xmm10, %xmm12, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x07] - vcmpps $7, %xmm10, %xmm12, %xmm15 - -// CHECK: vcmppd $0, %xmm10, %xmm12, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x00] - vcmppd $0, %xmm10, %xmm12, %xmm15 - -// CHECK: vcmppd $0, (%rax), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x19,0xc2,0x38,0x00] - vcmppd $0, (%rax), %xmm12, %xmm15 - -// CHECK: vcmppd $7, %xmm10, %xmm12, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x07] - vcmppd $7, %xmm10, %xmm12, %xmm15 - -// CHECK: vshufps $8, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc6,0xeb,0x08] - vshufps $8, %xmm11, %xmm12, %xmm13 - -// CHECK: vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc6,0x6c,0xcb,0xfc,0x08] - vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vshufpd $8, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc6,0xeb,0x08] - vshufpd $8, %xmm11, %xmm12, %xmm13 - -// CHECK: vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc6,0x6c,0xcb,0xfc,0x08] - vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $0, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x00] - vcmpeqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $2, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x02] - vcmpleps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $1, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x01] - vcmpltps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $4, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x04] - vcmpneqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $6, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x06] - vcmpnleps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $5, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x05] - vcmpnltps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x07] - vcmpordps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $3, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x03] - vcmpunordps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x02] - vcmpleps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnleps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordps -4(%rbx,%rcx,8), %xmm6, %xmm2 - -// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $0, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x00] - vcmpeqpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $2, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x02] - vcmplepd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $1, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x01] - vcmpltpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $4, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x04] - vcmpneqpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $6, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x06] - vcmpnlepd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $5, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x05] - vcmpnltpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x07] - vcmpordpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $3, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x03] - vcmpunordpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqpd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x02] - vcmplepd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltpd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqpd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnlepd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltpd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordpd -4(%rbx,%rcx,8), %xmm6, %xmm2 - -// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordpd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $0, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x00] - vcmpeqss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $2, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x02] - vcmpless %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $1, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x01] - vcmpltss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $4, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x04] - vcmpneqss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $6, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x06] - vcmpnless %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $5, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x05] - vcmpnltss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x07] - vcmpordss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $3, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x03] - vcmpunordss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqss -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x02] - vcmpless -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltss -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqss -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnless -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltss -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordss -4(%rbx,%rcx,8), %xmm6, %xmm2 - -// CHECK: vcmpss $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordss -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $0, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x00] - vcmpeqsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $2, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x02] - vcmplesd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $1, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x01] - vcmpltsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $4, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x04] - vcmpneqsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $6, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x06] - vcmpnlesd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $5, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x05] - vcmpnltsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x07] - vcmpordsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $3, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x03] - vcmpunordsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqsd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x02] - vcmplesd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltsd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqsd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnlesd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltsd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordsd -4(%rbx,%rcx,8), %xmm6, %xmm2 - -// CHECK: vcmpsd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordsd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vucomiss %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3] - vucomiss %xmm11, %xmm12 - -// CHECK: vucomiss (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x2e,0x20] - vucomiss (%rax), %xmm12 - -// CHECK: vcomiss %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x2f,0xe3] - vcomiss %xmm11, %xmm12 - -// CHECK: vcomiss (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x2f,0x20] - vcomiss (%rax), %xmm12 - -// CHECK: vucomisd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xe3] - vucomisd %xmm11, %xmm12 - -// CHECK: vucomisd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x79,0x2e,0x20] - vucomisd (%rax), %xmm12 - -// CHECK: vcomisd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x79,0x2f,0xe3] - vcomisd %xmm11, %xmm12 - -// CHECK: vcomisd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x79,0x2f,0x20] - vcomisd (%rax), %xmm12 - -// CHECK: vcvttss2si (%rcx), %eax -// CHECK: encoding: [0xc5,0xfa,0x2c,0x01] - vcvttss2si (%rcx), %eax - -// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x22,0x2a,0x20] - vcvtsi2ss (%rax), %xmm11, %xmm12 - -// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x22,0x2a,0x20] - vcvtsi2ss (%rax), %xmm11, %xmm12 - -// CHECK: vcvttsd2si (%rcx), %eax -// CHECK: encoding: [0xc5,0xfb,0x2c,0x01] - vcvttsd2si (%rcx), %eax - -// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x23,0x2a,0x20] - vcvtsi2sd (%rax), %xmm11, %xmm12 - -// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x23,0x2a,0x20] - vcvtsi2sd (%rax), %xmm11, %xmm12 - -// CHECK: vmovaps (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x28,0x20] - vmovaps (%rax), %xmm12 - -// CHECK: vmovaps %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x28,0xe3] - vmovaps %xmm11, %xmm12 - -// CHECK: vmovaps %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x78,0x29,0x18] - vmovaps %xmm11, (%rax) - -// CHECK: vmovapd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x79,0x28,0x20] - vmovapd (%rax), %xmm12 - -// CHECK: vmovapd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x79,0x28,0xe3] - vmovapd %xmm11, %xmm12 - -// CHECK: vmovapd %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0x29,0x18] - vmovapd %xmm11, (%rax) - -// CHECK: vmovups (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x10,0x20] - vmovups (%rax), %xmm12 - -// CHECK: vmovups %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x10,0xe3] - vmovups %xmm11, %xmm12 - -// CHECK: vmovups %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x78,0x11,0x18] - vmovups %xmm11, (%rax) - -// CHECK: vmovupd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x79,0x10,0x20] - vmovupd (%rax), %xmm12 - -// CHECK: vmovupd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x79,0x10,0xe3] - vmovupd %xmm11, %xmm12 - -// CHECK: vmovupd %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0x11,0x18] - vmovupd %xmm11, (%rax) - -// CHECK: vmovlps %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x78,0x13,0x18] - vmovlps %xmm11, (%rax) - -// CHECK: vmovlps (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0x12,0x28] - vmovlps (%rax), %xmm12, %xmm13 - -// CHECK: vmovlpd %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0x13,0x18] - vmovlpd %xmm11, (%rax) - -// CHECK: vmovlpd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x12,0x28] - vmovlpd (%rax), %xmm12, %xmm13 - -// CHECK: vmovhps %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x78,0x17,0x18] - vmovhps %xmm11, (%rax) - -// CHECK: vmovhps (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0x16,0x28] - vmovhps (%rax), %xmm12, %xmm13 - -// CHECK: vmovhpd %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0x17,0x18] - vmovhpd %xmm11, (%rax) - -// CHECK: vmovhpd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x16,0x28] - vmovhpd (%rax), %xmm12, %xmm13 - -// CHECK: vmovlhps %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0x16,0xeb] - vmovlhps %xmm11, %xmm12, %xmm13 - -// CHECK: vmovhlps %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb] - vmovhlps %xmm11, %xmm12, %xmm13 - -// CHECK: vcvtss2sil %xmm11, %eax -// CHECK: encoding: [0xc4,0xc1,0x7a,0x2d,0xc3] - vcvtss2si %xmm11, %eax - -// CHECK: vcvtss2sil (%rax), %ebx -// CHECK: encoding: [0xc5,0xfa,0x2d,0x18] - vcvtss2si (%rax), %ebx - -// CHECK: vcvtdq2ps %xmm10, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xe2] - vcvtdq2ps %xmm10, %xmm12 - -// CHECK: vcvtdq2ps (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x5b,0x20] - vcvtdq2ps (%rax), %xmm12 - -// CHECK: vcvtsd2ss %xmm12, %xmm13, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x13,0x5a,0xd4] - vcvtsd2ss %xmm12, %xmm13, %xmm10 - -// CHECK: vcvtsd2ss (%rax), %xmm13, %xmm10 -// CHECK: encoding: [0xc5,0x13,0x5a,0x10] - vcvtsd2ss (%rax), %xmm13, %xmm10 - -// CHECK: vcvtps2dq %xmm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x79,0x5b,0xdc] - vcvtps2dq %xmm12, %xmm11 - -// CHECK: vcvtps2dq (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x79,0x5b,0x18] - vcvtps2dq (%rax), %xmm11 - -// CHECK: vcvtss2sd %xmm12, %xmm13, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x12,0x5a,0xd4] - vcvtss2sd %xmm12, %xmm13, %xmm10 - -// CHECK: vcvtss2sd (%rax), %xmm13, %xmm10 -// CHECK: encoding: [0xc5,0x12,0x5a,0x10] - vcvtss2sd (%rax), %xmm13, %xmm10 - -// CHECK: vcvtdq2ps %xmm13, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xd5] - vcvtdq2ps %xmm13, %xmm10 - -// CHECK: vcvtdq2ps (%ecx), %xmm13 -// CHECK: encoding: [0xc5,0x78,0x5b,0x29] - vcvtdq2ps (%ecx), %xmm13 - -// CHECK: vcvttps2dq %xmm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x7a,0x5b,0xdc] - vcvttps2dq %xmm12, %xmm11 - -// CHECK: vcvttps2dq (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x7a,0x5b,0x18] - vcvttps2dq (%rax), %xmm11 - -// CHECK: vcvtps2pd %xmm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x78,0x5a,0xdc] - vcvtps2pd %xmm12, %xmm11 - -// CHECK: vcvtps2pd (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x78,0x5a,0x18] - vcvtps2pd (%rax), %xmm11 - -// CHECK: vcvtpd2ps %xmm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc] - vcvtpd2ps %xmm12, %xmm11 - -// CHECK: vsqrtpd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3] - vsqrtpd %xmm11, %xmm12 - -// CHECK: vsqrtpd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x79,0x51,0x20] - vsqrtpd (%rax), %xmm12 - -// CHECK: vsqrtps %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3] - vsqrtps %xmm11, %xmm12 - -// CHECK: vsqrtps (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x51,0x20] - vsqrtps (%rax), %xmm12 - -// CHECK: vsqrtsd %xmm11, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3] - vsqrtsd %xmm11, %xmm12, %xmm10 - -// CHECK: vsqrtsd (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1b,0x51,0x10] - vsqrtsd (%rax), %xmm12, %xmm10 - -// CHECK: vsqrtss %xmm11, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3] - vsqrtss %xmm11, %xmm12, %xmm10 - -// CHECK: vsqrtss (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1a,0x51,0x10] - vsqrtss (%rax), %xmm12, %xmm10 - -// CHECK: vrsqrtps %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3] - vrsqrtps %xmm11, %xmm12 - -// CHECK: vrsqrtps (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x52,0x20] - vrsqrtps (%rax), %xmm12 - -// CHECK: vrsqrtss %xmm11, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3] - vrsqrtss %xmm11, %xmm12, %xmm10 - -// CHECK: vrsqrtss (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1a,0x52,0x10] - vrsqrtss (%rax), %xmm12, %xmm10 - -// CHECK: vrcpps %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3] - vrcpps %xmm11, %xmm12 - -// CHECK: vrcpps (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x53,0x20] - vrcpps (%rax), %xmm12 - -// CHECK: vrcpss %xmm11, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3] - vrcpss %xmm11, %xmm12, %xmm10 - -// CHECK: vrcpss (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1a,0x53,0x10] - vrcpss (%rax), %xmm12, %xmm10 - -// CHECK: vmovntdq %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0xe7,0x18] - vmovntdq %xmm11, (%rax) - -// CHECK: vmovntpd %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0x2b,0x18] - vmovntpd %xmm11, (%rax) - -// CHECK: vmovntps %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x78,0x2b,0x18] - vmovntps %xmm11, (%rax) - -// CHECK: vldmxcsr -4(%rip) -// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xfc,0xff,0xff,0xff] - vldmxcsr -4(%rip) - -// CHECK: vstmxcsr -4(%rsp) -// CHECK: encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc] - vstmxcsr -4(%rsp) - -// CHECK: vpsubb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf8,0xeb] - vpsubb %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf8,0x28] - vpsubb (%rax), %xmm12, %xmm13 - -// CHECK: vpsubw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf9,0xeb] - vpsubw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf9,0x28] - vpsubw (%rax), %xmm12, %xmm13 - -// CHECK: vpsubd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xfa,0xeb] - vpsubd %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xfa,0x28] - vpsubd (%rax), %xmm12, %xmm13 - -// CHECK: vpsubq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xfb,0xeb] - vpsubq %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xfb,0x28] - vpsubq (%rax), %xmm12, %xmm13 - -// CHECK: vpsubsb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe8,0xeb] - vpsubsb %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubsb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe8,0x28] - vpsubsb (%rax), %xmm12, %xmm13 - -// CHECK: vpsubsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe9,0xeb] - vpsubsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe9,0x28] - vpsubsw (%rax), %xmm12, %xmm13 - -// CHECK: vpsubusb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd8,0xeb] - vpsubusb %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubusb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd8,0x28] - vpsubusb (%rax), %xmm12, %xmm13 - -// CHECK: vpsubusw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd9,0xeb] - vpsubusw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubusw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd9,0x28] - vpsubusw (%rax), %xmm12, %xmm13 - -// CHECK: vpaddb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xfc,0xeb] - vpaddb %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xfc,0x28] - vpaddb (%rax), %xmm12, %xmm13 - -// CHECK: vpaddw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xfd,0xeb] - vpaddw %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xfd,0x28] - vpaddw (%rax), %xmm12, %xmm13 - -// CHECK: vpaddd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xfe,0xeb] - vpaddd %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xfe,0x28] - vpaddd (%rax), %xmm12, %xmm13 - -// CHECK: vpaddq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd4,0xeb] - vpaddq %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd4,0x28] - vpaddq (%rax), %xmm12, %xmm13 - -// CHECK: vpaddsb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xec,0xeb] - vpaddsb %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddsb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xec,0x28] - vpaddsb (%rax), %xmm12, %xmm13 - -// CHECK: vpaddsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xed,0xeb] - vpaddsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xed,0x28] - vpaddsw (%rax), %xmm12, %xmm13 - -// CHECK: vpaddusb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xdc,0xeb] - vpaddusb %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddusb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xdc,0x28] - vpaddusb (%rax), %xmm12, %xmm13 - -// CHECK: vpaddusw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xdd,0xeb] - vpaddusw %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddusw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xdd,0x28] - vpaddusw (%rax), %xmm12, %xmm13 - -// CHECK: vpmulhuw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe4,0xeb] - vpmulhuw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmulhuw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe4,0x28] - vpmulhuw (%rax), %xmm12, %xmm13 - -// CHECK: vpmulhw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe5,0xeb] - vpmulhw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmulhw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe5,0x28] - vpmulhw (%rax), %xmm12, %xmm13 - -// CHECK: vpmullw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd5,0xeb] - vpmullw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmullw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd5,0x28] - vpmullw (%rax), %xmm12, %xmm13 - -// CHECK: vpmuludq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf4,0xeb] - vpmuludq %xmm11, %xmm12, %xmm13 - -// CHECK: vpmuludq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf4,0x28] - vpmuludq (%rax), %xmm12, %xmm13 - -// CHECK: vpavgb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe0,0xeb] - vpavgb %xmm11, %xmm12, %xmm13 - -// CHECK: vpavgb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe0,0x28] - vpavgb (%rax), %xmm12, %xmm13 - -// CHECK: vpavgw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe3,0xeb] - vpavgw %xmm11, %xmm12, %xmm13 - -// CHECK: vpavgw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe3,0x28] - vpavgw (%rax), %xmm12, %xmm13 - -// CHECK: vpminsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xea,0xeb] - vpminsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpminsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xea,0x28] - vpminsw (%rax), %xmm12, %xmm13 - -// CHECK: vpminub %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xda,0xeb] - vpminub %xmm11, %xmm12, %xmm13 - -// CHECK: vpminub (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xda,0x28] - vpminub (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xee,0xeb] - vpmaxsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmaxsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xee,0x28] - vpmaxsw (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxub %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xde,0xeb] - vpmaxub %xmm11, %xmm12, %xmm13 - -// CHECK: vpmaxub (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xde,0x28] - vpmaxub (%rax), %xmm12, %xmm13 - -// CHECK: vpsadbw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf6,0xeb] - vpsadbw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsadbw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf6,0x28] - vpsadbw (%rax), %xmm12, %xmm13 - -// CHECK: vpsllw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf1,0xeb] - vpsllw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsllw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf1,0x28] - vpsllw (%rax), %xmm12, %xmm13 - -// CHECK: vpslld %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf2,0xeb] - vpslld %xmm11, %xmm12, %xmm13 - -// CHECK: vpslld (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf2,0x28] - vpslld (%rax), %xmm12, %xmm13 - -// CHECK: vpsllq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf3,0xeb] - vpsllq %xmm11, %xmm12, %xmm13 - -// CHECK: vpsllq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf3,0x28] - vpsllq (%rax), %xmm12, %xmm13 - -// CHECK: vpsraw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe1,0xeb] - vpsraw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsraw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe1,0x28] - vpsraw (%rax), %xmm12, %xmm13 - -// CHECK: vpsrad %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe2,0xeb] - vpsrad %xmm11, %xmm12, %xmm13 - -// CHECK: vpsrad (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe2,0x28] - vpsrad (%rax), %xmm12, %xmm13 - -// CHECK: vpsrlw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd1,0xeb] - vpsrlw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsrlw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd1,0x28] - vpsrlw (%rax), %xmm12, %xmm13 - -// CHECK: vpsrld %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd2,0xeb] - vpsrld %xmm11, %xmm12, %xmm13 - -// CHECK: vpsrld (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd2,0x28] - vpsrld (%rax), %xmm12, %xmm13 - -// CHECK: vpsrlq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd3,0xeb] - vpsrlq %xmm11, %xmm12, %xmm13 - -// CHECK: vpsrlq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd3,0x28] - vpsrlq (%rax), %xmm12, %xmm13 - -// CHECK: vpslld $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a] - vpslld $10, %xmm12, %xmm13 - -// CHECK: vpslldq $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xfc,0x0a] - vpslldq $10, %xmm12, %xmm13 - -// CHECK: vpsllq $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xf4,0x0a] - vpsllq $10, %xmm12, %xmm13 - -// CHECK: vpsllw $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xf4,0x0a] - vpsllw $10, %xmm12, %xmm13 - -// CHECK: vpsrad $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xe4,0x0a] - vpsrad $10, %xmm12, %xmm13 - -// CHECK: vpsraw $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xe4,0x0a] - vpsraw $10, %xmm12, %xmm13 - -// CHECK: vpsrld $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xd4,0x0a] - vpsrld $10, %xmm12, %xmm13 - -// CHECK: vpsrldq $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xdc,0x0a] - vpsrldq $10, %xmm12, %xmm13 - -// CHECK: vpsrlq $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xd4,0x0a] - vpsrlq $10, %xmm12, %xmm13 - -// CHECK: vpsrlw $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xd4,0x0a] - vpsrlw $10, %xmm12, %xmm13 - -// CHECK: vpslld $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a] - vpslld $10, %xmm12, %xmm13 - -// CHECK: vpand %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xdb,0xeb] - vpand %xmm11, %xmm12, %xmm13 - -// CHECK: vpand (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xdb,0x28] - vpand (%rax), %xmm12, %xmm13 - -// CHECK: vpor %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xeb,0xeb] - vpor %xmm11, %xmm12, %xmm13 - -// CHECK: vpor (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xeb,0x28] - vpor (%rax), %xmm12, %xmm13 - -// CHECK: vpxor %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xef,0xeb] - vpxor %xmm11, %xmm12, %xmm13 - -// CHECK: vpxor (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xef,0x28] - vpxor (%rax), %xmm12, %xmm13 - -// CHECK: vpandn %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xdf,0xeb] - vpandn %xmm11, %xmm12, %xmm13 - -// CHECK: vpandn (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xdf,0x28] - vpandn (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpeqb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x74,0xeb] - vpcmpeqb %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpeqb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x74,0x28] - vpcmpeqb (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpeqw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x75,0xeb] - vpcmpeqw %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpeqw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x75,0x28] - vpcmpeqw (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpeqd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x76,0xeb] - vpcmpeqd %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpeqd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x76,0x28] - vpcmpeqd (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpgtb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x64,0xeb] - vpcmpgtb %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpgtb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x64,0x28] - vpcmpgtb (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpgtw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x65,0xeb] - vpcmpgtw %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpgtw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x65,0x28] - vpcmpgtw (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpgtd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x66,0xeb] - vpcmpgtd %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpgtd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x66,0x28] - vpcmpgtd (%rax), %xmm12, %xmm13 - -// CHECK: vpacksswb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x63,0xeb] - vpacksswb %xmm11, %xmm12, %xmm13 - -// CHECK: vpacksswb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x63,0x28] - vpacksswb (%rax), %xmm12, %xmm13 - -// CHECK: vpackssdw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x6b,0xeb] - vpackssdw %xmm11, %xmm12, %xmm13 - -// CHECK: vpackssdw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x6b,0x28] - vpackssdw (%rax), %xmm12, %xmm13 - -// CHECK: vpackuswb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x67,0xeb] - vpackuswb %xmm11, %xmm12, %xmm13 - -// CHECK: vpackuswb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x67,0x28] - vpackuswb (%rax), %xmm12, %xmm13 - -// CHECK: vpshufd $4, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x79,0x70,0xec,0x04] - vpshufd $4, %xmm12, %xmm13 - -// CHECK: vpshufd $4, (%rax), %xmm13 -// CHECK: encoding: [0xc5,0x79,0x70,0x28,0x04] - vpshufd $4, (%rax), %xmm13 - -// CHECK: vpshufhw $4, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x7a,0x70,0xec,0x04] - vpshufhw $4, %xmm12, %xmm13 - -// CHECK: vpshufhw $4, (%rax), %xmm13 -// CHECK: encoding: [0xc5,0x7a,0x70,0x28,0x04] - vpshufhw $4, (%rax), %xmm13 - -// CHECK: vpshuflw $4, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x7b,0x70,0xec,0x04] - vpshuflw $4, %xmm12, %xmm13 - -// CHECK: vpshuflw $4, (%rax), %xmm13 -// CHECK: encoding: [0xc5,0x7b,0x70,0x28,0x04] - vpshuflw $4, (%rax), %xmm13 - -// CHECK: vpunpcklbw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x60,0xeb] - vpunpcklbw %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpcklbw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x60,0x28] - vpunpcklbw (%rax), %xmm12, %xmm13 - -// CHECK: vpunpcklwd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x61,0xeb] - vpunpcklwd %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpcklwd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x61,0x28] - vpunpcklwd (%rax), %xmm12, %xmm13 - -// CHECK: vpunpckldq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x62,0xeb] - vpunpckldq %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpckldq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x62,0x28] - vpunpckldq (%rax), %xmm12, %xmm13 - -// CHECK: vpunpcklqdq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x6c,0xeb] - vpunpcklqdq %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpcklqdq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x6c,0x28] - vpunpcklqdq (%rax), %xmm12, %xmm13 - -// CHECK: vpunpckhbw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x68,0xeb] - vpunpckhbw %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpckhbw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x68,0x28] - vpunpckhbw (%rax), %xmm12, %xmm13 - -// CHECK: vpunpckhwd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x69,0xeb] - vpunpckhwd %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpckhwd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x69,0x28] - vpunpckhwd (%rax), %xmm12, %xmm13 - -// CHECK: vpunpckhdq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x6a,0xeb] - vpunpckhdq %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpckhdq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x6a,0x28] - vpunpckhdq (%rax), %xmm12, %xmm13 - -// CHECK: vpunpckhqdq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x6d,0xeb] - vpunpckhqdq %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpckhqdq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x6d,0x28] - vpunpckhqdq (%rax), %xmm12, %xmm13 - -// CHECK: vpinsrw $7, %eax, %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc4,0xe8,0x07] - vpinsrw $7, %eax, %xmm12, %xmm13 - -// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc4,0x28,0x07] - vpinsrw $7, (%rax), %xmm12, %xmm13 - -// CHECK: vpextrw $7, %xmm12, %eax -// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07] - vpextrw $7, %xmm12, %eax - -// CHECK: vpmovmskb %xmm12, %eax -// CHECK: encoding: [0xc4,0xc1,0x79,0xd7,0xc4] - vpmovmskb %xmm12, %eax - -// CHECK: vmaskmovdqu %xmm14, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x79,0xf7,0xfe] - vmaskmovdqu %xmm14, %xmm15 - -// CHECK: vmovd %eax, %xmm14 -// CHECK: encoding: [0xc5,0x79,0x6e,0xf0] - vmovd %eax, %xmm14 - -// CHECK: vmovd (%rax), %xmm14 -// CHECK: encoding: [0xc5,0x79,0x6e,0x30] - vmovd (%rax), %xmm14 - -// CHECK: vmovd %xmm14, (%rax) -// CHECK: encoding: [0xc5,0x79,0x7e,0x30] - vmovd %xmm14, (%rax) - -// CHECK: vmovd %rax, %xmm14 -// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0] - vmovd %rax, %xmm14 - -// CHECK: vmovq %xmm14, (%rax) -// CHECK: encoding: [0xc5,0x79,0xd6,0x30] - vmovq %xmm14, (%rax) - -// CHECK: vmovq %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7a,0x7e,0xe6] - vmovq %xmm14, %xmm12 - -// CHECK: vmovq (%rax), %xmm14 -// CHECK: encoding: [0xc5,0x7a,0x7e,0x30] - vmovq (%rax), %xmm14 - -// CHECK: vmovq %rax, %xmm14 -// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0] - vmovq %rax, %xmm14 - -// CHECK: vmovq %xmm14, %rax -// CHECK: encoding: [0xc4,0x61,0xf9,0x7e,0xf0] - vmovq %xmm14, %rax - -// CHECK: vcvtpd2dq %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xe3] - vcvtpd2dq %xmm11, %xmm12 - -// CHECK: vcvtdq2pd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7a,0xe6,0xe3] - vcvtdq2pd %xmm11, %xmm12 - -// CHECK: vcvtdq2pd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x7a,0xe6,0x20] - vcvtdq2pd (%rax), %xmm12 - -// CHECK: vmovshdup %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7a,0x16,0xe3] - vmovshdup %xmm11, %xmm12 - -// CHECK: vmovshdup (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x7a,0x16,0x20] - vmovshdup (%rax), %xmm12 - -// CHECK: vmovsldup %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7a,0x12,0xe3] - vmovsldup %xmm11, %xmm12 - -// CHECK: vmovsldup (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x7a,0x12,0x20] - vmovsldup (%rax), %xmm12 - -// CHECK: vmovddup %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7b,0x12,0xe3] - vmovddup %xmm11, %xmm12 - -// CHECK: vmovddup (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x7b,0x12,0x20] - vmovddup (%rax), %xmm12 - -// CHECK: vaddsubps %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xd0,0xeb] - vaddsubps %xmm11, %xmm12, %xmm13 - -// CHECK: vaddsubps (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x23,0xd0,0x20] - vaddsubps (%rax), %xmm11, %xmm12 - -// CHECK: vaddsubpd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd0,0xeb] - vaddsubpd %xmm11, %xmm12, %xmm13 - -// CHECK: vaddsubpd (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x21,0xd0,0x20] - vaddsubpd (%rax), %xmm11, %xmm12 - -// CHECK: vhaddps %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0x7c,0xeb] - vhaddps %xmm11, %xmm12, %xmm13 - -// CHECK: vhaddps (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0x7c,0x28] - vhaddps (%rax), %xmm12, %xmm13 - -// CHECK: vhaddpd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x7c,0xeb] - vhaddpd %xmm11, %xmm12, %xmm13 - -// CHECK: vhaddpd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x7c,0x28] - vhaddpd (%rax), %xmm12, %xmm13 - -// CHECK: vhsubps %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0x7d,0xeb] - vhsubps %xmm11, %xmm12, %xmm13 - -// CHECK: vhsubps (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0x7d,0x28] - vhsubps (%rax), %xmm12, %xmm13 - -// CHECK: vhsubpd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x7d,0xeb] - vhsubpd %xmm11, %xmm12, %xmm13 - -// CHECK: vhsubpd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x7d,0x28] - vhsubpd (%rax), %xmm12, %xmm13 - -// CHECK: vpabsb %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x42,0x79,0x1c,0xe3] - vpabsb %xmm11, %xmm12 - -// CHECK: vpabsb (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x1c,0x20] - vpabsb (%rax), %xmm12 - -// CHECK: vpabsw %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x42,0x79,0x1d,0xe3] - vpabsw %xmm11, %xmm12 - -// CHECK: vpabsw (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x1d,0x20] - vpabsw (%rax), %xmm12 - -// CHECK: vpabsd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x42,0x79,0x1e,0xe3] - vpabsd %xmm11, %xmm12 - -// CHECK: vpabsd (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x1e,0x20] - vpabsd (%rax), %xmm12 - -// CHECK: vphaddw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x01,0xeb] - vphaddw %xmm11, %xmm12, %xmm13 - -// CHECK: vphaddw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x01,0x28] - vphaddw (%rax), %xmm12, %xmm13 - -// CHECK: vphaddd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x02,0xeb] - vphaddd %xmm11, %xmm12, %xmm13 - -// CHECK: vphaddd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x02,0x28] - vphaddd (%rax), %xmm12, %xmm13 - -// CHECK: vphaddsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x03,0xeb] - vphaddsw %xmm11, %xmm12, %xmm13 - -// CHECK: vphaddsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x03,0x28] - vphaddsw (%rax), %xmm12, %xmm13 - -// CHECK: vphsubw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x05,0xeb] - vphsubw %xmm11, %xmm12, %xmm13 - -// CHECK: vphsubw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x05,0x28] - vphsubw (%rax), %xmm12, %xmm13 - -// CHECK: vphsubd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x06,0xeb] - vphsubd %xmm11, %xmm12, %xmm13 - -// CHECK: vphsubd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x06,0x28] - vphsubd (%rax), %xmm12, %xmm13 - -// CHECK: vphsubsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x07,0xeb] - vphsubsw %xmm11, %xmm12, %xmm13 - -// CHECK: vphsubsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x07,0x28] - vphsubsw (%rax), %xmm12, %xmm13 - -// CHECK: vpmaddubsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x04,0xeb] - vpmaddubsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmaddubsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x04,0x28] - vpmaddubsw (%rax), %xmm12, %xmm13 - -// CHECK: vpshufb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x00,0xeb] - vpshufb %xmm11, %xmm12, %xmm13 - -// CHECK: vpshufb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x00,0x28] - vpshufb (%rax), %xmm12, %xmm13 - -// CHECK: vpsignb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x08,0xeb] - vpsignb %xmm11, %xmm12, %xmm13 - -// CHECK: vpsignb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x08,0x28] - vpsignb (%rax), %xmm12, %xmm13 - -// CHECK: vpsignw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x09,0xeb] - vpsignw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsignw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x09,0x28] - vpsignw (%rax), %xmm12, %xmm13 - -// CHECK: vpsignd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x0a,0xeb] - vpsignd %xmm11, %xmm12, %xmm13 - -// CHECK: vpsignd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x0a,0x28] - vpsignd (%rax), %xmm12, %xmm13 - -// CHECK: vpmulhrsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x0b,0xeb] - vpmulhrsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmulhrsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x0b,0x28] - vpmulhrsw (%rax), %xmm12, %xmm13 - -// CHECK: vpalignr $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x43,0x19,0x0f,0xeb,0x07] - vpalignr $7, %xmm11, %xmm12, %xmm13 - -// CHECK: vpalignr $7, (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x19,0x0f,0x28,0x07] - vpalignr $7, (%rax), %xmm12, %xmm13 - -// CHECK: vroundsd $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x43,0x19,0x0b,0xeb,0x07] - vroundsd $7, %xmm11, %xmm12, %xmm13 - -// CHECK: vroundsd $7, (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x19,0x0b,0x28,0x07] - vroundsd $7, (%rax), %xmm12, %xmm13 - -// CHECK: vroundss $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x43,0x19,0x0a,0xeb,0x07] - vroundss $7, %xmm11, %xmm12, %xmm13 - -// CHECK: vroundss $7, (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x19,0x0a,0x28,0x07] - vroundss $7, (%rax), %xmm12, %xmm13 - -// CHECK: vroundpd $7, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x43,0x79,0x09,0xec,0x07] - vroundpd $7, %xmm12, %xmm13 - -// CHECK: vroundpd $7, (%rax), %xmm13 -// CHECK: encoding: [0xc4,0x63,0x79,0x09,0x28,0x07] - vroundpd $7, (%rax), %xmm13 - -// CHECK: vroundps $7, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x43,0x79,0x08,0xec,0x07] - vroundps $7, %xmm12, %xmm13 - -// CHECK: vroundps $7, (%rax), %xmm13 -// CHECK: encoding: [0xc4,0x63,0x79,0x08,0x28,0x07] - vroundps $7, (%rax), %xmm13 - -// CHECK: vphminposuw %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x79,0x41,0xec] - vphminposuw %xmm12, %xmm13 - -// CHECK: vphminposuw (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x41,0x20] - vphminposuw (%rax), %xmm12 - -// CHECK: vpackusdw %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x2b,0xdc] - vpackusdw %xmm12, %xmm13, %xmm11 - -// CHECK: vpackusdw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x2b,0x28] - vpackusdw (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpeqq %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x29,0xdc] - vpcmpeqq %xmm12, %xmm13, %xmm11 - -// CHECK: vpcmpeqq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x29,0x28] - vpcmpeqq (%rax), %xmm12, %xmm13 - -// CHECK: vpminsb %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x38,0xdc] - vpminsb %xmm12, %xmm13, %xmm11 - -// CHECK: vpminsb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x38,0x28] - vpminsb (%rax), %xmm12, %xmm13 - -// CHECK: vpminsd %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x39,0xdc] - vpminsd %xmm12, %xmm13, %xmm11 - -// CHECK: vpminsd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x39,0x28] - vpminsd (%rax), %xmm12, %xmm13 - -// CHECK: vpminud %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3b,0xdc] - vpminud %xmm12, %xmm13, %xmm11 - -// CHECK: vpminud (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3b,0x28] - vpminud (%rax), %xmm12, %xmm13 - -// CHECK: vpminuw %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3a,0xdc] - vpminuw %xmm12, %xmm13, %xmm11 - -// CHECK: vpminuw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3a,0x28] - vpminuw (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxsb %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3c,0xdc] - vpmaxsb %xmm12, %xmm13, %xmm11 - -// CHECK: vpmaxsb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3c,0x28] - vpmaxsb (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxsd %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3d,0xdc] - vpmaxsd %xmm12, %xmm13, %xmm11 - -// CHECK: vpmaxsd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3d,0x28] - vpmaxsd (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxud %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3f,0xdc] - vpmaxud %xmm12, %xmm13, %xmm11 - -// CHECK: vpmaxud (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3f,0x28] - vpmaxud (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxuw %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3e,0xdc] - vpmaxuw %xmm12, %xmm13, %xmm11 - -// CHECK: vpmaxuw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3e,0x28] - vpmaxuw (%rax), %xmm12, %xmm13 - -// CHECK: vpmuldq %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x28,0xdc] - vpmuldq %xmm12, %xmm13, %xmm11 - -// CHECK: vpmuldq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x28,0x28] - vpmuldq (%rax), %xmm12, %xmm13 - -// CHECK: vpmulld %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x51,0x40,0xdc] - vpmulld %xmm12, %xmm5, %xmm11 - -// CHECK: vpmulld (%rax), %xmm5, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x51,0x40,0x28] - vpmulld (%rax), %xmm5, %xmm13 - -// CHECK: vblendps $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x0c,0xdc,0x03] - vblendps $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vblendps $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x0c,0x18,0x03] - vblendps $3, (%rax), %xmm5, %xmm11 - -// CHECK: vblendpd $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x0d,0xdc,0x03] - vblendpd $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vblendpd $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x0d,0x18,0x03] - vblendpd $3, (%rax), %xmm5, %xmm11 - -// CHECK: vpblendw $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x0e,0xdc,0x03] - vpblendw $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vpblendw $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x0e,0x18,0x03] - vpblendw $3, (%rax), %xmm5, %xmm11 - -// CHECK: vmpsadbw $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x42,0xdc,0x03] - vmpsadbw $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vmpsadbw $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x42,0x18,0x03] - vmpsadbw $3, (%rax), %xmm5, %xmm11 - -// CHECK: vdpps $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x40,0xdc,0x03] - vdpps $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vdpps $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x40,0x18,0x03] - vdpps $3, (%rax), %xmm5, %xmm11 - -// CHECK: vdppd $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x41,0xdc,0x03] - vdppd $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vdppd $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x41,0x18,0x03] - vdppd $3, (%rax), %xmm5, %xmm11 - -// CHECK: vblendvpd %xmm12, %xmm5, %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0xed,0xc0] - vblendvpd %xmm12, %xmm5, %xmm11, %xmm13 - -// CHECK: vblendvpd %xmm12, (%rax), %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0x28,0xc0] - vblendvpd %xmm12, (%rax), %xmm11, %xmm13 - -// CHECK: vblendvps %xmm12, %xmm5, %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0xed,0xc0] - vblendvps %xmm12, %xmm5, %xmm11, %xmm13 - -// CHECK: vblendvps %xmm12, (%rax), %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0x28,0xc0] - vblendvps %xmm12, (%rax), %xmm11, %xmm13 - -// CHECK: vpblendvb %xmm12, %xmm5, %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0xed,0xc0] - vpblendvb %xmm12, %xmm5, %xmm11, %xmm13 - -// CHECK: vpblendvb %xmm12, (%rax), %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0x28,0xc0] - vpblendvb %xmm12, (%rax), %xmm11, %xmm13 - -// CHECK: vpmovsxbw %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x20,0xd4] - vpmovsxbw %xmm12, %xmm10 - -// CHECK: vpmovsxbw (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x20,0x20] - vpmovsxbw (%rax), %xmm12 - -// CHECK: vpmovsxwd %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x23,0xd4] - vpmovsxwd %xmm12, %xmm10 - -// CHECK: vpmovsxwd (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x23,0x20] - vpmovsxwd (%rax), %xmm12 - -// CHECK: vpmovsxdq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x25,0xd4] - vpmovsxdq %xmm12, %xmm10 - -// CHECK: vpmovsxdq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x25,0x20] - vpmovsxdq (%rax), %xmm12 - -// CHECK: vpmovzxbw %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x30,0xd4] - vpmovzxbw %xmm12, %xmm10 - -// CHECK: vpmovzxbw (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x30,0x20] - vpmovzxbw (%rax), %xmm12 - -// CHECK: vpmovzxwd %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x33,0xd4] - vpmovzxwd %xmm12, %xmm10 - -// CHECK: vpmovzxwd (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x33,0x20] - vpmovzxwd (%rax), %xmm12 - -// CHECK: vpmovzxdq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x35,0xd4] - vpmovzxdq %xmm12, %xmm10 - -// CHECK: vpmovzxdq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x35,0x20] - vpmovzxdq (%rax), %xmm12 - -// CHECK: vpmovsxbq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x22,0xd4] - vpmovsxbq %xmm12, %xmm10 - -// CHECK: vpmovsxbq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x22,0x20] - vpmovsxbq (%rax), %xmm12 - -// CHECK: vpmovzxbq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x32,0xd4] - vpmovzxbq %xmm12, %xmm10 - -// CHECK: vpmovzxbq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x32,0x20] - vpmovzxbq (%rax), %xmm12 - -// CHECK: vpmovsxbd %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x21,0xd4] - vpmovsxbd %xmm12, %xmm10 - -// CHECK: vpmovsxbd (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x21,0x20] - vpmovsxbd (%rax), %xmm12 - -// CHECK: vpmovsxwq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x24,0xd4] - vpmovsxwq %xmm12, %xmm10 - -// CHECK: vpmovsxwq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x24,0x20] - vpmovsxwq (%rax), %xmm12 - -// CHECK: vpmovzxbd %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x31,0xd4] - vpmovzxbd %xmm12, %xmm10 - -// CHECK: vpmovzxbd (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x31,0x20] - vpmovzxbd (%rax), %xmm12 - -// CHECK: vpmovzxwq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x34,0xd4] - vpmovzxwq %xmm12, %xmm10 - -// CHECK: vpmovzxwq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x34,0x20] - vpmovzxwq (%rax), %xmm12 - -// CHECK: vpextrw $7, %xmm12, %eax -// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07] - vpextrw $7, %xmm12, %eax - -// CHECK: vpextrw $7, %xmm12, (%rax) -// CHECK: encoding: [0xc4,0x63,0x79,0x15,0x20,0x07] - vpextrw $7, %xmm12, (%rax) - -// CHECK: vpextrd $7, %xmm12, %eax -// CHECK: encoding: [0xc4,0x63,0x79,0x16,0xe0,0x07] - vpextrd $7, %xmm12, %eax - -// CHECK: vpextrd $7, %xmm12, (%rax) -// CHECK: encoding: [0xc4,0x63,0x79,0x16,0x20,0x07] - vpextrd $7, %xmm12, (%rax) - -// CHECK: vpextrb $7, %xmm12, %eax -// CHECK: encoding: [0xc4,0x63,0x79,0x14,0xe0,0x07] - vpextrb $7, %xmm12, %eax - -// CHECK: vpextrb $7, %xmm12, (%rax) -// CHECK: encoding: [0xc4,0x63,0x79,0x14,0x20,0x07] - vpextrb $7, %xmm12, (%rax) - -// CHECK: vpextrq $7, %xmm12, %rcx -// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0xe1,0x07] - vpextrq $7, %xmm12, %rcx - -// CHECK: vpextrq $7, %xmm12, (%rcx) -// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0x21,0x07] - vpextrq $7, %xmm12, (%rcx) - -// CHECK: vextractps $7, %xmm12, (%rax) -// CHECK: encoding: [0xc4,0x63,0x79,0x17,0x20,0x07] - vextractps $7, %xmm12, (%rax) - -// CHECK: vextractps $7, %xmm12, %eax -// CHECK: encoding: [0xc4,0x63,0x79,0x17,0xe0,0x07] - vextractps $7, %xmm12, %eax - -// CHECK: vpinsrw $7, %eax, %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0xc4,0xd0,0x07] - vpinsrw $7, %eax, %xmm12, %xmm10 - -// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0xc4,0x10,0x07] - vpinsrw $7, (%rax), %xmm12, %xmm10 - -// CHECK: vpinsrb $7, %eax, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x19,0x20,0xd0,0x07] - vpinsrb $7, %eax, %xmm12, %xmm10 - -// CHECK: vpinsrb $7, (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x19,0x20,0x10,0x07] - vpinsrb $7, (%rax), %xmm12, %xmm10 - -// CHECK: vpinsrd $7, %eax, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x19,0x22,0xd0,0x07] - vpinsrd $7, %eax, %xmm12, %xmm10 - -// CHECK: vpinsrd $7, (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x19,0x22,0x10,0x07] - vpinsrd $7, (%rax), %xmm12, %xmm10 - -// CHECK: vpinsrq $7, %rax, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x99,0x22,0xd0,0x07] - vpinsrq $7, %rax, %xmm12, %xmm10 - -// CHECK: vpinsrq $7, (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07] - vpinsrq $7, (%rax), %xmm12, %xmm10 - -// CHECK: vinsertps $7, %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x29,0x21,0xdc,0x07] - vinsertps $7, %xmm12, %xmm10, %xmm11 - -// CHECK: vinsertps $7, (%rax), %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x29,0x21,0x18,0x07] - vinsertps $7, (%rax), %xmm10, %xmm11 - -// CHECK: vptest %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x17,0xd4] - vptest %xmm12, %xmm10 - -// CHECK: vptest (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x17,0x20] - vptest (%rax), %xmm12 - -// CHECK: vmovntdqa (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x2a,0x20] - vmovntdqa (%rax), %xmm12 - -// CHECK: vpcmpgtq %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x29,0x37,0xdc] - vpcmpgtq %xmm12, %xmm10, %xmm11 - -// CHECK: vpcmpgtq (%rax), %xmm10, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x29,0x37,0x28] - vpcmpgtq (%rax), %xmm10, %xmm13 - -// CHECK: vpcmpistrm $7, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x43,0x79,0x62,0xd4,0x07] - vpcmpistrm $7, %xmm12, %xmm10 - -// CHECK: vpcmpistrm $7, (%rax), %xmm10 -// CHECK: encoding: [0xc4,0x63,0x79,0x62,0x10,0x07] - vpcmpistrm $7, (%rax), %xmm10 - -// CHECK: vpcmpestrm $7, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x43,0x79,0x60,0xd4,0x07] - vpcmpestrm $7, %xmm12, %xmm10 - -// CHECK: vpcmpestrm $7, (%rax), %xmm10 -// CHECK: encoding: [0xc4,0x63,0x79,0x60,0x10,0x07] - vpcmpestrm $7, (%rax), %xmm10 - -// CHECK: vpcmpistri $7, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x43,0x79,0x63,0xd4,0x07] - vpcmpistri $7, %xmm12, %xmm10 - -// CHECK: vpcmpistri $7, (%rax), %xmm10 -// CHECK: encoding: [0xc4,0x63,0x79,0x63,0x10,0x07] - vpcmpistri $7, (%rax), %xmm10 - -// CHECK: vpcmpestri $7, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x43,0x79,0x61,0xd4,0x07] - vpcmpestri $7, %xmm12, %xmm10 - -// CHECK: vpcmpestri $7, (%rax), %xmm10 -// CHECK: encoding: [0xc4,0x63,0x79,0x61,0x10,0x07] - vpcmpestri $7, (%rax), %xmm10 - -// CHECK: vaesimc %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0xdb,0xd4] - vaesimc %xmm12, %xmm10 - -// CHECK: vaesimc (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0xdb,0x20] - vaesimc (%rax), %xmm12 - -// CHECK: vaesenc %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x29,0xdc,0xdc] - vaesenc %xmm12, %xmm10, %xmm11 - -// CHECK: vaesenc (%rax), %xmm10, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x29,0xdc,0x28] - vaesenc (%rax), %xmm10, %xmm13 - -// CHECK: vaesenclast %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x29,0xdd,0xdc] - vaesenclast %xmm12, %xmm10, %xmm11 - -// CHECK: vaesenclast (%rax), %xmm10, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x29,0xdd,0x28] - vaesenclast (%rax), %xmm10, %xmm13 - -// CHECK: vaesdec %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x29,0xde,0xdc] - vaesdec %xmm12, %xmm10, %xmm11 - -// CHECK: vaesdec (%rax), %xmm10, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x29,0xde,0x28] - vaesdec (%rax), %xmm10, %xmm13 - -// CHECK: vaesdeclast %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x29,0xdf,0xdc] - vaesdeclast %xmm12, %xmm10, %xmm11 - -// CHECK: vaesdeclast (%rax), %xmm10, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x29,0xdf,0x28] - vaesdeclast (%rax), %xmm10, %xmm13 - -// CHECK: vaeskeygenassist $7, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x43,0x79,0xdf,0xd4,0x07] - vaeskeygenassist $7, %xmm12, %xmm10 - -// CHECK: vaeskeygenassist $7, (%rax), %xmm10 -// CHECK: encoding: [0xc4,0x63,0x79,0xdf,0x10,0x07] - vaeskeygenassist $7, (%rax), %xmm10 - -// CHECK: vcmpps $8, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x08] - vcmpeq_uqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $9, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x09] - vcmpngeps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $10, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0a] - vcmpngtps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $11, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0b] - vcmpfalseps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $12, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0c] - vcmpneq_oqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $13, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0d] - vcmpgeps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $14, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0e] - vcmpgtps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $15, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0f] - vcmptrueps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $16, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x10] - vcmpeq_osps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $17, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x11] - vcmplt_oqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $18, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x12] - vcmple_oqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $19, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x13] - vcmpunord_sps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $20, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x14] - vcmpneq_usps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $21, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x15] - vcmpnlt_uqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $22, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x16] - vcmpnle_uqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $23, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x17] - vcmpord_sps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $24, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x18] - vcmpeq_usps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $25, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x19] - vcmpnge_uqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $26, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1a] - vcmpngt_uqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $27, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1b] - vcmpfalse_osps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $28, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1c] - vcmpneq_osps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $29, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1d] - vcmpge_oqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $30, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1e] - vcmpgt_oqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $31, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1f] - vcmptrue_usps %xmm11, %xmm12, %xmm13 - -// CHECK: vmovaps (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x28,0x20] - vmovaps (%rax), %ymm12 - -// CHECK: vmovaps %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x28,0xe3] - vmovaps %ymm11, %ymm12 - -// CHECK: vmovaps %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7c,0x29,0x18] - vmovaps %ymm11, (%rax) - -// CHECK: vmovapd (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7d,0x28,0x20] - vmovapd (%rax), %ymm12 - -// CHECK: vmovapd %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7d,0x28,0xe3] - vmovapd %ymm11, %ymm12 - -// CHECK: vmovapd %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7d,0x29,0x18] - vmovapd %ymm11, (%rax) - -// CHECK: vmovups (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x10,0x20] - vmovups (%rax), %ymm12 - -// CHECK: vmovups %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x10,0xe3] - vmovups %ymm11, %ymm12 - -// CHECK: vmovups %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7c,0x11,0x18] - vmovups %ymm11, (%rax) - -// CHECK: vmovupd (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7d,0x10,0x20] - vmovupd (%rax), %ymm12 - -// CHECK: vmovupd %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7d,0x10,0xe3] - vmovupd %ymm11, %ymm12 - -// CHECK: vmovupd %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7d,0x11,0x18] - vmovupd %ymm11, (%rax) - -// CHECK: vunpckhps %ymm11, %ymm12, %ymm4 -// CHECK: encoding: [0xc4,0xc1,0x1c,0x15,0xe3] - vunpckhps %ymm11, %ymm12, %ymm4 - -// CHECK: vunpckhpd %ymm11, %ymm12, %ymm4 -// CHECK: encoding: [0xc4,0xc1,0x1d,0x15,0xe3] - vunpckhpd %ymm11, %ymm12, %ymm4 - -// CHECK: vunpcklps %ymm11, %ymm12, %ymm4 -// CHECK: encoding: [0xc4,0xc1,0x1c,0x14,0xe3] - vunpcklps %ymm11, %ymm12, %ymm4 - -// CHECK: vunpcklpd %ymm11, %ymm12, %ymm4 -// CHECK: encoding: [0xc4,0xc1,0x1d,0x14,0xe3] - vunpcklpd %ymm11, %ymm12, %ymm4 - -// CHECK: vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x15,0x54,0xcb,0xfc] - vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x15,0x54,0xcb,0xfc] - vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x14,0x54,0xcb,0xfc] - vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x14,0x54,0xcb,0xfc] - vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vmovntdq %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7d,0xe7,0x18] - vmovntdq %ymm11, (%rax) - -// CHECK: vmovntpd %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7d,0x2b,0x18] - vmovntpd %ymm11, (%rax) - -// CHECK: vmovntps %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7c,0x2b,0x18] - vmovntps %ymm11, (%rax) - -// CHECK: vmovmskps %xmm12, %eax -// CHECK: encoding: [0xc4,0xc1,0x78,0x50,0xc4] - vmovmskps %xmm12, %eax - -// CHECK: vmovmskpd %xmm12, %eax -// CHECK: encoding: [0xc4,0xc1,0x79,0x50,0xc4] - vmovmskpd %xmm12, %eax - -// CHECK: vmaxps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x5f,0xf4] - vmaxps %ymm12, %ymm4, %ymm6 - -// CHECK: vmaxpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x5f,0xf4] - vmaxpd %ymm12, %ymm4, %ymm6 - -// CHECK: vminps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x5d,0xf4] - vminps %ymm12, %ymm4, %ymm6 - -// CHECK: vminpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x5d,0xf4] - vminpd %ymm12, %ymm4, %ymm6 - -// CHECK: vsubps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x5c,0xf4] - vsubps %ymm12, %ymm4, %ymm6 - -// CHECK: vsubpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x5c,0xf4] - vsubpd %ymm12, %ymm4, %ymm6 - -// CHECK: vdivps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x5e,0xf4] - vdivps %ymm12, %ymm4, %ymm6 - -// CHECK: vdivpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x5e,0xf4] - vdivpd %ymm12, %ymm4, %ymm6 - -// CHECK: vaddps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x58,0xf4] - vaddps %ymm12, %ymm4, %ymm6 - -// CHECK: vaddpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x58,0xf4] - vaddpd %ymm12, %ymm4, %ymm6 - -// CHECK: vmulps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x59,0xf4] - vmulps %ymm12, %ymm4, %ymm6 - -// CHECK: vmulpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x59,0xf4] - vmulpd %ymm12, %ymm4, %ymm6 - -// CHECK: vmaxps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5f,0x30] - vmaxps (%rax), %ymm4, %ymm6 - -// CHECK: vmaxpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5f,0x30] - vmaxpd (%rax), %ymm4, %ymm6 - -// CHECK: vminps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5d,0x30] - vminps (%rax), %ymm4, %ymm6 - -// CHECK: vminpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5d,0x30] - vminpd (%rax), %ymm4, %ymm6 - -// CHECK: vsubps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5c,0x30] - vsubps (%rax), %ymm4, %ymm6 - -// CHECK: vsubpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5c,0x30] - vsubpd (%rax), %ymm4, %ymm6 - -// CHECK: vdivps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5e,0x30] - vdivps (%rax), %ymm4, %ymm6 - -// CHECK: vdivpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5e,0x30] - vdivpd (%rax), %ymm4, %ymm6 - -// CHECK: vaddps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x58,0x30] - vaddps (%rax), %ymm4, %ymm6 - -// CHECK: vaddpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x58,0x30] - vaddpd (%rax), %ymm4, %ymm6 - -// CHECK: vmulps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x59,0x30] - vmulps (%rax), %ymm4, %ymm6 - -// CHECK: vmulpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x59,0x30] - vmulpd (%rax), %ymm4, %ymm6 - -// CHECK: vsqrtpd %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7d,0x51,0xe3] - vsqrtpd %ymm11, %ymm12 - -// CHECK: vsqrtpd (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7d,0x51,0x20] - vsqrtpd (%rax), %ymm12 - -// CHECK: vsqrtps %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x51,0xe3] - vsqrtps %ymm11, %ymm12 - -// CHECK: vsqrtps (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x51,0x20] - vsqrtps (%rax), %ymm12 - -// CHECK: vrsqrtps %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x52,0xe3] - vrsqrtps %ymm11, %ymm12 - -// CHECK: vrsqrtps (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x52,0x20] - vrsqrtps (%rax), %ymm12 - -// CHECK: vrcpps %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x53,0xe3] - vrcpps %ymm11, %ymm12 - -// CHECK: vrcpps (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x53,0x20] - vrcpps (%rax), %ymm12 - -// CHECK: vandps %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0c,0x54,0xdc] - vandps %ymm12, %ymm14, %ymm11 - -// CHECK: vandpd %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0d,0x54,0xdc] - vandpd %ymm12, %ymm14, %ymm11 - -// CHECK: vandps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x54,0x54,0xcb,0xfc] - vandps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x54,0x54,0xcb,0xfc] - vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vorps %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0c,0x56,0xdc] - vorps %ymm12, %ymm14, %ymm11 - -// CHECK: vorpd %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0d,0x56,0xdc] - vorpd %ymm12, %ymm14, %ymm11 - -// CHECK: vorps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x56,0x54,0xcb,0xfc] - vorps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x56,0x54,0xcb,0xfc] - vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vxorps %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0c,0x57,0xdc] - vxorps %ymm12, %ymm14, %ymm11 - -// CHECK: vxorpd %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0d,0x57,0xdc] - vxorpd %ymm12, %ymm14, %ymm11 - -// CHECK: vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x57,0x54,0xcb,0xfc] - vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x57,0x54,0xcb,0xfc] - vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vandnps %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0c,0x55,0xdc] - vandnps %ymm12, %ymm14, %ymm11 - -// CHECK: vandnpd %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0d,0x55,0xdc] - vandnpd %ymm12, %ymm14, %ymm11 - -// CHECK: vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x55,0x54,0xcb,0xfc] - vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x55,0x54,0xcb,0xfc] - vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vcvtps2pd %xmm13, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x5a,0xe5] - vcvtps2pd %xmm13, %ymm12 - -// CHECK: vcvtps2pd (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x5a,0x20] - vcvtps2pd (%rax), %ymm12 - -// CHECK: vcvtdq2pd %xmm13, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7e,0xe6,0xe5] - vcvtdq2pd %xmm13, %ymm12 - -// CHECK: vcvtdq2pd (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7e,0xe6,0x20] - vcvtdq2pd (%rax), %ymm12 - -// CHECK: vcvtdq2ps %ymm12, %ymm10 -// CHECK: encoding: [0xc4,0x41,0x7c,0x5b,0xd4] - vcvtdq2ps %ymm12, %ymm10 - -// CHECK: vcvtdq2ps (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x5b,0x20] - vcvtdq2ps (%rax), %ymm12 - -// CHECK: vcvtps2dq %ymm12, %ymm10 -// CHECK: encoding: [0xc4,0x41,0x7d,0x5b,0xd4] - vcvtps2dq %ymm12, %ymm10 - -// CHECK: vcvtps2dq (%rax), %ymm10 -// CHECK: encoding: [0xc5,0x7d,0x5b,0x10] - vcvtps2dq (%rax), %ymm10 - -// CHECK: vcvttps2dq %ymm12, %ymm10 -// CHECK: encoding: [0xc4,0x41,0x7e,0x5b,0xd4] - vcvttps2dq %ymm12, %ymm10 - -// CHECK: vcvttps2dq (%rax), %ymm10 -// CHECK: encoding: [0xc5,0x7e,0x5b,0x10] - vcvttps2dq (%rax), %ymm10 - -// CHECK: vcvttpd2dq %xmm11, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3] - vcvttpd2dq %xmm11, %xmm10 - -// CHECK: vcvttpd2dq %ymm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xd4] - vcvttpd2dq %ymm12, %xmm10 - -// CHECK: vcvttpd2dqx %xmm11, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3] - vcvttpd2dqx %xmm11, %xmm10 - -// CHECK: vcvttpd2dqx (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x79,0xe6,0x18] - vcvttpd2dqx (%rax), %xmm11 - -// CHECK: vcvttpd2dqy %ymm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xdc] - vcvttpd2dqy %ymm12, %xmm11 - -// CHECK: vcvttpd2dqy (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x7d,0xe6,0x18] - vcvttpd2dqy (%rax), %xmm11 - -// CHECK: vcvtpd2ps %ymm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xd4] - vcvtpd2ps %ymm12, %xmm10 - -// CHECK: vcvtpd2psx %xmm11, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xd3] - vcvtpd2psx %xmm11, %xmm10 - -// CHECK: vcvtpd2psx (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x79,0x5a,0x18] - vcvtpd2psx (%rax), %xmm11 - -// CHECK: vcvtpd2psy %ymm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xdc] - vcvtpd2psy %ymm12, %xmm11 - -// CHECK: vcvtpd2psy (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x7d,0x5a,0x18] - vcvtpd2psy (%rax), %xmm11 - -// CHECK: vcvtpd2dq %ymm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xd4] - vcvtpd2dq %ymm12, %xmm10 - -// CHECK: vcvtpd2dqy %ymm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xdc] - vcvtpd2dqy %ymm12, %xmm11 - -// CHECK: vcvtpd2dqy (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x7f,0xe6,0x18] - vcvtpd2dqy (%rax), %xmm11 - -// CHECK: vcvtpd2dqx %xmm11, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xd3] - vcvtpd2dqx %xmm11, %xmm10 - -// CHECK: vcvtpd2dqx (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x7b,0xe6,0x18] - vcvtpd2dqx (%rax), %xmm11 - -// CHECK: vcmpps $0, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x00] - vcmpeqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $2, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x02] - vcmpleps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $1, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x01] - vcmpltps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $4, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x04] - vcmpneqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $6, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x06] - vcmpnleps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $5, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x05] - vcmpnltps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $7, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x07] - vcmpordps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $3, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x03] - vcmpunordps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x02] - vcmpleps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnleps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %ymm6, %ymm12 -// CHECK: encoding: [0xc5,0x4c,0xc2,0x64,0xcb,0xfc,0x07] - vcmpordps -4(%rbx,%rcx,8), %ymm6, %ymm12 - -// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $0, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x00] - vcmpeqpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $2, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x02] - vcmplepd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $1, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x01] - vcmpltpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $4, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x04] - vcmpneqpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $6, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x06] - vcmpnlepd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $5, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x05] - vcmpnltpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $7, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x07] - vcmpordpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $3, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x03] - vcmpunordpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqpd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x02] - vcmplepd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltpd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqpd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnlepd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltpd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %ymm6, %ymm12 -// CHECK: encoding: [0xc5,0x4d,0xc2,0x64,0xcb,0xfc,0x07] - vcmpordpd -4(%rbx,%rcx,8), %ymm6, %ymm12 - -// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordpd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $8, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x08] - vcmpeq_uqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $9, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x09] - vcmpngeps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $10, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0a] - vcmpngtps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $11, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0b] - vcmpfalseps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $12, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0c] - vcmpneq_oqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $13, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0d] - vcmpgeps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $14, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0e] - vcmpgtps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $15, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0f] - vcmptrueps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $16, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x10] - vcmpeq_osps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $17, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x11] - vcmplt_oqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $18, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x12] - vcmple_oqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $19, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x13] - vcmpunord_sps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $20, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x14] - vcmpneq_usps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $21, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x15] - vcmpnlt_uqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $22, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x16] - vcmpnle_uqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $23, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x17] - vcmpord_sps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $24, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x18] - vcmpeq_usps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $25, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x19] - vcmpnge_uqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $26, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1a] - vcmpngt_uqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $27, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1b] - vcmpfalse_osps %ymm11, %ymm12, %ymm13 +// CHECK: movq 57005(,%riz), %rbx +// CHECK: encoding: [0x48,0x8b,0x1c,0x25,0xad,0xde,0x00,0x00] + movq 57005(,%riz), %rbx -// CHECK: vcmpps $28, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1c] - vcmpneq_osps %ymm11, %ymm12, %ymm13 +// CHECK: movq 48879(,%riz), %rax +// CHECK: encoding: [0x48,0x8b,0x04,0x25,0xef,0xbe,0x00,0x00] + movq 48879(,%riz), %rax -// CHECK: vcmpps $29, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1d] - vcmpge_oqps %ymm11, %ymm12, %ymm13 +// CHECK: movq -4(,%riz,8), %rax +// CHECK: encoding: [0x48,0x8b,0x04,0xe5,0xfc,0xff,0xff,0xff] + movq -4(,%riz,8), %rax -// CHECK: vcmpps $30, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1e] - vcmpgt_oqps %ymm11, %ymm12, %ymm13 +// CHECK: movq (%rcx,%riz), %rax +// CHECK: encoding: [0x48,0x8b,0x04,0x21] + movq (%rcx,%riz), %rax -// CHECK: vcmpps $31, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1f] - vcmptrue_usps %ymm11, %ymm12, %ymm13 +// CHECK: movq (%rcx,%riz,8), %rax +// CHECK: encoding: [0x48,0x8b,0x04,0xe1] + movq (%rcx,%riz,8), %rax diff --git a/test/MC/AsmParser/X86/x86_64-fma3-encoding.s b/test/MC/AsmParser/X86/x86_64-fma3-encoding.s new file mode 100644 index 0000000000000..d08a7329a09f1 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_64-fma3-encoding.s @@ -0,0 +1,674 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vfmadd132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x98,0xdc] + vfmadd132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x98,0x18] + vfmadd132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x98,0xdc] + vfmadd132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x98,0x18] + vfmadd132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xa8,0xdc] + vfmadd213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xa8,0x18] + vfmadd213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xa8,0xdc] + vfmadd213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xa8,0x18] + vfmadd213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xb8,0xdc] + vfmadd231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xb8,0x18] + vfmadd231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xb8,0xdc] + vfmadd231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xb8,0x18] + vfmadd231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x98,0xdc] + vfmadd132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x98,0x18] + vfmadd132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x98,0xdc] + vfmadd132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x98,0x18] + vfmadd132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xa8,0xdc] + vfmadd213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xa8,0x18] + vfmadd213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xa8,0xdc] + vfmadd213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xa8,0x18] + vfmadd213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xb8,0xdc] + vfmadd231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xb8,0x18] + vfmadd231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xb8,0xdc] + vfmadd231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xb8,0x18] + vfmadd231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x98,0xdc] + vfmadd132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x98,0x18] + vfmadd132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x98,0xdc] + vfmadd132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x98,0x18] + vfmadd132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xa8,0xdc] + vfmadd213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xa8,0x18] + vfmadd213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xa8,0xdc] + vfmadd213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xa8,0x18] + vfmadd213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xb8,0xdc] + vfmadd231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xb8,0x18] + vfmadd231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xb8,0xdc] + vfmadd231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xb8,0x18] + vfmadd231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x96,0xdc] + vfmaddsub132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x96,0x18] + vfmaddsub132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x96,0xdc] + vfmaddsub132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x96,0x18] + vfmaddsub132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xa6,0xdc] + vfmaddsub213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xa6,0x18] + vfmaddsub213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xa6,0xdc] + vfmaddsub213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xa6,0x18] + vfmaddsub213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xb6,0xdc] + vfmaddsub231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xb6,0x18] + vfmaddsub231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xb6,0xdc] + vfmaddsub231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xb6,0x18] + vfmaddsub231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x97,0xdc] + vfmsubadd132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x97,0x18] + vfmsubadd132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x97,0xdc] + vfmsubadd132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x97,0x18] + vfmsubadd132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xa7,0xdc] + vfmsubadd213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xa7,0x18] + vfmsubadd213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xa7,0xdc] + vfmsubadd213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xa7,0x18] + vfmsubadd213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xb7,0xdc] + vfmsubadd231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xb7,0x18] + vfmsubadd231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xb7,0xdc] + vfmsubadd231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xb7,0x18] + vfmsubadd231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x9a,0xdc] + vfmsub132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x9a,0x18] + vfmsub132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x9a,0xdc] + vfmsub132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x9a,0x18] + vfmsub132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xaa,0xdc] + vfmsub213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xaa,0x18] + vfmsub213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xaa,0xdc] + vfmsub213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xaa,0x18] + vfmsub213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xba,0xdc] + vfmsub231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xba,0x18] + vfmsub231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xba,0xdc] + vfmsub231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xba,0x18] + vfmsub231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x9c,0xdc] + vfnmadd132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x9c,0x18] + vfnmadd132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x9c,0xdc] + vfnmadd132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x9c,0x18] + vfnmadd132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xac,0xdc] + vfnmadd213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xac,0x18] + vfnmadd213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xac,0xdc] + vfnmadd213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xac,0x18] + vfnmadd213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xbc,0xdc] + vfnmadd231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xbc,0x18] + vfnmadd231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xbc,0xdc] + vfnmadd231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xbc,0x18] + vfnmadd231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x9e,0xdc] + vfnmsub132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x9e,0x18] + vfnmsub132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x9e,0xdc] + vfnmsub132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x9e,0x18] + vfnmsub132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xae,0xdc] + vfnmsub213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xae,0x18] + vfnmsub213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xae,0xdc] + vfnmsub213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xae,0x18] + vfnmsub213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xbe,0xdc] + vfnmsub231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xbe,0x18] + vfnmsub231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xbe,0xdc] + vfnmsub231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xbe,0x18] + vfnmsub231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x98,0xdc] + vfmadd132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x98,0x18] + vfmadd132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x98,0xdc] + vfmadd132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x98,0x18] + vfmadd132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xa8,0xdc] + vfmadd213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xa8,0x18] + vfmadd213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xa8,0xdc] + vfmadd213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xa8,0x18] + vfmadd213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xb8,0xdc] + vfmadd231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xb8,0x18] + vfmadd231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xb8,0xdc] + vfmadd231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xb8,0x18] + vfmadd231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x96,0xdc] + vfmaddsub132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x96,0x18] + vfmaddsub132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x96,0xdc] + vfmaddsub132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x96,0x18] + vfmaddsub132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xa6,0xdc] + vfmaddsub213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xa6,0x18] + vfmaddsub213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xa6,0xdc] + vfmaddsub213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xa6,0x18] + vfmaddsub213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xb6,0xdc] + vfmaddsub231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xb6,0x18] + vfmaddsub231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xb6,0xdc] + vfmaddsub231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xb6,0x18] + vfmaddsub231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x97,0xdc] + vfmsubadd132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x97,0x18] + vfmsubadd132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x97,0xdc] + vfmsubadd132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x97,0x18] + vfmsubadd132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xa7,0xdc] + vfmsubadd213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xa7,0x18] + vfmsubadd213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xa7,0xdc] + vfmsubadd213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xa7,0x18] + vfmsubadd213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xb7,0xdc] + vfmsubadd231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xb7,0x18] + vfmsubadd231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xb7,0xdc] + vfmsubadd231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xb7,0x18] + vfmsubadd231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x9a,0xdc] + vfmsub132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x9a,0x18] + vfmsub132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x9a,0xdc] + vfmsub132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x9a,0x18] + vfmsub132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xaa,0xdc] + vfmsub213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xaa,0x18] + vfmsub213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xaa,0xdc] + vfmsub213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xaa,0x18] + vfmsub213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xba,0xdc] + vfmsub231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xba,0x18] + vfmsub231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xba,0xdc] + vfmsub231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xba,0x18] + vfmsub231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x9c,0xdc] + vfnmadd132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x9c,0x18] + vfnmadd132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x9c,0xdc] + vfnmadd132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x9c,0x18] + vfnmadd132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xac,0xdc] + vfnmadd213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xac,0x18] + vfnmadd213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xac,0xdc] + vfnmadd213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xac,0x18] + vfnmadd213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xbc,0xdc] + vfnmadd231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xbc,0x18] + vfnmadd231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xbc,0xdc] + vfnmadd231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xbc,0x18] + vfnmadd231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x9e,0xdc] + vfnmsub132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x9e,0x18] + vfnmsub132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x9e,0xdc] + vfnmsub132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x9e,0x18] + vfnmsub132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xae,0xdc] + vfnmsub213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xae,0x18] + vfnmsub213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xae,0xdc] + vfnmsub213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xae,0x18] + vfnmsub213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xbe,0xdc] + vfnmsub231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xbe,0x18] + vfnmsub231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xbe,0xdc] + vfnmsub231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xbe,0x18] + vfnmsub231ps (%rax), %ymm10, %ymm11 + diff --git a/test/MC/AsmParser/X86/x86_64-new-encoder.s b/test/MC/AsmParser/X86/x86_64-new-encoder.s index 1858441870aca..9f94d8404f42c 100644 --- a/test/MC/AsmParser/X86/x86_64-new-encoder.s +++ b/test/MC/AsmParser/X86/x86_64-new-encoder.s @@ -72,9 +72,9 @@ stosl // Not moffset forms of moves, they are x86-32 only! rdar://7947184 -movb 0, %al // CHECK: movb 0, %al # encoding: [0x8a,0x04,0x25,A,A,A,A] -movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0x8b,0x04,0x25,A,A,A,A] -movl 0, %eax // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,A,A,A,A] +movb 0, %al // CHECK: movb 0, %al # encoding: [0x8a,0x04,0x25,0x00,0x00,0x00,0x00] +movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0x8b,0x04,0x25,0x00,0x00,0x00,0x00] +movl 0, %eax // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,0x00,0x00,0x00,0x00] // CHECK: pushfq # encoding: [0x9c] pushf @@ -150,3 +150,10 @@ btq $0x01,%rdx // CHECK: [0x65,0x8b,0x04,0x25,0x7c,0x00,0x00,0x00] movl %gs:124, %eax +// CHECK: jmpq *8(%rax) +// CHECK: encoding: [0xff,0x60,0x08] + jmp *8(%rax) + +// CHECK: btq $61, -216(%rbp) +// CHECK: encoding: [0x48,0x0f,0xba,0xa5,0x28,0xff,0xff,0xff,0x3d] + btq $61, -216(%rbp) diff --git a/test/MC/AsmParser/X86/x86_instruction_errors.s b/test/MC/AsmParser/X86/x86_instruction_errors.s new file mode 100644 index 0000000000000..183306be2c116 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_instruction_errors.s @@ -0,0 +1,5 @@ +// RUN: not llvm-mc -triple x86_64-unknown-unknown %s 2> %t.err +// RUN: FileCheck < %t.err %s + +// CHECK: error: ambiguous instructions require an explicit suffix (could be 'cmpb', 'cmpw', 'cmpl', or 'cmpq') +cmp $0, 0(%eax) diff --git a/test/MC/AsmParser/X86/x86_instructions.s b/test/MC/AsmParser/X86/x86_instructions.s index 4bc8a4bb3a848..a82d2a1c0d413 100644 --- a/test/MC/AsmParser/X86/x86_instructions.s +++ b/test/MC/AsmParser/X86/x86_instructions.s @@ -1,4 +1,6 @@ -// RUN: llvm-mc -triple x86_64-unknown-unknown %s | FileCheck %s +// RUN: llvm-mc -triple x86_64-unknown-unknown %s > %t 2> %t.err +// RUN: FileCheck < %t %s +// RUN: FileCheck --check-prefix=CHECK-STDERR < %t.err %s // CHECK: subb %al, %al subb %al, %al @@ -56,7 +58,7 @@ subl %eax, %ebx // FIXME: Check that this matches the correct instruction. -// CHECK: call *%rax +// CHECK: callq *%rax call *%rax // FIXME: Check that this matches the correct instruction. @@ -151,3 +153,23 @@ fadd %st(7) // CHECK: int3 INT3 + +// Allow scale factor without index register. +// CHECK: movaps %xmm3, (%esi) +// CHECK-STDERR: warning: scale factor without index register is ignored +movaps %xmm3, (%esi, 2) + +// CHECK: imull $12, %eax, %eax +imul $12, %eax + +// CHECK: imull %ecx, %eax +imull %ecx, %eax + +// PR8114 +// CHECK: outb %al, %dx +// CHECK: outw %ax, %dx +// CHECK: outl %eax, %dx + +out %al, (%dx) +out %ax, (%dx) +outl %eax, (%dx) diff --git a/test/MC/AsmParser/X86/x86_operands.s b/test/MC/AsmParser/X86/x86_operands.s index bf958d8478ca3..ddadf7931895f 100644 --- a/test/MC/AsmParser/X86/x86_operands.s +++ b/test/MC/AsmParser/X86/x86_operands.s @@ -1,5 +1,3 @@ -// FIXME: Actually test that we get the expected results. - // RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s # Immediates diff --git a/test/MC/AsmParser/dg.exp b/test/MC/AsmParser/dg.exp index 64cb75b20ff11..a6d81da5b7168 100644 --- a/test/MC/AsmParser/dg.exp +++ b/test/MC/AsmParser/dg.exp @@ -1,4 +1,5 @@ load_lib llvm.exp -RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]] - +if { [llvm_supports_target X86] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]] +} diff --git a/test/MC/AsmParser/directive_abort.s b/test/MC/AsmParser/directive_abort.s index 3eb8e96f2f887..1fd1f6e44a32c 100644 --- a/test/MC/AsmParser/directive_abort.s +++ b/test/MC/AsmParser/directive_abort.s @@ -1,6 +1,6 @@ # RUN: llvm-mc -triple i386-unknown-unknown %s 2> %t # RUN: FileCheck -input-file %t %s -# CHECK: .abort "please stop assembing" -TEST0: - .abort "please stop assembing" +# CHECK: error: .abort 'please stop assembing' +TEST0: + .abort please stop assembing diff --git a/test/MC/AsmParser/directive_elf_size.s b/test/MC/AsmParser/directive_elf_size.s new file mode 100644 index 0000000000000..af35ae07ed6ce --- /dev/null +++ b/test/MC/AsmParser/directive_elf_size.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s + +a: + ret +.Lt: +# CHECK: .size a, .Lt-a + .size a, .Lt-a + diff --git a/test/MC/AsmParser/directive_values.s b/test/MC/AsmParser/directive_values.s index beac69a4aeb1d..c7617a337e02f 100644 --- a/test/MC/AsmParser/directive_values.s +++ b/test/MC/AsmParser/directive_values.s @@ -19,3 +19,20 @@ TEST2: # CHECK: .quad 9 TEST3: .quad 9 + + +# rdar://7997827 +TEST4: + .quad 0b0100 + .quad 4294967295 + .quad 4294967295+1 + .quad 4294967295LL+1 + .quad 0b10LL + 07ULL + 0x42AULL +# CHECK: TEST4 +# CHECK: .quad 4 +# CHECK: .quad 4294967295 +# CHECK: .quad 4294967296 +# CHECK: .quad 4294967296 +# CHECK: .quad 1075 + + diff --git a/test/MC/AsmParser/dollars-in-identifiers.s b/test/MC/AsmParser/dollars-in-identifiers.s new file mode 100644 index 0000000000000..e56959062ad9d --- /dev/null +++ b/test/MC/AsmParser/dollars-in-identifiers.s @@ -0,0 +1,7 @@ +# RUN: llvm-mc -triple i386-unknown-unknown %s > %t +# RUN: FileCheck < %t %s + +// CHECK: .globl $foo +.globl $foo +// CHECK: .long ($foo) +.long ($foo) diff --git a/test/MC/AsmParser/macro-def-in-instantiation.s b/test/MC/AsmParser/macro-def-in-instantiation.s new file mode 100644 index 0000000000000..b6483b3b32b2c --- /dev/null +++ b/test/MC/AsmParser/macro-def-in-instantiation.s @@ -0,0 +1,13 @@ +// RUN: llvm-mc -triple x86_64-apple-darwin10 %s | FileCheck %s + +.macro .make_macro +$0 $1 +$2 $3 +$4 +.endmacro + +.make_macro .macro,.mybyte,.byte,$0,.endmacro + +.data +// CHECK: .byte 10 +.mybyte 10 diff --git a/test/MC/AsmParser/macros-parsing.s b/test/MC/AsmParser/macros-parsing.s new file mode 100644 index 0000000000000..65f64546cc130 --- /dev/null +++ b/test/MC/AsmParser/macros-parsing.s @@ -0,0 +1,23 @@ +// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err +// RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err + +.macro .test0 +.endmacro + +.macros_off +// CHECK-ERRORS: 9:1: warning: ignoring directive for now +.test0 +.macros_on + +.test0 + +// CHECK-ERRORS: macro '.test0' is already defined +.macro .test0 +.endmacro + +// CHECK-ERRORS: unexpected '.endmacro' in file +.endmacro + +// CHECK-ERRORS: no matching '.endmacro' in definition +.macro dummy + diff --git a/test/MC/AsmParser/macros.s b/test/MC/AsmParser/macros.s new file mode 100644 index 0000000000000..214274d9aa845 --- /dev/null +++ b/test/MC/AsmParser/macros.s @@ -0,0 +1,39 @@ +// RUN: llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err | FileCheck %s +// RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err + +.macro .test0 +.macrobody0 +.endmacro +.macro .test1 +.test0 +.endmacro + +.test1 +// CHECK-ERRORS: <instantiation>:1:1: warning: ignoring directive for now +// CHECK-ERRORS-NEXT: macrobody0 +// CHECK-ERRORS-NEXT: ^ +// CHECK-ERRORS: <instantiation>:1:1: note: while in macro instantiation +// CHECK-ERRORS-NEXT: .test0 +// CHECK-ERRORS-NEXT: ^ +// CHECK-ERRORS: 11:1: note: while in macro instantiation +// CHECK-ERRORS-NEXT: .test1 +// CHECK-ERRORS-NEXT: ^ + +.macro test2 +.byte $0 +.endmacro +test2 10 + +.macro test3 +.globl "$0 $1 $2 $$3 $n" +.endmacro + +// CHECK: .globl "1 23 $3 2" +test3 1,2 3 + +.macro test4 +.globl "$0 -- $1" +.endmacro + +// CHECK: .globl "ab)(,) -- (cd)" +test4 a b)(,),(cd) diff --git a/test/MC/COFF/basic-coff.ll b/test/MC/COFF/basic-coff.ll new file mode 100644 index 0000000000000..1e67db0e5c425 --- /dev/null +++ b/test/MC/COFF/basic-coff.ll @@ -0,0 +1,136 @@ +; This test checks that the COFF object emitter works for the most basic +; programs. + +; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t +; RUN: coff-dump.py %abs_tmp | FileCheck %s +; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t + +@.str = private constant [12 x i8] c"Hello World\00" ; <[12 x i8]*> [#uses=1] + +define i32 @main() nounwind { +entry: + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0)) nounwind ; <i32> [#uses=0] + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +; CHECK: { +; CHECK: MachineType = IMAGE_FILE_MACHINE_I386 (0x14C) +; CHECK: NumberOfSections = 2 +; CHECK: TimeDateStamp = {{[0-9]+}} +; CHECK: PointerToSymbolTable = 0x99 +; CHECK: NumberOfSymbols = 7 +; CHECK: SizeOfOptionalHeader = 0 +; CHECK: Characteristics = 0x0 +; CHECK: Sections = [ +; CHECK: 0 = { +; CHECK: Name = .text +; CHECK: VirtualSize = 0 +; CHECK: VirtualAddress = 0 +; CHECK: SizeOfRawData = 21 +; CHECK: PointerToRawData = 0x64 +; CHECK: PointerToRelocations = 0x79 +; CHECK: PointerToLineNumbers = 0x0 +; CHECK: NumberOfRelocations = 2 +; CHECK: NumberOfLineNumbers = 0 +; CHECK: Charateristics = 0x60500020 +; CHECK: IMAGE_SCN_CNT_CODE +; CHECK: IMAGE_SCN_ALIGN_16BYTES +; CHECK: IMAGE_SCN_MEM_EXECUTE +; CHECK: IMAGE_SCN_MEM_READ +; CHECK: SectionData = +; CHECK: 83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 31 |.....$.........1| +; CHECK: C0 83 C4 04 C3 |.....| +; CHECK: Relocations = [ +; CHECK: 0 = { +; CHECK: VirtualAddress = 0x6 +; CHECK: SymbolTableIndex = 5 +; CHECK: Type = IMAGE_REL_I386_DIR32 (6) +; CHECK: SymbolName = _main +; CHECK: } +; CHECK: 1 = { +; CHECK: VirtualAddress = 0xB +; CHECK: SymbolTableIndex = 6 +; CHECK: Type = IMAGE_REL_I386_REL32 (20) +; CHECK: SymbolName = L_.str +; CHECK: } +; CHECK: ] +; CHECK: } +; CHECK: 1 = { +; CHECK: Name = .data +; CHECK: VirtualSize = 0 +; CHECK: VirtualAddress = 0 +; CHECK: SizeOfRawData = 12 +; CHECK: PointerToRawData = 0x8D +; CHECK: PointerToRelocations = 0x0 +; CHECK: PointerToLineNumbers = 0x0 +; CHECK: NumberOfRelocations = 0 +; CHECK: NumberOfLineNumbers = 0 +; CHECK: Charateristics = 0xC0100040 +; CHECK: IMAGE_SCN_CNT_INITIALIZED_DATA +; CHECK: IMAGE_SCN_ALIGN_1BYTES +; CHECK: IMAGE_SCN_MEM_READ +; CHECK: IMAGE_SCN_MEM_WRITE +; CHECK: SectionData = +; CHECK: 48 65 6C 6C 6F 20 57 6F - 72 6C 64 00 |Hello World.| +; CHECK: Relocations = None +; CHECK: } +; CHECK: ] +; CHECK: Symbols = [ +; CHECK: 0 = { +; CHECK: Name = .text +; CHECK: Value = 0 +; CHECK: SectionNumber = 1 +; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0) +; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0) +; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3) +; CHECK: NumberOfAuxSymbols = 1 +; CHECK: AuxillaryData = +; CHECK: 15 00 00 00 02 00 00 00 - 00 00 00 00 01 00 00 00 |................| +; CHECK: 00 00 |..| +; CHECK: } +; CHECK: 1 = { +; CHECK: Name = .data +; CHECK: Value = 0 +; CHECK: SectionNumber = 2 +; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0) +; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0) +; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3) +; CHECK: NumberOfAuxSymbols = 1 +; CHECK: AuxillaryData = +; CHECK: 0C 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................| +; CHECK: 00 00 |..| +; CHECK: } +; CHECK: 2 = { +; CHECK: Name = _main +; CHECK: Value = 0 +; CHECK: SectionNumber = 1 +; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0) +; CHECK: ComplexType = IMAGE_SYM_DTYPE_FUNCTION (2) +; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2) +; CHECK: NumberOfAuxSymbols = 0 +; CHECK: AuxillaryData = +; CHECK: } +; CHECK: 3 = { +; CHECK: Name = L_.str +; CHECK: Value = 0 +; CHECK: SectionNumber = 2 +; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0) +; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0) +; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3) +; CHECK: NumberOfAuxSymbols = 0 +; CHECK: AuxillaryData = +; CHECK: } +; CHECK: 4 = { +; CHECK: Name = _printf +; CHECK: Value = 0 +; CHECK: SectionNumber = 0 +; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0) +; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0) +; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2) +; CHECK: NumberOfAuxSymbols = 0 +; CHECK: AuxillaryData = +; CHECK: } +; CHECK: ] +; CHECK: } diff --git a/test/MC/COFF/dg.exp b/test/MC/COFF/dg.exp new file mode 100644 index 0000000000000..7b7bd4e73807c --- /dev/null +++ b/test/MC/COFF/dg.exp @@ -0,0 +1,5 @@ +load_lib llvm.exp + +if { [llvm_supports_target X86] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]] +} diff --git a/test/MC/COFF/switch-relocations.ll b/test/MC/COFF/switch-relocations.ll new file mode 100644 index 0000000000000..300c10732ec6d --- /dev/null +++ b/test/MC/COFF/switch-relocations.ll @@ -0,0 +1,34 @@ +; The purpose of this test is to see if the COFF object writer can properly +; relax the fixups that are created for jump tables on x86-64. See PR7960. + +; This test case was reduced from Lua/lapi.c. + +; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t +; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t + +define void @lua_gc(i32 %what) nounwind { +entry: + switch i32 %what, label %sw.epilog [ + i32 0, label %sw.bb + i32 1, label %sw.bb + i32 2, label %sw.bb + i32 3, label %sw.bb14 + i32 4, label %sw.bb18 + i32 6, label %sw.bb57 + ] + +sw.bb: ; preds = %entry, %entry, %entry + ret void + +sw.bb14: ; preds = %entry + ret void + +sw.bb18: ; preds = %entry + ret void + +sw.bb57: ; preds = %entry + ret void + +sw.epilog: ; preds = %entry + ret void +} diff --git a/test/MC/COFF/symbol-fragment-offset.ll b/test/MC/COFF/symbol-fragment-offset.ll new file mode 100644 index 0000000000000..af7ace19d59f0 --- /dev/null +++ b/test/MC/COFF/symbol-fragment-offset.ll @@ -0,0 +1,182 @@ +; The purpose of this test is to see if the COFF object writer is emitting the
+; proper relocations for multiple pieces of data in a single data fragment.
+
+; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t
+; RUN: coff-dump.py %abs_tmp | FileCheck %s
+; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t
+
+@.str = private constant [7 x i8] c"Hello \00" ; <[7 x i8]*> [#uses=1]
+@str = internal constant [7 x i8] c"World!\00" ; <[7 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+ %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
+ %puts = tail call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @str, i32 0, i32 0)) ; <i32> [#uses=0]
+ ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
+
+; CHECK: {
+; CHECK: MachineType = IMAGE_FILE_MACHINE_I386 (0x14C)
+; CHECK: NumberOfSections = 2
+; CHECK: TimeDateStamp = {{[0-9]+}}
+; CHECK: PointerToSymbolTable = 0xBB
+; CHECK: NumberOfSymbols = 9
+; CHECK: SizeOfOptionalHeader = 0
+; CHECK: Characteristics = 0x0
+; CHECK: Sections = [
+; CHECK: 0 = {
+; CHECK: Name = .text
+; CHECK: VirtualSize = 0
+; CHECK: VirtualAddress = 0
+; CHECK: SizeOfRawData = 33
+; CHECK: PointerToRawData = 0x64
+; CHECK: PointerToRelocations = 0x85
+; CHECK: PointerToLineNumbers = 0x0
+; CHECK: NumberOfRelocations = 4
+; CHECK: NumberOfLineNumbers = 0
+; CHECK: Charateristics = 0x60500020
+; CHECK: IMAGE_SCN_CNT_CODE
+; CHECK: IMAGE_SCN_ALIGN_16BYTES
+; CHECK: IMAGE_SCN_MEM_EXECUTE
+; CHECK: IMAGE_SCN_MEM_READ
+; CHECK: SectionData =
+; CHECK: 83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 C7 |.....$..........|
+; CHECK: 04 24 00 00 00 00 E8 00 - 00 00 00 31 C0 83 C4 04 |.$.........1....|
+; CHECK: C3 |.|
+
+; CHECK: Relocations = [
+; CHECK: 0 = {
+; CHECK: VirtualAddress = 0x6
+; CHECK: SymbolTableIndex = 5
+; CHECK: Type = IMAGE_REL_I386_DIR32 (6)
+; CHECK: SymbolName = _main
+; CHECK: }
+; CHECK: 1 = {
+; CHECK: VirtualAddress = 0xB
+; CHECK: SymbolTableIndex = 6
+; CHECK: Type = IMAGE_REL_I386_REL32 (20)
+; CHECK: SymbolName = L_.str
+; CHECK: }
+; CHECK: 2 = {
+; CHECK: VirtualAddress = 0x12
+; CHECK: SymbolTableIndex = 7
+; CHECK: Type = IMAGE_REL_I386_DIR32 (6)
+; CHECK: SymbolName = _printf
+; CHECK: }
+; CHECK: 3 = {
+; CHECK: VirtualAddress = 0x17
+; CHECK: SymbolTableIndex = 8
+; CHECK: Type = IMAGE_REL_I386_REL32 (20)
+; CHECK: SymbolName = _str
+; CHECK: }
+; CHECK: ]
+; CHECK: }
+; CHECK: 1 = {
+; CHECK: Name = .data
+; CHECK: VirtualSize = 0
+; CHECK: VirtualAddress = 0
+; CHECK: SizeOfRawData = 14
+; CHECK: PointerToRawData = 0xAD
+; CHECK: PointerToRelocations = 0x0
+; CHECK: PointerToLineNumbers = 0x0
+; CHECK: NumberOfRelocations = 0
+; CHECK: NumberOfLineNumbers = 0
+; CHECK: Charateristics = 0xC0100040
+; CHECK: IMAGE_SCN_CNT_INITIALIZED_DATA
+; CHECK: IMAGE_SCN_ALIGN_1BYTES
+; CHECK: IMAGE_SCN_MEM_READ
+; CHECK: IMAGE_SCN_MEM_WRITE
+; CHECK: SectionData =
+; CHECK: 48 65 6C 6C 6F 20 00 57 - 6F 72 6C 64 21 00 |Hello .World!.|
+
+; CHECK: Relocations = None
+; CHECK: }
+; CHECK: ]
+; CHECK: Symbols = [
+; CHECK: 0 = {
+; CHECK: Name = .text
+; CHECK: Value = 0
+; CHECK: SectionNumber = 1
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 1
+; CHECK: AuxillaryData =
+; CHECK: 21 00 00 00 04 00 00 00 - 00 00 00 00 01 00 00 00 |!...............|
+; CHECK: 00 00 |..|
+
+; CHECK: }
+; CHECK: 1 = {
+; CHECK: Name = .data
+; CHECK: Value = 0
+; CHECK: SectionNumber = 2
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 1
+; CHECK: AuxillaryData =
+; CHECK: 0E 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................|
+; CHECK: 00 00 |..|
+
+; CHECK: }
+; CHECK: 2 = {
+; CHECK: Name = _main
+; CHECK: Value = 0
+; CHECK: SectionNumber = 1
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_FUNCTION (2)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 3 = {
+; CHECK: Name = L_.str
+; CHECK: Value = 0
+; CHECK: SectionNumber = 2
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 4 = {
+; CHECK: Name = _printf
+; CHECK: Value = 0
+; CHECK: SectionNumber = 0
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 5 = {
+; CHECK: Name = _str
+; CHECK: Value = 7
+; CHECK: SectionNumber = 2
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 6 = {
+; CHECK: Name = _puts
+; CHECK: Value = 0
+; CHECK: SectionNumber = 0
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: ]
+; CHECK: }
diff --git a/test/MC/Disassembler/arm-tests.txt b/test/MC/Disassembler/arm-tests.txt index a1e229caebf86..0b4c2978fe3da 100644 --- a/test/MC/Disassembler/arm-tests.txt +++ b/test/MC/Disassembler/arm-tests.txt @@ -12,9 +12,21 @@ # CHECK: cmn r0, #1 0x01 0x00 0x70 0xe3 +# CHECK: dmb +0x5f 0xf0 0x7f 0xf5 + # CHECK: dmb nshst 0x56 0xf0 0x7f 0xf5 +# CHECK: dsb +0x4f 0xf0 0x7f 0xf5 + +# CHECK: dsb st +0x4e 0xf0 0x7f 0xf5 + +# CHECK: isb +0x6f 0xf0 0x7f 0xf5 + # CHECK: ldclvc p5, cr15, [r8], #-0 0x00 0xf5 0x78 0x7c @@ -42,9 +54,17 @@ # CHECK: mvnpls r7, #245, 2 0xf5 0x71 0xf0 0x53 +# CHECK-NOT: orr r7, r8, r7, rrx #0 +# CHECK: orr r7, r8, r7, rrx +0x67 0x70 0x88 0xe1 + # CHECK: pkhbt r8, r9, r10, lsl #4 0x1a 0x82 0x89 0xe6 +# CHECK-NOT: pkhbtls pc, r11, r11, lsl #0 +# CHECK: pkhbtls pc, r11, r11 +0x1b 0xf0 0x8b 0x96 + # CHECK: pop {r0, r2, r4, r6, r8, r10} 0x55 0x05 0xbd 0xe8 @@ -57,6 +77,14 @@ # CHECK: rfedb r0! 0x00 0x0a 0x30 0xf9 +# CHECK-NOT: rsbeq r0, r2, r0, lsl #0 +# CHECK: rsbeq r0, r2, r0 +0x00 0x00 0x62 0x00 + +# CHECK-NOT: rsceqs r0, r0, r1, lsl #0 +# CHECK: rsceqs r0, r0, r1 +0x01 0x00 0xf0 0x00 + # CHECK: sbcs r0, pc, #1 0x01 0x00 0xdf 0xe2 @@ -66,6 +94,10 @@ # CHECK: ssat r8, #1, r10, lsl #8 0x1a 0x84 0xa0 0xe6 +# CHECK-NOT: ssatmi r0, #17, r12, lsl #0 +# CHECK: ssatmi r0, #17, r12 +0x1c 0x00 0xb0 0x46 + # CHECK: stmdb r10!, {r4, r5, r6, r7, lr} 0xf0 0x40 0x2a 0xe9 @@ -75,3 +107,5 @@ # CHECK: ubfx r0, r0, #16, #1 0x50 0x08 0xe0 0xe7 +# CHECK: usat r8, #0, r10, asr #32 +0x5a 0x80 0xe0 0xe6 diff --git a/test/MC/Disassembler/neon-tests.txt b/test/MC/Disassembler/neon-tests.txt index 51b31e7c1a6e5..826ff2272efa8 100644 --- a/test/MC/Disassembler/neon-tests.txt +++ b/test/MC/Disassembler/neon-tests.txt @@ -25,6 +25,9 @@ # CHECK: vmov.i64 q6, #0xFF00FF00FF 0x75 0xce 0x81 0xf2 +# CHECK: vmvn.i32 d0, #0x0 +0x30 0x00 0x80 0xf2 + # CHECK: vmul.f32 d0, d0, d6 0x16 0x0d 0x00 0xf3 diff --git a/test/MC/Disassembler/thumb-tests.txt b/test/MC/Disassembler/thumb-tests.txt index 14e91295276b8..06d12fed87fb7 100644 --- a/test/MC/Disassembler/thumb-tests.txt +++ b/test/MC/Disassembler/thumb-tests.txt @@ -42,6 +42,10 @@ # CHECK: pkhtb r2, r4, r6, asr #16 0xc4 0xea 0x26 0x42 +# CHECK-NOT: pkhbt r2, r4, r6, lsl #0 +# CHECK: pkhbt r2, r4, r6 +0xc4 0xea 0x06 0x02 + # CHECK: pop {r2, r4, r6, r8, r10, r12} 0xbd 0xe8 0x54 0x15 @@ -51,6 +55,14 @@ # CHECK: rsbs r0, r0, #0 0x40 0x42 +# CHECK-NOT: rsb r0, r2, r0, lsl #0 +# CHECK: rsb r0, r2, r0 +0xc2 0xeb 0x00 0x00 + +# CHECK-NOT: ssat r0, #17, r12, lsl #0 +# CHECK: ssat r0, #17, r12 +0x0c 0xf3 0x10 0x00 + # CHECK: strd r0, [r7, #64] 0xc7 0xe9 0x10 0x01 diff --git a/test/MC/ELF/bss.ll b/test/MC/ELF/bss.ll new file mode 100644 index 0000000000000..5112d2c9b0a5d --- /dev/null +++ b/test/MC/ELF/bss.ll @@ -0,0 +1,8 @@ +; RUN: llc -filetype=obj %s -o %t +; FIXME: Add ELF dumping tool to check results. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" + +@g0 = global i8* null, align 4 ; <i8**> [#uses=0] + diff --git a/test/MC/ELF/dg.exp b/test/MC/ELF/dg.exp new file mode 100644 index 0000000000000..7b7bd4e73807c --- /dev/null +++ b/test/MC/ELF/dg.exp @@ -0,0 +1,5 @@ +load_lib llvm.exp + +if { [llvm_supports_target X86] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]] +} diff --git a/test/Makefile b/test/Makefile index f6830e638393e..7ca46beccc3f2 100644 --- a/test/Makefile +++ b/test/Makefile @@ -10,11 +10,11 @@ LEVEL = .. DIRS = -# -# Make Dejagnu the default for testing -# all:: check-local +# 'lit' is the default test runner. +check-local:: check-local-lit + # Include other test rules include Makefile.tests @@ -84,18 +84,18 @@ else # !SunOS ifeq ($(HOST_OS),AuroraUX) ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ; else # !AuroraUX -# Fedora 13 x86-64 python fails with -v 51200 -ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 768000 ; +# Fedora 13 x86-64 python fails with -v 76800 +ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 1024000 ; endif # AuroraUX endif # SunOS ifneq ($(RUNTEST),) -check-local:: site.exp +check-local-dg:: site.exp ( $(ULIMIT) \ PATH="$(LLVMToolDir):$(LLVM_SRC_ROOT)/test/Scripts:$(LLVMGCCDIR)/bin:$(PATH)" \ $(RUNTEST) $(RUNTESTFLAGS) ) else -check-local:: site.exp +check-local-dg:: site.exp @echo "*** dejagnu not found. Make sure 'runtest' is in your PATH, then reconfigure LLVM." endif @@ -107,26 +107,6 @@ check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-lit-site-cfgs ( $(ULIMIT) \ $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_ALL_TESTSUITES) ) -ifdef TESTONE -CLEANED_TESTONE := $(patsubst %/,%,$(TESTONE)) -CLEANED_TESTONE := $(patsubst test/%,%,$(CLEANED_TESTONE)) -SUBDIR := $(shell dirname $(CLEANED_TESTONE)) -TESTPATH := $(LLVM_SRC_ROOT)/test/$(CLEANED_TESTONE) -check-one: site.exp $(TCLSH) - $(Verb)( echo "source $(LLVM_OBJ_ROOT)/test/site.exp" ; \ - echo "set subdir $(SUBDIR)" ; \ - echo "proc pass { msg } { puts \"PASS: \$$msg\" } "; \ - echo "proc fail { msg } { puts \"FAIL: \$$msg\" }" ; \ - echo "proc xfail { msg } { puts \"XFAIL: \$$msg\" }" ; \ - echo "proc xpass { msg } { puts \"XPASS: \$$msg\" }" ; \ - echo "proc verbose args { }" ; \ - echo "source $(LLVM_SRC_ROOT)/test/lib/llvm.exp" ; \ - echo "RunLLVMTests $(TESTPATH)" ) | \ - ( $(ULIMIT) \ - PATH="$(LLVMToolDir):$(LLVM_SRC_ROOT)/test/Scripts:$(PATH)" \ - $(TCLSH) ) -endif - clean:: $(RM) -rf `find $(LLVM_OBJ_ROOT)/test -name Output -type d -print` @@ -166,7 +146,7 @@ site.exp: FORCE @echo 'set gccpath "$(CC)"' >>site.tmp @echo 'set gxxpath "$(CXX)"' >>site.tmp @echo 'set compile_c "' $(CC) $(CPP.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c '"' >>site.tmp - @echo 'set compile_cxx "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c '"' >> site.tmp + @echo 'set compile_cxx "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c -x c++ '"' >> site.tmp @echo 'set link "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) '"' >>site.tmp @echo 'set llvmgcc "$(LLVMGCC) $(TargetCommonOpts) $(EXTRA_OPTIONS)"' >> site.tmp @echo 'set llvmgxx "$(LLVMGCC) $(TargetCommonOpts) $(EXTRA_OPTIONS)"' >> site.tmp @@ -203,6 +183,3 @@ Unit/lit.site.cfg: $(PROJ_OBJ_DIR)/Unit/.dir FORCE -e "s#@SHLIBPATH_VAR@#$(SHLIBPATH_VAR)#g" \ $(PROJ_SRC_DIR)/Unit/lit.site.cfg.in > $@ -# Daniel hates Chris. -chris-lit: - make check-lit LIT_ARGS='-j16 -s' diff --git a/test/Other/close-stderr.ll b/test/Other/close-stderr.ll new file mode 100644 index 0000000000000..60fab3df0d9f9 --- /dev/null +++ b/test/Other/close-stderr.ll @@ -0,0 +1,9 @@ +; RUN: sh -c "\ +; RUN: opt --reject-this-option 2>&-; echo \$?; \ +; RUN: opt -o /dev/null /dev/null 2>&-; echo \$?; \ +; RUN: " | FileCheck %s +; CHECK: {{^1$}} +; CHECK: {{^0$}} + +; Test that the error handling when writing to stderr fails exits the +; program cleanly rather than aborting. diff --git a/test/Other/constant-fold-gep.ll b/test/Other/constant-fold-gep.ll index ecef9c48492e8..926bdbc1b464e 100644 --- a/test/Other/constant-fold-gep.ll +++ b/test/Other/constant-fold-gep.ll @@ -71,8 +71,6 @@ ; PLAIN: @g = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) ; PLAIN: @h = constant i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64) ; PLAIN: @i = constant i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) -; PLAIN: @j = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) -; PLAIN: @k = constant i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) ; OPT: @a = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310) ; OPT: @b = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) ; OPT: @c = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2) @@ -82,8 +80,6 @@ ; OPT: @g = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) ; OPT: @h = constant i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64) ; OPT: @i = constant i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) -; OPT: @j = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) -; OPT: @k = constant i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) ; TO: @a = constant i64 18480 ; TO: @b = constant i64 8 ; TO: @c = constant i64 16 @@ -93,8 +89,6 @@ ; TO: @g = constant i64 8 ; TO: @h = constant i64 8 ; TO: @i = constant i64 8 -; TO: @j = constant i64 8 -; TO: @k = constant i64 8 @a = constant i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}* null, i64 11) to i64), i64 5)) @b = constant i64 ptrtoint ([13 x double]* getelementptr ({i1, [13 x double]}* null, i64 0, i32 1) to i64) @@ -105,8 +99,6 @@ @g = constant i64 ptrtoint ({double, double}* getelementptr ({i1, {double, double}}* null, i64 0, i32 1) to i64) @h = constant i64 ptrtoint (double** getelementptr (double** null, i64 1) to i64) @i = constant i64 ptrtoint (double** getelementptr ({i1, double*}* null, i64 0, i32 1) to i64) -@j = constant i64 ptrtoint (union {double, double}* getelementptr ({i1, union {double, double}}* null, i64 0, i32 1) to i64) -@k = constant i64 ptrtoint (union {double, double}* getelementptr (union {double, double}* null, i64 1) to i64) ; The target-dependent folder should cast GEP indices to integer-sized pointers. @@ -275,14 +267,6 @@ define i1* @hoo1() nounwind { ; PLAIN: %t = bitcast i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) to i64 ; PLAIN: ret i64 %t ; PLAIN: } -; PLAIN: define i64 @fj() nounwind { -; PLAIN: %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64 -; PLAIN: ret i64 %t -; PLAIN: } -; PLAIN: define i64 @fk() nounwind { -; PLAIN: %t = bitcast i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) to i64 -; PLAIN: ret i64 %t -; PLAIN: } ; OPT: define i64 @fa() nounwind { ; OPT: ret i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310) ; OPT: } @@ -310,12 +294,6 @@ define i1* @hoo1() nounwind { ; OPT: define i64 @fi() nounwind { ; OPT: ret i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) ; OPT: } -; OPT: define i64 @fj() nounwind { -; OPT: ret i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) -; OPT: } -; OPT: define i64 @fk() nounwind { -; OPT: ret i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) -; OPT: } ; TO: define i64 @fa() nounwind { ; TO: ret i64 18480 ; TO: } @@ -343,12 +321,6 @@ define i1* @hoo1() nounwind { ; TO: define i64 @fi() nounwind { ; TO: ret i64 8 ; TO: } -; TO: define i64 @fj() nounwind { -; TO: ret i64 8 -; TO: } -; TO: define i64 @fk() nounwind { -; TO: ret i64 8 -; TO: } ; SCEV: Classifying expressions for: @fa ; SCEV: %t = bitcast i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310) to i64 ; SCEV: --> (2310 * sizeof(double)) @@ -376,12 +348,6 @@ define i1* @hoo1() nounwind { ; SCEV: Classifying expressions for: @fi ; SCEV: %t = bitcast i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) to i64 ; SCEV: --> alignof(i1*) -; SCEV: Classifying expressions for: @fj -; SCEV: %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64 -; SCEV: --> alignof(double) -; SCEV: Classifying expressions for: @fk -; SCEV: %t = bitcast i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) to i64 -; SCEV: --> sizeof(double) define i64 @fa() nounwind { %t = bitcast i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}* null, i64 11) to i64), i64 5)) to i64 @@ -419,14 +385,6 @@ define i64 @fi() nounwind { %t = bitcast i64 ptrtoint (double** getelementptr ({i1, double*}* null, i64 0, i32 1) to i64) to i64 ret i64 %t } -define i64 @fj() nounwind { - %t = bitcast i64 ptrtoint (union {double, double}* getelementptr ({i1, union {double, double}}* null, i64 0, i32 1) to i64) to i64 - ret i64 %t -} -define i64 @fk() nounwind { - %t = bitcast i64 ptrtoint (union {double, double}* getelementptr (union {double, double}* null, i64 1) to i64) to i64 - ret i64 %t -} ; PLAIN: define i64* @fM() nounwind { ; PLAIN: %t = bitcast i64* getelementptr (i64* null, i32 1) to i64* diff --git a/test/Other/inline-asm-newline-terminator.ll b/test/Other/inline-asm-newline-terminator.ll index f6cc5c1fb4216..af93cc0dd2aa9 100644 --- a/test/Other/inline-asm-newline-terminator.ll +++ b/test/Other/inline-asm-newline-terminator.ll @@ -1,5 +1,4 @@ ; RUN: llc -filetype=obj -o - < %s -; XFAIL: vg_leak ; ModuleID = 't.c' target triple = "x86_64-apple-darwin10.0.0" diff --git a/test/Other/lint.ll b/test/Other/lint.ll index dee3d11d2fb5e..fcef7ee2d5713 100644 --- a/test/Other/lint.ll +++ b/test/Other/lint.ll @@ -161,5 +161,7 @@ declare i32 @nonstruct_callee() nounwind define void @struct_caller() nounwind { entry: call %struct bitcast (i32 ()* @foo to %struct ()*)() - ret void + + ; CHECK: Undefined behavior: indirectbr with no destinations + indirectbr i8* null, [] } diff --git a/test/Scripts/coff-dump.py b/test/Scripts/coff-dump.py new file mode 100755 index 0000000000000..0af3d368d5d0a --- /dev/null +++ b/test/Scripts/coff-dump.py @@ -0,0 +1,566 @@ +#!/usr/bin/env python +#===-- coff-dump.py - COFF object file dump utility-------------------------===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +# +# COFF File Definition +# + +def string_table_entry (offset): + return ('ptr', '+ + PointerToSymbolTable * NumberOfSymbols 18 %s' % offset, ('scalar', 'cstr', '%s')) + +def secname(value): + if value[0] == '/': + return string_table_entry (value [1:].rstrip('\0')) + else: + return '%s' + +def symname(value): + parts = struct.unpack("<2L", value) + if parts [0] == 0: + return string_table_entry (parts [1]) + else: + return '%s' + +file = ('struct', [ + ('MachineType', ('enum', '<H', '0x%X', { + 0x0: 'IMAGE_FILE_MACHINE_UNKNOWN', + 0x1d3: 'IMAGE_FILE_MACHINE_AM33', + 0x8664: 'IMAGE_FILE_MACHINE_AMD64', + 0x1c0: 'IMAGE_FILE_MACHINE_ARM', + 0xebc: 'IMAGE_FILE_MACHINE_EBC', + 0x14c: 'IMAGE_FILE_MACHINE_I386', + 0x200: 'IMAGE_FILE_MACHINE_IA64', + 0x904: 'IMAGE_FILE_MACHINE_M32R', + 0x266: 'IMAGE_FILE_MACHINE_MIPS16', + 0x366: 'IMAGE_FILE_MACHINE_MIPSFPU', + 0x466: 'IMAGE_FILE_MACHINE_MIPSFPU16', + 0x1f0: 'IMAGE_FILE_MACHINE_POWERPC', + 0x1f1: 'IMAGE_FILE_MACHINE_POWERPCFP', + 0x166: 'IMAGE_FILE_MACHINE_R4000', + 0x1a2: 'IMAGE_FILE_MACHINE_SH3', + 0x1a3: 'IMAGE_FILE_MACHINE_SH3DSP', + 0x1a6: 'IMAGE_FILE_MACHINE_SH4', + 0x1a8: 'IMAGE_FILE_MACHINE_SH5', + 0x1c2: 'IMAGE_FILE_MACHINE_THUMB', + 0x169: 'IMAGE_FILE_MACHINE_WCEMIPSV2', + })), + ('NumberOfSections', ('scalar', '<H', '%d')), + ('TimeDateStamp', ('scalar', '<L', '%d')), + ('PointerToSymbolTable', ('scalar', '<L', '0x%0X')), + ('NumberOfSymbols', ('scalar', '<L', '%d')), + ('SizeOfOptionalHeader', ('scalar', '<H', '%d')), + ('Characteristics', ('flags', '<H', '0x%x', [ + (0x0001, 'IMAGE_FILE_RELOCS_STRIPPED', ), + (0x0002, 'IMAGE_FILE_EXECUTABLE_IMAGE', ), + (0x0004, 'IMAGE_FILE_LINE_NUMS_STRIPPED', ), + (0x0008, 'IMAGE_FILE_LOCAL_SYMS_STRIPPED', ), + (0x0010, 'IMAGE_FILE_AGGRESSIVE_WS_TRIM', ), + (0x0020, 'IMAGE_FILE_LARGE_ADDRESS_AWARE', ), + (0x0080, 'IMAGE_FILE_BYTES_REVERSED_LO', ), + (0x0100, 'IMAGE_FILE_32BIT_MACHINE', ), + (0x0200, 'IMAGE_FILE_DEBUG_STRIPPED', ), + (0x0400, 'IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP', ), + (0x0800, 'IMAGE_FILE_NET_RUN_FROM_SWAP', ), + (0x1000, 'IMAGE_FILE_SYSTEM', ), + (0x2000, 'IMAGE_FILE_DLL', ), + (0x4000, 'IMAGE_FILE_UP_SYSTEM_ONLY', ), + (0x8000, 'IMAGE_FILE_BYTES_REVERSED_HI', ), + ])), + ('Sections', ('array', 'NumberOfSections', ('struct', [ + ('Name', ('scalar', '<8s', secname)), + ('VirtualSize', ('scalar', '<L', '%d' )), + ('VirtualAddress', ('scalar', '<L', '%d' )), + ('SizeOfRawData', ('scalar', '<L', '%d' )), + ('PointerToRawData', ('scalar', '<L', '0x%X' )), + ('PointerToRelocations', ('scalar', '<L', '0x%X' )), + ('PointerToLineNumbers', ('scalar', '<L', '0x%X' )), + ('NumberOfRelocations', ('scalar', '<H', '%d' )), + ('NumberOfLineNumbers', ('scalar', '<H', '%d' )), + ('Charateristics', ('flags', '<L', '0x%X', [ + (0x00000008, 'IMAGE_SCN_TYPE_NO_PAD'), + (0x00000020, 'IMAGE_SCN_CNT_CODE'), + (0x00000040, 'IMAGE_SCN_CNT_INITIALIZED_DATA'), + (0x00000080, 'IMAGE_SCN_CNT_UNINITIALIZED_DATA'), + (0x00000100, 'IMAGE_SCN_LNK_OTHER'), + (0x00000200, 'IMAGE_SCN_LNK_INFO'), + (0x00000800, 'IMAGE_SCN_LNK_REMOVE'), + (0x00001000, 'IMAGE_SCN_LNK_COMDAT'), + (0x00008000, 'IMAGE_SCN_GPREL'), + (0x00020000, 'IMAGE_SCN_MEM_PURGEABLE'), + (0x00020000, 'IMAGE_SCN_MEM_16BIT'), + (0x00040000, 'IMAGE_SCN_MEM_LOCKED'), + (0x00080000, 'IMAGE_SCN_MEM_PRELOAD'), + (0x00F00000, 'IMAGE_SCN_ALIGN', { + 0x00100000: 'IMAGE_SCN_ALIGN_1BYTES', + 0x00200000: 'IMAGE_SCN_ALIGN_2BYTES', + 0x00300000: 'IMAGE_SCN_ALIGN_4BYTES', + 0x00400000: 'IMAGE_SCN_ALIGN_8BYTES', + 0x00500000: 'IMAGE_SCN_ALIGN_16BYTES', + 0x00600000: 'IMAGE_SCN_ALIGN_32BYTES', + 0x00700000: 'IMAGE_SCN_ALIGN_64BYTES', + 0x00800000: 'IMAGE_SCN_ALIGN_128BYTES', + 0x00900000: 'IMAGE_SCN_ALIGN_256BYTES', + 0x00A00000: 'IMAGE_SCN_ALIGN_512BYTES', + 0x00B00000: 'IMAGE_SCN_ALIGN_1024BYTES', + 0x00C00000: 'IMAGE_SCN_ALIGN_2048BYTES', + 0x00D00000: 'IMAGE_SCN_ALIGN_4096BYTES', + 0x00E00000: 'IMAGE_SCN_ALIGN_8192BYTES', + }), + (0x01000000, 'IMAGE_SCN_LNK_NRELOC_OVFL'), + (0x02000000, 'IMAGE_SCN_MEM_DISCARDABLE'), + (0x04000000, 'IMAGE_SCN_MEM_NOT_CACHED'), + (0x08000000, 'IMAGE_SCN_MEM_NOT_PAGED'), + (0x10000000, 'IMAGE_SCN_MEM_SHARED'), + (0x20000000, 'IMAGE_SCN_MEM_EXECUTE'), + (0x40000000, 'IMAGE_SCN_MEM_READ'), + (0x80000000, 'IMAGE_SCN_MEM_WRITE'), + ])), + ('SectionData', ('ptr', 'PointerToRawData', ('blob', 'SizeOfRawData'))), + ('Relocations', ('ptr', 'PointerToRelocations', ('array', 'NumberOfRelocations', ('struct', [ + ('VirtualAddress', ('scalar', '<L', '0x%X')), + ('SymbolTableIndex', ('scalar', '<L', '%d' )), + ('Type', ('enum', '<H', '%d', ('MachineType', { + 0x14c: { + 0x0000: 'IMAGE_REL_I386_ABSOLUTE', + 0x0001: 'IMAGE_REL_I386_DIR16', + 0x0002: 'IMAGE_REL_I386_REL16', + 0x0006: 'IMAGE_REL_I386_DIR32', + 0x0007: 'IMAGE_REL_I386_DIR32NB', + 0x0009: 'IMAGE_REL_I386_SEG12', + 0x000A: 'IMAGE_REL_I386_SECTION', + 0x000B: 'IMAGE_REL_I386_SECREL', + 0x000C: 'IMAGE_REL_I386_TOKEN', + 0x000D: 'IMAGE_REL_I386_SECREL7', + 0x0014: 'IMAGE_REL_I386_REL32', + }, + 0x8664: { + 0x0000: 'IMAGE_REL_AMD64_ABSOLUTE', + 0x0001: 'IMAGE_REL_AMD64_ADDR64', + 0x0002: 'IMAGE_REL_AMD64_ADDR32', + 0x0003: 'IMAGE_REL_AMD64_ADDR32NB', + 0x0004: 'IMAGE_REL_AMD64_REL32', + 0x0005: 'IMAGE_REL_AMD64_REL32_1', + 0x0006: 'IMAGE_REL_AMD64_REL32_2', + 0x0007: 'IMAGE_REL_AMD64_REL32_3', + 0x0008: 'IMAGE_REL_AMD64_REL32_4', + 0x0009: 'IMAGE_REL_AMD64_REL32_5', + 0x000A: 'IMAGE_REL_AMD64_SECTION', + 0x000B: 'IMAGE_REL_AMD64_SECREL', + 0x000C: 'IMAGE_REL_AMD64_SECREL7', + 0x000D: 'IMAGE_REL_AMD64_TOKEN', + 0x000E: 'IMAGE_REL_AMD64_SREL32', + 0x000F: 'IMAGE_REL_AMD64_PAIR', + 0x0010: 'IMAGE_REL_AMD64_SSPAN32', + }, + }))), + ('SymbolName', ('ptr', '+ PointerToSymbolTable * - SymbolTableIndex 1 18', ('scalar', '<8s', symname))) + ])))), + ]))), + ('Symbols', ('ptr', 'PointerToSymbolTable', ('byte-array', '* NumberOfSymbols 18', ('struct', [ + ('Name', ('scalar', '<8s', symname)), + ('Value', ('scalar', '<L', '%d' )), + ('SectionNumber', ('scalar', '<H', '%d' )), + ('SimpleType', ('enum', '<B', '%d', { + 0: 'IMAGE_SYM_TYPE_NULL', + 1: 'IMAGE_SYM_TYPE_VOID', + 2: 'IMAGE_SYM_TYPE_CHAR', + 3: 'IMAGE_SYM_TYPE_SHORT', + 4: 'IMAGE_SYM_TYPE_INT', + 5: 'IMAGE_SYM_TYPE_LONG', + 6: 'IMAGE_SYM_TYPE_FLOAT', + 7: 'IMAGE_SYM_TYPE_DOUBLE', + 8: 'IMAGE_SYM_TYPE_STRUCT', + 9: 'IMAGE_SYM_TYPE_UNION', + 10: 'IMAGE_SYM_TYPE_ENUM', + 11: 'IMAGE_SYM_TYPE_MOE', + 12: 'IMAGE_SYM_TYPE_BYTE', + 13: 'IMAGE_SYM_TYPE_WORD', + 14: 'IMAGE_SYM_TYPE_UINT', + 15: 'IMAGE_SYM_TYPE_DWORD', + })), + ('ComplexType', ('enum', '<B', '%d', { + 0: 'IMAGE_SYM_DTYPE_NULL', + 1: 'IMAGE_SYM_DTYPE_POINTER', + 2: 'IMAGE_SYM_DTYPE_FUNCTION', + 3: 'IMAGE_SYM_DTYPE_ARRAY', + })), + ('StorageClass', ('enum', '<B', '%d', { + -1: 'IMAGE_SYM_CLASS_END_OF_FUNCTION', + 0: 'IMAGE_SYM_CLASS_NULL', + 1: 'IMAGE_SYM_CLASS_AUTOMATIC', + 2: 'IMAGE_SYM_CLASS_EXTERNAL', + 3: 'IMAGE_SYM_CLASS_STATIC', + 4: 'IMAGE_SYM_CLASS_REGISTER', + 5: 'IMAGE_SYM_CLASS_EXTERNAL_DEF', + 6: 'IMAGE_SYM_CLASS_LABEL', + 7: 'IMAGE_SYM_CLASS_UNDEFINED_LABEL', + 8: 'IMAGE_SYM_CLASS_MEMBER_OF_STRUCT', + 9: 'IMAGE_SYM_CLASS_ARGUMENT', + 10: 'IMAGE_SYM_CLASS_STRUCT_TAG', + 11: 'IMAGE_SYM_CLASS_MEMBER_OF_UNION', + 12: 'IMAGE_SYM_CLASS_UNION_TAG', + 13: 'IMAGE_SYM_CLASS_TYPE_DEFINITION', + 14: 'IMAGE_SYM_CLASS_UNDEFINED_STATIC', + 15: 'IMAGE_SYM_CLASS_ENUM_TAG', + 16: 'IMAGE_SYM_CLASS_MEMBER_OF_ENUM', + 17: 'IMAGE_SYM_CLASS_REGISTER_PARAM', + 18: 'IMAGE_SYM_CLASS_BIT_FIELD', + 100: 'IMAGE_SYM_CLASS_BLOCK', + 101: 'IMAGE_SYM_CLASS_FUNCTION', + 102: 'IMAGE_SYM_CLASS_END_OF_STRUCT', + 103: 'IMAGE_SYM_CLASS_FILE', + 104: 'IMAGE_SYM_CLASS_SECTION', + 105: 'IMAGE_SYM_CLASS_WEAK_EXTERNAL', + 107: 'IMAGE_SYM_CLASS_CLR_TOKEN', + })), + ('NumberOfAuxSymbols', ('scalar', '<B', '%d' )), + ('AuxillaryData', ('blob', '* NumberOfAuxSymbols 18')), + ])))), +]) + +# +# Definition Interpreter +# + +import sys, types, struct, re + +Input = None +Stack = [] +Fields = {} + +Indent = 0 +NewLine = True + +def indent(): + global Indent + Indent += 1 + +def dedent(): + global Indent + Indent -= 1 + +def write(input): + global NewLine + output = "" + + for char in input: + + if NewLine: + output += Indent * ' ' + NewLine = False + + output += char + + if char == '\n': + NewLine = True + + sys.stdout.write (output) + +def read(format): + return struct.unpack (format, Input.read(struct.calcsize(format))) + +def read_cstr (): + output = "" + while True: + char = Input.read (1) + if len (char) == 0: + raise RuntimeError ("EOF while reading cstr") + if char == '\0': + break + output += char + return output + +def push_pos(seek_to = None): + Stack [0:0] = [Input.tell ()] + if seek_to: + Input.seek (seek_to) + +def pop_pos(): + assert(len (Stack) > 0) + Input.seek (Stack [0]) + del Stack [0] + +def print_binary_data(size): + value = "" + while size > 0: + if size >= 16: + data = Input.read(16) + size -= 16 + else: + data = Input.read(size) + size = 0 + value += data + bytes = "" + text = "" + for index in xrange (16): + if index < len (data): + if index == 8: + bytes += "- " + ch = ord (data [index]) + bytes += "%02X " % ch + if ch >= 0x20 and ch <= 0x7F: + text += data [index] + else: + text += "." + else: + if index == 8: + bytes += " " + bytes += " " + + write ("%s|%s|\n" % (bytes, text)) + return value + +idlit = re.compile ("[a-zA-Z][a-zA-Z0-9_-]*") +numlit = re.compile ("[0-9]+") + +def read_value(expr): + + input = iter (expr.split ()) + + def eval(): + + token = input.next () + + if expr == 'cstr': + return read_cstr () + if expr == 'true': + return True + if expr == 'false': + return False + + if len (token) > 1 and token [0] in ('=', '@', '<', '!', '>'): + val = read(expr) + assert (len (val) == 1) + return val [0] + + if token == '+': + return eval () + eval () + if token == '-': + return eval () - eval () + if token == '*': + return eval () * eval () + if token == '/': + return eval () / eval () + + if idlit.match (token): + return Fields [token] + if numlit.match (token): + return int (token) + + raise RuntimeError ("unexpected token %s" % repr(token)) + + value = eval () + + try: + input.next () + except StopIteration: + return value + raise RuntimeError("unexpected input at end of expression") + +def write_value(format,value): + format_type = type (format) + if format_type is types.StringType: + write (format%value) + elif format_type is types.FunctionType: + write_value (format (value), value) + elif format_type is types.TupleType: + Fields ['this'] = value + handle_element (format) + else: + raise RuntimeError("unexpected type: %s" % repr(format_type)) + +def handle_scalar(entry): + iformat = entry [1] + oformat = entry [2] + + value = read_value (iformat) + + write_value (oformat, value) + + return value + +def handle_enum(entry): + iformat = entry [1] + oformat = entry [2] + definitions = entry [3] + + value = read_value (iformat) + + if type (definitions) is types.TupleType: + selector = read_value (definitions [0]) + definitions = definitions [1] [selector] + + if value in definitions: + description = definitions[value] + else: + description = "unknown" + + write ("%s (" % description) + write_value (oformat, value) + write (")") + + return value + +def handle_flags(entry): + iformat = entry [1] + oformat = entry [2] + definitions = entry [3] + + value = read_value (iformat) + + write_value (oformat, value) + + indent () + for entry in definitions: + mask = entry [0] + name = entry [1] + if len (entry) == 3: + map = entry [2] + selection = value & mask + if selection in map: + write("\n%s" % map[selection]) + else: + write("\n%s <%d>" % (name, selection)) + elif len (entry) == 2: + if value & mask != 0: + write("\n%s" % name) + dedent () + + return value + +def handle_struct(entry): + global Fields + members = entry [1] + + newFields = {} + + write ("{\n"); + indent () + + for member in members: + name = member [0] + type = member [1] + + write("%s = "%name.ljust(24)) + + value = handle_element(type) + + write("\n") + + Fields [name] = value + newFields [name] = value + + dedent () + write ("}") + + return newFields + +def handle_array(entry): + length = entry [1] + element = entry [2] + + newItems = [] + + write ("[\n") + indent () + + value = read_value (length) + + for index in xrange (value): + write ("%d = "%index) + value = handle_element(element) + write ("\n") + newItems.append (value) + + dedent () + write ("]") + + return newItems + +def handle_byte_array(entry): + length = entry [1] + element = entry [2] + + newItems = [] + + write ("[\n") + indent () + + value = read_value (length) + end_of_array = Input.tell () + value + + index = 0 + while Input.tell () < end_of_array: + write ("%d = "%index) + value = handle_element(element) + write ("\n") + newItems.append (value) + index += 1 + + dedent () + write ("]") + + return newItems + +def handle_ptr(entry): + offset = entry[1] + element = entry [2] + + value = None + offset = read_value (offset) + + if offset != 0: + + push_pos (offset) + + value = handle_element (element) + + pop_pos () + + else: + write ("None") + + return value + +def handle_blob(entry): + length = entry [1] + + write ("\n") + indent () + + value = print_binary_data (read_value (length)) + + dedent () + + return value + +def handle_element(entry): + handlers = { + 'struct': handle_struct, + 'scalar': handle_scalar, + 'enum': handle_enum, + 'flags': handle_flags, + 'ptr': handle_ptr, + 'blob': handle_blob, + 'array': handle_array, + 'byte-array': handle_byte_array, + } + + if not entry [0] in handlers: + raise RuntimeError ("unexpected type '%s'" % str (entry[0])) + + return handlers [entry [0]] (entry) + +Input = open (sys.argv [1], "rb") +try: + handle_element (file) +finally: + Input.close () + Input = None diff --git a/test/Scripts/coff-dump.py.bat b/test/Scripts/coff-dump.py.bat new file mode 100644 index 0000000000000..cc83eba1c4468 --- /dev/null +++ b/test/Scripts/coff-dump.py.bat @@ -0,0 +1,4 @@ +@echo off + +%PYTHON_EXECUTABLE% %LLVM_SRC_ROOT%\test\Scripts\coff-dump.py %1 %2 %3 %4 %5 %6 %7 %8 %9 + diff --git a/test/TableGen/FieldAccess.td b/test/TableGen/FieldAccess.td new file mode 100644 index 0000000000000..ad652e79ea7ce --- /dev/null +++ b/test/TableGen/FieldAccess.td @@ -0,0 +1,14 @@ +// RUN: tblgen %s +class Bla<string t> +{ + string blu = t; +} + +class Bli<Bla t> +{ + Bla bla = t; +} + +def a : Bli<Bla<"">>; +def b : Bla<!cast<Bla>(a.bla).blu>; // works +def c : Bla<a.bla.blu>; // doesn't work: Cannot access field 'blu' of value 'a.bla' diff --git a/test/TableGen/ListManip.td b/test/TableGen/ListManip.td new file mode 100644 index 0000000000000..c221bb1335b62 --- /dev/null +++ b/test/TableGen/ListManip.td @@ -0,0 +1,10 @@ +// RUN: tblgen %s +class Bli<string _t> +{ + string t = _t; +} + +class Bla<list<Bli> _bli> +: Bli<!car(_bli).t> +{ +} diff --git a/test/TestRunner.sh b/test/TestRunner.sh index 4f04d81aac648..ab50856af11fc 100755 --- a/test/TestRunner.sh +++ b/test/TestRunner.sh @@ -1,36 +1,5 @@ #!/bin/sh -# -# TestRunner.sh - This script is used to run the deja-gnu tests exactly like -# deja-gnu does, by executing the Tcl script specified in the test case's -# RUN: lines. This is made possible by a simple make target supported by the -# test/Makefile. All this script does is invoke that make target. -# -# Usage: -# TestRunner.sh {script_names} -# -# This script is typically used by cd'ing to a test directory and then -# running TestRunner.sh with a list of test file names you want to run. -# -TESTPATH=`pwd` -SUBDIR="" -if test `dirname $1` = "." ; then - while test `basename $TESTPATH` != "test" -a ! -z "$TESTPATH" ; do - tmp=`basename $TESTPATH` - SUBDIR="$tmp/$SUBDIR" - TESTPATH=`dirname $TESTPATH` - done -fi +# Deprecated, use 'llvm-lit'. -for TESTFILE in "$@" ; do - if test `dirname $TESTFILE` = . ; then - if test -d "$TESTPATH" ; then - cd $TESTPATH - make check-one TESTONE="$SUBDIR$TESTFILE" - cd $PWD - else - echo "Can't find llvm/test directory in " `pwd` - fi - else - make check-one TESTONE=$TESTFILE - fi -done +echo "warning: '$0' is deprecated, use 'llvm-lit' instead." +exec llvm-lit "$@" diff --git a/test/Transforms/ABCD/basic.ll b/test/Transforms/ABCD/basic.ll deleted file mode 100644 index f2ce1b9aa738b..0000000000000 --- a/test/Transforms/ABCD/basic.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: opt < %s -abcd -S | FileCheck %s - -define void @test() { -; CHECK: @test -; CHECK-NOT: br i1 %tmp95 -; CHECK: ret void -entry: - br label %bb19 - -bb: - br label %bb1 - -bb1: - %tmp7 = icmp sgt i32 %tmp94, 1 - br i1 %tmp7, label %bb.i.i, label %return - -bb.i.i: - br label %return - -bb19: - %tmp94 = ashr i32 undef, 3 - %tmp95 = icmp sgt i32 %tmp94, 16 - br i1 %tmp95, label %bb, label %return - -return: - ret void -} diff --git a/test/Transforms/ConstProp/constant-expr.ll b/test/Transforms/ConstProp/constant-expr.ll index 996303293d180..556ed1f652afe 100644 --- a/test/Transforms/ConstProp/constant-expr.ll +++ b/test/Transforms/ConstProp/constant-expr.ll @@ -16,9 +16,9 @@ @E = global i1 udiv (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z)) ; CHECK: @E = global i1 icmp ult (i8* @X, i8* @Y) @F = global i1 srem (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z)) -; CHECK: @F = global i1 false ; <i1*> [#uses=0] +; CHECK: @F = global i1 false @G = global i1 urem (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z)) -; CHECK: @G = global i1 false ; <i1*> [#uses=0] +; CHECK: @G = global i1 false @H = global i1 icmp ule (i32* bitcast (i8* @X to i32*), i32* bitcast (i8* @Y to i32*)) ; CHECK: @H = global i1 icmp ule (i8* @X, i8* @Y) diff --git a/test/Transforms/ConstantMerge/dont-merge.ll b/test/Transforms/ConstantMerge/dont-merge.ll index 877cf8dc67104..e5337dff27dfb 100644 --- a/test/Transforms/ConstantMerge/dont-merge.ll +++ b/test/Transforms/ConstantMerge/dont-merge.ll @@ -28,3 +28,17 @@ define void @test2(i32** %P1, i32 addrspace(30)** %P2) { store i32 addrspace(30)* @T2b, i32 addrspace(30)** %P2 ret void } + +; PR8144 - Don't merge globals marked attribute(used) +; CHECK: @T3A = +; CHECK: @T3B = + +@T3A = internal constant i32 0 +@T3B = internal constant i32 0 +@llvm.used = appending global [2 x i32*] [i32* @T3A, i32* @T3B], section +"llvm.metadata" + +define void @test3() { + call void asm sideeffect "T3A, T3B",""() ; invisible use of T3A and T3B + ret void +} diff --git a/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll b/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll new file mode 100644 index 0000000000000..fef5b8579eb58 --- /dev/null +++ b/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll @@ -0,0 +1,25 @@ +; RUN: opt -S < %s -correlated-propagation | FileCheck %s + +; CHECK: @test +define i16 @test(i32 %a, i1 %b) { +entry: + %c = icmp eq i32 %a, 0 + br i1 %c, label %left, label %right + +right: + %d = trunc i32 %a to i1 + br label %merge + +left: + br i1 %b, label %merge, label %other + +other: + ret i16 23 + +merge: + %f = phi i1 [%b, %left], [%d, %right] +; CHECK: select i1 %f, i16 1, i16 0 + %h = select i1 %f, i16 1, i16 0 +; CHECK: ret i16 %h + ret i16 %h +}
\ No newline at end of file diff --git a/test/Transforms/CorrelatedValuePropagation/basic.ll b/test/Transforms/CorrelatedValuePropagation/basic.ll new file mode 100644 index 0000000000000..24666e901e9eb --- /dev/null +++ b/test/Transforms/CorrelatedValuePropagation/basic.ll @@ -0,0 +1,83 @@ +; RUN: opt < %s -correlated-propagation -S | FileCheck %s +; PR2581 + +; CHECK: @test1 +define i32 @test1(i1 %C) nounwind { + br i1 %C, label %exit, label %body + +body: ; preds = %0 +; CHECK-NOT: select + %A = select i1 %C, i32 10, i32 11 ; <i32> [#uses=1] +; CHECK: ret i32 11 + ret i32 %A + +exit: ; preds = %0 +; CHECK: ret i32 10 + ret i32 10 +} + +; PR4420 +declare i1 @ext() +; CHECK: @test2 +define i1 @test2() { +entry: + %cond = tail call i1 @ext() ; <i1> [#uses=2] + br i1 %cond, label %bb1, label %bb2 + +bb1: ; preds = %entry + %cond2 = tail call i1 @ext() ; <i1> [#uses=1] + br i1 %cond2, label %bb3, label %bb2 + +bb2: ; preds = %bb1, %entry +; CHECK-NOT: phi i1 + %cond_merge = phi i1 [ %cond, %entry ], [ false, %bb1 ] ; <i1> [#uses=1] +; CHECK: ret i1 false + ret i1 %cond_merge + +bb3: ; preds = %bb1 + %res = tail call i1 @ext() ; <i1> [#uses=1] +; CHECK: ret i1 %res + ret i1 %res +} + +; PR4855 +@gv = internal constant i8 7 +; CHECK: @test3 +define i8 @test3(i8* %a) nounwind { +entry: + %cond = icmp eq i8* %a, @gv + br i1 %cond, label %bb2, label %bb + +bb: ; preds = %entry + ret i8 0 + +bb2: ; preds = %entry +; CHECK-NOT: load i8* %a + %should_be_const = load i8* %a +; CHECK: ret i8 7 + ret i8 %should_be_const +} + +; PR1757 +; CHECK: @test4 +define i32 @test4(i32) { +EntryBlock: +; CHECK: icmp sgt i32 %0, 2 + %.demorgan = icmp sgt i32 %0, 2 + br i1 %.demorgan, label %GreaterThanTwo, label %LessThanOrEqualToTwo + +GreaterThanTwo: +; CHECK-NOT: icmp eq i32 %0, 2 + icmp eq i32 %0, 2 +; CHECK: br i1 false + br i1 %1, label %Impossible, label %NotTwoAndGreaterThanTwo + +NotTwoAndGreaterThanTwo: + ret i32 2 + +Impossible: + ret i32 1 + +LessThanOrEqualToTwo: + ret i32 0 +}
\ No newline at end of file diff --git a/test/Transforms/CorrelatedValuePropagation/dg.exp b/test/Transforms/CorrelatedValuePropagation/dg.exp new file mode 100644 index 0000000000000..de42dad163fd9 --- /dev/null +++ b/test/Transforms/CorrelatedValuePropagation/dg.exp @@ -0,0 +1,3 @@ +load_lib llvm.exp + +RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]] diff --git a/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll b/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll index 641e920006b23..f079108b9bdaf 100644 --- a/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll +++ b/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -gvn | llvm-dis ; PR4256 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" -target triple = "i386-linux-gnu" +target triple = "i386-pc-linux-gnu" %llvm.dbg.anchor.type = type { i32, i32 } %struct.cset = type { i8*, i8, i8, i32, i8* } %struct.lmat = type { %struct.re_guts*, i32, %llvm.dbg.anchor.type*, i8*, i8*, i8*, i8*, i8**, i32, i8*, i8*, i8*, i8*, i8* } diff --git a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll index 5e64f807f6ead..390e77a8cea8e 100644 --- a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll +++ b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll @@ -4,7 +4,7 @@ ; RUN: opt < %s -globalopt -S > %t ; Check that the new global values still have their address space -; RUN: cat %t | grep global.*addrspace +; RUN: cat %t | grep addrspace.*global @struct = internal addrspace(1) global { i32, i32 } zeroinitializer @array = internal addrspace(1) global [ 2 x i32 ] zeroinitializer diff --git a/test/Transforms/GlobalOpt/crash.ll b/test/Transforms/GlobalOpt/crash.ll index 701472c059a83..bb1fc84f46f93 100644 --- a/test/Transforms/GlobalOpt/crash.ll +++ b/test/Transforms/GlobalOpt/crash.ll @@ -40,3 +40,18 @@ xx: } declare noalias i8* @malloc(i64) nounwind + + +; PR8063 +@permute_bitrev.bitrev = internal global i32* null, align 8 +define void @permute_bitrev() nounwind { +entry: + %tmp = load i32** @permute_bitrev.bitrev, align 8 + %conv = sext i32 0 to i64 + %mul = mul i64 %conv, 4 + %call = call i8* @malloc(i64 %mul) + %0 = bitcast i8* %call to i32* + store i32* %0, i32** @permute_bitrev.bitrev, align 8 + ret void +} + diff --git a/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll b/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll index f4bab353cd073..bd174a8be3ff0 100644 --- a/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll +++ b/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll @@ -21,10 +21,10 @@ define internal i32 @vfu2(%struct.MYstr* byval align 4 %u) nounwind readonly { entry: %0 = getelementptr %struct.MYstr* %u, i32 0, i32 1 ; <i32*> [#uses=1] %1 = load i32* %0 -; CHECK: load i32* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 1) ; <i32> [#uses=1] +; CHECK: load i32* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 1) %2 = getelementptr %struct.MYstr* %u, i32 0, i32 0 ; <i8*> [#uses=1] %3 = load i8* %2 -; CHECK: load i8* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 0) ; <i8> [#uses=1] +; CHECK: load i8* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 0) %4 = zext i8 %3 to i32 %5 = add i32 %4, %1 ret i32 %5 diff --git a/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll b/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll deleted file mode 100644 index c8f97e39bef6e..0000000000000 --- a/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll +++ /dev/null @@ -1,25 +0,0 @@ -; The induction variable canonicalization pass shouldn't leave dead -; instructions laying around! -; -; RUN: opt < %s -indvars -S | \ -; RUN: not grep {#uses=0} - -define i32 @mul(i32 %x, i32 %y) { -entry: - br label %tailrecurse - -tailrecurse: ; preds = %endif, %entry - %accumulator.tr = phi i32 [ %x, %entry ], [ %tmp.9, %endif ] ; <i32> [#uses=2] - %y.tr = phi i32 [ %y, %entry ], [ %tmp.8, %endif ] ; <i32> [#uses=2] - %tmp.1 = icmp eq i32 %y.tr, 0 ; <i1> [#uses=1] - br i1 %tmp.1, label %return, label %endif - -endif: ; preds = %tailrecurse - %tmp.8 = add i32 %y.tr, -1 ; <i32> [#uses=1] - %tmp.9 = add i32 %accumulator.tr, %x ; <i32> [#uses=1] - br label %tailrecurse - -return: ; preds = %tailrecurse - ret i32 %accumulator.tr -} - diff --git a/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll b/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll index d73eee812b307..d211e3b824b2f 100644 --- a/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll +++ b/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -indvars ; PR4258 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" -target triple = "i386-linux-gnu" +target triple = "i386-pc-linux-gnu" define void @0(i32*, i32*, i32, i32) nounwind { br i1 false, label %bb.nph1.preheader, label %.outer._crit_edge diff --git a/test/Transforms/IndVarSimplify/crash.ll b/test/Transforms/IndVarSimplify/crash.ll index ab438334c660e..516fd8084d9e9 100644 --- a/test/Transforms/IndVarSimplify/crash.ll +++ b/test/Transforms/IndVarSimplify/crash.ll @@ -1,4 +1,5 @@ ; RUN: opt -indvars %s -disable-output +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" declare i32 @putchar(i8) nounwind @@ -17,3 +18,38 @@ define void @t2(i1* %P) nounwind { ; <label>:6 ; preds = %1 ret void } + +; PR7562 +define void @fannkuch() nounwind { +entry: ; preds = %entry + br label %bb12 + +bb12: ; preds = %bb29, %entry + %i.1 = phi i32 [ undef, %entry ], [ %i.0, %bb29 ] ; <i32> [#uses=2] + %r.1 = phi i32 [ undef, %entry ], [ %r.0, %bb29 ] ; <i32> [#uses=2] + br i1 undef, label %bb13, label %bb24 + +bb13: ; preds = %bb12 + br label %bb24 + +bb24: ; preds = %bb30, %bb13, %bb12 + %i.2 = phi i32 [ %i.1, %bb13 ], [ %i.0, %bb30 ], [ %i.1, %bb12 ] ; <i32> [#uses=1] + %r.0 = phi i32 [ %r.1, %bb13 ], [ %2, %bb30 ], [ %r.1, %bb12 ] ; <i32> [#uses=3] + br label %bb28 + +bb27: ; preds = %bb28 + %0 = add nsw i32 %i.0, 1 ; <i32> [#uses=1] + br label %bb28 + +bb28: ; preds = %bb27, %bb26 + %i.0 = phi i32 [ %i.2, %bb24 ], [ %0, %bb27 ] ; <i32> [#uses=4] + %1 = icmp slt i32 %i.0, %r.0 ; <i1> [#uses=1] + br i1 %1, label %bb27, label %bb29 + +bb29: ; preds = %bb28 + br i1 undef, label %bb12, label %bb30 + +bb30: ; preds = %bb29 + %2 = add nsw i32 %r.0, 1 ; <i32> [#uses=1] + br label %bb24 +} diff --git a/test/Transforms/IndVarSimplify/loop_evaluate10.ll b/test/Transforms/IndVarSimplify/loop_evaluate10.ll index 4ec4acadb4a57..269478a5ed038 100644 --- a/test/Transforms/IndVarSimplify/loop_evaluate10.ll +++ b/test/Transforms/IndVarSimplify/loop_evaluate10.ll @@ -5,7 +5,7 @@ ; exit is taken. Indvars should correctly compute the exit values. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-linux-gnu" +target triple = "x86_64-pc-linux-gnu" %struct..0anon = type <{ i8, [3 x i8] }> define i32 @main() nounwind { diff --git a/test/Transforms/IndVarSimplify/uglygep.ll b/test/Transforms/IndVarSimplify/uglygep.ll new file mode 100644 index 0000000000000..0014b683db4b1 --- /dev/null +++ b/test/Transforms/IndVarSimplify/uglygep.ll @@ -0,0 +1,40 @@ +; RUN: opt -indvars -S < %s | not grep uglygep +; rdar://8197217 + +; Indvars should be able to emit a clean GEP here, not an uglygep. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin11.0" + +@numf2s = external global i32 ; <i32*> [#uses=1] +@numf1s = external global i32 ; <i32*> [#uses=1] +@tds = external global double** ; <double***> [#uses=1] + +define void @init_td(i32 %tmp7) nounwind { +entry: + br label %bb4 + +bb4: ; preds = %bb3, %entry + %i.0 = phi i32 [ 0, %entry ], [ %tmp9, %bb3 ] ; <i32> [#uses=3] + br label %bb + +bb: ; preds = %bb4 + br label %bb2 + +bb2: ; preds = %bb1, %bb + %j.0 = phi i32 [ 0, %bb ], [ %tmp6, %bb1 ] ; <i32> [#uses=3] + %tmp8 = icmp slt i32 %j.0, %tmp7 ; <i1> [#uses=1] + br i1 %tmp8, label %bb1, label %bb3 + +bb1: ; preds = %bb2 + %tmp = load double*** @tds, align 8 ; <double**> [#uses=1] + %tmp1 = sext i32 %i.0 to i64 ; <i64> [#uses=1] + %tmp2 = getelementptr inbounds double** %tmp, i64 %tmp1 ; <double**> [#uses=1] + %tmp3 = load double** %tmp2, align 1 ; <double*> [#uses=1] + %tmp6 = add nsw i32 %j.0, 1 ; <i32> [#uses=1] + br label %bb2 + +bb3: ; preds = %bb2 + %tmp9 = add nsw i32 %i.0, 1 ; <i32> [#uses=1] + br label %bb4 +} diff --git a/test/Transforms/InstCombine/align-addr.ll b/test/Transforms/InstCombine/align-addr.ll index d8ad5a9864e2b..27916b9860307 100644 --- a/test/Transforms/InstCombine/align-addr.ll +++ b/test/Transforms/InstCombine/align-addr.ll @@ -1,10 +1,13 @@ -; RUN: opt < %s -instcombine -S | grep {align 16} | count 1 +; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" ; Instcombine should be able to prove vector alignment in the ; presence of a few mild address computation tricks. -define void @foo(i8* %b, i64 %n, i64 %u, i64 %y) nounwind { +; CHECK: @test0( +; CHECK: align 16 + +define void @test0(i8* %b, i64 %n, i64 %u, i64 %y) nounwind { entry: %c = ptrtoint i8* %b to i64 %d = and i64 %c, -16 @@ -29,3 +32,29 @@ return: ret void } +; When we see a unaligned load from an insufficiently aligned global or +; alloca, increase the alignment of the load, turning it into an aligned load. + +; CHECK: @test1( +; CHECK: tmp = load +; CHECK: GLOBAL{{.*}}align 16 + +@GLOBAL = internal global [4 x i32] zeroinitializer + +define <16 x i8> @test1(<2 x i64> %x) { +entry: + %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1 + ret <16 x i8> %tmp +} + +; When a load or store lacks an explicit alignment, add one. + +; CHECK: @test2( +; CHECK: load double* %p, align 8 +; CHECK: store double %n, double* %p, align 8 + +define double @test2(double* %p, double %n) nounwind { + %t = load double* %p + store double %n, double* %p + ret double %t +} diff --git a/test/Transforms/InstCombine/align-inc.ll b/test/Transforms/InstCombine/align-inc.ll deleted file mode 100644 index 71512b3a14945..0000000000000 --- a/test/Transforms/InstCombine/align-inc.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: opt < %s -instcombine -S | grep {GLOBAL.*align 16} -; RUN: opt < %s -instcombine -S | grep {tmp = load} -target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" - -@GLOBAL = internal global [4 x i32] zeroinitializer - -define <16 x i8> @foo(<2 x i64> %x) { -entry: - %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1 - ret <16 x i8> %tmp -} - diff --git a/test/Transforms/InstCombine/bit-checks.ll b/test/Transforms/InstCombine/bit-checks.ll index f97fb45af1614..d774c0972defa 100644 --- a/test/Transforms/InstCombine/bit-checks.ll +++ b/test/Transforms/InstCombine/bit-checks.ll @@ -13,3 +13,14 @@ entry: %retval.0 = select i1 %or.cond, i32 2, i32 1 ; <i32> [#uses=1] ret i32 %retval.0 } + +define i32 @main2(i32 %argc, i8** nocapture %argv) nounwind readnone ssp { +entry: + %and = and i32 %argc, 1 ; <i32> [#uses=1] + %tobool = icmp eq i32 %and, 0 ; <i1> [#uses=1] + %and2 = and i32 %argc, 2 ; <i32> [#uses=1] + %tobool3 = icmp eq i32 %and2, 0 ; <i1> [#uses=1] + %or.cond = or i1 %tobool, %tobool3 ; <i1> [#uses=1] + %storemerge = select i1 %or.cond, i32 0, i32 1 ; <i32> [#uses=1] + ret i32 %storemerge +}
\ No newline at end of file diff --git a/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll b/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll deleted file mode 100644 index 4e9dfbb53b496..0000000000000 --- a/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: opt < %s -instcombine -S | grep {ret i32 0} -; PR4487 - -; Bitcasts between vectors and scalars are valid, despite being ill-advised. - -define i32 @test(i64 %a) { -bb20: - %t1 = bitcast i64 %a to <2 x i32> - %t2 = bitcast i64 %a to <2 x i32> - %t3 = xor <2 x i32> %t1, %t2 - %t4 = extractelement <2 x i32> %t3, i32 0 - ret i32 %t4 -} - diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll new file mode 100644 index 0000000000000..0718b8a3aee04 --- /dev/null +++ b/test/Transforms/InstCombine/bitcast.ll @@ -0,0 +1,105 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Bitcasts between vectors and scalars are valid. +; PR4487 +define i32 @test1(i64 %a) { + %t1 = bitcast i64 %a to <2 x i32> + %t2 = bitcast i64 %a to <2 x i32> + %t3 = xor <2 x i32> %t1, %t2 + %t4 = extractelement <2 x i32> %t3, i32 0 + ret i32 %t4 + +; CHECK: @test1 +; CHECK: ret i32 0 +} + +; Optimize bitcasts that are extracting low element of vector. This happens +; because of SRoA. +; rdar://7892780 +define float @test2(<2 x float> %A, <2 x i32> %B) { + %tmp28 = bitcast <2 x float> %A to i64 ; <i64> [#uses=2] + %tmp23 = trunc i64 %tmp28 to i32 ; <i32> [#uses=1] + %tmp24 = bitcast i32 %tmp23 to float ; <float> [#uses=1] + + %tmp = bitcast <2 x i32> %B to i64 + %tmp2 = trunc i64 %tmp to i32 ; <i32> [#uses=1] + %tmp4 = bitcast i32 %tmp2 to float ; <float> [#uses=1] + + %add = fadd float %tmp24, %tmp4 + ret float %add + +; CHECK: @test2 +; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 0 +; CHECK-NEXT: bitcast <2 x i32> %B to <2 x float> +; CHECK-NEXT: %tmp4 = extractelement <2 x float> {{.*}}, i32 0 +; CHECK-NEXT: %add = fadd float %tmp24, %tmp4 +; CHECK-NEXT: ret float %add +} + +; Optimize bitcasts that are extracting other elements of a vector. This +; happens because of SRoA. +; rdar://7892780 +define float @test3(<2 x float> %A, <2 x i64> %B) { + %tmp28 = bitcast <2 x float> %A to i64 + %tmp29 = lshr i64 %tmp28, 32 + %tmp23 = trunc i64 %tmp29 to i32 + %tmp24 = bitcast i32 %tmp23 to float + + %tmp = bitcast <2 x i64> %B to i128 + %tmp1 = lshr i128 %tmp, 64 + %tmp2 = trunc i128 %tmp1 to i32 + %tmp4 = bitcast i32 %tmp2 to float + + %add = fadd float %tmp24, %tmp4 + ret float %add + +; CHECK: @test3 +; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 1 +; CHECK-NEXT: bitcast <2 x i64> %B to <4 x float> +; CHECK-NEXT: %tmp4 = extractelement <4 x float> {{.*}}, i32 2 +; CHECK-NEXT: %add = fadd float %tmp24, %tmp4 +; CHECK-NEXT: ret float %add +} + + +define <2 x i32> @test4(i32 %A, i32 %B){ + %tmp38 = zext i32 %A to i64 + %tmp32 = zext i32 %B to i64 + %tmp33 = shl i64 %tmp32, 32 + %ins35 = or i64 %tmp33, %tmp38 + %tmp43 = bitcast i64 %ins35 to <2 x i32> + ret <2 x i32> %tmp43 + ; CHECK: @test4 + ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %A, i32 0 + ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %B, i32 1 + ; CHECK-NEXT: ret <2 x i32> + +} + +; rdar://8360454 +define <2 x float> @test5(float %A, float %B) { + %tmp37 = bitcast float %A to i32 + %tmp38 = zext i32 %tmp37 to i64 + %tmp31 = bitcast float %B to i32 + %tmp32 = zext i32 %tmp31 to i64 + %tmp33 = shl i64 %tmp32, 32 + %ins35 = or i64 %tmp33, %tmp38 + %tmp43 = bitcast i64 %ins35 to <2 x float> + ret <2 x float> %tmp43 + ; CHECK: @test5 + ; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0 + ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %B, i32 1 + ; CHECK-NEXT: ret <2 x float> +} + +define <2 x float> @test6(float %A){ + %tmp23 = bitcast float %A to i32 ; <i32> [#uses=1] + %tmp24 = zext i32 %tmp23 to i64 ; <i64> [#uses=1] + %tmp25 = shl i64 %tmp24, 32 ; <i64> [#uses=1] + %mask20 = or i64 %tmp25, 1109917696 ; <i64> [#uses=1] + %tmp35 = bitcast i64 %mask20 to <2 x float> ; <<2 x float>> [#uses=1] + ret <2 x float> %tmp35 +; CHECK: @test6 +; CHECK-NEXT: insertelement <2 x float> <float 4.200000e+01, float undef>, float %A, i32 1 +; CHECK: ret +} diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll index 08dcfa731a94f..d672d8c1535ec 100644 --- a/test/Transforms/InstCombine/intrinsics.ll +++ b/test/Transforms/InstCombine/intrinsics.ll @@ -10,16 +10,16 @@ declare i32 @llvm.ctlz.i32(i32) nounwind readnone declare i32 @llvm.ctpop.i32(i32) nounwind readnone declare i8 @llvm.ctlz.i8(i8) nounwind readnone -define i8 @test1(i8 %A, i8 %B) { +define i8 @uaddtest1(i8 %A, i8 %B) { %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B) %y = extractvalue %overflow.result %x, 0 ret i8 %y -; CHECK: @test1 +; CHECK: @uaddtest1 ; CHECK-NEXT: %y = add i8 %A, %B ; CHECK-NEXT: ret i8 %y } -define i8 @test2(i8 %A, i8 %B, i1* %overflowPtr) { +define i8 @uaddtest2(i8 %A, i8 %B, i1* %overflowPtr) { %and.A = and i8 %A, 127 %and.B = and i8 %B, 127 %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %and.A, i8 %and.B) @@ -27,7 +27,7 @@ define i8 @test2(i8 %A, i8 %B, i1* %overflowPtr) { %z = extractvalue %overflow.result %x, 1 store i1 %z, i1* %overflowPtr ret i8 %y -; CHECK: @test2 +; CHECK: @uaddtest2 ; CHECK-NEXT: %and.A = and i8 %A, 127 ; CHECK-NEXT: %and.B = and i8 %B, 127 ; CHECK-NEXT: %1 = add nuw i8 %and.A, %and.B @@ -35,7 +35,7 @@ define i8 @test2(i8 %A, i8 %B, i1* %overflowPtr) { ; CHECK-NEXT: ret i8 %1 } -define i8 @test3(i8 %A, i8 %B, i1* %overflowPtr) { +define i8 @uaddtest3(i8 %A, i8 %B, i1* %overflowPtr) { %or.A = or i8 %A, -128 %or.B = or i8 %B, -128 %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %or.A, i8 %or.B) @@ -43,7 +43,7 @@ define i8 @test3(i8 %A, i8 %B, i1* %overflowPtr) { %z = extractvalue %overflow.result %x, 1 store i1 %z, i1* %overflowPtr ret i8 %y -; CHECK: @test3 +; CHECK: @uaddtest3 ; CHECK-NEXT: %or.A = or i8 %A, -128 ; CHECK-NEXT: %or.B = or i8 %B, -128 ; CHECK-NEXT: %1 = add i8 %or.A, %or.B @@ -51,34 +51,44 @@ define i8 @test3(i8 %A, i8 %B, i1* %overflowPtr) { ; CHECK-NEXT: ret i8 %1 } -define i8 @test4(i8 %A, i1* %overflowPtr) { +define i8 @uaddtest4(i8 %A, i1* %overflowPtr) { %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 undef, i8 %A) %y = extractvalue %overflow.result %x, 0 %z = extractvalue %overflow.result %x, 1 store i1 %z, i1* %overflowPtr ret i8 %y -; CHECK: @test4 +; CHECK: @uaddtest4 ; CHECK-NEXT: ret i8 undef } -define i8 @test5(i8 %A, i1* %overflowPtr) { +define i8 @uaddtest5(i8 %A, i1* %overflowPtr) { + %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 0, i8 %A) + %y = extractvalue %overflow.result %x, 0 + %z = extractvalue %overflow.result %x, 1 + store i1 %z, i1* %overflowPtr + ret i8 %y +; CHECK: @uaddtest5 +; CHECK: ret i8 %A +} + +define i8 @umultest1(i8 %A, i1* %overflowPtr) { %x = call %overflow.result @llvm.umul.with.overflow.i8(i8 0, i8 %A) %y = extractvalue %overflow.result %x, 0 %z = extractvalue %overflow.result %x, 1 store i1 %z, i1* %overflowPtr ret i8 %y -; CHECK: @test5 +; CHECK: @umultest1 ; CHECK-NEXT: store i1 false, i1* %overflowPtr ; CHECK-NEXT: ret i8 0 } -define i8 @test6(i8 %A, i1* %overflowPtr) { +define i8 @umultest2(i8 %A, i1* %overflowPtr) { %x = call %overflow.result @llvm.umul.with.overflow.i8(i8 1, i8 %A) %y = extractvalue %overflow.result %x, 0 %z = extractvalue %overflow.result %x, 1 store i1 %z, i1* %overflowPtr ret i8 %y -; CHECK: @test6 +; CHECK: @umultest2 ; CHECK-NEXT: store i1 false, i1* %overflowPtr ; CHECK-NEXT: ret i8 %A } diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll index fc321e968224c..c6c3f2ff6a687 100644 --- a/test/Transforms/InstCombine/phi.ll +++ b/test/Transforms/InstCombine/phi.ll @@ -402,3 +402,24 @@ if.else: ; preds = %entry store i32 %tmp5, i32* %res br label %if.end } + +; PR4413 +declare i32 @ext() +; CHECK: @test17 +define i32 @test17(i1 %a) { +entry: + br i1 %a, label %bb1, label %bb2 + +bb1: ; preds = %entry + %0 = tail call i32 @ext() ; <i32> [#uses=1] + br label %bb2 + +bb2: ; preds = %bb1, %entry + %cond = phi i1 [ true, %bb1 ], [ false, %entry ] ; <i1> [#uses=1] +; CHECK-NOT: %val = phi i32 [ %0, %bb1 ], [ 0, %entry ] + %val = phi i32 [ %0, %bb1 ], [ 0, %entry ] ; <i32> [#uses=1] + %res = select i1 %cond, i32 %val, i32 0 ; <i32> [#uses=1] +; CHECK: ret i32 %cond + ret i32 %res +} + diff --git a/test/Transforms/InstCombine/shift-simplify.ll b/test/Transforms/InstCombine/shift-simplify.ll deleted file mode 100644 index e5cc705350f9c..0000000000000 --- a/test/Transforms/InstCombine/shift-simplify.ll +++ /dev/null @@ -1,42 +0,0 @@ -; RUN: opt < %s -instcombine -S | \ -; RUN: egrep {shl|lshr|ashr} | count 3 - -define i32 @test0(i32 %A, i32 %B, i32 %C) { - %X = shl i32 %A, %C - %Y = shl i32 %B, %C - %Z = and i32 %X, %Y - ret i32 %Z -} - -define i32 @test1(i32 %A, i32 %B, i32 %C) { - %X = lshr i32 %A, %C - %Y = lshr i32 %B, %C - %Z = or i32 %X, %Y - ret i32 %Z -} - -define i32 @test2(i32 %A, i32 %B, i32 %C) { - %X = ashr i32 %A, %C - %Y = ashr i32 %B, %C - %Z = xor i32 %X, %Y - ret i32 %Z -} - -define i1 @test3(i32 %X) { - %tmp1 = shl i32 %X, 7 - %tmp2 = icmp slt i32 %tmp1, 0 - ret i1 %tmp2 -} - -define i1 @test4(i32 %X) { - %tmp1 = lshr i32 %X, 7 - %tmp2 = icmp slt i32 %tmp1, 0 - ret i1 %tmp2 -} - -define i1 @test5(i32 %X) { - %tmp1 = ashr i32 %X, 7 - %tmp2 = icmp slt i32 %tmp1, 0 - ret i1 %tmp2 -} - diff --git a/test/Transforms/InstCombine/shift-trunc-shift.ll b/test/Transforms/InstCombine/shift-trunc-shift.ll deleted file mode 100644 index 7133d299a2bde..0000000000000 --- a/test/Transforms/InstCombine/shift-trunc-shift.ll +++ /dev/null @@ -1,10 +0,0 @@ -; RUN: opt < %s -instcombine -S | grep lshr.*63 - -define i32 @t1(i64 %d18) { -entry: - %tmp916 = lshr i64 %d18, 32 ; <i64> [#uses=1] - %tmp917 = trunc i64 %tmp916 to i32 ; <i32> [#uses=1] - %tmp10 = lshr i32 %tmp917, 31 ; <i32> [#uses=1] - ret i32 %tmp10 -} - diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll index feed37bd10ab5..871e9fe070e7a 100644 --- a/test/Transforms/InstCombine/shift.ll +++ b/test/Transforms/InstCombine/shift.ll @@ -130,8 +130,8 @@ define i8 @test13(i8 %A) { ;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4) define i32 @test14(i32 %A) { ; CHECK: @test14 -; CHECK-NEXT: or i32 %A, 19744 -; CHECK-NEXT: and i32 +; CHECK-NEXT: %B = and i32 %A, -19760 +; CHECK-NEXT: or i32 %B, 19744 ; CHECK-NEXT: ret i32 %B = lshr i32 %A, 4 ; <i32> [#uses=1] %C = or i32 %B, 1234 ; <i32> [#uses=1] @@ -343,3 +343,101 @@ bb2: } +define i32 @test29(i64 %d18) { +entry: + %tmp916 = lshr i64 %d18, 32 + %tmp917 = trunc i64 %tmp916 to i32 + %tmp10 = lshr i32 %tmp917, 31 + ret i32 %tmp10 +; CHECK: @test29 +; CHECK: %tmp916 = lshr i64 %d18, 63 +; CHECK: %tmp10 = trunc i64 %tmp916 to i32 +} + + +define i32 @test30(i32 %A, i32 %B, i32 %C) { + %X = shl i32 %A, %C + %Y = shl i32 %B, %C + %Z = and i32 %X, %Y + ret i32 %Z +; CHECK: @test30 +; CHECK: %X1 = and i32 %A, %B +; CHECK: %Z = shl i32 %X1, %C +} + +define i32 @test31(i32 %A, i32 %B, i32 %C) { + %X = lshr i32 %A, %C + %Y = lshr i32 %B, %C + %Z = or i32 %X, %Y + ret i32 %Z +; CHECK: @test31 +; CHECK: %X1 = or i32 %A, %B +; CHECK: %Z = lshr i32 %X1, %C +} + +define i32 @test32(i32 %A, i32 %B, i32 %C) { + %X = ashr i32 %A, %C + %Y = ashr i32 %B, %C + %Z = xor i32 %X, %Y + ret i32 %Z +; CHECK: @test32 +; CHECK: %X1 = xor i32 %A, %B +; CHECK: %Z = ashr i32 %X1, %C +; CHECK: ret i32 %Z +} + +define i1 @test33(i32 %X) { + %tmp1 = shl i32 %X, 7 + %tmp2 = icmp slt i32 %tmp1, 0 + ret i1 %tmp2 +; CHECK: @test33 +; CHECK: %tmp1.mask = and i32 %X, 16777216 +; CHECK: %tmp2 = icmp ne i32 %tmp1.mask, 0 +} + +define i1 @test34(i32 %X) { + %tmp1 = lshr i32 %X, 7 + %tmp2 = icmp slt i32 %tmp1, 0 + ret i1 %tmp2 +; CHECK: @test34 +; CHECK: ret i1 false +} + +define i1 @test35(i32 %X) { + %tmp1 = ashr i32 %X, 7 + %tmp2 = icmp slt i32 %tmp1, 0 + ret i1 %tmp2 +; CHECK: @test35 +; CHECK: %tmp2 = icmp slt i32 %X, 0 +; CHECK: ret i1 %tmp2 +} + +define i128 @test36(i128 %A, i128 %B) { +entry: + %tmp27 = shl i128 %A, 64 + %tmp23 = shl i128 %B, 64 + %ins = or i128 %tmp23, %tmp27 + %tmp45 = lshr i128 %ins, 64 + ret i128 %tmp45 + +; CHECK: @test36 +; CHECK: %tmp231 = or i128 %B, %A +; CHECK: %ins = and i128 %tmp231, 18446744073709551615 +; CHECK: ret i128 %ins +} + +define i64 @test37(i128 %A, i32 %B) { +entry: + %tmp27 = shl i128 %A, 64 + %tmp22 = zext i32 %B to i128 + %tmp23 = shl i128 %tmp22, 96 + %ins = or i128 %tmp23, %tmp27 + %tmp45 = lshr i128 %ins, 64 + %tmp46 = trunc i128 %tmp45 to i64 + ret i64 %tmp46 + +; CHECK: @test37 +; CHECK: %tmp23 = shl i128 %tmp22, 32 +; CHECK: %ins = or i128 %tmp23, %A +; CHECK: %tmp46 = trunc i128 %ins to i64 +} diff --git a/test/Transforms/InstCombine/sqrt.ll b/test/Transforms/InstCombine/sqrt.ll new file mode 100644 index 0000000000000..69e511bfb3bd4 --- /dev/null +++ b/test/Transforms/InstCombine/sqrt.ll @@ -0,0 +1,32 @@ +; RUN: opt -S -instcombine %s | FileCheck %s + +define float @test1(float %x) nounwind readnone ssp { +entry: +; CHECK: @test1 +; CHECK-NOT: fpext +; CHECK-NOT: sqrt( +; CHECK: sqrtf( +; CHECK-NOT: fptrunc + %conv = fpext float %x to double ; <double> [#uses=1] + %call = tail call double @sqrt(double %conv) readnone nounwind ; <double> [#uses=1] + %conv1 = fptrunc double %call to float ; <float> [#uses=1] +; CHECK: ret float + ret float %conv1 +} + +declare double @sqrt(double) + +; PR8096 +define float @test2(float %x) nounwind readnone ssp { +entry: +; CHECK: @test2 +; CHECK-NOT: fpext +; CHECK-NOT: sqrt( +; CHECK: sqrtf( +; CHECK-NOT: fptrunc + %conv = fpext float %x to double ; <double> [#uses=1] + %call = tail call double @sqrt(double %conv) nounwind ; <double> [#uses=1] + %conv1 = fptrunc double %call to float ; <float> [#uses=1] +; CHECK: ret float + ret float %conv1 +} diff --git a/test/Transforms/InstCombine/trunc-mask-ext.ll b/test/Transforms/InstCombine/trunc-mask-ext.ll deleted file mode 100644 index 93e3753cf5023..0000000000000 --- a/test/Transforms/InstCombine/trunc-mask-ext.ll +++ /dev/null @@ -1,38 +0,0 @@ -; RUN: opt < %s -instcombine -S > %t -; RUN: not grep zext %t -; RUN: not grep sext %t - -; Instcombine should be able to eliminate all of these ext casts. - -declare void @use(i32) - -define i64 @foo(i64 %a) { - %b = trunc i64 %a to i32 - %c = and i32 %b, 15 - %d = zext i32 %c to i64 - call void @use(i32 %b) - ret i64 %d -} -define i64 @bar(i64 %a) { - %b = trunc i64 %a to i32 - %c = shl i32 %b, 4 - %q = ashr i32 %c, 4 - %d = sext i32 %q to i64 - call void @use(i32 %b) - ret i64 %d -} -define i64 @goo(i64 %a) { - %b = trunc i64 %a to i32 - %c = and i32 %b, 8 - %d = zext i32 %c to i64 - call void @use(i32 %b) - ret i64 %d -} -define i64 @hoo(i64 %a) { - %b = trunc i64 %a to i32 - %c = and i32 %b, 8 - %x = xor i32 %c, 8 - %d = zext i32 %x to i64 - call void @use(i32 %b) - ret i64 %d -} diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll new file mode 100644 index 0000000000000..f98bfd9236cd0 --- /dev/null +++ b/test/Transforms/InstCombine/trunc.ll @@ -0,0 +1,99 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +; Instcombine should be able to eliminate all of these ext casts. + +declare void @use(i32) + +define i64 @test1(i64 %a) { + %b = trunc i64 %a to i32 + %c = and i32 %b, 15 + %d = zext i32 %c to i64 + call void @use(i32 %b) + ret i64 %d +; CHECK: @test1 +; CHECK: %d = and i64 %a, 15 +; CHECK: ret i64 %d +} +define i64 @test2(i64 %a) { + %b = trunc i64 %a to i32 + %c = shl i32 %b, 4 + %q = ashr i32 %c, 4 + %d = sext i32 %q to i64 + call void @use(i32 %b) + ret i64 %d +; CHECK: @test2 +; CHECK: shl i64 %a, 36 +; CHECK: %d = ashr i64 {{.*}}, 36 +; CHECK: ret i64 %d +} +define i64 @test3(i64 %a) { + %b = trunc i64 %a to i32 + %c = and i32 %b, 8 + %d = zext i32 %c to i64 + call void @use(i32 %b) + ret i64 %d +; CHECK: @test3 +; CHECK: %d = and i64 %a, 8 +; CHECK: ret i64 %d +} +define i64 @test4(i64 %a) { + %b = trunc i64 %a to i32 + %c = and i32 %b, 8 + %x = xor i32 %c, 8 + %d = zext i32 %x to i64 + call void @use(i32 %b) + ret i64 %d +; CHECK: @test4 +; CHECK: = and i64 %a, 8 +; CHECK: %d = xor i64 {{.*}}, 8 +; CHECK: ret i64 %d +} + +define i32 @test5(i32 %A) { + %B = zext i32 %A to i128 + %C = lshr i128 %B, 16 + %D = trunc i128 %C to i32 + ret i32 %D +; CHECK: @test5 +; CHECK: %C = lshr i32 %A, 16 +; CHECK: ret i32 %C +} + +define i32 @test6(i64 %A) { + %B = zext i64 %A to i128 + %C = lshr i128 %B, 32 + %D = trunc i128 %C to i32 + ret i32 %D +; CHECK: @test6 +; CHECK: %C = lshr i64 %A, 32 +; CHECK: %D = trunc i64 %C to i32 +; CHECK: ret i32 %D +} + +define i92 @test7(i64 %A) { + %B = zext i64 %A to i128 + %C = lshr i128 %B, 32 + %D = trunc i128 %C to i92 + ret i92 %D +; CHECK: @test7 +; CHECK: %B = zext i64 %A to i92 +; CHECK: %C = lshr i92 %B, 32 +; CHECK: ret i92 %C +} + +define i64 @test8(i32 %A, i32 %B) { + %tmp38 = zext i32 %A to i128 + %tmp32 = zext i32 %B to i128 + %tmp33 = shl i128 %tmp32, 32 + %ins35 = or i128 %tmp33, %tmp38 + %tmp42 = trunc i128 %ins35 to i64 + ret i64 %tmp42 +; CHECK: @test8 +; CHECK: %tmp38 = zext i32 %A to i64 +; CHECK: %tmp32 = zext i32 %B to i64 +; CHECK: %tmp33 = shl i64 %tmp32, 32 +; CHECK: %ins35 = or i64 %tmp33, %tmp38 +; CHECK: ret i64 %ins35 +} + diff --git a/test/Transforms/InstCombine/urem-simplify-bug.ll b/test/Transforms/InstCombine/urem-simplify-bug.ll index 7c2b4b01ca667..229f1a85e860c 100644 --- a/test/Transforms/InstCombine/urem-simplify-bug.ll +++ b/test/Transforms/InstCombine/urem-simplify-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {= or i32 %x, -5 } +; RUN: opt < %s -instcombine -S | grep {= or i32 %x, -5} @.str = internal constant [5 x i8] c"foo\0A\00" ; <[5 x i8]*> [#uses=1] @.str1 = internal constant [5 x i8] c"bar\0A\00" ; <[5 x i8]*> [#uses=1] diff --git a/test/Transforms/JumpThreading/2010-08-26-and.ll b/test/Transforms/JumpThreading/2010-08-26-and.ll new file mode 100644 index 0000000000000..17a0aba2faefd --- /dev/null +++ b/test/Transforms/JumpThreading/2010-08-26-and.ll @@ -0,0 +1,162 @@ +; RUN: opt -jump-threading -enable-jump-threading-lvi -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +%class.StringSwitch = type { i8*, i32, i32, i8 } + +@.str = private constant [4 x i8] c"red\00" ; <[4 x i8]*> [#uses=1] +@.str1 = private constant [7 x i8] c"orange\00" ; <[7 x i8]*> [#uses=1] +@.str2 = private constant [7 x i8] c"yellow\00" ; <[7 x i8]*> [#uses=1] +@.str3 = private constant [6 x i8] c"green\00" ; <[6 x i8]*> [#uses=1] +@.str4 = private constant [5 x i8] c"blue\00" ; <[5 x i8]*> [#uses=1] +@.str5 = private constant [7 x i8] c"indigo\00" ; <[7 x i8]*> [#uses=1] +@.str6 = private constant [7 x i8] c"violet\00" ; <[7 x i8]*> [#uses=1] +@.str7 = private constant [12 x i8] c"Color = %d\0A\00" ; <[12 x i8]*> [#uses=1] + +define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp { +entry: + %cmp142 = icmp sgt i32 %argc, 1 ; <i1> [#uses=1] + br i1 %cmp142, label %bb.nph, label %for.end + +bb.nph: ; preds = %entry + %tmp = add i32 %argc, -2 ; <i32> [#uses=1] + %tmp144 = zext i32 %tmp to i64 ; <i64> [#uses=1] + %tmp145 = add i64 %tmp144, 1 ; <i64> [#uses=1] + br label %land.lhs.true.i + +land.lhs.true.i: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134, %bb.nph + %retval.0.i.pre161 = phi i32 [ undef, %bb.nph ], [ %retval.0.i.pre, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 ] ; <i32> [#uses=3] + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp146, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 ] ; <i64> [#uses=1] + %tmp146 = add i64 %indvar, 1 ; <i64> [#uses=3] + %arrayidx = getelementptr i8** %argv, i64 %tmp146 ; <i8**> [#uses=1] + %tmp6 = load i8** %arrayidx, align 8 ; <i8*> [#uses=8] + %call.i.i = call i64 @strlen(i8* %tmp6) nounwind ; <i64> [#uses=1] + %conv.i.i = trunc i64 %call.i.i to i32 ; <i32> [#uses=6]\ +; CHECK: switch i32 %conv.i.i +; CHECK-NOT: if.then.i40 +; CHECK: } + switch i32 %conv.i.i, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit [ + i32 3, label %land.lhs.true5.i + i32 6, label %land.lhs.true5.i37 + ] + +land.lhs.true5.i: ; preds = %land.lhs.true.i + %call.i = call i32 @memcmp(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* %tmp6, i64 4) nounwind ; <i32> [#uses=1] + %cmp9.i = icmp eq i32 %call.i, 0 ; <i1> [#uses=1] + br i1 %cmp9.i, label %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + +_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit: ; preds = %land.lhs.true5.i + br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + +land.lhs.true5.i37: ; preds = %land.lhs.true.i + %call.i35 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str1, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1] + %cmp9.i36 = icmp eq i32 %call.i35, 0 ; <i1> [#uses=1] + br i1 %cmp9.i36, label %if.then.i40, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + +if.then.i40: ; preds = %land.lhs.true5.i37 + br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + +_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit: ; preds = %if.then.i40, %land.lhs.true5.i37, %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit, %land.lhs.true5.i, %land.lhs.true.i + %retval.0.i.pre159 = phi i32 [ 1, %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre161, %land.lhs.true5.i37 ], [ 2, %if.then.i40 ], [ %retval.0.i.pre161, %land.lhs.true5.i ], [ %retval.0.i.pre161, %land.lhs.true.i ] ; <i32> [#uses=2] + %tmp2.i44 = phi i8 [ 1, %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit ], [ 0, %land.lhs.true5.i37 ], [ 1, %if.then.i40 ], [ 0, %land.lhs.true5.i ], [ 0, %land.lhs.true.i ] ; <i8> [#uses=3] + %tobool.i46 = icmp eq i8 %tmp2.i44, 0 ; <i1> [#uses=1] + %cmp.i49 = icmp eq i32 %conv.i.i, 6 ; <i1> [#uses=1] + %or.cond = and i1 %tobool.i46, %cmp.i49 ; <i1> [#uses=1] + br i1 %or.cond, label %land.lhs.true5.i55, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 + +land.lhs.true5.i55: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + %call.i53 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str2, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1] + %cmp9.i54 = icmp eq i32 %call.i53, 0 ; <i1> [#uses=1] + br i1 %cmp9.i54, label %if.then.i58, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 + +if.then.i58: ; preds = %land.lhs.true5.i55 + br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 + +_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60: ; preds = %if.then.i58, %land.lhs.true5.i55, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + %retval.0.i.pre158 = phi i32 [ %retval.0.i.pre159, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre159, %land.lhs.true5.i55 ], [ 3, %if.then.i58 ] ; <i32> [#uses=2] + %tmp2.i63 = phi i8 [ %tmp2.i44, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit ], [ %tmp2.i44, %land.lhs.true5.i55 ], [ 1, %if.then.i58 ] ; <i8> [#uses=3] + %tmp14.i64 = and i8 %tmp2.i63, 1 ; <i8> [#uses=1] + %tobool.i65 = icmp eq i8 %tmp14.i64, 0 ; <i1> [#uses=1] + %cmp.i68 = icmp eq i32 %conv.i.i, 5 ; <i1> [#uses=1] + %or.cond168 = and i1 %tobool.i65, %cmp.i68 ; <i1> [#uses=1] + br i1 %or.cond168, label %land.lhs.true5.i74, label %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit + +land.lhs.true5.i74: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 + %call.i72 = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8]* @.str3, i64 0, i64 0), i8* %tmp6, i64 6) nounwind ; <i32> [#uses=1] + %cmp9.i73 = icmp eq i32 %call.i72, 0 ; <i1> [#uses=1] + br i1 %cmp9.i73, label %if.then.i77, label %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit + +if.then.i77: ; preds = %land.lhs.true5.i74 + br label %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit + +_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit: ; preds = %if.then.i77, %land.lhs.true5.i74, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 + %retval.0.i.pre157 = phi i32 [ %retval.0.i.pre158, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 ], [ %retval.0.i.pre158, %land.lhs.true5.i74 ], [ 4, %if.then.i77 ] ; <i32> [#uses=2] + %tmp2.i81 = phi i8 [ %tmp2.i63, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 ], [ %tmp2.i63, %land.lhs.true5.i74 ], [ 1, %if.then.i77 ] ; <i8> [#uses=3] + %tmp14.i82 = and i8 %tmp2.i81, 1 ; <i8> [#uses=1] + %tobool.i83 = icmp eq i8 %tmp14.i82, 0 ; <i1> [#uses=1] + %cmp.i86 = icmp eq i32 %conv.i.i, 4 ; <i1> [#uses=1] + %or.cond169 = and i1 %tobool.i83, %cmp.i86 ; <i1> [#uses=1] + br i1 %or.cond169, label %land.lhs.true5.i92, label %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit + +land.lhs.true5.i92: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit + %call.i90 = call i32 @memcmp(i8* getelementptr inbounds ([5 x i8]* @.str4, i64 0, i64 0), i8* %tmp6, i64 5) nounwind ; <i32> [#uses=1] + %cmp9.i91 = icmp eq i32 %call.i90, 0 ; <i1> [#uses=1] + br i1 %cmp9.i91, label %if.then.i95, label %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit + +if.then.i95: ; preds = %land.lhs.true5.i92 + br label %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit + +_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit: ; preds = %if.then.i95, %land.lhs.true5.i92, %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit + %retval.0.i.pre156 = phi i32 [ %retval.0.i.pre157, %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre157, %land.lhs.true5.i92 ], [ 5, %if.then.i95 ] ; <i32> [#uses=2] + %tmp2.i99 = phi i8 [ %tmp2.i81, %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit ], [ %tmp2.i81, %land.lhs.true5.i92 ], [ 1, %if.then.i95 ] ; <i8> [#uses=3] + %tmp14.i100 = and i8 %tmp2.i99, 1 ; <i8> [#uses=1] + %tobool.i101 = icmp eq i8 %tmp14.i100, 0 ; <i1> [#uses=1] + %cmp.i104 = icmp eq i32 %conv.i.i, 6 ; <i1> [#uses=1] + %or.cond170 = and i1 %tobool.i101, %cmp.i104 ; <i1> [#uses=1] + br i1 %or.cond170, label %land.lhs.true5.i110, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 + +land.lhs.true5.i110: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit + %call.i108 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1] + %cmp9.i109 = icmp eq i32 %call.i108, 0 ; <i1> [#uses=1] + br i1 %cmp9.i109, label %if.then.i113, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 + +if.then.i113: ; preds = %land.lhs.true5.i110 + br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 + +_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115: ; preds = %if.then.i113, %land.lhs.true5.i110, %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit + %retval.0.i.pre155 = phi i32 [ %retval.0.i.pre156, %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre156, %land.lhs.true5.i110 ], [ 6, %if.then.i113 ] ; <i32> [#uses=2] + %tmp2.i118 = phi i8 [ %tmp2.i99, %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit ], [ %tmp2.i99, %land.lhs.true5.i110 ], [ 1, %if.then.i113 ] ; <i8> [#uses=3] + %tmp14.i119 = and i8 %tmp2.i118, 1 ; <i8> [#uses=1] + %tobool.i120 = icmp eq i8 %tmp14.i119, 0 ; <i1> [#uses=1] + %cmp.i123 = icmp eq i32 %conv.i.i, 6 ; <i1> [#uses=1] + %or.cond171 = and i1 %tobool.i120, %cmp.i123 ; <i1> [#uses=1] + br i1 %or.cond171, label %land.lhs.true5.i129, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 + +land.lhs.true5.i129: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 + %call.i127 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str6, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1] + %cmp9.i128 = icmp eq i32 %call.i127, 0 ; <i1> [#uses=1] + br i1 %cmp9.i128, label %if.then.i132, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 + +if.then.i132: ; preds = %land.lhs.true5.i129 + br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 + +_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134: ; preds = %if.then.i132, %land.lhs.true5.i129, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 + %retval.0.i.pre = phi i32 [ %retval.0.i.pre155, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 ], [ %retval.0.i.pre155, %land.lhs.true5.i129 ], [ 7, %if.then.i132 ] ; <i32> [#uses=2] + %tmp2.i137 = phi i8 [ %tmp2.i118, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 ], [ %tmp2.i118, %land.lhs.true5.i129 ], [ 1, %if.then.i132 ] ; <i8> [#uses=1] + %tmp7.i138 = and i8 %tmp2.i137, 1 ; <i8> [#uses=1] + %tobool.i139 = icmp eq i8 %tmp7.i138, 0 ; <i1> [#uses=1] + %retval.0.i = select i1 %tobool.i139, i32 0, i32 %retval.0.i.pre ; <i32> [#uses=1] + %call22 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str7, i64 0, i64 0), i32 %retval.0.i) ; <i32> [#uses=0] + %exitcond = icmp eq i64 %tmp146, %tmp145 ; <i1> [#uses=1] + br i1 %exitcond, label %for.end, label %land.lhs.true.i + +for.end: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134, %entry + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +declare i32 @memcmp(i8* nocapture, i8* nocapture, i64) nounwind readonly + +declare i64 @strlen(i8* nocapture) nounwind readonly diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll index 503d301892ee9..cd274e78c9fc3 100644 --- a/test/Transforms/JumpThreading/basic.ll +++ b/test/Transforms/JumpThreading/basic.ll @@ -147,11 +147,17 @@ define i32 @test6(i32 %A) { ; CHECK: @test6 %tmp455 = icmp eq i32 %A, 42 br i1 %tmp455, label %BB1, label %BB2 - -BB2: + +; CHECK: call i32 @f2() +; CHECK-NEXT: ret i32 3 + ; CHECK: call i32 @f1() -; CHECK-NEXT: call void @f3() -; CHECK-NEXT: ret i32 4 +; CHECK-NOT: br +; CHECK: call void @f3() +; CHECK-NOT: br +; CHECK: ret i32 4 + +BB2: call i32 @f1() br label %BB1 @@ -415,4 +421,58 @@ F2: ; CHECK-NEXT: br i1 %N, label %T2, label %F2 } +; CHECK: @test14 +define i32 @test14(i32 %in) { +entry: + %A = icmp eq i32 %in, 0 +; CHECK: br i1 %A, label %right_ret, label %merge + br i1 %A, label %left, label %right + +; CHECK-NOT: left: +left: + br label %merge + +; CHECK-NOT: right: +right: + %B = call i32 @f1() + br label %merge + +merge: +; CHECK-NOT: %C = phi i32 [%in, %left], [%B, %right] + %C = phi i32 [%in, %left], [%B, %right] + %D = add i32 %C, 1 + %E = icmp eq i32 %D, 2 + br i1 %E, label %left_ret, label %right_ret + +; CHECK: left_ret: +left_ret: + ret i32 0 + +right_ret: + ret i32 1 +} + +; PR5652 +; CHECK: @test15 +define i32 @test15(i32 %len) { +entry: +; CHECK: icmp ult i32 %len, 13 + %tmp = icmp ult i32 %len, 13 + br i1 %tmp, label %check, label %exit0 + +exit0: + ret i32 0 + +check: + %tmp9 = icmp ult i32 %len, 21 + br i1 %tmp9, label %exit1, label %exit2 + +exit2: +; CHECK-NOT: ret i32 2 + ret i32 2 + +exit1: + ret i32 1 +; CHECK: } +} diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll index f0fc61e7370f3..751bc6518a1ab 100644 --- a/test/Transforms/JumpThreading/crash.ll +++ b/test/Transforms/JumpThreading/crash.ll @@ -216,6 +216,9 @@ bb61: ; PR5698 define void @test7(i32 %x) { +entry: + br label %tailrecurse + tailrecurse: switch i32 %x, label %return [ i32 2, label %bb2 @@ -433,4 +436,51 @@ for.cond1040: ; preds = %for.body1044, %for. ret void } +; PR7755 +define void @test16(i1 %c, i1 %c2, i1 %c3, i1 %c4) nounwind ssp { +entry: + %cmp = icmp sgt i32 undef, 1 ; <i1> [#uses=1] + br i1 %c, label %land.end, label %land.rhs + +land.rhs: ; preds = %entry + br i1 %c2, label %lor.lhs.false.i, label %land.end + +lor.lhs.false.i: ; preds = %land.rhs + br i1 %c3, label %land.end, label %land.end + +land.end: + %0 = phi i1 [ true, %entry ], [ false, %land.rhs ], [false, %lor.lhs.false.i], [false, %lor.lhs.false.i] ; <i1> [#uses=1] + %cmp12 = and i1 %cmp, %0 + %xor1 = xor i1 %cmp12, %c4 + br i1 %xor1, label %if.then, label %if.end + +if.then: + ret void + +if.end: + ret void +} + +define void @test17() { +entry: + br i1 undef, label %bb269.us.us, label %bb269.us.us.us + +bb269.us.us.us: + %indvar = phi i64 [ %indvar.next, %bb287.us.us.us ], [ 0, %entry ] + %0 = icmp eq i16 undef, 0 + br i1 %0, label %bb287.us.us.us, label %bb286.us.us.us + +bb287.us.us.us: + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 4 + br i1 %exitcond, label %bb288.bb289.loopexit_crit_edge, label %bb269.us.us.us +bb286.us.us.us: + unreachable + +bb269.us.us: + unreachable + +bb288.bb289.loopexit_crit_edge: + unreachable +} diff --git a/test/Transforms/JumpThreading/lvi-load.ll b/test/Transforms/JumpThreading/lvi-load.ll new file mode 100644 index 0000000000000..0bf4187d544b9 --- /dev/null +++ b/test/Transforms/JumpThreading/lvi-load.ll @@ -0,0 +1,49 @@ +; RUN: opt -S -jump-threading -enable-jump-threading-lvi -dce < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.4" + +%"struct.llvm::PATypeHolder" = type { %"struct.llvm::Type"* } +%"struct.llvm::PointerIntPair<llvm::Use**,2u,llvm::Use::PrevPtrTag,llvm::PointerLikeTypeTraits<llvm::Use**> >" = type { i64 } +%"struct.llvm::Type" = type opaque +%"struct.llvm::Use" = type { %"struct.llvm::Value"*, %"struct.llvm::Use"*, %"struct.llvm::PointerIntPair<llvm::Use**,2u,llvm::Use::PrevPtrTag,llvm::PointerLikeTypeTraits<llvm::Use**> >" } +%"struct.llvm::Value" = type { i32 (...)**, i8, i8, i16, %"struct.llvm::PATypeHolder", %"struct.llvm::Use"*, %"struct.llvm::ValueName"* } +%"struct.llvm::ValueName" = type opaque + +@_ZZN4llvm4castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_E8__func__ = internal constant [5 x i8] c"cast\00", align 8 ; <[5 x i8]*> [#uses=1] +@.str = private constant [31 x i8] c"include/llvm/Support/Casting.h\00", align 8 ; <[31 x i8]*> [#uses=1] +@.str1 = private constant [59 x i8] c"isa<X>(Val) && \22cast<Ty>() argument of incompatible type!\22\00", align 8 ; <[59 x i8]*> [#uses=1] + +; CHECK: Z3fooPN4llvm5ValueE +define zeroext i8 @_Z3fooPN4llvm5ValueE(%"struct.llvm::Value"* %V) ssp { +entry: + %0 = getelementptr inbounds %"struct.llvm::Value"* %V, i64 0, i32 1 ; <i8*> [#uses=1] + %1 = load i8* %0, align 8 ; <i8> [#uses=2] + %2 = icmp ugt i8 %1, 20 ; <i1> [#uses=1] + br i1 %2, label %bb.i, label %bb2 + +bb.i: ; preds = %entry + %toBoolnot.i.i = icmp ult i8 %1, 21 ; <i1> [#uses=1] + br i1 %toBoolnot.i.i, label %bb6.i.i, label %_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit + +; CHECK-NOT: assert +bb6.i.i: ; preds = %bb.i + tail call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8]* @_ZZN4llvm4castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_E8__func__, i64 0, i64 0), i8* getelementptr inbounds ([31 x i8]* @.str, i64 0, i64 0), i32 202, i8* getelementptr inbounds ([59 x i8]* @.str1, i64 0, i64 0)) noreturn + unreachable + +_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit: ; preds = %bb.i +; CHECK-NOT: null + %3 = icmp eq %"struct.llvm::Value"* %V, null ; <i1> [#uses=1] + br i1 %3, label %bb2, label %bb + +bb: ; preds = %_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit + tail call void @_ZNK4llvm5Value4dumpEv(%"struct.llvm::Value"* %V) +; CHECK: ret + ret i8 1 + +bb2: ; preds = %entry, %_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit + ret i8 0 +} + +declare void @__assert_rtn(i8*, i8*, i32, i8*) noreturn + +declare void @_ZNK4llvm5Value4dumpEv(%"struct.llvm::Value"*) diff --git a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll index 7545641f1aee6..5381c88aea630 100644 --- a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll +++ b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll @@ -1,6 +1,6 @@ -; RUN: opt < %s -lcssa -S | \ +; RUN: opt < %s -loopsimplify -lcssa -S | \ ; RUN: grep {%%SJE.0.0.lcssa = phi .struct.SetJmpMapEntry} -; RUN: opt < %s -lcssa -S | \ +; RUN: opt < %s -loopsimplify -lcssa -S | \ ; RUN: grep {%%SJE.0.0.lcssa1 = phi .struct.SetJmpMapEntry} %struct.SetJmpMapEntry = type { i8*, i32, %struct.SetJmpMapEntry* } diff --git a/test/Transforms/LICM/crash.ll b/test/Transforms/LICM/crash.ll new file mode 100644 index 0000000000000..88be5c41ccc5a --- /dev/null +++ b/test/Transforms/LICM/crash.ll @@ -0,0 +1,61 @@ +; RUN: opt -licm %s -disable-output + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + + +; PR8068 +@g_12 = external global i8, align 1 +define void @test1() nounwind ssp { +entry: + br label %for.body + +for.body: ; preds = %for.cond, %bb.nph + store i8 0, i8* @g_12, align 1 + %tmp6 = load i8* @g_12, align 1 + br label %for.cond + +for.cond: ; preds = %for.body + store i8 %tmp6, i8* @g_12, align 1 + br i1 false, label %for.cond.for.end10_crit_edge, label %for.body + +for.cond.for.end10_crit_edge: ; preds = %for.cond + br label %for.end10 + +for.end10: ; preds = %for.cond.for.end10_crit_edge, %entry + ret void +} + +; PR8067 +@g_8 = external global i32, align 4 + +define void @test2() noreturn nounwind ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %tmp7 = load i32* @g_8, align 4 + store i32* @g_8, i32** undef, align 16 + store i32 undef, i32* @g_8, align 4 + br label %for.body +} + +; PR8102 +define void @test3() { +entry: + %__first = alloca { i32* } + br i1 undef, label %for.cond, label %for.end + +for.cond: ; preds = %for.cond, %entry + %tmp1 = getelementptr { i32*}* %__first, i32 0, i32 0 + %tmp2 = load i32** %tmp1, align 4 + %call = tail call i32* @test3helper(i32* %tmp2) + %tmp3 = getelementptr { i32*}* %__first, i32 0, i32 0 + store i32* %call, i32** %tmp3, align 4 + br i1 false, label %for.cond, label %for.end + +for.end: ; preds = %for.cond, %entry + ret void +} + +declare i32* @test3helper(i32*) diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll index e7d36afb91b15..6f28d53af66ea 100644 --- a/test/Transforms/LICM/hoisting.ll +++ b/test/Transforms/LICM/hoisting.ll @@ -48,3 +48,19 @@ Out: ; preds = %Loop %C = sub i32 %A, %B ; <i32> [#uses=1] ret i32 %C } + + +; This loop invariant instruction should be constant folded, not hoisted. +define i32 @test3(i1 %c) { +; CHECK: define i32 @test3 +; CHECK: call void @foo2(i32 6) + %A = load i32* @X ; <i32> [#uses=2] + br label %Loop +Loop: + %B = add i32 4, 2 ; <i32> [#uses=2] + call void @foo2( i32 %B ) + br i1 %c, label %Loop, label %Out +Out: ; preds = %Loop + %C = sub i32 %A, %B ; <i32> [#uses=1] + ret i32 %C +} diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll index ef28c38ca6071..c1d2b24b0bba5 100644 --- a/test/Transforms/LICM/scalar_promote.ll +++ b/test/Transforms/LICM/scalar_promote.ll @@ -1,4 +1,6 @@ ; RUN: opt < %s -licm -S | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + @X = global i32 7 ; <i32*> [#uses=4] define void @test1(i32 %i) { @@ -32,23 +34,21 @@ Entry: br label %Loop ; CHECK: @test2 ; CHECK: Entry: -; CHECK-NEXT: %X1 = getelementptr i32* @X, i64 0 -; CHECK-NEXT: %X2 = getelementptr i32* @X, i64 0 -; CHECK-NEXT: %X1.promoted = load i32* %X1 +; CHECK-NEXT: %.promoted = load i32* getelementptr inbounds (i32* @X, i64 1) ; CHECK-NEXT: br label %Loop Loop: ; preds = %Loop, %0 - %X1 = getelementptr i32* @X, i64 0 ; <i32*> [#uses=1] + %X1 = getelementptr i32* @X, i64 1 ; <i32*> [#uses=1] %A = load i32* %X1 ; <i32> [#uses=1] %V = add i32 %A, 1 ; <i32> [#uses=1] - %X2 = getelementptr i32* @X, i64 0 ; <i32*> [#uses=1] + %X2 = getelementptr i32* @X, i64 1 ; <i32*> [#uses=1] store i32 %V, i32* %X2 br i1 false, label %Loop, label %Exit Exit: ; preds = %Loop ret void ; CHECK: Exit: -; CHECK-NEXT: store i32 %V, i32* %X1 +; CHECK-NEXT: store i32 %V, i32* getelementptr inbounds (i32* @X, i64 1) ; CHECK-NEXT: ret void } @@ -71,3 +71,50 @@ Out: ; preds = %Loop ret void } +; PR8041 +define void @test4(i8* %x, i8 %n) { +; CHECK: @test4 + %handle1 = alloca i8* + %handle2 = alloca i8* + store i8* %x, i8** %handle1 + br label %loop + +loop: + %tmp = getelementptr i8* %x, i64 8 + store i8* %tmp, i8** %handle2 + br label %subloop + +subloop: + %count = phi i8 [ 0, %loop ], [ %nextcount, %subloop ] + %offsetx2 = load i8** %handle2 + store i8 %n, i8* %offsetx2 + %newoffsetx2 = getelementptr i8* %offsetx2, i64 -1 + store i8* %newoffsetx2, i8** %handle2 + %nextcount = add i8 %count, 1 + %innerexitcond = icmp sge i8 %nextcount, 8 + br i1 %innerexitcond, label %innerexit, label %subloop + +; Should have promoted 'handle2' accesses. +; CHECK: subloop: +; CHECK-NEXT: phi i8* [ +; CHECK-NEXT: %count = phi i8 [ +; CHECK-NEXT: store i8 %n +; CHECK-NOT: store +; CHECK: br i1 + +innerexit: + %offsetx1 = load i8** %handle1 + %val = load i8* %offsetx1 + %cond = icmp eq i8 %val, %n + br i1 %cond, label %exit, label %loop + +; Should not have promoted offsetx1 loads. +; CHECK: innerexit: +; CHECK: %val = load i8* %offsetx1 +; CHECK: %cond = icmp eq i8 %val, %n +; CHECK: br i1 %cond, label %exit, label %loop + +exit: + ret void +} + diff --git a/test/Transforms/LICM/sinking.ll b/test/Transforms/LICM/sinking.ll index 11112eb74443f..68e4b64bf9bf6 100644 --- a/test/Transforms/LICM/sinking.ll +++ b/test/Transforms/LICM/sinking.ll @@ -233,3 +233,17 @@ Out: ; preds = %Loop ; CHECK-NEXT: ret i32 %tmp.6 } +; Should delete, not sink, dead instructions. +define void @test11() { + br label %Loop +Loop: + %dead = getelementptr %Ty* @X2, i64 0, i32 0 + br i1 false, label %Loop, label %Out +Out: + ret void +; CHECK: @test11 +; CHECK: Out: +; CHECK-NEXT: ret void +} + + diff --git a/test/Transforms/LoopRotate/phi-duplicate.ll b/test/Transforms/LoopRotate/phi-duplicate.ll index 9a64e2a9a8303..5403e723ee155 100644 --- a/test/Transforms/LoopRotate/phi-duplicate.ll +++ b/test/Transforms/LoopRotate/phi-duplicate.ll @@ -27,9 +27,21 @@ for.body: ; preds = %for.cond for.end: ; preds = %for.cond ret void } -; Should only end up with one phi. -; CHECK: for.body: -; CHECK-NEXT: %j.02 = phi i64 -; CHECK-NOT: phi -; CHECK: ret void +; Should only end up with one phi. Also, the original for.cond block should +; be moved to the end of the loop so that the new loop header pleasantly +; ends up at the top. + +; CHECK: define void @test +; CHECK-NEXT: entry: +; CHECK-NEXT: icmp slt i64 +; CHECK-NEXT: br i1 +; CHECK-NOT: : +; CHECK: bb.nph: +; CHECK-NEXT: br label %for.body +; CHECK-NOT: : +; CHECK: for.body: +; CHECK-NEXT: %j.02 = phi i64 +; CHECK-NOT: phi +; CHECK: ret void +; CHECK-NEXT: } diff --git a/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll b/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll new file mode 100644 index 0000000000000..2a1ee7d1a72f8 --- /dev/null +++ b/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll @@ -0,0 +1,20 @@ +; RUN: opt < %s -domfrontier -loopsimplify -domfrontier -verify-dom-info -analyze + + +define void @a() nounwind { +entry: + br i1 undef, label %bb37, label %bb1.i + +bb1.i: ; preds = %bb1.i, %bb + %indvar = phi i64 [ %indvar.next, %bb1.i ], [ 0, %entry ] ; <i64> [#uses=1] + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %indvar.next, 576 ; <i1> [#uses=1] + br i1 %exitcond, label %bb37, label %bb1.i + +bb37: ; preds = %bb1.i, %bb + br label %return + + +return: ; preds = %bb39 + ret void +} diff --git a/test/Transforms/LoopSimplify/indirectbr-backedge.ll b/test/Transforms/LoopSimplify/indirectbr-backedge.ll new file mode 100644 index 0000000000000..ca6e47fcecd3c --- /dev/null +++ b/test/Transforms/LoopSimplify/indirectbr-backedge.ll @@ -0,0 +1,35 @@ +; RUN: opt -loopsimplify -S < %s | FileCheck %s + +; LoopSimplify shouldn't split loop backedges that use indirectbr. + +; CHECK: bb1: ; preds = %bb5, %bb +; CHECK-NEXT: indirectbr + +; CHECK: bb5: ; preds = %bb1 +; CHECK-NEXT: br label %bb1{{$}} + +define void @foo(i8* %p) nounwind { +bb: + br label %bb1 + +bb1: ; preds = %bb5, %bb1, %bb + indirectbr i8* %p, [label %bb6, label %bb7, label %bb1, label %bb2, label %bb3, label %bb5, label %bb4] + +bb2: ; preds = %bb1 + ret void + +bb3: ; preds = %bb1 + ret void + +bb4: ; preds = %bb1 + ret void + +bb5: ; preds = %bb1 + br label %bb1 + +bb6: ; preds = %bb1 + ret void + +bb7: ; preds = %bb1 + ret void +} diff --git a/test/Transforms/LoopSimplify/preserve-scev.ll b/test/Transforms/LoopSimplify/preserve-scev.ll new file mode 100644 index 0000000000000..017a0d2108498 --- /dev/null +++ b/test/Transforms/LoopSimplify/preserve-scev.ll @@ -0,0 +1,50 @@ +; RUN: opt -S < %s -indvars | opt -analyze -iv-users | grep {%cmp = icmp slt i32} | grep {= \{%\\.ph,+,1\}<%for.cond>} +; PR8079 + +; LoopSimplify should invalidate indvars when splitting out the +; inner loop. + +@maxStat = external global i32 + +define i32 @test() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %if.then5, %if.end, %entry + %cuts.1 = phi i32 [ 0, %entry ], [ %inc, %if.then5 ], [ %cuts.1, %if.end ] + %0 = phi i32 [ 0, %entry ], [ %add, %if.end ], [ %add, %if.then5 ] + %add = add i32 %0, 1 + %cmp = icmp slt i32 %0, 1 + %tmp1 = load i32* @maxStat, align 4 + br i1 %cmp, label %for.body, label %for.cond14.preheader + +for.cond14.preheader: ; preds = %for.cond + %cmp1726 = icmp sgt i32 %tmp1, 0 + br i1 %cmp1726, label %for.body18, label %return + +for.body: ; preds = %for.cond + %cmp2 = icmp sgt i32 %tmp1, 100 + br i1 %cmp2, label %return, label %if.end + +if.end: ; preds = %for.body + %cmp4 = icmp sgt i32 %tmp1, -1 + br i1 %cmp4, label %if.then5, label %for.cond + +if.then5: ; preds = %if.end + call void @foo() nounwind + %inc = add i32 %cuts.1, 1 + br label %for.cond + +for.body18: ; preds = %for.body18, %for.cond14.preheader + %i13.027 = phi i32 [ %1, %for.body18 ], [ 0, %for.cond14.preheader ] + call void @foo() nounwind + %1 = add nsw i32 %i13.027, 1 + %tmp16 = load i32* @maxStat, align 4 + %cmp17 = icmp slt i32 %1, %tmp16 + br i1 %cmp17, label %for.body18, label %return + +return: ; preds = %for.body18, %for.body, %for.cond14.preheader + ret i32 0 +} + +declare void @foo() nounwind diff --git a/test/Transforms/LoopStrengthReduce/pr3571.ll b/test/Transforms/LoopStrengthReduce/pr3571.ll index 9ad27d5ff1146..a23e4db497053 100644 --- a/test/Transforms/LoopStrengthReduce/pr3571.ll +++ b/test/Transforms/LoopStrengthReduce/pr3571.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -loop-reduce | llvm-dis ; PR3571 -target triple = "i386-mingw32" +target triple = "i386-pc-mingw32" define void @_ZNK18qdesigner_internal10TreeWidget12drawBranchesEP8QPainterRK5QRectRK11QModelIndex() nounwind { entry: br label %_ZNK11QModelIndex7isValidEv.exit.i diff --git a/test/Transforms/LoopStrengthReduce/uglygep.ll b/test/Transforms/LoopStrengthReduce/uglygep.ll index dca97e9ad187e..8af5cf1dfd726 100644 --- a/test/Transforms/LoopStrengthReduce/uglygep.ll +++ b/test/Transforms/LoopStrengthReduce/uglygep.ll @@ -4,7 +4,6 @@ ; should be able to form pretty GEPs. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" define void @Z4() nounwind { bb: diff --git a/test/Transforms/LoopUnswitch/infinite-loop.ll b/test/Transforms/LoopUnswitch/infinite-loop.ll new file mode 100644 index 0000000000000..73391ca8d19d3 --- /dev/null +++ b/test/Transforms/LoopUnswitch/infinite-loop.ll @@ -0,0 +1,53 @@ +; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s +; RUN: opt -loop-unswitch -simplifycfg -S < %s | FileCheck %s +; PR5373 + +; Loop unswitching shouldn't trivially unswitch the true case of condition %a +; in the code here because it leads to an infinite loop. While this doesn't +; contain any instructions with side effects, it's still a kind of side effect. +; It can trivially unswitch on the false cas of condition %a though. + +; STATS: 2 loop-unswitch - Number of branches unswitched +; STATS: 1 loop-unswitch - Number of unswitches that are trivial + +; CHECK: @func_16 +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split + +; CHECK: entry.split: +; CHECK-NEXT: br i1 %b, label %cond.end.us, label %abort1 + +; CHECK: cond.end.us: +; CHECK-NEXT: br label %cond.end.us + +; CHECK: abort0.split: +; CHECK-NEXT: call void @end0() noreturn nounwind +; CHECK-NEXT: unreachable + +; CHECK: abort1: +; CHECK-NEXT: call void @end1() noreturn nounwind +; CHECK-NEXT: unreachable + +; CHECK: } + +define void @func_16(i1 %a, i1 %b) nounwind { +entry: + br label %for.body + +for.body: + br i1 %a, label %cond.end, label %abort0 + +cond.end: + br i1 %b, label %for.body, label %abort1 + +abort0: + call void @end0() noreturn nounwind + unreachable + +abort1: + call void @end1() noreturn nounwind + unreachable +} + +declare void @end0() noreturn +declare void @end1() noreturn diff --git a/test/Transforms/LowerAtomic/atomic-load.ll b/test/Transforms/LowerAtomic/atomic-load.ll new file mode 100644 index 0000000000000..5b110d6b7eba4 --- /dev/null +++ b/test/Transforms/LowerAtomic/atomic-load.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -loweratomic -S | FileCheck %s + +declare i8 @llvm.atomic.load.add.i8.p0i8(i8* %ptr, i8 %delta) +declare i8 @llvm.atomic.load.nand.i8.p0i8(i8* %ptr, i8 %delta) +declare i8 @llvm.atomic.load.min.i8.p0i8(i8* %ptr, i8 %delta) + +define i8 @add() { +; CHECK: @add + %i = alloca i8 + %j = call i8 @llvm.atomic.load.add.i8.p0i8(i8* %i, i8 42) +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: add +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} + +define i8 @nand() { +; CHECK: @nand + %i = alloca i8 + %j = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* %i, i8 42) +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: and +; CHECK-NEXT: xor +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} + +define i8 @min() { +; CHECK: @min + %i = alloca i8 + %j = call i8 @llvm.atomic.load.min.i8.p0i8(i8* %i, i8 42) +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: icmp +; CHECK-NEXT: select +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} diff --git a/test/Transforms/LowerAtomic/atomic-swap.ll b/test/Transforms/LowerAtomic/atomic-swap.ll new file mode 100644 index 0000000000000..0a59c8595e6a0 --- /dev/null +++ b/test/Transforms/LowerAtomic/atomic-swap.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -loweratomic -S | FileCheck %s + +declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* %ptr, i8 %cmp, i8 %val) +declare i8 @llvm.atomic.swap.i8.p0i8(i8* %ptr, i8 %val) + +define i8 @cmpswap() { +; CHECK: @cmpswap + %i = alloca i8 + %j = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* %i, i8 0, i8 42) +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: icmp +; CHECK-NEXT: select +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} + +define i8 @swap() { +; CHECK: @swap + %i = alloca i8 + %j = call i8 @llvm.atomic.swap.i8.p0i8(i8* %i, i8 42) +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} diff --git a/test/Transforms/LowerAtomic/barrier.ll b/test/Transforms/LowerAtomic/barrier.ll new file mode 100644 index 0000000000000..218c5ba8d18e8 --- /dev/null +++ b/test/Transforms/LowerAtomic/barrier.ll @@ -0,0 +1,10 @@ +; RUN: opt < %s -loweratomic -S | FileCheck %s + +declare void @llvm.memory.barrier(i1 %ll, i1 %ls, i1 %sl, i1 %ss, i1 %device) + +define void @barrier() { +; CHECK: @barrier + call void @llvm.memory.barrier(i1 0, i1 0, i1 0, i1 0, i1 0) +; CHECK-NEXT: ret + ret void +} diff --git a/test/Transforms/SSI/dg.exp b/test/Transforms/LowerAtomic/dg.exp index f2005891a59a8..f2005891a59a8 100644 --- a/test/Transforms/SSI/dg.exp +++ b/test/Transforms/LowerAtomic/dg.exp diff --git a/test/Transforms/MergeFunc/vectors-and-arrays.ll b/test/Transforms/MergeFunc/vectors-and-arrays.ll new file mode 100644 index 0000000000000..dc64a0858ba84 --- /dev/null +++ b/test/Transforms/MergeFunc/vectors-and-arrays.ll @@ -0,0 +1,18 @@ +; RUN: opt -mergefunc < %s -disable-output -stats | not grep merged +; This used to crash with an assert. + +define <2 x i8> @v1(<2 x i8> %x) { + ret <2 x i8> %x +} + +define <4 x i8> @v2(<4 x i8> %x) { + ret <4 x i8> %x +} + +define [2 x i8] @a1([2 x i8] %x) { + ret [2 x i8] %x +} + +define [4 x i8] @a2([4 x i8] %x) { + ret [4 x i8] %x +} diff --git a/test/Transforms/PartialSpecialize/two-specializations.ll b/test/Transforms/PartialSpecialize/two-specializations.ll index c85ddb78dd1a7..bc3da22e1855f 100644 --- a/test/Transforms/PartialSpecialize/two-specializations.ll +++ b/test/Transforms/PartialSpecialize/two-specializations.ll @@ -1,8 +1,8 @@ ; If there are two specializations of a function, make sure each callsite ; calls the right one. ; -; RN: opt -S -partialspecialization %s | FileCheck %s -; RUN: true +; RUN: opt -S -partialspecialization -disable-inlining %s | opt -S -inline | FileCheck %s -check-prefix=CORRECT +; RUN: opt -S -partialspecialization -disable-inlining %s | FileCheck %s declare void @callback1() declare void @callback2() @@ -14,14 +14,18 @@ define internal void @UseCallback(void()* %pCallback) { define void @foo(void()* %pNonConstCallback) { Entry: +; CORRECT: Entry +; CORRECT-NEXT: call void @callback1() +; CORRECT-NEXT: call void @callback1() +; CORRECT-NEXT: call void @callback2() +; CORRECT-NEXT: call void %pNonConstCallback() +; CORRECT-NEXT: call void @callback1() +; CORRECT-NEXT: call void @callback2() +; CORRECT-NEXT: call void @callback2() ; CHECK: Entry -; CHECK-NEXT: call void @callback1() -; CHECK-NEXT: call void @callback1() -; CHECK-NEXT: call void @callback2() -; CHECK-NEXT: call void %pNonConstCallback() -; CHECK-NEXT: call void @callback1() -; CHECK-NEXT: call void @callback2() -; CHECK-NEXT: call void @callback2() +; CHECK-NOT: call void @UseCallback(void ()* @callback1) +; CHECK-NOT: call void @UseCallback(void ()* @callback2) +; CHECK: ret void call void @UseCallback(void()* @callback1) call void @UseCallback(void()* @callback1) call void @UseCallback(void()* @callback2) diff --git a/test/Transforms/SCCP/ipsccp-addr-taken.ll b/test/Transforms/SCCP/ipsccp-addr-taken.ll new file mode 100644 index 0000000000000..c6572fa5d141b --- /dev/null +++ b/test/Transforms/SCCP/ipsccp-addr-taken.ll @@ -0,0 +1,28 @@ +; RUN: opt %s -ipsccp -S | FileCheck %s +; PR7876 +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +define internal i32 @foo() nounwind noinline ssp { +entry: + ret i32 0 +; CHECK: @foo +; CHECK: entry: +; CHECK: ret i32 0 +} + +declare i32 @bar() + +define internal i32 @test(i32 %c) nounwind noinline ssp { +bb: + %tmp1 = icmp ne i32 %c, 0 ; <i1> [#uses=1] + %tmp2 = select i1 %tmp1, i32 ()* @foo, i32 ()* @bar ; <i32 ()*> [#uses=1] + %tmp3 = tail call i32 %tmp2() nounwind ; <i32> [#uses=1] + ret i32 %tmp3 +} + +define i32 @main() nounwind ssp { +bb: + %tmp = tail call i32 @test(i32 1) ; <i32> [#uses=1] + ret i32 %tmp +} diff --git a/test/Transforms/SSI/2009-07-09-Invoke.ll b/test/Transforms/SSI/2009-07-09-Invoke.ll deleted file mode 100644 index 20a22172806ea..0000000000000 --- a/test/Transforms/SSI/2009-07-09-Invoke.ll +++ /dev/null @@ -1,71 +0,0 @@ -; RUN: opt < %s -ssi-everything -disable-output -; PR4511 - - %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" } - %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* } - %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" } - %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* } - %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" } - %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" = type { i32, i32, i32 } - %"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" } - -declare void @_Unwind_Resume(i8*) - -declare fastcc %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* @_ZSt24__uninitialized_copy_auxIPSsS0_ET0_T_S2_S1_St12__false_type(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*) - -define fastcc void @_ZNSt6vectorISsSaISsEE9push_backERKSs(%"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >"* nocapture %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* nocapture %__x) { -entry: - br i1 undef, label %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i, label %bb - -bb: ; preds = %entry - ret void - -_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i: ; preds = %entry - %0 = invoke fastcc %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* @_ZSt24__uninitialized_copy_auxIPSsS0_ET0_T_S2_S1_St12__false_type(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef) - to label %invcont14.i unwind label %ppad81.i ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=3] - -invcont14.i: ; preds = %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i - %1 = icmp eq %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %0, null ; <i1> [#uses=1] - br i1 %1, label %bb19.i, label %bb.i17.i - -bb.i17.i: ; preds = %invcont14.i - %2 = invoke fastcc i8* @_ZNSs4_Rep8_M_cloneERKSaIcEj(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep"* undef, i32 0) - to label %bb2.i25.i unwind label %ppad.i.i.i23.i ; <i8*> [#uses=0] - -ppad.i.i.i23.i: ; preds = %bb.i17.i - invoke void @_Unwind_Resume(i8* undef) - to label %.noexc.i24.i unwind label %lpad.i29.i - -.noexc.i24.i: ; preds = %ppad.i.i.i23.i - unreachable - -bb2.i25.i: ; preds = %bb.i17.i - unreachable - -lpad.i29.i: ; preds = %ppad.i.i.i23.i - invoke void @_Unwind_Resume(i8* undef) - to label %.noexc.i9 unwind label %ppad81.i - -.noexc.i9: ; preds = %lpad.i29.i - unreachable - -bb19.i: ; preds = %invcont14.i - %3 = getelementptr %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %0, i32 1 ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=2] - %4 = invoke fastcc %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* @_ZSt24__uninitialized_copy_auxIPSsS0_ET0_T_S2_S1_St12__false_type(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %3) - to label %invcont20.i unwind label %ppad81.i ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=0] - -invcont20.i: ; preds = %bb19.i - unreachable - -invcont32.i: ; preds = %ppad81.i - unreachable - -ppad81.i: ; preds = %bb19.i, %lpad.i29.i, %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i - %__new_finish.0.i = phi %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* [ %0, %lpad.i29.i ], [ undef, %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i ], [ %3, %bb19.i ] ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=0] - br i1 undef, label %invcont32.i, label %bb.i.i.i.i - -bb.i.i.i.i: ; preds = %bb.i.i.i.i, %ppad81.i - br label %bb.i.i.i.i -} - -declare fastcc i8* @_ZNSs4_Rep8_M_cloneERKSaIcEj(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep"* nocapture, i32) diff --git a/test/Transforms/SSI/2009-08-15-UnreachableBB.ll b/test/Transforms/SSI/2009-08-15-UnreachableBB.ll deleted file mode 100644 index 0fe37ec74098d..0000000000000 --- a/test/Transforms/SSI/2009-08-15-UnreachableBB.ll +++ /dev/null @@ -1,19 +0,0 @@ -; RUN: opt < %s -ssi-everything -disable-output - -declare fastcc i32 @ras_Empty(i8** nocapture) nounwind readonly - -define i32 @cc_Tautology() nounwind { -entry: - unreachable - -cc_InitData.exit: ; No predecessors! - %0 = call fastcc i32 @ras_Empty(i8** undef) nounwind ; <i32> [#uses=1] - %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1] - br i1 %1, label %bb2, label %bb6 - -bb2: ; preds = %cc_InitData.exit - unreachable - -bb6: ; preds = %cc_InitData.exit - ret i32 undef -} diff --git a/test/Transforms/SSI/2009-08-17-CritEdge.ll b/test/Transforms/SSI/2009-08-17-CritEdge.ll deleted file mode 100644 index 61bd2dc693f4f..0000000000000 --- a/test/Transforms/SSI/2009-08-17-CritEdge.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: opt < %s -ssi-everything -disable-output - -define void @test(i32 %x) { -entry: - br label %label1 -label1: - %A = phi i32 [ 0, %entry ], [ %A.1, %label2 ] - %B = icmp slt i32 %A, %x - br i1 %B, label %label2, label %label2 -label2: - %A.1 = add i32 %A, 1 - br label %label1 -label3: ; No predecessors! - ret void -} diff --git a/test/Transforms/SSI/2009-08-19-UnreachableBB2.ll b/test/Transforms/SSI/2009-08-19-UnreachableBB2.ll deleted file mode 100644 index 64bed191def03..0000000000000 --- a/test/Transforms/SSI/2009-08-19-UnreachableBB2.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: opt < %s -ssi-everything -disable-output - -define void @foo() { -entry: - %tmp0 = load i64* undef, align 4 ; <i64> [#uses=3] - br i1 undef, label %end_stmt_playback, label %bb16 - -readJournalHdr.exit: ; No predecessors! - br label %end_stmt_playback - -bb16: ; preds = %bb7 - %tmp1 = icmp slt i64 0, %tmp0 ; <i1> [#uses=1] - br i1 %tmp1, label %bb16, label %bb17 - -bb17: ; preds = %bb16 - store i64 %tmp0, i64* undef, align 4 - br label %end_stmt_playback - -end_stmt_playback: ; preds = %bb17, %readJournalHdr.exit, %bb6, %bb2 - store i64 %tmp0, i64* undef, align 4 - ret void -} diff --git a/test/Transforms/SSI/ssiphi.ll b/test/Transforms/SSI/ssiphi.ll deleted file mode 100644 index a42b70c3c0211..0000000000000 --- a/test/Transforms/SSI/ssiphi.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: opt < %s -ssi-everything -S | FileCheck %s - -declare void @use(i32) -declare i32 @create() - -define i32 @foo() { -entry: - %x = call i32 @create() - %y = icmp slt i32 %x, 10 - br i1 %y, label %T, label %F -T: -; CHECK: SSI_sigma - call void @use(i32 %x) - br label %join -F: -; CHECK: SSI_sigma - call void @use(i32 %x) - br label %join -join: -; CHECK: SSI_phi - ret i32 %x -} diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll index 4f875b0841b25..fe55426b24a19 100644 --- a/test/Transforms/ScalarRepl/vector_promote.ll +++ b/test/Transforms/ScalarRepl/vector_promote.ll @@ -1,8 +1,8 @@ -; RUN: opt < %s -scalarrepl -S | not grep alloca -; RUN: opt < %s -scalarrepl -S | grep {load <4 x float>} +; RUN: opt < %s -scalarrepl -S | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "x86_64-apple-darwin10.0.0" -define void @test(<4 x float>* %F, float %f) { +define void @test1(<4 x float>* %F, float %f) { entry: %G = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3] %tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2] @@ -14,6 +14,11 @@ entry: %tmp6 = fadd <4 x float> %tmp4, %tmp4 ; <<4 x float>> [#uses=1] store <4 x float> %tmp6, <4 x float>* %F ret void +; CHECK: @test1 +; CHECK-NOT: alloca +; CHECK: %tmp = load <4 x float>* %F +; CHECK: fadd <4 x float> %tmp, %tmp +; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 0 } define void @test2(<4 x float>* %F, float %f) { @@ -28,6 +33,11 @@ entry: %tmp6 = fadd <4 x float> %tmp4, %tmp4 ; <<4 x float>> [#uses=1] store <4 x float> %tmp6, <4 x float>* %F ret void +; CHECK: @test2 +; CHECK-NOT: alloca +; CHECK: %tmp = load <4 x float>* %F +; CHECK: fadd <4 x float> %tmp, %tmp +; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 2 } define void @test3(<4 x float>* %F, float* %f) { @@ -40,6 +50,11 @@ entry: %tmp.upgrd.4 = load float* %tmp.upgrd.3 ; <float> [#uses=1] store float %tmp.upgrd.4, float* %f ret void +; CHECK: @test3 +; CHECK-NOT: alloca +; CHECK: %tmp = load <4 x float>* %F +; CHECK: fadd <4 x float> %tmp, %tmp +; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 2 } define void @test4(<4 x float>* %F, float* %f) { @@ -52,6 +67,11 @@ entry: %tmp.upgrd.6 = load float* %G.upgrd.5 ; <float> [#uses=1] store float %tmp.upgrd.6, float* %f ret void +; CHECK: @test4 +; CHECK-NOT: alloca +; CHECK: %tmp = load <4 x float>* %F +; CHECK: fadd <4 x float> %tmp, %tmp +; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 0 } define i32 @test5(float %X) { ;; should turn into bitcast. @@ -61,5 +81,22 @@ define i32 @test5(float %X) { ;; should turn into bitcast. %a = bitcast float* %X1 to i32* %tmp = load i32* %a ret i32 %tmp +; CHECK: @test5 +; CHECK-NEXT: bitcast float %X to i32 +; CHECK-NEXT: ret i32 +} + + +;; should not turn into <1 x i64> - It is a banned MMX datatype. +;; rdar://8380055 +define i64 @test6(<2 x float> %X) { + %X_addr = alloca <2 x float> + store <2 x float> %X, <2 x float>* %X_addr + %P = bitcast <2 x float>* %X_addr to i64* + %tmp = load i64* %P + ret i64 %tmp +; CHECK: @test6 +; CHECK-NEXT: bitcast <2 x float> %X to i64 +; CHECK-NEXT: ret i64 } diff --git a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll index ba33d84f84aa2..9c15efccd275c 100644 --- a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll +++ b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -simplifycfg -disable-output ; PR2256 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-mingw32" +target triple = "x86_64-pc-mingw32" define { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval %Z, i1 %cond) nounwind { bb: ; preds = %entry diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll index 83a9fa7ad1b8d..7315ff66bd127 100644 --- a/test/Transforms/SimplifyCFG/basictest.ll +++ b/test/Transforms/SimplifyCFG/basictest.ll @@ -54,6 +54,5 @@ bb1: ; preds = %entry return: ; preds = %entry ret void ; CHECK: @test5 -; CHECK-NEXT: bb: ; CHECK-NEXT: ret void } diff --git a/test/Transforms/SimplifyCFG/indirectbr.ll b/test/Transforms/SimplifyCFG/indirectbr.ll new file mode 100644 index 0000000000000..de4f5b6075511 --- /dev/null +++ b/test/Transforms/SimplifyCFG/indirectbr.ll @@ -0,0 +1,64 @@ +; RUN: opt -S -simplifycfg < %s | FileCheck %s + +; SimplifyCFG should eliminate redundant indirectbr edges. + +; CHECK: indbrtest0 +; CHECK: indirectbr i8* %t, [label %BB0, label %BB1, label %BB2] +; CHECK: %x = phi i32 [ 0, %BB0 ], [ 1, %entry ] + +declare void @foo() +declare void @A() +declare void @B(i32) +declare void @C() + +define void @indbrtest0(i8** %P, i8** %Q) { +entry: + store i8* blockaddress(@indbrtest0, %BB0), i8** %P + store i8* blockaddress(@indbrtest0, %BB1), i8** %P + store i8* blockaddress(@indbrtest0, %BB2), i8** %P + call void @foo() + %t = load i8** %Q + indirectbr i8* %t, [label %BB0, label %BB1, label %BB2, label %BB0, label %BB1, label %BB2] +BB0: + call void @A() + br label %BB1 +BB1: + %x = phi i32 [ 0, %BB0 ], [ 1, %entry ], [ 1, %entry ] + call void @B(i32 %x) + ret void +BB2: + call void @C() + ret void +} + +; SimplifyCFG should convert the indirectbr into a directbr. It would be even +; better if it removed the branch altogether, but simplifycfdg currently misses +; that because the predecessor is the entry block. + +; CHECK: indbrtest1 +; CHECK: br label %BB0 + +define void @indbrtest1(i8** %P, i8** %Q) { +entry: + store i8* blockaddress(@indbrtest1, %BB0), i8** %P + call void @foo() + %t = load i8** %Q + indirectbr i8* %t, [label %BB0, label %BB0] +BB0: + call void @A() + ret void +} + +; SimplifyCFG should notice that BB0 does not have its address taken and +; remove it from entry's successor list. + +; CHECK: indbrtest2 +; CHECK: entry: +; CHECK-NEXT: unreachable + +define void @indbrtest2(i8* %t) { +entry: + indirectbr i8* %t, [label %BB0, label %BB0] +BB0: + ret void +} diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll new file mode 100644 index 0000000000000..3965c37822764 --- /dev/null +++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll @@ -0,0 +1,19 @@ +; RUN: opt -strip-dead-debug-info -disable-output %s +define i32 @foo() nounwind ssp { +entry: + ret i32 0, !dbg !8 +} + +!llvm.dbg.sp = !{!0} +!llvm.dbg.gv = !{!6} + +!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 524329, metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW", metadata !"clang version 2.8 (trunk 112062)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{metadata !5} +!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"i", metadata !1, i32 2, metadata !7, i1 true, i1 true, i32 0} ; [ DW_TAG_variable ] +!7 = metadata !{i32 524326, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_const_type ] +!8 = metadata !{i32 3, i32 13, metadata !9, null} +!9 = metadata !{i32 524299, metadata !0, i32 3, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] diff --git a/test/Transforms/TailCallElim/accum_recursion.ll b/test/Transforms/TailCallElim/accum_recursion.ll index b2a9ed2813d62..9475f87e8f5b2 100644 --- a/test/Transforms/TailCallElim/accum_recursion.ll +++ b/test/Transforms/TailCallElim/accum_recursion.ll @@ -1,15 +1,74 @@ -; RUN: opt < %s -tailcallelim -S | not grep call +; RUN: opt < %s -tailcallelim -S | FileCheck %s -define i32 @factorial(i32 %x) { +define i32 @test1_factorial(i32 %x) { entry: %tmp.1 = icmp sgt i32 %x, 0 ; <i1> [#uses=1] br i1 %tmp.1, label %then, label %else then: ; preds = %entry %tmp.6 = add i32 %x, -1 ; <i32> [#uses=1] - %tmp.4 = call i32 @factorial( i32 %tmp.6 ) ; <i32> [#uses=1] + %tmp.4 = call i32 @test1_factorial( i32 %tmp.6 ) ; <i32> [#uses=1] %tmp.7 = mul i32 %tmp.4, %x ; <i32> [#uses=1] ret i32 %tmp.7 else: ; preds = %entry ret i32 1 } +; CHECK: define i32 @test1_factorial +; CHECK: phi i32 +; CHECK-NOT: call i32 +; CHECK: else: + +; This is a more aggressive form of accumulator recursion insertion, which +; requires noticing that X doesn't change as we perform the tailcall. + +define i32 @test2_mul(i32 %x, i32 %y) { +entry: + %tmp.1 = icmp eq i32 %y, 0 ; <i1> [#uses=1] + br i1 %tmp.1, label %return, label %endif +endif: ; preds = %entry + %tmp.8 = add i32 %y, -1 ; <i32> [#uses=1] + %tmp.5 = call i32 @test2_mul( i32 %x, i32 %tmp.8 ) ; <i32> [#uses=1] + %tmp.9 = add i32 %tmp.5, %x ; <i32> [#uses=1] + ret i32 %tmp.9 +return: ; preds = %entry + ret i32 %x +} + +; CHECK: define i32 @test2_mul +; CHECK: phi i32 +; CHECK-NOT: call i32 +; CHECK: return: + + +define i64 @test3_fib(i64 %n) nounwind readnone { +; CHECK: @test3_fib +entry: +; CHECK: tailrecurse: +; CHECK: %accumulator.tr = phi i64 [ %n, %entry ], [ %3, %bb1 ] +; CHECK: %n.tr = phi i64 [ %n, %entry ], [ %2, %bb1 ] + switch i64 %n, label %bb1 [ +; CHECK: switch i64 %n.tr, label %bb1 [ + i64 0, label %bb2 + i64 1, label %bb2 + ] + +bb1: +; CHECK: bb1: + %0 = add i64 %n, -1 +; CHECK: %0 = add i64 %n.tr, -1 + %1 = tail call i64 @test3_fib(i64 %0) nounwind +; CHECK: %1 = tail call i64 @test3_fib(i64 %0) + %2 = add i64 %n, -2 +; CHECK: %2 = add i64 %n.tr, -2 + %3 = tail call i64 @test3_fib(i64 %2) nounwind +; CHECK-NOT: tail call i64 @test3_fib + %4 = add nsw i64 %3, %1 +; CHECK: add nsw i64 %accumulator.tr, %1 + ret i64 %4 +; CHECK: br label %tailrecurse + +bb2: +; CHECK: bb2: + ret i64 %n +; CHECK: ret i64 %accumulator.tr +} diff --git a/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll b/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll deleted file mode 100644 index 2a90cf3b22d73..0000000000000 --- a/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll +++ /dev/null @@ -1,20 +0,0 @@ -; This is a more aggressive form of accumulator recursion insertion, which -; requires noticing that X doesn't change as we perform the tailcall. Thanks -; go out to the anonymous users of the demo script for "suggesting" -; optimizations that should be done. :) - -; RUN: opt < %s -tailcallelim -S | not grep call - -define i32 @mul(i32 %x, i32 %y) { -entry: - %tmp.1 = icmp eq i32 %y, 0 ; <i1> [#uses=1] - br i1 %tmp.1, label %return, label %endif -endif: ; preds = %entry - %tmp.8 = add i32 %y, -1 ; <i32> [#uses=1] - %tmp.5 = call i32 @mul( i32 %x, i32 %tmp.8 ) ; <i32> [#uses=1] - %tmp.9 = add i32 %tmp.5, %x ; <i32> [#uses=1] - ret i32 %tmp.9 -return: ; preds = %entry - ret i32 %x -} - diff --git a/test/Transforms/TailCallElim/switch.ll b/test/Transforms/TailCallElim/switch.ll deleted file mode 100644 index 33884318b0c81..0000000000000 --- a/test/Transforms/TailCallElim/switch.ll +++ /dev/null @@ -1,34 +0,0 @@ -; RUN: opt %s -tailcallelim -S | FileCheck %s - -define i64 @fib(i64 %n) nounwind readnone { -; CHECK: @fib -entry: -; CHECK: tailrecurse: -; CHECK: %accumulator.tr = phi i64 [ %n, %entry ], [ %3, %bb1 ] -; CHECK: %n.tr = phi i64 [ %n, %entry ], [ %2, %bb1 ] - switch i64 %n, label %bb1 [ -; CHECK: switch i64 %n.tr, label %bb1 [ - i64 0, label %bb2 - i64 1, label %bb2 - ] - -bb1: -; CHECK: bb1: - %0 = add i64 %n, -1 -; CHECK: %0 = add i64 %n.tr, -1 - %1 = tail call i64 @fib(i64 %0) nounwind -; CHECK: %1 = tail call i64 @fib(i64 %0) - %2 = add i64 %n, -2 -; CHECK: %2 = add i64 %n.tr, -2 - %3 = tail call i64 @fib(i64 %2) nounwind -; CHECK-NOT: tail call i64 @fib - %4 = add nsw i64 %3, %1 -; CHECK: add nsw i64 %accumulator.tr, %1 - ret i64 %4 -; CHECK: br label %tailrecurse - -bb2: -; CHECK: bb2: - ret i64 %n -; CHECK: ret i64 %accumulator.tr -} diff --git a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll index 88a565684c5df..03e99bc9bf6aa 100644 --- a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll +++ b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output | not grep tailduplicate +; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output |& not grep tailduplicate ; XFAIL: * define i32 @foo(i32 %l) nounwind { diff --git a/test/Verifier/2010-08-07-PointerIntrinsic.ll b/test/Verifier/2010-08-07-PointerIntrinsic.ll new file mode 100644 index 0000000000000..bf5563d9c0514 --- /dev/null +++ b/test/Verifier/2010-08-07-PointerIntrinsic.ll @@ -0,0 +1,21 @@ +; RUN: not llvm-as < %s 2> %t +; RUN: grep {Broken module} %t +; PR7316 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32" +target triple = "x86-unknown-unknown" +@aa = global [32 x i8] zeroinitializer, align 1 +@bb = global [16 x i8] zeroinitializer, align 1 +define void @x() nounwind { +L.0: + %0 = getelementptr [32 x i8]* @aa, i32 0, i32 4 + %1 = bitcast i8* %0 to [16 x i8]* + %2 = bitcast [16 x i8]* %1 to [0 x i8]* + %3 = getelementptr [16 x i8]* @bb + %4 = bitcast [16 x i8]* %3 to [0 x i8]* + call void @llvm.memcpy.i32([0 x i8]* %2, [0 x i8]* %4, i32 16, i32 1) + br label %return +return: + ret void +} +declare void @llvm.memcpy.i32([0 x i8]*, [0 x i8]*, i32, i32) nounwind diff --git a/test/lit.cfg b/test/lit.cfg index 5e7e0e4449808..f15777c99912e 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -46,7 +46,16 @@ if llvm_obj_root is not None: config.environment['PATH'] = path # Propogate 'HOME' through the environment. -config.environment['HOME'] = os.environ['HOME'] +if 'HOME' in os.environ: + config.environment['HOME'] = os.environ['HOME'] + +# Propogate 'INCLUDE' through the environment. +if 'INCLUDE' in os.environ: + config.environment['INCLUDE'] = os.environ['INCLUDE'] + +# Propogate 'LIB' through the environment. +if 'LIB' in os.environ: + config.environment['LIB'] = os.environ['LIB'] # Propogate LLVM_SRC_ROOT into the environment. config.environment['LLVM_SRC_ROOT'] = getattr(config, 'llvm_src_root', '') @@ -110,7 +119,7 @@ import re site_exp = {} # FIXME: Implement lit.site.cfg. for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')): - m = re.match('set ([^ ]+) "([^"]*)"', line) + m = re.match('set ([^ ]+) "(.*)"', line) if m: site_exp[m.group(1)] = m.group(2) @@ -147,13 +156,13 @@ def llvm_supports_target(name): def llvm_supports_darwin_and_target(name): return 'darwin' in config.target_triple and llvm_supports_target(name) -langs = set(site_exp['llvmgcc_langs'].split(',')) +langs = set([s.strip() for s in site_exp['llvmgcc_langs'].split(',')]) def llvm_gcc_supports(name): - return name in langs + return name.strip() in langs -bindings = set(site_exp['llvm_bindings'].split(',')) +bindings = set([s.strip() for s in site_exp['llvm_bindings'].split(',')]) def llvm_supports_binding(name): - return name in bindings + return name.strip() in bindings # Provide on_clone hook for reading 'dg.exp'. import os |