diff options
Diffstat (limited to 'test')
443 files changed, 18712 insertions, 8378 deletions
diff --git a/test/Analysis/BasicAA/args-rets-allocas-loads.ll b/test/Analysis/BasicAA/args-rets-allocas-loads.ll index 5d3f67ebe1ae..7555a4c2a9b0 100644 --- a/test/Analysis/BasicAA/args-rets-allocas-loads.ll +++ b/test/Analysis/BasicAA/args-rets-allocas-loads.ll @@ -168,132 +168,132 @@ define void @caller_a(double* %arg_a0, ; CHECK: NoAlias: double* %noalias_ret_a1, double* %normal_ret_a0 ; CHECK: NoAlias: double* %noalias_ret_a1, double* %normal_ret_a1 ; CHECK: NoAlias: double* %noalias_ret_a0, double* %noalias_ret_a1 -; CHECK: ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %noescape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %noescape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %noescape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %noescape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: ===== Alias Analysis Evaluator Report ===== @@ -302,9 +302,9 @@ define void @caller_a(double* %arg_a0, ; CHECK: 36 may alias responses (30.0%) ; CHECK: 0 must alias responses (0.0%) ; CHECK: Alias Analysis Evaluator Pointer Alias Summary: 70%/30%/0% -; CHECK: 128 Total ModRef Queries Performed -; CHECK: 44 no mod/ref responses (34.3%) +; CHECK: 184 Total ModRef Queries Performed +; CHECK: 44 no mod/ref responses (23.9%) ; CHECK: 0 mod responses (0.0%) ; CHECK: 0 ref responses (0.0%) -; CHECK: 84 mod & ref responses (65.6%) -; CHECK: Alias Analysis Evaluator Mod/Ref Summary: 34%/0%/0%/65% +; CHECK: 140 mod & ref responses (76.0%) +; CHECK: Alias Analysis Evaluator Mod/Ref Summary: 23%/0%/0%/76% diff --git a/test/Analysis/BasicAA/constant-over-index.ll b/test/Analysis/BasicAA/constant-over-index.ll index 95f94d096f35..0e0c45c8ad5c 100644 --- a/test/Analysis/BasicAA/constant-over-index.ll +++ b/test/Analysis/BasicAA/constant-over-index.ll @@ -1,7 +1,8 @@ -; RUN: opt < %s -aa-eval -print-all-alias-modref-info \ -; RUN: |& grep {MayAlias: double\\* \[%\]p.0.i.0, double\\* \[%\]p3\$} +; RUN: opt < %s -aa-eval -print-all-alias-modref-info |& FileCheck %s ; PR4267 +; CHECK: MayAlias: double* %p.0.i.0, double* %p3 + ; %p3 is equal to %p.0.i.0 on the second iteration of the loop, ; so MayAlias is needed. diff --git a/test/Analysis/BasicAA/featuretest.ll b/test/Analysis/BasicAA/featuretest.ll index 50dc8864ac9b..47d278fab1c2 100644 --- a/test/Analysis/BasicAA/featuretest.ll +++ b/test/Analysis/BasicAA/featuretest.ll @@ -1,17 +1,22 @@ ; This testcase tests for various features the basicaa test should be able to ; determine, as noted in the comments. -; RUN: opt < %s -basicaa -gvn -instcombine -dce -S | not grep REMOVE +; RUN: opt < %s -basicaa -gvn -instcombine -dce -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @Global = external global { i32 } +declare void @external(i32*) + ; Array test: Test that operations on one local array do not invalidate ; operations on another array. Important for scientific codes. ; define i32 @different_array_test(i64 %A, i64 %B) { %Array1 = alloca i32, i32 100 %Array2 = alloca i32, i32 200 + + call void @external(i32* %Array1) + call void @external(i32* %Array2) %pointer = getelementptr i32* %Array1, i64 %A %val = load i32* %pointer @@ -22,6 +27,8 @@ define i32 @different_array_test(i64 %A, i64 %B) { %REMOVE = load i32* %pointer ; redundant with above load %retval = sub i32 %REMOVE, %val ret i32 %retval +; CHECK: @different_array_test +; CHECK: ret i32 0 } ; Constant index test: Constant indexes into the same array should not @@ -29,6 +36,8 @@ define i32 @different_array_test(i64 %A, i64 %B) { ; define i32 @constant_array_index_test() { %Array = alloca i32, i32 100 + call void @external(i32* %Array) + %P1 = getelementptr i32* %Array, i64 7 %P2 = getelementptr i32* %Array, i64 6 @@ -37,6 +46,8 @@ define i32 @constant_array_index_test() { %BREMOVE = load i32* %P1 %Val = sub i32 %A, %BREMOVE ret i32 %Val +; CHECK: @constant_array_index_test +; CHECK: ret i32 0 } ; Test that if two pointers are spaced out by a constant getelementptr, that @@ -48,6 +59,8 @@ define i32 @gep_distance_test(i32* %A) { %REMOVEv = load i32* %A %r = sub i32 %REMOVEu, %REMOVEv ret i32 %r +; CHECK: @gep_distance_test +; CHECK: ret i32 0 } ; Test that if two pointers are spaced out by a constant offset, that they @@ -60,6 +73,8 @@ define i32 @gep_distance_test2({i32,i32}* %A, i64 %distance) { %REMOVEv = load i32* %A1 %r = sub i32 %REMOVEu, %REMOVEv ret i32 %r +; CHECK: @gep_distance_test2 +; CHECK: ret i32 0 } ; Test that we can do funny pointer things and that distance calc will still @@ -68,16 +83,45 @@ define i32 @gep_distance_test3(i32 * %A) { %X = load i32* %A %B = bitcast i32* %A to i8* %C = getelementptr i8* %B, i64 4 - %Y = load i8* %C - ret i32 8 + store i8 42, i8* %C + %Y = load i32* %A + %R = sub i32 %X, %Y + ret i32 %R +; CHECK: @gep_distance_test3 +; CHECK: ret i32 0 } ; Test that we can disambiguate globals reached through constantexpr geps define i32 @constexpr_test() { %X = alloca i32 + call void @external(i32* %X) + %Y = load i32* %X store i32 5, i32* getelementptr ({ i32 }* @Global, i64 0, i32 0) %REMOVE = load i32* %X %retval = sub i32 %Y, %REMOVE ret i32 %retval +; CHECK: @constexpr_test +; CHECK: ret i32 0 +} + + + +; PR7589 +; These two index expressions are different, this cannot be CSE'd. +define i16 @zext_sext_confusion(i16* %row2col, i5 %j) nounwind{ +entry: + %sum5.cast = zext i5 %j to i64 ; <i64> [#uses=1] + %P1 = getelementptr i16* %row2col, i64 %sum5.cast + %row2col.load.1.2 = load i16* %P1, align 1 ; <i16> [#uses=1] + + %sum13.cast31 = sext i5 %j to i6 ; <i6> [#uses=1] + %sum13.cast = zext i6 %sum13.cast31 to i64 ; <i64> [#uses=1] + %P2 = getelementptr i16* %row2col, i64 %sum13.cast + %row2col.load.1.6 = load i16* %P2, align 1 ; <i16> [#uses=1] + + %.ret = sub i16 %row2col.load.1.6, %row2col.load.1.2 ; <i16> [#uses=1] + ret i16 %.ret +; CHECK: @zext_sext_confusion +; CHECK: ret i16 %.ret } diff --git a/test/Analysis/BasicAA/gep-alias.ll b/test/Analysis/BasicAA/gep-alias.ll index 1ed031224713..eba9599ba07b 100644 --- a/test/Analysis/BasicAA/gep-alias.ll +++ b/test/Analysis/BasicAA/gep-alias.ll @@ -117,12 +117,12 @@ define i32 @test7(i32* %p, i64 %i) { ; P[zext(i)] != p[zext(i+1)] ; PR1143 -define i32 @test8(i32* %p, i32 %i) { - %i1 = zext i32 %i to i64 - %pi = getelementptr i32* %p, i64 %i1 - %i.next = add i32 %i, 1 - %i.next2 = zext i32 %i.next to i64 - %pi.next = getelementptr i32* %p, i64 %i.next2 +define i32 @test8(i32* %p, i16 %i) { + %i1 = zext i16 %i to i32 + %pi = getelementptr i32* %p, i32 %i1 + %i.next = add i16 %i, 1 + %i.next2 = zext i16 %i.next to i32 + %pi.next = getelementptr i32* %p, i32 %i.next2 %x = load i32* %pi store i32 42, i32* %pi.next %y = load i32* %pi diff --git a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll new file mode 100644 index 000000000000..12b088b1f651 --- /dev/null +++ b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s + + +; CHECK: Just Ref: call void @ro() <-> call void @f0() + +declare void @f0() +declare void @ro() readonly + +define void @test0() { + call void @f0() + call void @ro() + ret void +} + +; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) +; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) + +declare void @llvm.memset.i64(i8*, i8, i64, i32) + +@A = external global i8 +@B = external global i8 +define void @test1() { + call void @llvm.memset.i64(i8* @A, i8 0, i64 1, i32 1) + call void @llvm.memset.i64(i8* @B, i8 0, i64 1, i32 1) + ret void +} diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll index a2aabf135f6f..b9a3c5e58f68 100644 --- a/test/Analysis/BasicAA/modref.ll +++ b/test/Analysis/BasicAA/modref.ll @@ -123,3 +123,14 @@ define i32 @test5(i8* %P, i32 %Len) { ; CHECK: sub i32 %tmp, %tmp } +define i8 @test6(i8* %p, i8* noalias %a) { + %x = load i8* %a + %t = va_arg i8* %p, float + %y = load i8* %a + %z = add i8 %x, %y + ret i8 %z +; CHECK: @test6 +; CHECK: load i8* %a +; CHECK-NOT: load +; CHECK: ret +} diff --git a/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll b/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll new file mode 100644 index 000000000000..218b4375f70c --- /dev/null +++ b/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll @@ -0,0 +1,20 @@ +; RUN: opt -regions %s +define i32 @main() nounwind { +entry: + br label %for.cond + +test: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + br i1 true, label %for.body, label %for.end + +for.body: ; preds = %for.cond + br label %for.inc + +for.inc: ; preds = %for.body + br label %for.cond + +for.end: ; preds = %for.cond + ret i32 0 +} diff --git a/test/Analysis/RegionInfo/block_sort.ll b/test/Analysis/RegionInfo/block_sort.ll new file mode 100644 index 000000000000..faec45a911f5 --- /dev/null +++ b/test/Analysis/RegionInfo/block_sort.ll @@ -0,0 +1,42 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats -analyze < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @BZ2_blockSort() nounwind { +start: + br label %while + +while: + br label %while.body134.i.i + +while.body134.i.i: + br i1 1, label %end, label %w + +w: + br label %if.end140.i.i + +if.end140.i.i: + br i1 1, label %while.end186.i.i, label %if.end183.i.i + +if.end183.i.i: + br label %while.body134.i.i + +while.end186.i.i: + br label %while + +end: + ret void +} +; CHECK-NOT: => +; CHECK: [0] start => <Function Return> +; CHECK: [1] while => end + +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: start, while, while.body134.i.i, end, w, if.end140.i.i, while.end186.i.i, if.end183.i.i, +; BBIT: while, while.body134.i.i, w, if.end140.i.i, while.end186.i.i, if.end183.i.i, + +; RNIT: start, while => end, end, +; RNIT: while, while.body134.i.i, w, if.end140.i.i, while.end186.i.i, if.end183.i.i, diff --git a/test/Analysis/RegionInfo/cond_loop.ll b/test/Analysis/RegionInfo/cond_loop.ll new file mode 100644 index 000000000000..2ce57c3c5f37 --- /dev/null +++ b/test/Analysis/RegionInfo/cond_loop.ll @@ -0,0 +1,33 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +5: + br label %"0" + +0: + br label %"1" +1: + br i1 1, label %"2", label %"3" +2: + ret void +3: + br i1 1, label %"1", label %"4" +4: + br label %"0" +} + +; CHECK-NOT: => +; CHECK: [0] 5 => <Function Return> +; CHECK: [1] 0 => 2 + +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 5, 0, 1, 2, 3, 4, +; BBIT: 0, 1, 3, 4, + +; RNIT: 5, 0 => 2, 2, +; RNIT: 0, 1, 3, 4, diff --git a/test/Analysis/RegionInfo/condition_complicated.ll b/test/Analysis/RegionInfo/condition_complicated.ll new file mode 100644 index 000000000000..7ca5c7c7b537 --- /dev/null +++ b/test/Analysis/RegionInfo/condition_complicated.ll @@ -0,0 +1,60 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc zeroext i8 @handle_compress() nounwind { +end165: + br i1 1, label %false239, label %true181 + +true181: + br i1 1, label %then187, label %else232 + +then187: + br label %end265 + +else232: + br i1 1, label %false239, label %then245 + +false239: + br i1 1, label %then245, label %else259 + +then245: + br i1 1, label %then251, label %end253 + +then251: + br label %end253 + +end253: + br label %end265 + +else259: + br label %end265 + +end265: + br i1 1, label %then291, label %end298 + +then291: + br label %end298 + +end298: + ret i8 1 +} + +; CHECK-NOT: => +; CHECK: [0] end165 => <Function Return> +; CHECK-NEXT: [1] end165 => end265 +; CHECK-NEXT: [2] then245 => end253 +; CHECK-NEXT: [1] end265 => end298 + +; STAT: 4 region - The # of regions + +; BBIT: end165, false239, then245, then251, end253, end265, then291, end298, else259, true181, then187, else232, +; BBIT: end165, false239, then245, then251, end253, else259, true181, then187, else232, +; BBIT: then245, then251, +; BBIT: end265, then291, + +; RNIT: end165 => end265, end265 => end298, end298, +; RNIT: end165, false239, then245 => end253, end253, else259, true181, then187, else232, +; RNIT: then245, then251, +; RNIT: end265, then291, diff --git a/test/Analysis/RegionInfo/condition_complicated_2.ll b/test/Analysis/RegionInfo/condition_complicated_2.ll new file mode 100644 index 000000000000..5fa940a61ef6 --- /dev/null +++ b/test/Analysis/RegionInfo/condition_complicated_2.ll @@ -0,0 +1,44 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc void @compress() nounwind { +end33: + br i1 1, label %end124, label %lor.lhs.false95 + +lor.lhs.false95: + br i1 1, label %then107, label %end172 + +then107: + br i1 1, label %end124, label %then113 + +then113: + br label %end124 + +end124: + br label %exit + +end172: + br label %exit + + +exit: + unreachable + + +} +; CHECK-NOT: => +; CHECK: [0] end33 => <Function Return> +; CHECK-NEXT: [1] end33 => exit +; CHECK-NEXT: [2] then107 => end124 + +; STAT: 3 region - The # of regions + +; BBIT: end33, end124, exit, lor.lhs.false95, then107, then113, end172, +; BBIT: end33, end124, lor.lhs.false95, then107, then113, end172, +; BBIT: then107, then113, + +; RNIT: end33 => exit, exit, +; RNIT: end33, end124, lor.lhs.false95, then107 => end124, end172, +; RNIT: then107, then113, diff --git a/test/Analysis/RegionInfo/condition_forward_edge.ll b/test/Analysis/RegionInfo/condition_forward_edge.ll new file mode 100644 index 000000000000..098c9b6b4613 --- /dev/null +++ b/test/Analysis/RegionInfo/condition_forward_edge.ll @@ -0,0 +1,26 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"1" +1: + br i1 1, label %"2", label %"3" +2: + br label %"3" +3: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK: [1] 1 => 3 + +; STAT: 2 region - The # of regions + +; BBIT: 0, 1, 2, 3, +; BBIT: 1, 2, + +; RNIT: 0, 1 => 3, 3, +; RNIT: 1, 2, diff --git a/test/Analysis/RegionInfo/condition_same_exit.ll b/test/Analysis/RegionInfo/condition_same_exit.ll new file mode 100644 index 000000000000..1b88596c0f8c --- /dev/null +++ b/test/Analysis/RegionInfo/condition_same_exit.ll @@ -0,0 +1,31 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br i1 1, label %"1", label %"4" + +1: + br i1 1, label %"2", label %"3" +2: + br label %"4" +3: + br label %"4" +4: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 0 => 4 +; CHECK-NEXT: [2] 1 => 4 +; STAT: 3 region - The # of regions + +; BBIT: 0, 1, 2, 4, 3, +; BBIT: 0, 1, 2, 3, +; BBIT: 1, 2, 3, + +; RNIT: 0 => 4, 4, +; RNIT: 0, 1 => 4, +; RNIT: 1, 2, 3, diff --git a/test/Analysis/RegionInfo/condition_simple.ll b/test/Analysis/RegionInfo/condition_simple.ll new file mode 100644 index 000000000000..19b154b6476b --- /dev/null +++ b/test/Analysis/RegionInfo/condition_simple.ll @@ -0,0 +1,28 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"1" +1: + br i1 1, label %"2", label %"3" +2: + br label %"4" +3: + br label %"4" +4: + ret void +} + +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 1 => 4 +; STAT: 2 region - The # of regions + +; BBIT: 0, 1, 2, 4, 3, +; BBIT: 1, 2, 3, + +; RNIT: 0, 1 => 4, 4, +; RNIT: 1, 2, 3, diff --git a/test/Transforms/ABCD/dg.exp b/test/Analysis/RegionInfo/dg.exp index f2005891a59a..f2005891a59a 100644 --- a/test/Transforms/ABCD/dg.exp +++ b/test/Analysis/RegionInfo/dg.exp diff --git a/test/Analysis/RegionInfo/exit_in_condition.ll b/test/Analysis/RegionInfo/exit_in_condition.ll new file mode 100644 index 000000000000..3b152d2f565d --- /dev/null +++ b/test/Analysis/RegionInfo/exit_in_condition.ll @@ -0,0 +1,38 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc zeroext i8 @handle_compress() nounwind { +entry: + br label %outer + +outer: + br label %body + +body: + br i1 1, label %body.i, label %if.end + +body.i: + br i1 1, label %end, label %if.end + +if.end: + br label %if.then64 + +if.then64: + br label %outer + +end: + ret i8 1 +} +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK-NEXT: [1] outer => end +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: entry, outer, body, body.i, end, if.end, if.then64, +; BBIT: outer, body, body.i, if.end, if.then64, + +; RNIT: entry, outer => end, end, +; RNIT: outer, body, body.i, if.end, if.then64, diff --git a/test/Analysis/RegionInfo/infinite_loop.ll b/test/Analysis/RegionInfo/infinite_loop.ll new file mode 100644 index 000000000000..59cead492619 --- /dev/null +++ b/test/Analysis/RegionInfo/infinite_loop.ll @@ -0,0 +1,20 @@ +; RUN: opt -regions -analyze < %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s + +define void @normal_condition() nounwind { +0: + br label %"1" +1: + br i1 1, label %"2", label %"3" +2: + br label %"2" +3: + br label %"4" +4: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK: [1] 1 => 4 +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions diff --git a/test/Analysis/RegionInfo/infinite_loop_2.ll b/test/Analysis/RegionInfo/infinite_loop_2.ll new file mode 100644 index 000000000000..80c69b7ab2e2 --- /dev/null +++ b/test/Analysis/RegionInfo/infinite_loop_2.ll @@ -0,0 +1,36 @@ +; RUN: opt -regions -analyze < %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"1" +1: + br i1 1, label %"2", label %"3" +2: + br label %"5" +5: + br i1 1, label %"11", label %"12" +11: + br label %"6" +12: + br label %"6" +6: + br label %"2" +3: + br label %"4" +4: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK: [1] 1 => 3 +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 0, 1, 2, 5, 11, 6, 12, 3, 4, +; BBIT: 1, 2, 5, 11, 6, 12, + +; RNIT: 0, 1 => 3, 3, 4, +; RNIT: 1, 2, 5, 11, 6, 12, diff --git a/test/Analysis/RegionInfo/infinite_loop_3.ll b/test/Analysis/RegionInfo/infinite_loop_3.ll new file mode 100644 index 000000000000..74ceafb84955 --- /dev/null +++ b/test/Analysis/RegionInfo/infinite_loop_3.ll @@ -0,0 +1,52 @@ +; RUN: opt -regions -analyze < %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s + +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"7" +7: + br i1 1, label %"1", label %"8" +1: + br i1 1, label %"2", label %"3" +2: + br label %"5" +5: + br i1 1, label %"11", label %"12" +11: + br label %"6" +12: + br label %"6" +6: + br label %"2" +8: + br label %"9" +9: + br i1 1, label %"13", label %"14" +13: + br label %"10" +14: + br label %"10" +10: + br label %"8" +3: + br label %"4" +4: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 1 => 3 +; CHECK-NEXT: [1] 7 => 1 +; STAT: 3 region - The # of regions +; STAT: 2 region - The # of simple regions + +; BBIT: 0, 7, 1, 2, 5, 11, 6, 12, 3, 4, 8, 9, 13, 10, 14, +; BBIT: 7, 8, 9, 13, 10, 14, +; BBIT: 1, 2, 5, 11, 6, 12, + +; RNIT: 0, 7 => 1, 1 => 3, 3, 4, +; RNIT: 7, 8, 9, 13, 10, 14, +; RNIT: 1, 2, 5, 11, 6, 12, diff --git a/test/Analysis/RegionInfo/infinite_loop_4.ll b/test/Analysis/RegionInfo/infinite_loop_4.ll new file mode 100644 index 000000000000..fd56af1d3b8c --- /dev/null +++ b/test/Analysis/RegionInfo/infinite_loop_4.ll @@ -0,0 +1,48 @@ +; RUN: opt -regions -analyze < %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"7" +7: + br i1 1, label %"1", label %"8" +1: + br i1 1, label %"2", label %"3" +2: + br label %"5" +5: + br i1 1, label %"11", label %"12" +11: + br label %"6" +12: + br label %"6" +6: + br i1 1, label %"2", label %"10" +8: + br label %"9" +9: + br i1 1, label %"13", label %"14" +13: + br label %"10" +14: + br label %"10" +10: + br label %"8" +3: + br label %"4" +4: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 7 => 3 +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 0, 7, 1, 2, 5, 11, 6, 10, 8, 9, 13, 14, 12, 3, 4, +; BBIT: 7, 1, 2, 5, 11, 6, 10, 8, 9, 13, 14, 12, + +; RNIT: 0, 7 => 3, 3, 4, +; RNIT: 7, 1, 2, 5, 11, 6, 10, 8, 9, 13, 14, 12, diff --git a/test/Analysis/RegionInfo/loop_with_condition.ll b/test/Analysis/RegionInfo/loop_with_condition.ll new file mode 100644 index 000000000000..d1d68982eec6 --- /dev/null +++ b/test/Analysis/RegionInfo/loop_with_condition.ll @@ -0,0 +1,46 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s + +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"1" +1: + br i1 1, label %"6", label %"2" +2: + br i1 1, label %"3", label %"4" +3: + br label %"5" +4: + br label %"5" +5: + br label %"8" +8: + br i1 1, label %"7", label %"9" +9: + br label %"2" +7: + br label %"6" +6: + ret void +} + +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 1 => 6 +; CHECK-NEXT: [2] 2 => 7 +; CHECK-NEXT: [3] 2 => 5 +; STAT: 4 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 0, 1, 6, 2, 3, 5, 8, 7, 9, 4, +; BBIT: 1, 2, 3, 5, 8, 7, 9, 4, +; BBIT: 2, 3, 5, 8, 9, 4, +; BBIT: 2, 3, 4, + +; RNIT: 0, 1 => 6, 6, +; RNIT: 1, 2 => 7, 7, +; RNIT: 2 => 5, 5, 8, 9, +; RNIT: 2, 3, 4, diff --git a/test/Analysis/RegionInfo/loops_1.ll b/test/Analysis/RegionInfo/loops_1.ll new file mode 100644 index 000000000000..d4bf3cc50118 --- /dev/null +++ b/test/Analysis/RegionInfo/loops_1.ll @@ -0,0 +1,40 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc zeroext i8 @loops_1() nounwind { +entry: + br i1 1, label %outer , label %a + +a: + br label %body + +outer: + br label %body + +body: + br i1 1, label %land, label %if + +land: + br i1 1, label %exit, label %end + +exit: + br i1 1, label %if, label %end + +if: + br label %outer + +end: + ret i8 1 +} +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK-NEXT: [1] entry => end +; STAT: 2 region - The # of regions + +; BBIT: entry, outer, body, land, exit, if, end, a, +; BBIT: entry, outer, body, land, exit, if, a, + +; RNIT: entry => end, end, +; RNIT: entry, outer, body, land, exit, if, a, diff --git a/test/Analysis/RegionInfo/loops_2.ll b/test/Analysis/RegionInfo/loops_2.ll new file mode 100644 index 000000000000..07aa7c311010 --- /dev/null +++ b/test/Analysis/RegionInfo/loops_2.ll @@ -0,0 +1,49 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @meread_() nounwind { +entry: + br label %bb23 + +bb23: + br label %bb.i + +bb.i: ; preds = %bb.i, %bb54 + br label %pflini_.exit + +pflini_.exit: ; preds = %bb.i + br label %bb58thread-split + +bb58thread-split: ; preds = %bb64, %bb61, %pflini_.exit + br label %bb58 + +bb58: ; preds = %bb60, %bb58thread-split + br i1 1, label %bb59, label %bb23 + +bb59: ; preds = %bb58 + switch i32 1, label %bb60 [ + i32 1, label %l98 + ] + +bb60: ; preds = %bb59 + br i1 1, label %bb61, label %bb58 + +bb61: ; preds = %bb60 + br label %bb58thread-split + +l98: ; preds = %bb69, %bb59 + ret void +} +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK: [1] bb23 => l98 +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: entry, bb23, bb.i, pflini_.exit, bb58thread-split, bb58, bb59, bb60, bb61, l98, +; BBIT: bb23, bb.i, pflini_.exit, bb58thread-split, bb58, bb59, bb60, bb61, + +; RNIT: entry, bb23 => l98, l98, +; RNIT: bb23, bb.i, pflini_.exit, bb58thread-split, bb58, bb59, bb60, bb61, diff --git a/test/Analysis/RegionInfo/mix_1.ll b/test/Analysis/RegionInfo/mix_1.ll new file mode 100644 index 000000000000..829c157c2c68 --- /dev/null +++ b/test/Analysis/RegionInfo/mix_1.ll @@ -0,0 +1,69 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s + +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @a_linear_impl_fig_1() nounwind { +0: + + br i1 1, label %"1", label %"15" +1: + switch i32 0, label %"2" [ i32 0, label %"3" + i32 1, label %"7"] +2: + br label %"4" +3: + br label %"5" +4: + br label %"6" +5: + br label %"6" +6: + br label %"7" +7: + br label %"15" +15: + br label %"8" +8: + br label %"16" +16: + br label %"9" +9: + br i1 1, label %"10", label %"11" +11: + br i1 1, label %"13", label %"12" +13: + br label %"14" +12: + br label %"14" +14: + br label %"8" +10: + br label %"17" +17: + br label %"18" +18: + ret void +} + +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 0 => 15 +; CHECK-NEXT: [2] 1 => 7 +; CHECK-NEXT: [1] 8 => 10 +; CHECK-NEXT: [2] 11 => 14 +; STAT: 5 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 0, 1, 2, 4, 6, 7, 15, 8, 16, 9, 10, 17, 18, 11, 13, 14, 12, 3, 5, +; BBIT: 0, 1, 2, 4, 6, 7, 3, 5, +; BBIT: 1, 2, 4, 6, 3, 5, +; BBIT: 8, 16, 9, 11, 13, 14, 12, +; BBIT: 11, 13, 12, + +; RNIT: 0 => 15, 15, 8 => 10, 10, 17, 18, +; RNIT: 0, 1 => 7, 7, +; RNIT: 1, 2, 4, 6, 3, 5, +; RNIT: 8, 16, 9, 11 => 14, 14, +; RNIT: 11, 13, 12, diff --git a/test/Analysis/RegionInfo/multiple_exiting_edge.ll b/test/Analysis/RegionInfo/multiple_exiting_edge.ll new file mode 100644 index 000000000000..7bc0e4607d68 --- /dev/null +++ b/test/Analysis/RegionInfo/multiple_exiting_edge.ll @@ -0,0 +1,38 @@ +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition_0() nounwind { +bb38: ; preds = %bb34, %bb34, %bb37 + switch i32 undef, label %bb42 [ + i32 67, label %bb42 + i32 90, label %bb41 + ] +bb41: ; preds = %bb38 + br label %bb42 +bb42: ; preds = %bb38, %bb38, %bb41 + ret void +} + +; BBIT: bb38, bb42, bb41, +; BBIT: bb38, bb41, + +; RNIT: bb38 => bb42, bb42, +; RNIT: bb38, bb41, + +define void @normal_condition_1() nounwind { +bb38: ; preds = %bb34, %bb34, %bb37 + switch i32 undef, label %bb41 [ + i32 67, label %bb42 + i32 90, label %bb42 + ] +bb41: ; preds = %bb38 + br label %bb42 +bb42: ; preds = %bb38, %bb38, %bb41 + ret void +} + +; BBIT: bb38, bb41, bb42, +; BBIT: bb38, bb41, + +; RNIT: bb38 => bb42, bb42, +; RNIT: bb38, bb41, diff --git a/test/Analysis/RegionInfo/nested_loops.ll b/test/Analysis/RegionInfo/nested_loops.ll new file mode 100644 index 000000000000..9d8c4558f049 --- /dev/null +++ b/test/Analysis/RegionInfo/nested_loops.ll @@ -0,0 +1,33 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s + +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc zeroext i8 @handle_compress() nounwind { +entry: + br label %outer + +outer: + br label %body + +body: + br i1 1, label %exit172, label %end + +exit172: + br i1 1, label %end, label %outer + +end: + ret i8 1 +} +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK-NEXT: [1] outer => end + +; STAT: 2 region - The # of regions + +; BBIT: entry, outer, body, exit172, end, +; BBIT: outer, body, exit172, + +; RNIT: entry, outer => end, end, +; RNIT: outer, body, exit172, diff --git a/test/Analysis/RegionInfo/next.ll b/test/Analysis/RegionInfo/next.ll new file mode 100644 index 000000000000..d986387099c3 --- /dev/null +++ b/test/Analysis/RegionInfo/next.ll @@ -0,0 +1,49 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @MAIN__() nounwind { +entry: + br label %__label_002001.outer + +__label_002001.outer: ; preds = %bb236, %bb92 + br label %__label_002001 + +__label_002001: ; preds = %bb229, %__label_002001.outer + br i1 1, label %bb93, label %__label_000020 + +bb93: ; preds = %__label_002001 + br i1 1, label %__label_000020, label %bb197 + +bb197: ; preds = %bb193 + br i1 1, label %bb229, label %bb224 + +bb224: ; preds = %bb223, %bb227 + br i1 1, label %bb229, label %bb224 + +bb229: ; preds = %bb227, %bb223 + br i1 1, label %__label_002001, label %__label_002001.outer + +__label_000020: ; preds = %__label_002001, %bb194 + ret void +} + +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK-NEXT: [1] __label_002001.outer => __label_000020 +; CHECK-NEXT; [2] bb197 => bb229 +; CHECK-NEXT; [3] bb224 => bb229 + +; STAT: 4 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: entry, __label_002001.outer, __label_002001, bb93, __label_000020, bb197, bb229, bb224, +; BBIT: __label_002001.outer, __label_002001, bb93, bb197, bb229, bb224, +; BBIT: bb197, bb224, +; BBIT: bb224, + +; RNIT: entry, __label_002001.outer => __label_000020, __label_000020, +; RNIT: __label_002001.outer, __label_002001, bb93, bb197 => bb229, bb229, +; RNIT: bb197, bb224 => bb229, +; RNIT: bb224, diff --git a/test/Analysis/RegionInfo/paper.ll b/test/Analysis/RegionInfo/paper.ll new file mode 100644 index 000000000000..00b544bc6919 --- /dev/null +++ b/test/Analysis/RegionInfo/paper.ll @@ -0,0 +1,55 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @a_linear_impl_fig_1() nounwind { +0: + br label %"1" +1: + br label %"2" +2: + br label %"3" +3: + br i1 1, label %"13", label %"4" +4: + br i1 1, label %"5", label %"1" +5: + br i1 1, label %"8", label %"6" +6: + br i1 1, label %"7", label %"4" +7: + ret void +8: + br i1 1, label %"9", label %"1" +9: + br label %"10" +10: + br i1 1, label %"12", label %"11" +11: + br i1 1, label %"9", label %"8" +13: + br i1 1, label %"2", label %"1" +12: + switch i32 0, label %"1" [ i32 0, label %"9" + i32 1, label %"8"] +} + +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 1 => 7 +; CHECK-NEXT: [2] 1 => 4 +; CHECK-NEXT: [2] 8 => 1 + +; STAT: 4 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 0, 1, 2, 3, 13, 4, 5, 8, 9, 10, 12, 11, 6, 7, +; BBIT: 1, 2, 3, 13, 4, 5, 8, 9, 10, 12, 11, 6, +; BBIT: 1, 2, 3, 13, +; BBIT: 8, 9, 10, 12, 11, + +; RNIT: 0, 1 => 7, 7, +; RNIT: 1 => 4, 4, 5, 8 => 1, 6, +; RNIT: 1, 2, 3, 13, +; RNIT: 8, 9, 10, 12, 11, diff --git a/test/Analysis/RegionInfo/two_loops_same_header.ll b/test/Analysis/RegionInfo/two_loops_same_header.ll new file mode 100644 index 000000000000..a97182b81a24 --- /dev/null +++ b/test/Analysis/RegionInfo/two_loops_same_header.ll @@ -0,0 +1,46 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc zeroext i8 @handle_compress() nounwind { +entry: + br label %outer + +outer: + br label %body + +body: + br i1 1, label %else, label %true77 + +true77: + br i1 1, label %then83, label %else + +then83: + br label %outer + +else: + br label %else106 + +else106: + br i1 1, label %end, label %outer + +end: + ret i8 1 +} + +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK-NEXT: [1] outer => end +; CHECK-NEXT: [2] outer => else + +; STAT: 3 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: entry, outer, body, else, else106, end, true77, then83, +; BBIT: outer, body, else, else106, true77, then83, +; BBIT: outer, body, true77, then83, + +; RNIT: entry, outer => end, end, +; RNIT: outer => else, else, else106, +; RNIT: outer, body, true77, then83, diff --git a/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/test/Analysis/ScalarEvolution/avoid-smax-1.ll index 0bc9ce8241a8..89e8b983c0c0 100644 --- a/test/Analysis/ScalarEvolution/avoid-smax-1.ll +++ b/test/Analysis/ScalarEvolution/avoid-smax-1.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -indvars -S > %t ; RUN: grep select %t | count 2 -; RUN: grep {icmp ne i32.\* %w } %t +; RUN: grep {icmp ne i32.\* } %t ; Indvars should be able to insert a canonical induction variable ; for the bb6 loop without using a maximum calculation (icmp, select) diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll index a8966be4ccd4..843fb073087c 100644 --- a/test/Analysis/ScalarEvolution/max-trip-count.ll +++ b/test/Analysis/ScalarEvolution/max-trip-count.ll @@ -1,8 +1,9 @@ -; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: | grep {\{%d,+,\[^\{\}\]\*\}<%bb>} +; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s ; ScalarEvolution should be able to understand the loop and eliminate the casts. +; CHECK: {%d,+,sizeof(i32)} + define void @foo(i32* nocapture %d, i32 %n) nounwind { entry: %0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1] @@ -32,3 +33,40 @@ bb1.return_crit_edge: ; preds = %bb1 return: ; preds = %bb1.return_crit_edge, %entry ret void } + +; ScalarEvolution should be able to find the maximum tripcount +; of this multiple-exit loop, and if it doesn't know the exact +; count, it should say so. + +; PR7845 +; CHECK: Loop %for.cond: <multiple exits> Unpredictable backedge-taken count. +; CHECK: Loop %for.cond: max backedge-taken count is 5 + +@.str = private constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=2] + +define i32 @main() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %g_4.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ] ; <i32> [#uses=5] + %cmp = icmp slt i32 %g_4.0, 5 ; <i1> [#uses=1] + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %conv = trunc i32 %g_4.0 to i16 ; <i16> [#uses=1] + %tobool.not = icmp eq i16 %conv, 0 ; <i1> [#uses=1] + %tobool3 = icmp ne i32 %g_4.0, 0 ; <i1> [#uses=1] + %or.cond = and i1 %tobool.not, %tobool3 ; <i1> [#uses=1] + br i1 %or.cond, label %for.end, label %for.inc + +for.inc: ; preds = %for.body + %add = add nsw i32 %g_4.0, 1 ; <i32> [#uses=1] + br label %for.cond + +for.end: ; preds = %for.body, %for.cond + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %g_4.0) nounwind ; <i32> [#uses=0] + ret i32 0 +} + +declare i32 @printf(i8*, ...) diff --git a/test/Archive/README.txt b/test/Archive/README.txt index da6cfa4c9ed0..6810befc5857 100644 --- a/test/Archive/README.txt +++ b/test/Archive/README.txt @@ -5,7 +5,7 @@ This directory contains various tests of llvm-ar and llvm-ranlib to ensure compatibility reading other ar(1) formats. It also provides a basic functionality test for these tools. -There are four archives stored in CVS with these tests: +There are four archives accompanying these tests: GNU.a - constructed on Linux with GNU ar MacOSX.a - constructed on Mac OS X with its native BSD4.4 ar diff --git a/test/Assembler/2010-01-06-UnionType.ll b/test/Assembler/2010-01-06-UnionType.ll deleted file mode 100644 index 37130d66088d..000000000000 --- a/test/Assembler/2010-01-06-UnionType.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: llvm-as %s -o /dev/null - -%X = type union { i32, i32* } diff --git a/test/Assembler/align-inst-alloca.ll b/test/Assembler/align-inst-alloca.ll new file mode 100644 index 000000000000..0343bebf1876 --- /dev/null +++ b/test/Assembler/align-inst-alloca.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as %s -o /dev/null 2>/dev/null + +define void @foo() { + %p = alloca i1, align 1073741824 + ret void +} diff --git a/test/Assembler/align-inst-load.ll b/test/Assembler/align-inst-load.ll new file mode 100644 index 000000000000..3586be2d6e03 --- /dev/null +++ b/test/Assembler/align-inst-load.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as %s -o /dev/null 2>/dev/null + +define void @foo() { + load i1* %p, align 1073741824 + ret void +} diff --git a/test/Assembler/align-inst-store.ll b/test/Assembler/align-inst-store.ll new file mode 100644 index 000000000000..8c3b7124b437 --- /dev/null +++ b/test/Assembler/align-inst-store.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as %s -o /dev/null 2>/dev/null + +define void @foo() { + store i1 false, i1* %p, align 1073741824 + ret void +} diff --git a/test/Assembler/align-inst.ll b/test/Assembler/align-inst.ll new file mode 100644 index 000000000000..6f7100e065d3 --- /dev/null +++ b/test/Assembler/align-inst.ll @@ -0,0 +1,10 @@ +; RUN: llvm-as %s -o /dev/null + +@A = global i1 0, align 536870912 + +define void @foo() { + %p = alloca i1, align 536870912 + load i1* %p, align 536870912 + store i1 false, i1* %p, align 536870912 + ret void +} diff --git a/test/Assembler/comment.ll b/test/Assembler/comment.ll new file mode 100644 index 000000000000..fe23d26fbeb4 --- /dev/null +++ b/test/Assembler/comment.ll @@ -0,0 +1,20 @@ +; RUN: llvm-as < %s | llvm-dis -show-annotations | FileCheck -check-prefix=ANNOT %s +; RUN: llvm-as < %s | llvm-dis | FileCheck -check-prefix=BARE %s + +; The bare version of this file should not have any #uses lines. +; BARE: @B = +; BARE-NOT: #uses +; BARE: } + +@B = external global i32 +; ANNOT: @B = external global i32 ; [#uses=0] + +define <4 x i1> @foo(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %cmp = fcmp olt <4 x float> %a, %b ; [#uses=1] + ret <4 x i1> %cmp +} + +; ANNOT: %cmp = fcmp olt <4 x float> %a, %b ; [#uses=1] + + diff --git a/test/Assembler/getelementptr.ll b/test/Assembler/getelementptr.ll index 803d6d343063..ebef58ff459c 100644 --- a/test/Assembler/getelementptr.ll +++ b/test/Assembler/getelementptr.ll @@ -3,9 +3,9 @@ ; Verify that over-indexed getelementptrs are folded. @A = external global [2 x [3 x [5 x [7 x i32]]]] @B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 0, i64 0, i64 2, i64 1, i64 7523) -; CHECK: @B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 36, i64 0, i64 1, i64 0, i64 5) ; <i32**> [#uses=0] +; CHECK: @B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 36, i64 0, i64 1, i64 0, i64 5) @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 3, i64 2, i64 0, i64 0, i64 7523) -; CHECK: @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 39, i64 1, i64 1, i64 4, i64 5) ; <i32**> [#uses=0] +; CHECK: @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 39, i64 1, i64 1, i64 4, i64 5) ;; Verify that i16 indices work. @x = external global {i32, i32} diff --git a/test/Bindings/Ocaml/analysis.ml b/test/Bindings/Ocaml/analysis.ml index e830106c11ff..bf2178254409 100644 --- a/test/Bindings/Ocaml/analysis.ml +++ b/test/Bindings/Ocaml/analysis.ml @@ -1,5 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa %s -o %t - * RUN: ./%t %t.bc + * RUN: %t *) open Llvm diff --git a/test/Bindings/Ocaml/bitreader.ml b/test/Bindings/Ocaml/bitreader.ml index 112ca618301a..30b07d2199f3 100644 --- a/test/Bindings/Ocaml/bitreader.ml +++ b/test/Bindings/Ocaml/bitreader.ml @@ -1,5 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_bitwriter.cmxa %s -o %t - * RUN: ./%t %t.bc + * RUN: %t %t.bc * RUN: llvm-dis < %t.bc | grep caml_int_ty *) diff --git a/test/Bindings/Ocaml/bitwriter.ml b/test/Bindings/Ocaml/bitwriter.ml index ef1c9ab722c8..8eb923ea32c7 100644 --- a/test/Bindings/Ocaml/bitwriter.ml +++ b/test/Bindings/Ocaml/bitwriter.ml @@ -1,5 +1,5 @@ (* RUN: %ocamlopt -warn-error A unix.cmxa llvm.cmxa llvm_bitwriter.cmxa %s -o %t - * RUN: ./%t %t.bc + * RUN: %t %t.bc * RUN: llvm-dis < %t.bc | grep caml_int_ty *) diff --git a/test/Bindings/Ocaml/executionengine.ml b/test/Bindings/Ocaml/executionengine.ml index 2caeb82aac22..63040e4a33fc 100644 --- a/test/Bindings/Ocaml/executionengine.ml +++ b/test/Bindings/Ocaml/executionengine.ml @@ -1,5 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %s -o %t - * RUN: ./%t %t.bc + * RUN: %t *) open Llvm diff --git a/test/Bindings/Ocaml/scalar_opts.ml b/test/Bindings/Ocaml/scalar_opts.ml index f28eff28da75..8a6af012ac24 100644 --- a/test/Bindings/Ocaml/scalar_opts.ml +++ b/test/Bindings/Ocaml/scalar_opts.ml @@ -1,4 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_scalar_opts.cmxa llvm_target.cmxa %s -o %t + * RUN: %t %t.bc *) (* Note: It takes several seconds for ocamlopt to link an executable with @@ -13,8 +14,11 @@ let context = global_context () let void_type = Llvm.void_type context (* Tiny unit test framework - really just to help find which line is busted *) +let print_checkpoints = false + let suite name f = - prerr_endline (name ^ ":"); + if print_checkpoints then + prerr_endline (name ^ ":"); f () diff --git a/test/Bindings/Ocaml/target.ml b/test/Bindings/Ocaml/target.ml index 3c3b7339fef8..bfaf37ca879d 100644 --- a/test/Bindings/Ocaml/target.ml +++ b/test/Bindings/Ocaml/target.ml @@ -1,4 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa %s -o %t + * RUN: %t %t.bc *) (* Note: It takes several seconds for ocamlopt to link an executable with @@ -8,13 +9,17 @@ open Llvm open Llvm_target + let context = global_context () let i32_type = Llvm.i32_type context let i64_type = Llvm.i64_type context (* Tiny unit test framework - really just to help find which line is busted *) +let print_checkpoints = false + let suite name f = - prerr_endline (name ^ ":"); + if print_checkpoints then + prerr_endline (name ^ ":"); f () diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml index 506bf50e2a49..e55ab9643e43 100644 --- a/test/Bindings/Ocaml/vmcore.ml +++ b/test/Bindings/Ocaml/vmcore.ml @@ -1,5 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa llvm_bitwriter.cmxa %s -o %t - * RUN: ./%t %t.bc + * RUN: %t %t.bc * RUN: llvm-dis < %t.bc > %t.ll *) @@ -296,12 +296,6 @@ let test_constants () = insist ((struct_type context [| i16_type; i16_type; i32_type; i32_type |]) = (type_of c)); - group "union"; - let t = union_type context [| i1_type; i16_type; i64_type; double_type |] in - let c = const_union t one in - ignore (define_global "const_union" c m); - insist (t = (type_of c)); - (* RUN: grep {const_null.*zeroinit} < %t.ll *) group "null"; @@ -436,7 +430,7 @@ let test_constants () = * RUN: grep {const_select.*select} < %t.ll * RUN: grep {const_extractelement.*extractelement} < %t.ll * RUN: grep {const_insertelement.*insertelement} < %t.ll - * RUN: grep {const_shufflevector.*shufflevector} < %t.ll + * RUN: grep {const_shufflevector = global <4 x i32> <i32 0, i32 1, i32 1, i32 0>} < %t.ll *) ignore (define_global "const_size_of" (size_of (pointer_type i8_type)) m); ignore (define_global "const_gep" (const_gep foldbomb_gv [| five |]) m); @@ -455,7 +449,8 @@ let test_constants () = ignore (define_global "const_shufflevector" (const_shufflevector (const_vector [| zero; one |]) (const_vector [| one; zero |]) - (const_bitcast foldbomb (vector_type i32_type 2))) m); + (const_vector [| const_int i32_type 0; const_int i32_type 1; + const_int i32_type 2; const_int i32_type 3 |])) m); group "asm"; begin let ft = function_type void_type [| i32_type; i32_type; i32_type |] in @@ -642,11 +637,18 @@ let test_users () = let p1 = param fn 0 in let p2 = param fn 1 in + let a3 = build_alloca i32_type "user_alloca" b in + let p3 = build_load a3 "user_load" b in let i = build_add p1 p2 "sum" b in + insist ((num_operands i) = 2); insist ((operand i 0) = p1); insist ((operand i 1) = p2); + set_operand i 1 p3; + insist ((operand i 1) != p2); + insist ((operand i 1) = p3); + ignore (build_unreachable b) @@ -1154,13 +1156,13 @@ let test_builder () = group "comparisons"; begin (* RUN: grep {%build_icmp_ne = icmp ne i32 %P1, %P2} < %t.ll * RUN: grep {%build_icmp_sle = icmp sle i32 %P2, %P1} < %t.ll - * RUN: grep {%build_icmp_false = fcmp false float %F1, %F2} < %t.ll - * RUN: grep {%build_icmp_true = fcmp true float %F2, %F1} < %t.ll + * RUN: grep {%build_fcmp_false = fcmp false float %F1, %F2} < %t.ll + * RUN: grep {%build_fcmp_true = fcmp true float %F2, %F1} < %t.ll *) ignore (build_icmp Icmp.Ne p1 p2 "build_icmp_ne" atentry); ignore (build_icmp Icmp.Sle p2 p1 "build_icmp_sle" atentry); - ignore (build_fcmp Fcmp.False f1 f2 "build_icmp_false" atentry); - ignore (build_fcmp Fcmp.True f2 f1 "build_icmp_true" atentry) + ignore (build_fcmp Fcmp.False f1 f2 "build_fcmp_false" atentry); + ignore (build_fcmp Fcmp.True f2 f1 "build_fcmp_true" atentry) end; group "miscellaneous"; begin @@ -1229,13 +1231,19 @@ let test_builder () = group "dbg"; begin (* RUN: grep {%dbg = add i32 %P1, %P2, !dbg !1} < %t.ll - * RUN: grep {!1 = metadata !\{i32 2, metadata !"dbg test"\}} < %t.ll + * RUN: grep {!1 = metadata !\{i32 2, i32 3, metadata !2, metadata !2\}} < %t.ll *) - let m1 = const_int i32_type 2 in - let m2 = mdstring context "dbg test" in - let md = mdnode context [| m1; m2 |] in + insist ((current_debug_location atentry) = None); + + let m_line = const_int i32_type 2 in + let m_col = const_int i32_type 3 in + let m_scope = mdnode context [| |] in + let m_inlined = mdnode context [| |] in + let md = mdnode context [| m_line; m_col; m_scope; m_inlined |] in set_current_debug_location atentry md; + insist ((current_debug_location atentry) = Some md); + let i = build_add p1 p2 "dbg" atentry in insist ((has_metadata i) = true); diff --git a/test/Bitcode/AutoUpgradeGlobals.ll b/test/Bitcode/AutoUpgradeGlobals.ll new file mode 100644 index 000000000000..8a8767337dca --- /dev/null +++ b/test/Bitcode/AutoUpgradeGlobals.ll @@ -0,0 +1,3 @@ +; This isn't really an assembly file. It just runs test on bitcode to ensure +; it is auto-upgraded. +; RUN: llvm-dis < %s.bc | not grep {i32 @\\.llvm\\.eh} diff --git a/test/Bitcode/AutoUpgradeGlobals.ll.bc b/test/Bitcode/AutoUpgradeGlobals.ll.bc Binary files differnew file mode 100644 index 000000000000..1abe9688e291 --- /dev/null +++ b/test/Bitcode/AutoUpgradeGlobals.ll.bc diff --git a/test/Bitcode/neon-intrinsics.ll b/test/Bitcode/neon-intrinsics.ll new file mode 100644 index 000000000000..272cd424e2a2 --- /dev/null +++ b/test/Bitcode/neon-intrinsics.ll @@ -0,0 +1,213 @@ +; RUN: llvm-dis < %s.bc | FileCheck %s + +; vmovls should be auto-upgraded to sext + +; CHECK: vmovls8 +; CHECK-NOT: arm.neon.vmovls.v8i16 +; CHECK: sext <8 x i8> + +; CHECK: vmovls16 +; CHECK-NOT: arm.neon.vmovls.v4i32 +; CHECK: sext <4 x i16> + +; CHECK: vmovls32 +; CHECK-NOT: arm.neon.vmovls.v2i64 +; CHECK: sext <2 x i32> + +; vmovlu should be auto-upgraded to zext + +; CHECK: vmovlu8 +; CHECK-NOT: arm.neon.vmovlu.v8i16 +; CHECK: zext <8 x i8> + +; CHECK: vmovlu16 +; CHECK-NOT: arm.neon.vmovlu.v4i32 +; CHECK: zext <4 x i16> + +; CHECK: vmovlu32 +; CHECK-NOT: arm.neon.vmovlu.v2i64 +; CHECK: zext <2 x i32> + +; vaddl/vaddw should be auto-upgraded to add with sext/zext + +; CHECK: vaddls16 +; CHECK-NOT: arm.neon.vaddls.v4i32 +; CHECK: sext <4 x i16> +; CHECK-NEXT: sext <4 x i16> +; CHECK-NEXT: add <4 x i32> + +; CHECK: vaddlu32 +; CHECK-NOT: arm.neon.vaddlu.v2i64 +; CHECK: zext <2 x i32> +; CHECK-NEXT: zext <2 x i32> +; CHECK-NEXT: add <2 x i64> + +; CHECK: vaddws8 +; CHECK-NOT: arm.neon.vaddws.v8i16 +; CHECK: sext <8 x i8> +; CHECK-NEXT: add <8 x i16> + +; CHECK: vaddwu16 +; CHECK-NOT: arm.neon.vaddwu.v4i32 +; CHECK: zext <4 x i16> +; CHECK-NEXT: add <4 x i32> + +; vsubl/vsubw should be auto-upgraded to subtract with sext/zext + +; CHECK: vsubls16 +; CHECK-NOT: arm.neon.vsubls.v4i32 +; CHECK: sext <4 x i16> +; CHECK-NEXT: sext <4 x i16> +; CHECK-NEXT: sub <4 x i32> + +; CHECK: vsublu32 +; CHECK-NOT: arm.neon.vsublu.v2i64 +; CHECK: zext <2 x i32> +; CHECK-NEXT: zext <2 x i32> +; CHECK-NEXT: sub <2 x i64> + +; CHECK: vsubws8 +; CHECK-NOT: arm.neon.vsubws.v8i16 +; CHECK: sext <8 x i8> +; CHECK-NEXT: sub <8 x i16> + +; CHECK: vsubwu16 +; CHECK-NOT: arm.neon.vsubwu.v4i32 +; CHECK: zext <4 x i16> +; CHECK-NEXT: sub <4 x i32> + +; vmull should be auto-upgraded to multiply with sext/zext +; (but vmullp should remain an intrinsic) + +; CHECK: vmulls8 +; CHECK-NOT: arm.neon.vmulls.v8i16 +; CHECK: sext <8 x i8> +; CHECK-NEXT: sext <8 x i8> +; CHECK-NEXT: mul <8 x i16> + +; CHECK: vmullu16 +; CHECK-NOT: arm.neon.vmullu.v4i32 +; CHECK: zext <4 x i16> +; CHECK-NEXT: zext <4 x i16> +; CHECK-NEXT: mul <4 x i32> + +; CHECK: vmullp8 +; CHECK: arm.neon.vmullp.v8i16 + +; vmlal should be auto-upgraded to multiply/add with sext/zext + +; CHECK: vmlals32 +; CHECK-NOT: arm.neon.vmlals.v2i64 +; CHECK: sext <2 x i32> +; CHECK-NEXT: sext <2 x i32> +; CHECK-NEXT: mul <2 x i64> +; CHECK-NEXT: add <2 x i64> + +; CHECK: vmlalu8 +; CHECK-NOT: arm.neon.vmlalu.v8i16 +; CHECK: zext <8 x i8> +; CHECK-NEXT: zext <8 x i8> +; CHECK-NEXT: mul <8 x i16> +; CHECK-NEXT: add <8 x i16> + +; vmlsl should be auto-upgraded to multiply/sub with sext/zext + +; CHECK: vmlsls16 +; CHECK-NOT: arm.neon.vmlsls.v4i32 +; CHECK: sext <4 x i16> +; CHECK-NEXT: sext <4 x i16> +; CHECK-NEXT: mul <4 x i32> +; CHECK-NEXT: sub <4 x i32> + +; CHECK: vmlslu32 +; CHECK-NOT: arm.neon.vmlslu.v2i64 +; CHECK: zext <2 x i32> +; CHECK-NEXT: zext <2 x i32> +; CHECK-NEXT: mul <2 x i64> +; CHECK-NEXT: sub <2 x i64> + +; vaba should be auto-upgraded to vabd + add + +; CHECK: vabas32 +; CHECK-NOT: arm.neon.vabas.v2i32 +; CHECK: arm.neon.vabds.v2i32 +; CHECK-NEXT: add <2 x i32> + +; CHECK: vabaQu8 +; CHECK-NOT: arm.neon.vabau.v16i8 +; CHECK: arm.neon.vabdu.v16i8 +; CHECK-NEXT: add <16 x i8> + +; vabal should be auto-upgraded to vabd with zext + add + +; CHECK: vabals16 +; CHECK-NOT: arm.neon.vabals.v4i32 +; CHECK: arm.neon.vabds.v4i16 +; CHECK-NEXT: zext <4 x i16> +; CHECK-NEXT: add <4 x i32> + +; CHECK: vabalu32 +; CHECK-NOT: arm.neon.vabalu.v2i64 +; CHECK: arm.neon.vabdu.v2i32 +; CHECK-NEXT: zext <2 x i32> +; CHECK-NEXT: add <2 x i64> + +; vabdl should be auto-upgraded to vabd with zext + +; CHECK: vabdls8 +; CHECK-NOT: arm.neon.vabdls.v8i16 +; CHECK: arm.neon.vabds.v8i8 +; CHECK-NEXT: zext <8 x i8> + +; CHECK: vabdlu16 +; CHECK-NOT: arm.neon.vabdlu.v4i32 +; CHECK: arm.neon.vabdu.v4i16 +; CHECK-NEXT: zext <4 x i16> + +; vmovn should be auto-upgraded to trunc + +; CHECK: vmovni16 +; CHECK-NOT: arm.neon.vmovn.v8i8 +; CHECK: trunc <8 x i16> + +; CHECK: vmovni32 +; CHECK-NOT: arm.neon.vmovn.v4i16 +; CHECK: trunc <4 x i32> + +; CHECK: vmovni64 +; CHECK-NOT: arm.neon.vmovn.v2i32 +; CHECK: trunc <2 x i64> + +; vld* and vst* intrinsic calls need an alignment argument (defaulted to 1) + +; CHECK: vld1i8 +; CHECK: i32 1 +; CHECK: vld2Qi16 +; CHECK: i32 1 +; CHECK: vld3i32 +; CHECK: i32 1 +; CHECK: vld4Qf +; CHECK: i32 1 + +; CHECK: vst1i8 +; CHECK: i32 1 +; CHECK: vst2Qi16 +; CHECK: i32 1 +; CHECK: vst3i32 +; CHECK: i32 1 +; CHECK: vst4Qf +; CHECK: i32 1 + +; CHECK: vld2laneQi16 +; CHECK: i32 1 +; CHECK: vld3lanei32 +; CHECK: i32 1 +; CHECK: vld4laneQf +; CHECK: i32 1 + +; CHECK: vst2laneQi16 +; CHECK: i32 1 +; CHECK: vst3lanei32 +; CHECK: i32 1 +; CHECK: vst4laneQf +; CHECK: i32 1 diff --git a/test/Bitcode/neon-intrinsics.ll.bc b/test/Bitcode/neon-intrinsics.ll.bc Binary files differnew file mode 100644 index 000000000000..cabc3c934136 --- /dev/null +++ b/test/Bitcode/neon-intrinsics.ll.bc diff --git a/test/BugPoint/crash-narrowfunctiontest.ll b/test/BugPoint/crash-narrowfunctiontest.ll index 6ad09d2e25cd..18a31eb45d36 100644 --- a/test/BugPoint/crash-narrowfunctiontest.ll +++ b/test/BugPoint/crash-narrowfunctiontest.ll @@ -1,6 +1,8 @@ ; Test that bugpoint can narrow down the testcase to the important function +; FIXME: This likely fails on windows ; -; RUN: bugpoint %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null +; RUN: bugpoint -load %llvmlibsdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null +; XFAIL: mingw define i32 @foo() { ret i32 1 } diff --git a/test/BugPoint/metadata.ll b/test/BugPoint/metadata.ll new file mode 100644 index 000000000000..f2541ee3f9ac --- /dev/null +++ b/test/BugPoint/metadata.ll @@ -0,0 +1,35 @@ +; RUN: bugpoint -load %llvmlibsdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null +; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s +; XFAIL: mingw + +; Bugpoint should keep the call's metadata attached to the call. + +; CHECK: call void @foo(), !dbg !0, !attach !2 +; CHECK: !0 = metadata !{i32 104, i32 105, metadata !1, metadata !1} +; CHECK: !1 = metadata !{i32 0, i32 0, i32 0, metadata !"source.c", metadata !"/dir", metadata !"me", i1 true, i1 false, metadata !"", i32 0} +; CHECK: !2 = metadata !{metadata !"the call to foo"} + +%rust_task = type {} +define void @test(i32* %a, i8* %b) { + %s = mul i8 22, 9, !attach !0, !dbg !10 + store i8 %s, i8* %b, !attach !1, !dbg !11 + call void @foo(), !attach !2, !dbg !12 + store i32 7, i32* %a, !attach !3, !dbg !13 + %t = add i32 0, 5, !attach !4, !dbg !14 + ret void +} + +declare void @foo() + +!0 = metadata !{metadata !"boring"} +!1 = metadata !{metadata !"uninteresting"} +!2 = metadata !{metadata !"the call to foo"} +!3 = metadata !{metadata !"noise"} +!4 = metadata !{metadata !"filler"} + +!9 = metadata !{i32 0, i32 0, i32 0, metadata !"source.c", metadata !"/dir", metadata !"me", i1 true, i1 false, metadata !"", i32 0} +!10 = metadata !{i32 100, i32 101, metadata !9, metadata !9} +!11 = metadata !{i32 102, i32 103, metadata !9, metadata !9} +!12 = metadata !{i32 104, i32 105, metadata !9, metadata !9} +!13 = metadata !{i32 106, i32 107, metadata !9, metadata !9} +!14 = metadata !{i32 108, i32 109, metadata !9, metadata !9} diff --git a/test/BugPoint/remove_arguments_test.ll b/test/BugPoint/remove_arguments_test.ll index 439ea545468e..791ec69a23d2 100644 --- a/test/BugPoint/remove_arguments_test.ll +++ b/test/BugPoint/remove_arguments_test.ll @@ -1,5 +1,7 @@ -; RUN: bugpoint %s -output-prefix %t -bugpoint-crashcalls -silence-passes +; FIXME: This likely fails on windows +; RUN: bugpoint -load %llvmlibsdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes ; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s +; XFAIL: mingw ; Test to make sure that arguments are removed from the function if they are ; unnecessary. And clean up any types that that frees up too. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 433af900dd2b..ad9a2432dbfa 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -24,6 +24,23 @@ endif() include(FindPythonInterp) if(PYTHONINTERP_FOUND) + get_directory_property(DEFINITIONS COMPILE_DEFINITIONS) + foreach(DEF ${DEFINITIONS}) + set(DEFS "${DEFS} -D${DEF}") + endforeach() + get_directory_property(INC_DIRS INCLUDE_DIRECTORIES) + foreach(INC_DIR ${INC_DIRS}) + set(IDIRS "${IDIRS} -I${INC_DIR}") + endforeach() + string(REPLACE "<CMAKE_CXX_COMPILER>" "${CMAKE_CXX_COMPILER}" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT}) + string(REPLACE "<DEFINES>" "${DEFS}" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + string(REPLACE "<FLAGS>" "${CMAKE_CXX_FLAGS}" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + string(REPLACE "-o" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + string(REGEX REPLACE "<[^>]+>" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} ${IDIRS}") + if(NOT MSVC) + set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} -x c++") + endif() configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in ${CMAKE_CURRENT_BINARY_DIR}/site.exp) diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll index 81483cb4e7c5..ee63656b26d3 100644 --- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll +++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll @@ -1,11 +1,15 @@ -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | FileCheck %s @quant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1] @dequant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1] @A = external global [4 x [4 x i32]] ; <[4 x [4 x i32]]*> [#uses=1] +; CHECK: dct_luma_sp: define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) { entry: +; Make sure to use base-updating stores for saving callee-saved registers. +; CHECK-NOT: sub sp +; CHECK: vstmdb sp! %predicted_block = alloca [4 x [4 x i32]], align 4 ; <[4 x [4 x i32]]*> [#uses=1] br label %cond_next489 diff --git a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll index d741112e2886..76fa3649c880 100644 --- a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll @@ -2,7 +2,7 @@ ; PR1266 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "arm-linux-gnueabi" +target triple = "arm-unknown-linux-gnueabi" %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 } %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] } %struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] } diff --git a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll index 030486a7c983..7ba2a190be73 100644 --- a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll +++ b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll @@ -2,7 +2,7 @@ ; PR1424 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "arm-linux-gnueabi" +target triple = "arm-unknown-linux-gnueabi" %struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* } %struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* } %struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64 } diff --git a/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll index 198faebbea6f..f89a5de77b3f 100644 --- a/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll +++ b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll @@ -17,3 +17,17 @@ entry: store <8 x i16> %1, <8 x i16>* %agg.result12.1.0, align 16 ret void } + +; Radar 8290937: Ignore undef shuffle indices. +; CHECK: t2 +; CHECK: vtrn.16 +define void @t2(%struct.int16x8x2_t* nocapture %ptr, <4 x i16> %a.0, <4 x i16> %b.0) nounwind { +entry: + %0 = shufflevector <4 x i16> %a.0, <4 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef> + %1 = shufflevector <4 x i16> %a.0, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> + %ptr26.0 = getelementptr inbounds %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 0, i32 0 + store <8 x i16> %0, <8 x i16>* %ptr26.0, align 16 + %ptr20.1.0 = getelementptr inbounds %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 1, i32 0 + store <8 x i16> %1, <8 x i16>* %ptr20.1.0, align 16 + ret void +} diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll index ff60fa8c49d8..e47c03839375 100644 --- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll +++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll @@ -5,32 +5,32 @@ %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } -declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly -declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind +declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind { - %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A2) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A2, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 0 ; <<8 x i8>> [#uses=1] %tmp4b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 1 ; <<8 x i8>> [#uses=1] - %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1] %tmp4d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 1 ; <<8 x i8>> [#uses=1] - %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A5) ; <%struct.__neon_int8x8x3_t> [#uses=1] + %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A5, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] %tmp2e = extractvalue %struct.__neon_int8x8x3_t %tmp1e, 0 ; <<8 x i8>> [#uses=1] - %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6) ; <%struct.__neon_int8x8x3_t> [#uses=1] + %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1] - %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A7) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A7, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 0 ; <<8 x i8>> [#uses=1] %tmp4g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 1 ; <<8 x i8>> [#uses=1] - %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A8) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A8, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 0 ; <<8 x i8>> [#uses=1] %tmp3h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 2 ; <<8 x i8>> [#uses=1] %tmp2bd = add <8 x i8> %tmp2b, %tmp2d ; <<8 x i8>> [#uses=1] %tmp4bd = add <8 x i8> %tmp4b, %tmp4d ; <<8 x i8>> [#uses=1] %tmp2abcd = mul <8 x i8> undef, %tmp2bd ; <<8 x i8>> [#uses=1] %tmp4abcd = mul <8 x i8> undef, %tmp4bd ; <<8 x i8>> [#uses=2] - call void @llvm.arm.neon.vst3.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd) + call void @llvm.arm.neon.vst3.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd, i32 1) %tmp2ef = sub <8 x i8> %tmp2e, %tmp2f ; <<8 x i8>> [#uses=1] %tmp2gh = sub <8 x i8> %tmp2g, %tmp2h ; <<8 x i8>> [#uses=1] %tmp3gh = sub <8 x i8> zeroinitializer, %tmp3h ; <<8 x i8>> [#uses=1] @@ -38,8 +38,8 @@ define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A %tmp2efgh = mul <8 x i8> %tmp2ef, %tmp2gh ; <<8 x i8>> [#uses=1] %tmp3efgh = mul <8 x i8> undef, %tmp3gh ; <<8 x i8>> [#uses=1] %tmp4efgh = mul <8 x i8> %tmp4ef, undef ; <<8 x i8>> [#uses=2] - call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh) + call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh, i32 1) %tmp4 = sub <8 x i8> %tmp4efgh, %tmp4abcd ; <<8 x i8>> [#uses=1] - tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef) + tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef, i32 1) ret <8 x i8> %tmp4 } diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll index ce959d1b91c8..cd1c9c8c0421 100644 --- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll +++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll @@ -36,8 +36,8 @@ entry: %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3 %19 = fmul <4 x float> %tmp5, %2 %20 = bitcast float* %fltp to i8* - tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19) + tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19, i32 1) ret void } -declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll index e4f20990bed2..6f487962310f 100644 --- a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll +++ b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll @@ -12,8 +12,8 @@ entry: %tmp9 = trunc i128 %tmp8 to i64 ; <i64> [#uses=1] %tmp16.i = bitcast i64 %tmp6 to <8 x i8> ; <<8 x i8>> [#uses=1] %tmp20.i = bitcast i64 %tmp9 to <8 x i8> ; <<8 x i8>> [#uses=1] - tail call void @llvm.arm.neon.vst2.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i) nounwind + tail call void @llvm.arm.neon.vst2.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i, i32 1) nounwind ret void } -declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind +declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind diff --git a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll index 7650d883d7b1..ac8e80904eda 100755 --- a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll +++ b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=arm -mtriple=armv4t-unknown-linux-gnueabi | FileCheck %s ; PR 7433 +; XFAIL: * %0 = type { i8*, i8* } %1 = type { i8*, i8*, i8* } diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll index 0c5b180cf846..ffc47ebdf196 100644 --- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll +++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll @@ -16,10 +16,10 @@ target triple = "thumbv7-apple-darwin10" define i32 @test(i8* %arg) nounwind { entry: - %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %arg) + %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %arg, i32 1) %1 = shufflevector <2 x i64> undef, <2 x i64> %0, <2 x i32> <i32 1, i32 2> store <2 x i64> %1, <2 x i64>* undef, align 16 ret i32 undef } -declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly +declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly diff --git a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll new file mode 100644 index 000000000000..c03c81545946 --- /dev/null +++ b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll @@ -0,0 +1,95 @@ +; RUN: llc -enable-correct-eh-support < %s +; PR7716 +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10.0.0" + +%0 = type { i8*, i8* } +%struct.A = type { i32 } + +@d = internal global i32 0, align 4 ; <i32*> [#uses=6] +@_ZTVN10__cxxabiv117__class_type_infoE = external global i8* ; <i8**> [#uses=1] +@_ZTS1A = internal constant [3 x i8] c"1A\00" ; <[3 x i8]*> [#uses=1] +@_ZTI1A = internal constant %0 { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1A, i32 0, i32 0) } ; <%0*> [#uses=1] +@.str2 = private constant [18 x i8] c"c == %d, d == %d\0A\00" ; <[18 x i8]*> [#uses=1] +@.str3 = private constant [16 x i8] c"A(const A&) %d\0A\00" ; <[16 x i8]*> [#uses=1] +@.str4 = private constant [9 x i8] c"~A() %d\0A\00" ; <[9 x i8]*> [#uses=1] +@.str5 = private constant [8 x i8] c"A() %d\0A\00" ; <[8 x i8]*> [#uses=1] +@str = internal constant [14 x i8] c"Throwing 1...\00" ; <[14 x i8]*> [#uses=1] +@str1 = internal constant [8 x i8] c"Caught.\00" ; <[8 x i8]*> [#uses=1] + +declare i32 @printf(i8* nocapture, ...) nounwind + +declare i8* @__cxa_allocate_exception(i32) + +declare i8* @llvm.eh.exception() nounwind readonly + +declare i32 @__gxx_personality_sj0(...) + +declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind + +declare i32 @llvm.eh.typeid.for(i8*) nounwind + +declare void @_Unwind_SjLj_Resume(i8*) + +define internal void @_ZN1AD1Ev(%struct.A* nocapture %this) nounwind ssp align 2 { +entry: + %tmp.i = getelementptr inbounds %struct.A* %this, i32 0, i32 0 ; <i32*> [#uses=1] + %tmp2.i = load i32* %tmp.i ; <i32> [#uses=1] + %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 %tmp2.i) nounwind ; <i32> [#uses=0] + %tmp3.i = load i32* @d ; <i32> [#uses=1] + %inc.i = add nsw i32 %tmp3.i, 1 ; <i32> [#uses=1] + store i32 %inc.i, i32* @d + ret void +} + +declare void @__cxa_throw(i8*, i8*, i8*) + +define i32 @main() ssp { +entry: + %puts.i = tail call i32 @puts(i8* getelementptr inbounds ([14 x i8]* @str, i32 0, i32 0)) ; <i32> [#uses=0] + %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind ; <i8*> [#uses=2] + %tmp2.i.i.i = bitcast i8* %exception.i to i32* ; <i32*> [#uses=1] + store i32 1, i32* %tmp2.i.i.i + %call.i.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str5, i32 0, i32 0), i32 1) nounwind ; <i32> [#uses=0] + invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (%0* @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1AD1Ev to i8*)) noreturn + to label %.noexc unwind label %lpad + +.noexc: ; preds = %entry + unreachable + +try.cont: ; preds = %lpad + %0 = tail call i8* @__cxa_get_exception_ptr(i8* %exn) nounwind ; <i8*> [#uses=0] + %call.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str3, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0] + %1 = tail call i8* @__cxa_begin_catch(i8* %exn) nounwind ; <i8*> [#uses=0] + %puts = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @str1, i32 0, i32 0)) ; <i32> [#uses=0] + %call.i.i3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0] + %tmp3.i.i = load i32* @d ; <i32> [#uses=1] + %inc.i.i4 = add nsw i32 %tmp3.i.i, 1 ; <i32> [#uses=1] + store i32 %inc.i.i4, i32* @d + tail call void @__cxa_end_catch() + %tmp13 = load i32* @d ; <i32> [#uses=1] + %call14 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str2, i32 0, i32 0), i32 2, i32 %tmp13) ; <i32> [#uses=0] + %tmp16 = load i32* @d ; <i32> [#uses=1] + %cmp = icmp ne i32 %tmp16, 2 ; <i1> [#uses=1] + %conv = zext i1 %cmp to i32 ; <i32> [#uses=1] + ret i32 %conv + +lpad: ; preds = %entry + %exn = tail call i8* @llvm.eh.exception() nounwind ; <i8*> [#uses=4] + %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* bitcast (%0* @_ZTI1A to i8*), i8* null) nounwind ; <i32> [#uses=1] + %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%0* @_ZTI1A to i8*)) nounwind ; <i32> [#uses=1] + %3 = icmp eq i32 %eh.selector, %2 ; <i1> [#uses=1] + br i1 %3, label %try.cont, label %eh.resume + +eh.resume: ; preds = %lpad + tail call void @_Unwind_SjLj_Resume(i8* %exn) noreturn + unreachable +} + +declare i8* @__cxa_get_exception_ptr(i8*) + +declare i8* @__cxa_begin_catch(i8*) + +declare void @__cxa_end_catch() + +declare i32 @puts(i8* nocapture) nounwind diff --git a/test/CodeGen/ARM/2010-08-04-EHCrash.ll b/test/CodeGen/ARM/2010-08-04-EHCrash.ll new file mode 100644 index 000000000000..f57b7e676949 --- /dev/null +++ b/test/CodeGen/ARM/2010-08-04-EHCrash.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 +; <rdar://problem/8264008> + +define linkonce_odr arm_apcscc void @func1() { +entry: + %save_filt.936 = alloca i32 ; <i32*> [#uses=2] + %save_eptr.935 = alloca i8* ; <i8**> [#uses=2] + %eh_exception = alloca i8* ; <i8**> [#uses=5] + %eh_selector = alloca i32 ; <i32*> [#uses=3] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + call arm_apcscc void @func2() + br label %return + +bb: ; No predecessors! + %eh_select = load i32* %eh_selector ; <i32> [#uses=1] + store i32 %eh_select, i32* %save_filt.936, align 4 + %eh_value = load i8** %eh_exception ; <i8*> [#uses=1] + store i8* %eh_value, i8** %save_eptr.935, align 4 + invoke arm_apcscc void @func3() + to label %invcont unwind label %lpad + +invcont: ; preds = %bb + %tmp6 = load i8** %save_eptr.935, align 4 ; <i8*> [#uses=1] + store i8* %tmp6, i8** %eh_exception, align 4 + %tmp7 = load i32* %save_filt.936, align 4 ; <i32> [#uses=1] + store i32 %tmp7, i32* %eh_selector, align 4 + br label %Unwind + +bb12: ; preds = %ppad + call arm_apcscc void @_ZSt9terminatev() noreturn nounwind + unreachable + +return: ; preds = %entry + ret void + +lpad: ; preds = %bb + %eh_ptr = call i8* @llvm.eh.exception() ; <i8*> [#uses=1] + store i8* %eh_ptr, i8** %eh_exception + %eh_ptr13 = load i8** %eh_exception ; <i8*> [#uses=1] + %eh_select14 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr13, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 1) + store i32 %eh_select14, i32* %eh_selector + br label %ppad + +ppad: + br label %bb12 + +Unwind: + %eh_ptr15 = load i8** %eh_exception + call arm_apcscc void @_Unwind_SjLj_Resume(i8* %eh_ptr15) + unreachable +} + +declare arm_apcscc void @func2() + +declare arm_apcscc void @_ZSt9terminatev() noreturn nounwind + +declare i8* @llvm.eh.exception() nounwind readonly + +declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind + +declare arm_apcscc void @_Unwind_SjLj_Resume(i8*) + +declare arm_apcscc void @func3() + +declare arm_apcscc i32 @__gxx_personality_sj0(...) diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll index cc718399ea96..bb7853e66ef4 100644 --- a/test/CodeGen/ARM/arguments.ll +++ b/test/CodeGen/ARM/arguments.ll @@ -1,11 +1,43 @@ -; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ELF -; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN +; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 | FileCheck %s -check-prefix=ELF +; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+vfp2 | FileCheck %s -check-prefix=DARWIN -define i32 @f(i32 %a, i64 %b) { +define i32 @f1(i32 %a, i64 %b) { +; ELF: f1: ; ELF: mov r0, r2 +; DARWIN: f1: ; DARWIN: mov r0, r1 - %tmp = call i32 @g(i64 %b) + %tmp = call i32 @g1(i64 %b) ret i32 %tmp } -declare i32 @g(i64) +; test that allocating the double to r2/r3 makes r1 unavailable on gnueabi. +define i32 @f2() nounwind optsize { +; ELF: f2: +; ELF: mov r0, #128 +; ELF: str r0, [sp] +; DARWIN: f2: +; DARWIN: mov r3, #128 +entry: + %0 = tail call i32 (i32, ...)* @g2(i32 5, double 1.600000e+01, i32 128) nounwind optsize ; <i32> [#uses=1] + %not. = icmp ne i32 %0, 128 ; <i1> [#uses=1] + %.0 = zext i1 %not. to i32 ; <i32> [#uses=1] + ret i32 %.0 +} + +; test that on gnueabi a 64 bit value at this position will cause r3 to go +; unused and the value stored in [sp] +; ELF: f3: +; ELF: ldr r0, [sp] +; ELF-NEXT: mov pc, lr +; DARWIN: f3: +; DARWIN: mov r0, r3 +; DARWIN-NEXT: mov pc, lr +define i32 @f3(i32 %i, i32 %j, i32 %k, i64 %l, ...) { +entry: + %0 = trunc i64 %l to i32 + ret i32 %0 +} + +declare i32 @g1(i64) + +declare i32 @g2(i32 %i, ...) diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll new file mode 100644 index 000000000000..59e2b43a9172 --- /dev/null +++ b/test/CodeGen/ARM/bfi.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=arm -mattr=+v6t2 < %s | FileCheck %s + +%struct.F = type { [3 x i8], i8 } + +@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1] + +define void @f1([1 x i32] %f.coerce0) nounwind { +entry: +; CHECK: f1 +; CHECK: mov r2, #10 +; CHECK: bfi r1, r2, #22, #4 + %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1] + %1 = and i32 %0, -62914561 ; <i32> [#uses=1] + %2 = or i32 %1, 41943040 ; <i32> [#uses=1] + store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4 + ret void +} + +define i32 @f2(i32 %A, i32 %B) nounwind readnone optsize { +entry: +; CHECK: f2 +; CHECK: mov r1, r1, lsr #7 +; CHECK: bfi r0, r1, #7, #16 + %and = and i32 %A, -8388481 ; <i32> [#uses=1] + %and2 = and i32 %B, 8388480 ; <i32> [#uses=1] + %or = or i32 %and2, %and ; <i32> [#uses=1] + ret i32 %or +} + +define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize { +entry: +; CHECK: f3 +; CHECK: mov r2, r0, lsr #7 +; CHECK: mov r0, r1 +; CHECK: bfi r0, r2, #7, #16 + %and = and i32 %A, 8388480 ; <i32> [#uses=1] + %and2 = and i32 %B, -8388481 ; <i32> [#uses=1] + %or = or i32 %and2, %and ; <i32> [#uses=1] + ret i32 %or +} diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll index f1269d5bd2be..db5afe3f56cb 100644 --- a/test/CodeGen/ARM/call-tc.ll +++ b/test/CodeGen/ARM/call-tc.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\ ; RUN: -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF +; XFAIL: * @t = weak global i32 ()* null ; <i32 ()**> [#uses=1] diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll new file mode 100644 index 000000000000..25c556889fc4 --- /dev/null +++ b/test/CodeGen/ARM/code-placement.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s +; PHI elimination shouldn't break backedge. +; rdar://8263994 + +%struct.list_data_s = type { i16, i16 } +%struct.list_head = type { %struct.list_head*, %struct.list_data_s* } + +define arm_apcscc %struct.list_head* @t(%struct.list_head* %list) nounwind { +entry: + %0 = icmp eq %struct.list_head* %list, null + br i1 %0, label %bb2, label %bb + +bb: +; CHECK: LBB0_2: +; CHECK: bne LBB0_2 +; CHECK-NOT: b LBB0_2 +; CHECK: bx lr + %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ] + %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ] + %1 = getelementptr inbounds %struct.list_head* %list_addr.05, i32 0, i32 0 + %2 = load %struct.list_head** %1, align 4 + store %struct.list_head* %next.04, %struct.list_head** %1, align 4 + %3 = icmp eq %struct.list_head* %2, null + br i1 %3, label %bb2, label %bb + +bb2: + %next.0.lcssa = phi %struct.list_head* [ null, %entry ], [ %list_addr.05, %bb ] + ret %struct.list_head* %next.0.lcssa +} diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll index d833afa55583..448b437ddf46 100644 --- a/test/CodeGen/ARM/div.ll +++ b/test/CodeGen/ARM/div.ll @@ -1,13 +1,9 @@ ; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECK-ARM -; RUN: llc < %s -march=arm -mcpu=cortex-m3 \ -; RUN: | FileCheck %s -check-prefix=CHECK-ARMV7M define i32 @f1(i32 %a, i32 %b) { entry: ; CHECK-ARM: f1 ; CHECK-ARM: __divsi3 -; CHECK-ARMV7M: f1 -; CHECK-ARMV7M: sdiv %tmp1 = sdiv i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } @@ -16,8 +12,6 @@ define i32 @f2(i32 %a, i32 %b) { entry: ; CHECK-ARM: f2 ; CHECK-ARM: __udivsi3 -; CHECK-ARMV7M: f2 -; CHECK-ARMV7M: udiv %tmp1 = udiv i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } @@ -26,8 +20,6 @@ define i32 @f3(i32 %a, i32 %b) { entry: ; CHECK-ARM: f3 ; CHECK-ARM: __modsi3 -; CHECK-ARMV7M: f3 -; CHECK-ARMV7M: sdiv %tmp1 = srem i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } @@ -36,8 +28,6 @@ define i32 @f4(i32 %a, i32 %b) { entry: ; CHECK-ARM: f4 ; CHECK-ARM: __umodsi3 -; CHECK-ARMV7M: f4 -; CHECK-ARMV7M: udiv %tmp1 = urem i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll new file mode 100644 index 000000000000..3bee84d84de4 --- /dev/null +++ b/test/CodeGen/ARM/fast-isel.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -O0 -arm-fast-isel -fast-isel-abort -mtriple=armv7-apple-darwin +; RUN: llc < %s -O0 -arm-fast-isel -fast-isel-abort -mtriple=thumbv7-apple-darwin + +; Very basic fast-isel functionality. + +define i32 @add(i32 %a, i32 %b) nounwind ssp { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr + store i32 %b, i32* %b.addr + %tmp = load i32* %a.addr + %tmp1 = load i32* %b.addr + %add = add nsw i32 %tmp, %tmp1 + ret i32 %add +} + +define i32* @foo(i32* %p, i32* %q, i32** %z) nounwind { +entry: + %r = load i32* %p + %s = load i32* %q + %y = load i32** %z + br label %fast + +fast: + %t0 = add i32 %r, %s + %t1 = mul i32 %t0, %s + %t2 = sub i32 %t1, %s + %t3 = and i32 %t2, %s + %t4 = xor i32 %t3, 3 + %t5 = xor i32 %t4, %s + %t6 = add i32 %t5, 2 + %t7 = getelementptr i32* %y, i32 1 + %t8 = getelementptr i32* %t7, i32 %t6 + br label %exit + +exit: + ret i32* %t8 +} diff --git a/test/CodeGen/ARM/fnmuls.ll b/test/CodeGen/ARM/fnmuls.ll index efd87d2dcb89..3223885feda9 100644 --- a/test/CodeGen/ARM/fnmuls.ll +++ b/test/CodeGen/ARM/fnmuls.ll @@ -1,20 +1,18 @@ -; XFAIL: * ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -define float @test1(float %a, float %b) nounwind { -; CHECK: fnmscs s2, s1, s0 +define arm_aapcs_vfpcc float @test1(float %a, float %b) nounwind { +; CHECK: vnmul.f32 s0, s0, s1 entry: %0 = fmul float %a, %b %1 = fsub float -0.0, %0 ret float %1 } -define float @test2(float %a, float %b) nounwind { -; CHECK: fnmscs s2, s1, s0 +define arm_aapcs_vfpcc float @test2(float %a, float %b) nounwind { +; CHECK: vnmul.f32 s0, s0, s1 entry: %0 = fmul float %a, %b %1 = fmul float -1.0, %0 diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll index 6875288304be..64350591b87f 100644 --- a/test/CodeGen/ARM/fpcmp-opt.ll +++ b/test/CodeGen/ARM/fpcmp-opt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s +; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s ; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s ; rdar://7461510 diff --git a/test/CodeGen/ARM/fpowi.ll b/test/CodeGen/ARM/fpowi.ll index 7f9d62a9e945..561463720c80 100644 --- a/test/CodeGen/ARM/fpowi.ll +++ b/test/CodeGen/ARM/fpowi.ll @@ -3,7 +3,7 @@ ; ModuleID = '<stdin>' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "arm-linux-gnueabi" +target triple = "arm-unknown-linux-gnueabi" define double @_ZSt3powdi(double %__x, i32 %__i) { entry: diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll index 688b7bc312c7..1ec4d15f6672 100644 --- a/test/CodeGen/ARM/long_shift.ll +++ b/test/CodeGen/ARM/long_shift.ll @@ -24,8 +24,7 @@ define i32 @f2(i64 %x, i64 %y) { ; CHECK: f2 ; CHECK: mov r0, r0, lsr r2 ; CHECK-NEXT: rsb r3, r2, #32 -; CHECK-NEXT: sub r2, r2, #32 -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: subs r2, r2, #32 ; CHECK-NEXT: orr r0, r0, r1, lsl r3 ; CHECK-NEXT: movge r0, r1, asr r2 %a = ashr i64 %x, %y @@ -37,8 +36,7 @@ define i32 @f3(i64 %x, i64 %y) { ; CHECK: f3 ; CHECK: mov r0, r0, lsr r2 ; CHECK-NEXT: rsb r3, r2, #32 -; CHECK-NEXT: sub r2, r2, #32 -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: subs r2, r2, #32 ; CHECK-NEXT: orr r0, r0, r1, lsl r3 ; CHECK-NEXT: movge r0, r1, lsr r2 %a = lshr i64 %x, %y diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll index 25cf1356d61c..866be423c2cb 100644 --- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll +++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll @@ -4,14 +4,14 @@ ; constant offset addressing, so that each of the following stores ; uses the same register. -; CHECK: vstr.32 s0, [r9, #-128] -; CHECK: vstr.32 s0, [r9, #-96] -; CHECK: vstr.32 s0, [r9, #-64] -; CHECK: vstr.32 s0, [r9, #-32] -; CHECK: vstr.32 s0, [r9] -; CHECK: vstr.32 s0, [r9, #32] -; CHECK: vstr.32 s0, [r9, #64] -; CHECK: vstr.32 s0, [r9, #96] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-128] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-96] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-64] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-32] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #32] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #64] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #96] target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" @@ -628,8 +628,7 @@ bb24: ; preds = %bb23 ; CHECK: @ %bb24 ; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: sub{{.*}} [[REGISTER:r[0-9]+]], #1 -; CHECK-NEXT: cmp{{.*}} [[REGISTER]], #0 +; CHECK-NEXT: sub{{.*}} [[REGISTER:(r[0-9]+)|(lr)]], #1 ; CHECK-NEXT: bne.w %92 = icmp eq i32 %tmp81, %indvar78 ; <i1> [#uses=1] diff --git a/test/CodeGen/ARM/pack.ll b/test/CodeGen/ARM/pack.ll index 1e2e7aa0c8ff..4905dc28cf48 100644 --- a/test/CodeGen/ARM/pack.ll +++ b/test/CodeGen/ARM/pack.ll @@ -1,8 +1,7 @@ -; RUN: llc < %s -march=arm -mattr=+v6 | \ -; RUN: grep pkhbt | count 5 -; RUN: llc < %s -march=arm -mattr=+v6 | \ -; RUN: grep pkhtb | count 4 +; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s +; CHECK: test1 +; CHECK: pkhbt r0, r0, r1, lsl #16 define i32 @test1(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp4 = shl i32 %Y, 16 ; <i32> [#uses=1] @@ -10,6 +9,8 @@ define i32 @test1(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test1a +; CHECK: pkhbt r0, r0, r1, lsl #16 define i32 @test1a(i32 %X, i32 %Y) { %tmp19 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp37 = shl i32 %Y, 16 ; <i32> [#uses=1] @@ -17,6 +18,8 @@ define i32 @test1a(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test2 +; CHECK: pkhbt r0, r0, r1, lsl #12 define i32 @test2(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp3 = shl i32 %Y, 12 ; <i32> [#uses=1] @@ -25,6 +28,8 @@ define i32 @test2(i32 %X, i32 %Y) { ret i32 %tmp57 } +; CHECK: test3 +; CHECK: pkhbt r0, r0, r1, lsl #18 define i32 @test3(i32 %X, i32 %Y) { %tmp19 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp37 = shl i32 %Y, 18 ; <i32> [#uses=1] @@ -32,6 +37,8 @@ define i32 @test3(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test4 +; CHECK: pkhbt r0, r0, r1 define i32 @test4(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp3 = and i32 %Y, -65536 ; <i32> [#uses=1] @@ -39,6 +46,8 @@ define i32 @test4(i32 %X, i32 %Y) { ret i32 %tmp46 } +; CHECK: test5 +; CHECK: pkhtb r0, r0, r1, asr #16 define i32 @test5(i32 %X, i32 %Y) { %tmp17 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp2 = bitcast i32 %Y to i32 ; <i32> [#uses=1] @@ -47,6 +56,8 @@ define i32 @test5(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test5a +; CHECK: pkhtb r0, r0, r1, asr #16 define i32 @test5a(i32 %X, i32 %Y) { %tmp110 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp37 = lshr i32 %Y, 16 ; <i32> [#uses=1] @@ -55,6 +66,8 @@ define i32 @test5a(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test6 +; CHECK: pkhtb r0, r0, r1, asr #12 define i32 @test6(i32 %X, i32 %Y) { %tmp1 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp37 = lshr i32 %Y, 12 ; <i32> [#uses=1] @@ -64,6 +77,8 @@ define i32 @test6(i32 %X, i32 %Y) { ret i32 %tmp59 } +; CHECK: test7 +; CHECK: pkhtb r0, r0, r1, asr #18 define i32 @test7(i32 %X, i32 %Y) { %tmp1 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp3 = ashr i32 %Y, 18 ; <i32> [#uses=1] @@ -71,3 +86,12 @@ define i32 @test7(i32 %X, i32 %Y) { %tmp57 = or i32 %tmp4, %tmp1 ; <i32> [#uses=1] ret i32 %tmp57 } + +; CHECK: test8 +; CHECK: pkhtb r0, r0, r1, asr #22 +define i32 @test8(i32 %X, i32 %Y) { + %tmp1 = and i32 %X, -65536 + %tmp3 = lshr i32 %Y, 22 + %tmp57 = or i32 %tmp3, %tmp1 + ret i32 %tmp57 +} diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 89b657797f2a..2e4f10d8a63d 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -23,21 +23,21 @@ entry: %2 = getelementptr inbounds %struct.int32x4_t* %vT1ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1] %3 = load <4 x i32>* %2, align 16 ; <<4 x i32>> [#uses=1] %4 = bitcast i16* %i_ptr to i8* ; <i8*> [#uses=1] - %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4) ; <<8 x i16>> [#uses=1] + %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1] %6 = bitcast <8 x i16> %5 to <2 x double> ; <<2 x double>> [#uses=2] %7 = extractelement <2 x double> %6, i32 0 ; <double> [#uses=1] %8 = bitcast double %7 to <4 x i16> ; <<4 x i16>> [#uses=1] - %9 = tail call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %8) ; <<4 x i32>> [#uses=1] + %9 = sext <4 x i16> %8 to <4 x i32> ; <<4 x i32>> [#uses=1] %10 = extractelement <2 x double> %6, i32 1 ; <double> [#uses=1] %11 = bitcast double %10 to <4 x i16> ; <<4 x i16>> [#uses=1] - %12 = tail call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %11) ; <<4 x i32>> [#uses=1] + %12 = sext <4 x i16> %11 to <4 x i32> ; <<4 x i32>> [#uses=1] %13 = mul <4 x i32> %1, %9 ; <<4 x i32>> [#uses=1] %14 = mul <4 x i32> %3, %12 ; <<4 x i32>> [#uses=1] %15 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %13, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1] %16 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %14, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1] %17 = shufflevector <4 x i16> %15, <4 x i16> %16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; <<8 x i16>> [#uses=1] %18 = bitcast i16* %o_ptr to i8* ; <i8*> [#uses=1] - tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17) + tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17, i32 1) ret void } @@ -45,10 +45,10 @@ define void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocapture %vT0ptr, entry: ; CHECK: t2: ; CHECK: vld1.16 -; CHECK: vmul.i16 ; CHECK-NOT: vmov ; CHECK: vld1.16 ; CHECK: vmul.i16 +; CHECK: vmul.i16 ; CHECK-NOT: vmov ; CHECK: vst1.16 ; CHECK: vst1.16 @@ -57,17 +57,17 @@ entry: %2 = getelementptr inbounds %struct.int16x8_t* %vT1ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] %3 = load <8 x i16>* %2, align 16 ; <<8 x i16>> [#uses=1] %4 = bitcast i16* %i_ptr to i8* ; <i8*> [#uses=1] - %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4) ; <<8 x i16>> [#uses=1] + %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1] %6 = getelementptr inbounds i16* %i_ptr, i32 8 ; <i16*> [#uses=1] %7 = bitcast i16* %6 to i8* ; <i8*> [#uses=1] - %8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %7) ; <<8 x i16>> [#uses=1] + %8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %7, i32 1) ; <<8 x i16>> [#uses=1] %9 = mul <8 x i16> %1, %5 ; <<8 x i16>> [#uses=1] %10 = mul <8 x i16> %3, %8 ; <<8 x i16>> [#uses=1] %11 = bitcast i16* %o_ptr to i8* ; <i8*> [#uses=1] - tail call void @llvm.arm.neon.vst1.v8i16(i8* %11, <8 x i16> %9) + tail call void @llvm.arm.neon.vst1.v8i16(i8* %11, <8 x i16> %9, i32 1) %12 = getelementptr inbounds i16* %o_ptr, i32 8 ; <i16*> [#uses=1] %13 = bitcast i16* %12 to i8* ; <i8*> [#uses=1] - tail call void @llvm.arm.neon.vst1.v8i16(i8* %13, <8 x i16> %10) + tail call void @llvm.arm.neon.vst1.v8i16(i8* %13, <8 x i16> %10, i32 1) ret void } @@ -77,14 +77,14 @@ define <8 x i8> @t3(i8* %A, i8* %B) nounwind { ; CHECK: vmul.i8 ; CHECK-NOT: vmov ; CHECK: vst3.8 - %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 ; <<8 x i8>> [#uses=1] %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 ; <<8 x i8>> [#uses=1] %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 1 ; <<8 x i8>> [#uses=1] %tmp5 = sub <8 x i8> %tmp3, %tmp4 %tmp6 = add <8 x i8> %tmp2, %tmp3 ; <<8 x i8>> [#uses=1] %tmp7 = mul <8 x i8> %tmp4, %tmp2 - tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7) + tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7, i32 1) ret <8 x i8> %tmp4 } @@ -97,10 +97,10 @@ entry: ; CHECK-NOT: vmov ; CHECK: bne %tmp1 = bitcast i32* %in to i8* ; <i8*> [#uses=1] - %tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp1) ; <%struct.__neon_int32x4x2_t> [#uses=2] + %tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp1, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2] %tmp3 = getelementptr inbounds i32* %in, i32 8 ; <i32*> [#uses=1] %tmp4 = bitcast i32* %tmp3 to i8* ; <i8*> [#uses=1] - %tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp4) ; <%struct.__neon_int32x4x2_t> [#uses=2] + %tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp4, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2] %tmp8 = bitcast i32* %out to i8* ; <i8*> [#uses=1] br i1 undef, label %return1, label %return2 @@ -116,7 +116,7 @@ return1: %tmp39 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1] %tmp6 = add <4 x i32> %tmp52, %tmp ; <<4 x i32>> [#uses=1] %tmp7 = add <4 x i32> %tmp57, %tmp39 ; <<4 x i32>> [#uses=1] - tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp6, <4 x i32> %tmp7) + tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp6, <4 x i32> %tmp7, i32 1) ret void return2: @@ -128,7 +128,7 @@ return2: %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1] %tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1] %tmp102 = add <4 x i32> %tmp100, %tmp101 ; <<4 x i32>> [#uses=1] - tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp102, <4 x i32> %tmp101) + tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp102, <4 x i32> %tmp101, i32 1) call void @llvm.trap() unreachable } @@ -143,7 +143,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind { ; CHECK: vadd.i16 %tmp0 = bitcast i16* %A to i8* ; <i8*> [#uses=1] %tmp1 = load <8 x i16>* %B ; <<8 x i16>> [#uses=2] - %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2] + %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2] %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 ; <<8 x i16>> [#uses=1] %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 ; <<8 x i16>> [#uses=1] %tmp5 = add <8 x i16> %tmp3, %tmp4 ; <<8 x i16>> [#uses=1] @@ -156,7 +156,7 @@ define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind { ; CHECK: vmov d1, d0 ; CHECK-NEXT: vld2.8 {d0[1], d1[1]} %tmp1 = load <8 x i8>* %B ; <<8 x i8>> [#uses=2] - %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2] + %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2] %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1] %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 ; <<8 x i8>> [#uses=1] %tmp5 = add <8 x i8> %tmp3, %tmp4 ; <<8 x i8>> [#uses=1] @@ -174,14 +174,14 @@ entry: ; CHECK: vuzp.32 q0, q1 ; CHECK: vst1.32 %0 = bitcast i32* %iptr to i8* ; <i8*> [#uses=2] - %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0) ; <%struct.__neon_int32x4x2_t> [#uses=2] + %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2] %tmp57 = extractvalue %struct.__neon_int32x4x2_t %1, 0 ; <<4 x i32>> [#uses=1] %tmp60 = extractvalue %struct.__neon_int32x4x2_t %1, 1 ; <<4 x i32>> [#uses=1] %2 = bitcast i32* %optr to i8* ; <i8*> [#uses=2] - tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60) - %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0) ; <<4 x i32>> [#uses=1] + tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60, i32 1) + %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0, i32 1) ; <<4 x i32>> [#uses=1] %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> ; <<4 x i32>> [#uses=1] - tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4) + tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4, i32 1) ret void } @@ -304,44 +304,43 @@ bb14: ; preds = %bb6 ; This test crashes the coalescer because live variables were not updated properly. define <8 x i8> @t11(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind { - %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4) ; <%struct.__neon_int8x8x3_t> [#uses=1] + %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1] - %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6) ; <%struct.__neon_int8x8x3_t> [#uses=1] + %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1] %tmp2bd = add <8 x i8> zeroinitializer, %tmp2d ; <<8 x i8>> [#uses=1] %tmp2abcd = mul <8 x i8> zeroinitializer, %tmp2bd ; <<8 x i8>> [#uses=1] %tmp2ef = sub <8 x i8> zeroinitializer, %tmp2f ; <<8 x i8>> [#uses=1] %tmp2efgh = mul <8 x i8> %tmp2ef, undef ; <<8 x i8>> [#uses=2] - call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh) + call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh, i32 1) %tmp2 = sub <8 x i8> %tmp2efgh, %tmp2abcd ; <<8 x i8>> [#uses=1] %tmp7 = mul <8 x i8> undef, %tmp2 ; <<8 x i8>> [#uses=1] - tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7) + tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7, i32 1) ret <8 x i8> undef } -declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly - -declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly +declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly -declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone -declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind +declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind +declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) +nounwind -declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly -declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly -declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly -declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly -declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll index 1e780e6a9097..6b86f1a9f368 100644 --- a/test/CodeGen/ARM/remat.ll +++ b/test/CodeGen/ARM/remat.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -stats -info-output-file - | grep "Number of re-materialization" +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -o /dev/null -stats -info-output-file - | grep "Number of re-materialization" define i32 @main(i32 %argc, i8** nocapture %argv, double %d1, double %d2) nounwind { entry: diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll index 29c55c6bd975..7413bed5c5b1 100644 --- a/test/CodeGen/ARM/select.ll +++ b/test/CodeGen/ARM/select.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=arm | FileCheck %s ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP +; RUN: llc < %s -mattr=+neon,+thumb2 -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=CHECK-NEON define i32 @f1(i32 %a.s) { ;CHECK: f1: @@ -65,3 +66,27 @@ define double @f7(double %a, double %b) { %tmp1 = select i1 %tmp, double -1.000e+00, double %b ret double %tmp1 } + +; <rdar://problem/7260094> +; +; We used to generate really horrible code for this function. The main cause was +; a lack of a custom lowering routine for an ISD::SELECT. This would result in +; two "it" blocks in the code: one for the "icmp" and another to move the index +; into the constant pool based on the value of the "icmp". If we have one "it" +; block generated, odds are good that we have close to the ideal code for this: +; +; CHECK-NEON: _f8: +; CHECK-NEON: movw [[REGISTER_1:r[0-9]+]], #1123 +; CHECK-NEON-NEXT: movs [[REGISTER_2:r[0-9]+]], #0 +; CHECK-NEON-NEXT: cmp r0, [[REGISTER_1]] +; CHECK-NEON-NEXT: adr [[REGISTER_3:r[0-9]+]], #LCPI +; CHECK-NEON-NEXT: it eq +; CHECK-NEON-NEXT: moveq [[REGISTER_2]], #4 +; CHECK-NEON-NEXT: ldr +; CHECK-NEON: bx + +define arm_apcscc float @f8(i32 %a) nounwind { + %tmp = icmp eq i32 %a, 1123 + %tmp1 = select i1 %tmp, float 0x3FF3BE76C0000000, float 0x40030E9A20000000 + ret float %tmp1 +} diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll index 792ef79982b7..ae1ba2f73825 100644 --- a/test/CodeGen/ARM/spill-q.ll +++ b/test/CodeGen/ARM/spill-q.ll @@ -7,7 +7,7 @@ %quux = type { i32 (...)**, %baz*, i32 } %quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo } -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly define void @aaa(%quuz* %this, i8* %block) { ; CHECK: aaa: @@ -15,11 +15,11 @@ define void @aaa(%quuz* %this, i8* %block) { ; CHECK: vst1.64 {{.*}}sp, :128 ; CHECK: vld1.64 {{.*}}sp, :128 entry: - %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 6.300000e+01, float* undef, align 4 - %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 0.000000e+00, float* undef, align 4 - %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] %val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1] br label %bb4 diff --git a/test/CodeGen/ARM/t2-imm.ll b/test/CodeGen/ARM/t2-imm.ll index 848a4dfed054..8b4145914e7c 100644 --- a/test/CodeGen/ARM/t2-imm.ll +++ b/test/CodeGen/ARM/t2-imm.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s define i32 @f6(i32 %a) { ; CHECK:f6 -; CHECK: movw r0, #:lower16:65537123 -; CHECK: movt r0, #:upper16:65537123 +; CHECK: movw r0, #1123 +; CHECK: movt r0, #1000 %tmp = add i32 0, 65537123 ret i32 %tmp } diff --git a/test/CodeGen/ARM/vaba.ll b/test/CodeGen/ARM/vaba.ll index e2dca4647bce..4fe1c434799d 100644 --- a/test/CodeGen/ARM/vaba.ll +++ b/test/CodeGen/ARM/vaba.ll @@ -6,8 +6,9 @@ define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i8> @llvm.arm.neon.vabas.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i8> %tmp4 + %tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) + %tmp5 = add <8 x i8> %tmp1, %tmp4 + ret <8 x i8> %tmp5 } define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -16,8 +17,9 @@ define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i16> @llvm.arm.neon.vabas.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i16> %tmp4 + %tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) + %tmp5 = add <4 x i16> %tmp1, %tmp4 + ret <4 x i16> %tmp5 } define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -26,8 +28,9 @@ define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i32> @llvm.arm.neon.vabas.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i32> %tmp4 + %tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) + %tmp5 = add <2 x i32> %tmp1, %tmp4 + ret <2 x i32> %tmp5 } define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { @@ -36,8 +39,9 @@ define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i8> @llvm.arm.neon.vabau.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i8> %tmp4 + %tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) + %tmp5 = add <8 x i8> %tmp1, %tmp4 + ret <8 x i8> %tmp5 } define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -46,8 +50,9 @@ define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i16> @llvm.arm.neon.vabau.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i16> %tmp4 + %tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) + %tmp5 = add <4 x i16> %tmp1, %tmp4 + ret <4 x i16> %tmp5 } define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -56,8 +61,9 @@ define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i32> @llvm.arm.neon.vabau.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i32> %tmp4 + %tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) + %tmp5 = add <2 x i32> %tmp1, %tmp4 + ret <2 x i32> %tmp5 } define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { @@ -66,8 +72,9 @@ define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = load <16 x i8>* %C - %tmp4 = call <16 x i8> @llvm.arm.neon.vabas.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> %tmp3) - ret <16 x i8> %tmp4 + %tmp4 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3) + %tmp5 = add <16 x i8> %tmp1, %tmp4 + ret <16 x i8> %tmp5 } define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { @@ -76,8 +83,9 @@ define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = load <8 x i16>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vabas.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3) + %tmp5 = add <8 x i16> %tmp1, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { @@ -86,8 +94,9 @@ define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = load <4 x i32>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3) + %tmp5 = add <4 x i32> %tmp1, %tmp4 + ret <4 x i32> %tmp5 } define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { @@ -96,8 +105,9 @@ define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = load <16 x i8>* %C - %tmp4 = call <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> %tmp3) - ret <16 x i8> %tmp4 + %tmp4 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3) + %tmp5 = add <16 x i8> %tmp1, %tmp4 + ret <16 x i8> %tmp5 } define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { @@ -106,8 +116,9 @@ define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = load <8 x i16>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3) + %tmp5 = add <8 x i16> %tmp1, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { @@ -116,25 +127,26 @@ define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = load <4 x i32>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3) + %tmp5 = add <4 x i32> %tmp1, %tmp4 + ret <4 x i32> %tmp5 } -declare <8 x i8> @llvm.arm.neon.vabas.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vabas.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vabas.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <8 x i8> @llvm.arm.neon.vabau.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vabau.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vabau.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vabas.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vabas.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone +declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone +declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { ;CHECK: vabals8: @@ -142,8 +154,10 @@ define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) + %tmp5 = zext <8 x i8> %tmp4 to <8 x i16> + %tmp6 = add <8 x i16> %tmp1, %tmp5 + ret <8 x i16> %tmp6 } define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -152,8 +166,10 @@ define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) + %tmp5 = zext <4 x i16> %tmp4 to <4 x i32> + %tmp6 = add <4 x i32> %tmp1, %tmp5 + ret <4 x i32> %tmp6 } define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -162,8 +178,10 @@ define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) + %tmp5 = zext <2 x i32> %tmp4 to <2 x i64> + %tmp6 = add <2 x i64> %tmp1, %tmp5 + ret <2 x i64> %tmp6 } define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { @@ -172,8 +190,10 @@ define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) + %tmp5 = zext <8 x i8> %tmp4 to <8 x i16> + %tmp6 = add <8 x i16> %tmp1, %tmp5 + ret <8 x i16> %tmp6 } define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -182,8 +202,10 @@ define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) + %tmp5 = zext <4 x i16> %tmp4 to <4 x i32> + %tmp6 = add <4 x i32> %tmp1, %tmp5 + ret <4 x i32> %tmp6 } define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -192,14 +214,8 @@ define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) + %tmp5 = zext <2 x i32> %tmp4 to <2 x i64> + %tmp6 = add <2 x i64> %tmp1, %tmp5 + ret <2 x i64> %tmp6 } - -declare <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll index 2b4539361459..9ec734fa7641 100644 --- a/test/CodeGen/ARM/vabd.ll +++ b/test/CodeGen/ARM/vabd.ll @@ -151,8 +151,9 @@ define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vabdl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> + ret <8 x i16> %tmp4 } define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -160,8 +161,9 @@ define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vabdl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 } define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -169,8 +171,9 @@ define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vabdl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> + ret <2 x i64> %tmp4 } define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -178,8 +181,9 @@ define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vabdl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> + ret <8 x i16> %tmp4 } define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -187,8 +191,9 @@ define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vabdl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 } define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -196,14 +201,7 @@ define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vabdl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> + ret <2 x i64> %tmp4 } - -declare <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll index 9bb8bf561045..a830e968ff78 100644 --- a/test/CodeGen/ARM/vadd.ll +++ b/test/CodeGen/ARM/vadd.ll @@ -157,8 +157,10 @@ define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vaddl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = sext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -166,8 +168,10 @@ define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vaddl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -175,8 +179,10 @@ define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vaddl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = sext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = add <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -184,8 +190,10 @@ define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vaddl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = zext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -193,8 +201,10 @@ define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vaddl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = zext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -202,25 +212,20 @@ define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vaddl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = zext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = add <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } -declare <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind { ;CHECK: vaddws8: ;CHECK: vaddw.s8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp4 = add <8 x i16> %tmp1, %tmp3 + ret <8 x i16> %tmp4 } define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { @@ -228,8 +233,9 @@ define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { ;CHECK: vaddw.s16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp4 = add <4 x i32> %tmp1, %tmp3 + ret <4 x i32> %tmp4 } define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { @@ -237,8 +243,9 @@ define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { ;CHECK: vaddw.s32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp4 = add <2 x i64> %tmp1, %tmp3 + ret <2 x i64> %tmp4 } define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { @@ -246,8 +253,9 @@ define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { ;CHECK: vaddw.u8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp4 = add <8 x i16> %tmp1, %tmp3 + ret <8 x i16> %tmp4 } define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { @@ -255,8 +263,9 @@ define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { ;CHECK: vaddw.u16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp4 = add <4 x i32> %tmp1, %tmp3 + ret <4 x i32> %tmp4 } define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind { @@ -264,14 +273,7 @@ define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind { ;CHECK: vaddw.u32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp4 = add <2 x i64> %tmp1, %tmp3 + ret <2 x i64> %tmp4 } - -declare <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll index c11a67c6c434..e460a84f6265 100644 --- a/test/CodeGen/ARM/vext.ll +++ b/test/CodeGen/ARM/vext.ll @@ -54,3 +54,23 @@ define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ret <4 x i32> %tmp3 } +; Undef shuffle indices should not prevent matching to VEXT: + +define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: test_vextd_undef: +;CHECK: vext + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10> + ret <8 x i8> %tmp3 +} + +define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: test_vextRq_undef: +;CHECK: vext + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6> + ret <16 x i8> %tmp3 +} + diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll index c61ea8c9a789..2488e8a0d0cc 100644 --- a/test/CodeGen/ARM/vld1.ll +++ b/test/CodeGen/ARM/vld1.ll @@ -3,7 +3,7 @@ define <8 x i8> @vld1i8(i8* %A) nounwind { ;CHECK: vld1i8: ;CHECK: vld1.8 - %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A) + %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 1) ret <8 x i8> %tmp1 } @@ -11,7 +11,7 @@ define <4 x i16> @vld1i16(i16* %A) nounwind { ;CHECK: vld1i16: ;CHECK: vld1.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0) + %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1) ret <4 x i16> %tmp1 } @@ -19,7 +19,7 @@ define <2 x i32> @vld1i32(i32* %A) nounwind { ;CHECK: vld1i32: ;CHECK: vld1.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0) + %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1) ret <2 x i32> %tmp1 } @@ -27,7 +27,7 @@ define <2 x float> @vld1f(float* %A) nounwind { ;CHECK: vld1f: ;CHECK: vld1.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %tmp0) + %tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %tmp0, i32 1) ret <2 x float> %tmp1 } @@ -35,14 +35,14 @@ define <1 x i64> @vld1i64(i64* %A) nounwind { ;CHECK: vld1i64: ;CHECK: vld1.64 %tmp0 = bitcast i64* %A to i8* - %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0) + %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0, i32 1) ret <1 x i64> %tmp1 } define <16 x i8> @vld1Qi8(i8* %A) nounwind { ;CHECK: vld1Qi8: ;CHECK: vld1.8 - %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A) + %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 1) ret <16 x i8> %tmp1 } @@ -50,7 +50,7 @@ define <8 x i16> @vld1Qi16(i16* %A) nounwind { ;CHECK: vld1Qi16: ;CHECK: vld1.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0) + %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 1) ret <8 x i16> %tmp1 } @@ -58,7 +58,7 @@ define <4 x i32> @vld1Qi32(i32* %A) nounwind { ;CHECK: vld1Qi32: ;CHECK: vld1.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0) + %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0, i32 1) ret <4 x i32> %tmp1 } @@ -66,7 +66,7 @@ define <4 x float> @vld1Qf(float* %A) nounwind { ;CHECK: vld1Qf: ;CHECK: vld1.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %tmp0) + %tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %tmp0, i32 1) ret <4 x float> %tmp1 } @@ -74,18 +74,31 @@ define <2 x i64> @vld1Qi64(i64* %A) nounwind { ;CHECK: vld1Qi64: ;CHECK: vld1.64 %tmp0 = bitcast i64* %A to i8* - %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0) + %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0, i32 1) ret <2 x i64> %tmp1 } -declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*) nounwind readonly -declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*) nounwind readonly -declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*) nounwind readonly -declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*) nounwind readonly -declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*) nounwind readonly +declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly +declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) nounwind readonly +declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly +declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly +declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly -declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*) nounwind readonly -declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly -declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly -declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly +declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly +declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly +declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly +declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly + +; Radar 8355607 +; Do not crash if the vld1 result is not used. +define void @unused_vld1_result() { +entry: +;CHECK: unused_vld1_result +;CHECK: vld1.32 + %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) + call void @llvm.trap() + unreachable +} + +declare void @llvm.trap() nounwind diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll index 0838636ce742..811f6e6db96f 100644 --- a/test/CodeGen/ARM/vld2.ll +++ b/test/CodeGen/ARM/vld2.ll @@ -14,7 +14,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind { ;CHECK: vld2i8: ;CHECK: vld2.8 - %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A) + %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1 %tmp4 = add <8 x i8> %tmp2, %tmp3 @@ -25,7 +25,7 @@ define <4 x i16> @vld2i16(i16* %A) nounwind { ;CHECK: vld2i16: ;CHECK: vld2.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1 %tmp4 = add <4 x i16> %tmp2, %tmp3 @@ -36,7 +36,7 @@ define <2 x i32> @vld2i32(i32* %A) nounwind { ;CHECK: vld2i32: ;CHECK: vld2.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1 %tmp4 = add <2 x i32> %tmp2, %tmp3 @@ -47,7 +47,7 @@ define <2 x float> @vld2f(float* %A) nounwind { ;CHECK: vld2f: ;CHECK: vld2.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1 %tmp4 = fadd <2 x float> %tmp2, %tmp3 @@ -58,7 +58,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind { ;CHECK: vld2i64: ;CHECK: vld1.64 %tmp0 = bitcast i64* %A to i8* - %tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0) + %tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1 %tmp4 = add <1 x i64> %tmp2, %tmp3 @@ -68,7 +68,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind { define <16 x i8> @vld2Qi8(i8* %A) nounwind { ;CHECK: vld2Qi8: ;CHECK: vld2.8 - %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A) + %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1 %tmp4 = add <16 x i8> %tmp2, %tmp3 @@ -79,7 +79,7 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind { ;CHECK: vld2Qi16: ;CHECK: vld2.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1 %tmp4 = add <8 x i16> %tmp2, %tmp3 @@ -90,7 +90,7 @@ define <4 x i32> @vld2Qi32(i32* %A) nounwind { ;CHECK: vld2Qi32: ;CHECK: vld2.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1 %tmp4 = add <4 x i32> %tmp2, %tmp3 @@ -101,20 +101,20 @@ define <4 x float> @vld2Qf(float* %A) nounwind { ;CHECK: vld2Qf: ;CHECK: vld2.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1 %tmp4 = fadd <4 x float> %tmp2, %tmp3 ret <4 x float> %tmp4 } -declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly -declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly -declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly -declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly -declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*) nounwind readonly +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*, i32) nounwind readonly -declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*) nounwind readonly -declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*) nounwind readonly -declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly -declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*) nounwind readonly +declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll index 65a24486bc62..92538c34f5b8 100644 --- a/test/CodeGen/ARM/vld3.ll +++ b/test/CodeGen/ARM/vld3.ll @@ -14,7 +14,7 @@ define <8 x i8> @vld3i8(i8* %A) nounwind { ;CHECK: vld3i8: ;CHECK: vld3.8 - %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A) + %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 %tmp4 = add <8 x i8> %tmp2, %tmp3 @@ -25,7 +25,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind { ;CHECK: vld3i16: ;CHECK: vld3.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2 %tmp4 = add <4 x i16> %tmp2, %tmp3 @@ -36,7 +36,7 @@ define <2 x i32> @vld3i32(i32* %A) nounwind { ;CHECK: vld3i32: ;CHECK: vld3.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2 %tmp4 = add <2 x i32> %tmp2, %tmp3 @@ -47,7 +47,7 @@ define <2 x float> @vld3f(float* %A) nounwind { ;CHECK: vld3f: ;CHECK: vld3.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2 %tmp4 = fadd <2 x float> %tmp2, %tmp3 @@ -58,7 +58,7 @@ define <1 x i64> @vld3i64(i64* %A) nounwind { ;CHECK: vld3i64: ;CHECK: vld1.64 %tmp0 = bitcast i64* %A to i8* - %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0) + %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2 %tmp4 = add <1 x i64> %tmp2, %tmp3 @@ -69,7 +69,7 @@ define <16 x i8> @vld3Qi8(i8* %A) nounwind { ;CHECK: vld3Qi8: ;CHECK: vld3.8 ;CHECK: vld3.8 - %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A) + %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2 %tmp4 = add <16 x i8> %tmp2, %tmp3 @@ -81,7 +81,7 @@ define <8 x i16> @vld3Qi16(i16* %A) nounwind { ;CHECK: vld3.16 ;CHECK: vld3.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2 %tmp4 = add <8 x i16> %tmp2, %tmp3 @@ -93,7 +93,7 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind { ;CHECK: vld3.32 ;CHECK: vld3.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2 %tmp4 = add <4 x i32> %tmp2, %tmp3 @@ -105,20 +105,20 @@ define <4 x float> @vld3Qf(float* %A) nounwind { ;CHECK: vld3.32 ;CHECK: vld3.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2 %tmp4 = fadd <4 x float> %tmp2, %tmp3 ret <4 x float> %tmp4 } -declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly -declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly -declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly -declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly -declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*, i32) nounwind readonly -declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*) nounwind readonly -declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*) nounwind readonly -declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*) nounwind readonly -declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*) nounwind readonly +declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*, i32) nounwind readonly diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll index e0b870638a18..d1bf957ebadc 100644 --- a/test/CodeGen/ARM/vld4.ll +++ b/test/CodeGen/ARM/vld4.ll @@ -14,7 +14,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind { ;CHECK: vld4i8: ;CHECK: vld4.8 - %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A) + %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2 %tmp4 = add <8 x i8> %tmp2, %tmp3 @@ -25,7 +25,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind { ;CHECK: vld4i16: ;CHECK: vld4.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2 %tmp4 = add <4 x i16> %tmp2, %tmp3 @@ -36,7 +36,7 @@ define <2 x i32> @vld4i32(i32* %A) nounwind { ;CHECK: vld4i32: ;CHECK: vld4.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2 %tmp4 = add <2 x i32> %tmp2, %tmp3 @@ -47,7 +47,7 @@ define <2 x float> @vld4f(float* %A) nounwind { ;CHECK: vld4f: ;CHECK: vld4.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2 %tmp4 = fadd <2 x float> %tmp2, %tmp3 @@ -58,7 +58,7 @@ define <1 x i64> @vld4i64(i64* %A) nounwind { ;CHECK: vld4i64: ;CHECK: vld1.64 %tmp0 = bitcast i64* %A to i8* - %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0) + %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2 %tmp4 = add <1 x i64> %tmp2, %tmp3 @@ -69,7 +69,7 @@ define <16 x i8> @vld4Qi8(i8* %A) nounwind { ;CHECK: vld4Qi8: ;CHECK: vld4.8 ;CHECK: vld4.8 - %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A) + %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2 %tmp4 = add <16 x i8> %tmp2, %tmp3 @@ -81,7 +81,7 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind { ;CHECK: vld4.16 ;CHECK: vld4.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2 %tmp4 = add <8 x i16> %tmp2, %tmp3 @@ -93,7 +93,7 @@ define <4 x i32> @vld4Qi32(i32* %A) nounwind { ;CHECK: vld4.32 ;CHECK: vld4.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 2 %tmp4 = add <4 x i32> %tmp2, %tmp3 @@ -105,20 +105,20 @@ define <4 x float> @vld4Qf(float* %A) nounwind { ;CHECK: vld4.32 ;CHECK: vld4.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 2 %tmp4 = fadd <4 x float> %tmp2, %tmp3 ret <4 x float> %tmp4 } -declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly -declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly -declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly -declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly -declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*) nounwind readonly +declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*, i32) nounwind readonly -declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*) nounwind readonly -declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*) nounwind readonly -declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*) nounwind readonly -declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*) nounwind readonly +declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*, i32) nounwind readonly diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll index b32c59019f4c..31ee64fa598f 100644 --- a/test/CodeGen/ARM/vldlane.ll +++ b/test/CodeGen/ARM/vldlane.ll @@ -13,7 +13,7 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld2lanei8: ;CHECK: vld2.8 %tmp1 = load <8 x i8>* %B - %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 %tmp5 = add <8 x i8> %tmp3, %tmp4 @@ -25,7 +25,7 @@ define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vld2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1 %tmp5 = add <4 x i16> %tmp3, %tmp4 @@ -37,7 +37,7 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vld2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 %tmp5 = add <2 x i32> %tmp3, %tmp4 @@ -49,7 +49,7 @@ define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vld2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1 %tmp5 = fadd <2 x float> %tmp3, %tmp4 @@ -61,7 +61,7 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vld2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 %tmp5 = add <8 x i16> %tmp3, %tmp4 @@ -73,7 +73,7 @@ define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vld2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2) + %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1 %tmp5 = add <4 x i32> %tmp3, %tmp4 @@ -85,21 +85,21 @@ define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vld2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1 %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } -declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly -declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly -declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } @@ -114,7 +114,7 @@ define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld3lanei8: ;CHECK: vld3.8 %tmp1 = load <8 x i8>* %B - %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2 @@ -128,7 +128,7 @@ define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vld3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2 @@ -142,7 +142,7 @@ define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vld3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2 @@ -156,7 +156,7 @@ define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vld3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2 @@ -170,7 +170,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vld3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2 @@ -184,7 +184,7 @@ define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vld3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3) + %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1) %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2 @@ -198,7 +198,7 @@ define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vld3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2 @@ -207,14 +207,14 @@ define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind { ret <4 x float> %tmp7 } -declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly -declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly -declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @@ -229,7 +229,7 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld4lanei8: ;CHECK: vld4.8 %tmp1 = load <8 x i8>* %B - %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 @@ -245,7 +245,7 @@ define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vld4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2 @@ -261,7 +261,7 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vld4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2 @@ -277,7 +277,7 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vld4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2 @@ -293,7 +293,7 @@ define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vld4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2 @@ -309,7 +309,7 @@ define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vld4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2 @@ -325,7 +325,7 @@ define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vld4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2 @@ -336,11 +336,11 @@ define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind { ret <4 x float> %tmp9 } -declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly -declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly +declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly -declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly +declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly diff --git a/test/CodeGen/ARM/vmla.ll b/test/CodeGen/ARM/vmla.ll index 77cf10ad3e68..9c6b210be797 100644 --- a/test/CodeGen/ARM/vmla.ll +++ b/test/CodeGen/ARM/vmla.ll @@ -94,8 +94,11 @@ define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = sext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = add <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 } define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -104,8 +107,11 @@ define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = sext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = add <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 } define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -114,8 +120,11 @@ define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = sext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = add <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 } define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { @@ -124,8 +133,11 @@ define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = zext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = add <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 } define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -134,8 +146,11 @@ define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = zext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = add <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 } define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -144,8 +159,11 @@ define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = zext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = add <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 } define arm_aapcs_vfpcc <4 x i32> @test_vmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone { @@ -153,8 +171,11 @@ entry: ; CHECK: test_vmlal_lanes16 ; CHECK: vmlal.s16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = sext <4 x i16> %arg1_int16x4_t to <4 x i32> + %2 = sext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + %4 = add <4 x i32> %arg0_int32x4_t, %3 + ret <4 x i32> %4 } define arm_aapcs_vfpcc <2 x i64> @test_vmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone { @@ -162,8 +183,11 @@ entry: ; CHECK: test_vmlal_lanes32 ; CHECK: vmlal.s32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = sext <2 x i32> %arg1_int32x2_t to <2 x i64> + %2 = sext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + %4 = add <2 x i64> %arg0_int64x2_t, %3 + ret <2 x i64> %4 } define arm_aapcs_vfpcc <4 x i32> @test_vmlal_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone { @@ -171,8 +195,11 @@ entry: ; CHECK: test_vmlal_laneu16 ; CHECK: vmlal.u16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = zext <4 x i16> %arg1_uint16x4_t to <4 x i32> + %2 = zext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + %4 = add <4 x i32> %arg0_uint32x4_t, %3 + ret <4 x i32> %4 } define arm_aapcs_vfpcc <2 x i64> @test_vmlal_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone { @@ -180,14 +207,9 @@ entry: ; CHECK: test_vmlal_laneu32 ; CHECK: vmlal.u32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = zext <2 x i32> %arg1_uint32x2_t to <2 x i64> + %2 = zext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + %4 = add <2 x i64> %arg0_uint64x2_t, %3 + ret <2 x i64> %4 } - -declare <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vmls.ll b/test/CodeGen/ARM/vmls.ll index 2b70a7878ced..65e7fe41bb3a 100644 --- a/test/CodeGen/ARM/vmls.ll +++ b/test/CodeGen/ARM/vmls.ll @@ -94,8 +94,11 @@ define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = sext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = sub <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 } define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -104,8 +107,11 @@ define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = sext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = sub <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 } define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -114,8 +120,11 @@ define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = sext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = sub <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 } define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { @@ -124,8 +133,11 @@ define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = zext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = sub <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 } define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -134,8 +146,11 @@ define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = zext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = sub <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 } define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -144,8 +159,11 @@ define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = zext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = sub <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 } define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone { @@ -153,8 +171,11 @@ entry: ; CHECK: test_vmlsl_lanes16 ; CHECK: vmlsl.s16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = sext <4 x i16> %arg1_int16x4_t to <4 x i32> + %2 = sext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + %4 = sub <4 x i32> %arg0_int32x4_t, %3 + ret <4 x i32> %4 } define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone { @@ -162,8 +183,11 @@ entry: ; CHECK: test_vmlsl_lanes32 ; CHECK: vmlsl.s32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = sext <2 x i32> %arg1_int32x2_t to <2 x i64> + %2 = sext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + %4 = sub <2 x i64> %arg0_int64x2_t, %3 + ret <2 x i64> %4 } define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone { @@ -171,8 +195,11 @@ entry: ; CHECK: test_vmlsl_laneu16 ; CHECK: vmlsl.u16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = zext <4 x i16> %arg1_uint16x4_t to <4 x i32> + %2 = zext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + %4 = sub <4 x i32> %arg0_uint32x4_t, %3 + ret <4 x i32> %4 } define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone { @@ -180,14 +207,9 @@ entry: ; CHECK: test_vmlsl_laneu32 ; CHECK: vmlsl.u32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = zext <2 x i32> %arg1_uint32x2_t to <2 x i64> + %2 = zext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + %4 = sub <2 x i64> %arg0_uint64x2_t, %3 + ret <2 x i64> %4 } - -declare <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll index 5e872ab6d0b2..8cd94576b0c2 100644 --- a/test/CodeGen/ARM/vmov.ll +++ b/test/CodeGen/ARM/vmov.ll @@ -192,7 +192,7 @@ define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind { ;CHECK: vmovls8: ;CHECK: vmovl.s8 %tmp1 = load <8 x i8>* %A - %tmp2 = call <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8> %tmp1) + %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> ret <8 x i16> %tmp2 } @@ -200,7 +200,7 @@ define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind { ;CHECK: vmovls16: ;CHECK: vmovl.s16 %tmp1 = load <4 x i16>* %A - %tmp2 = call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %tmp1) + %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> ret <4 x i32> %tmp2 } @@ -208,7 +208,7 @@ define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind { ;CHECK: vmovls32: ;CHECK: vmovl.s32 %tmp1 = load <2 x i32>* %A - %tmp2 = call <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32> %tmp1) + %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> ret <2 x i64> %tmp2 } @@ -216,7 +216,7 @@ define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind { ;CHECK: vmovlu8: ;CHECK: vmovl.u8 %tmp1 = load <8 x i8>* %A - %tmp2 = call <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8> %tmp1) + %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> ret <8 x i16> %tmp2 } @@ -224,7 +224,7 @@ define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind { ;CHECK: vmovlu16: ;CHECK: vmovl.u16 %tmp1 = load <4 x i16>* %A - %tmp2 = call <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16> %tmp1) + %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> ret <4 x i32> %tmp2 } @@ -232,23 +232,15 @@ define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind { ;CHECK: vmovlu32: ;CHECK: vmovl.u32 %tmp1 = load <2 x i32>* %A - %tmp2 = call <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32> %tmp1) + %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> ret <2 x i64> %tmp2 } -declare <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32>) nounwind readnone - define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind { ;CHECK: vmovni16: ;CHECK: vmovn.i16 %tmp1 = load <8 x i16>* %A - %tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1) + %tmp2 = trunc <8 x i16> %tmp1 to <8 x i8> ret <8 x i8> %tmp2 } @@ -256,7 +248,7 @@ define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind { ;CHECK: vmovni32: ;CHECK: vmovn.i32 %tmp1 = load <4 x i32>* %A - %tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1) + %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16> ret <4 x i16> %tmp2 } @@ -264,14 +256,10 @@ define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind { ;CHECK: vmovni64: ;CHECK: vmovn.i64 %tmp1 = load <2 x i64>* %A - %tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1) + %tmp2 = trunc <2 x i64> %tmp1 to <2 x i32> ret <2 x i32> %tmp2 } -declare <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone - define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind { ;CHECK: vqmovns16: ;CHECK: vqmovn.s16 diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll index 1d9168021279..5383425018f8 100644 --- a/test/CodeGen/ARM/vmul.ll +++ b/test/CodeGen/ARM/vmul.ll @@ -152,8 +152,10 @@ define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vmull.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = sext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = mul <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -161,8 +163,10 @@ define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vmull.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = mul <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -170,8 +174,10 @@ define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vmull.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = sext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = mul <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -179,8 +185,10 @@ define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vmull.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = zext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = mul <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -188,8 +196,10 @@ define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vmull.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = zext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = mul <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -197,8 +207,10 @@ define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vmull.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = zext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = mul <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -215,8 +227,10 @@ entry: ; CHECK: test_vmull_lanes16 ; CHECK: vmull.s16 q0, d0, d1[1] %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = sext <4 x i16> %arg0_int16x4_t to <4 x i32> + %2 = sext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + ret <4 x i32> %3 } define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { @@ -224,8 +238,10 @@ entry: ; CHECK: test_vmull_lanes32 ; CHECK: vmull.s32 q0, d0, d1[1] %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = sext <2 x i32> %arg0_int32x2_t to <2 x i64> + %2 = sext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + ret <2 x i64> %3 } define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone { @@ -233,8 +249,10 @@ entry: ; CHECK: test_vmull_laneu16 ; CHECK: vmull.u16 q0, d0, d1[1] %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = zext <4 x i16> %arg0_uint16x4_t to <4 x i32> + %2 = zext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + ret <4 x i32> %3 } define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone { @@ -242,16 +260,10 @@ entry: ; CHECK: test_vmull_laneu32 ; CHECK: vmull.u32 q0, d0, d1[1] %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = zext <2 x i32> %arg0_uint32x2_t to <2 x i64> + %2 = zext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + ret <2 x i64> %3 } -declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll index deed554d842c..e1fe64b02d9d 100644 --- a/test/CodeGen/ARM/vrev.ll +++ b/test/CodeGen/ARM/vrev.ll @@ -111,3 +111,21 @@ define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind { %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> ret <16 x i8> %tmp2 } + +; Undef shuffle indices should not prevent matching to VREV: + +define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind { +;CHECK: test_vrev64D8_undef: +;CHECK: vrev64.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %tmp2 +} + +define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind { +;CHECK: test_vrev32Q16_undef: +;CHECK: vrev32.16 + %tmp1 = load <8 x i16>* %A + %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef> + ret <8 x i16> %tmp2 +} diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll index 95414c308914..2b535ada3072 100644 --- a/test/CodeGen/ARM/vst1.ll +++ b/test/CodeGen/ARM/vst1.ll @@ -4,7 +4,7 @@ define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst1i8: ;CHECK: vst1.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1) + call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 1) ret void } @@ -13,7 +13,7 @@ define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst1.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1) + call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1) ret void } @@ -22,7 +22,7 @@ define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst1.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1) + call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1) ret void } @@ -31,7 +31,7 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst1.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1) + call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1) ret void } @@ -40,7 +40,7 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind { ;CHECK: vst1.64 %tmp0 = bitcast i64* %A to i8* %tmp1 = load <1 x i64>* %B - call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1) + call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1) ret void } @@ -48,7 +48,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst1Qi8: ;CHECK: vst1.8 %tmp1 = load <16 x i8>* %B - call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1) + call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 1) ret void } @@ -57,7 +57,7 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst1.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1) + call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 1) ret void } @@ -66,7 +66,7 @@ define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst1.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1) + call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1) ret void } @@ -75,7 +75,7 @@ define void @vst1Qf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst1.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1) + call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1) ret void } @@ -84,18 +84,18 @@ define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind { ;CHECK: vst1.64 %tmp0 = bitcast i64* %A to i8* %tmp1 = load <2 x i64>* %B - call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1) + call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1) ret void } -declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>) nounwind -declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>) nounwind -declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>) nounwind -declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>) nounwind -declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>) nounwind +declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32) nounwind -declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>) nounwind -declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind -declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind -declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind -declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>) nounwind +declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32) nounwind diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll index 3c98a2cbe60d..aed15fd51c56 100644 --- a/test/CodeGen/ARM/vst2.ll +++ b/test/CodeGen/ARM/vst2.ll @@ -4,7 +4,7 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst2i8: ;CHECK: vst2.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1) + call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ret void } @@ -13,7 +13,7 @@ define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1) + call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) ret void } @@ -22,7 +22,7 @@ define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1) + call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) ret void } @@ -31,7 +31,7 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1) + call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) ret void } @@ -40,7 +40,7 @@ define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind { ;CHECK: vst1.64 %tmp0 = bitcast i64* %A to i8* %tmp1 = load <1 x i64>* %B - call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1) + call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) ret void } @@ -48,7 +48,7 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst2Qi8: ;CHECK: vst2.8 %tmp1 = load <16 x i8>* %B - call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1) + call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1) ret void } @@ -57,7 +57,7 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1) + call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ret void } @@ -66,7 +66,7 @@ define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1) + call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1) ret void } @@ -75,17 +75,17 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1) + call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) ret void } -declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind -declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind -declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind -declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>) nounwind -declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>) nounwind +declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) nounwind -declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>) nounwind -declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>) nounwind -declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind -declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>) nounwind +declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll index 2599bc0db933..1feaed5a1044 100644 --- a/test/CodeGen/ARM/vst3.ll +++ b/test/CodeGen/ARM/vst3.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -O0 | FileCheck %s define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst3i8: ;CHECK: vst3.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1) + call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ret void } @@ -13,7 +13,7 @@ define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1) + call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) ret void } @@ -22,7 +22,7 @@ define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1) + call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) ret void } @@ -31,7 +31,7 @@ define void @vst3f(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1) + call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) ret void } @@ -40,7 +40,7 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind { ;CHECK: vst1.64 %tmp0 = bitcast i64* %A to i8* %tmp1 = load <1 x i64>* %B - call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1) + call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) ret void } @@ -49,7 +49,7 @@ define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst3.8 ;CHECK: vst3.8 %tmp1 = load <16 x i8>* %B - call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1) + call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1) ret void } @@ -59,7 +59,7 @@ define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1) + call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ret void } @@ -69,7 +69,7 @@ define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1) + call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1) ret void } @@ -79,17 +79,17 @@ define void @vst3Qf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1) + call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) ret void } -declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind -declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind -declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind -declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind -declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>) nounwind +declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind -declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>) nounwind -declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>) nounwind -declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>) nounwind -declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>) nounwind +declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll index 878f0efaa480..d302f097fc1f 100644 --- a/test/CodeGen/ARM/vst4.ll +++ b/test/CodeGen/ARM/vst4.ll @@ -4,7 +4,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst4i8: ;CHECK: vst4.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1) + call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ret void } @@ -13,7 +13,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1) + call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) ret void } @@ -22,7 +22,7 @@ define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1) + call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) ret void } @@ -31,7 +31,7 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1) + call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) ret void } @@ -40,7 +40,7 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind { ;CHECK: vst1.64 %tmp0 = bitcast i64* %A to i8* %tmp1 = load <1 x i64>* %B - call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1) + call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) ret void } @@ -49,7 +49,7 @@ define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst4.8 ;CHECK: vst4.8 %tmp1 = load <16 x i8>* %B - call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1) + call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1) ret void } @@ -59,7 +59,7 @@ define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1) + call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ret void } @@ -69,7 +69,7 @@ define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1) + call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1) ret void } @@ -79,17 +79,17 @@ define void @vst4Qf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1) + call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) ret void } -declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind -declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind -declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind -declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind -declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>) nounwind +declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind -declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind -declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>) nounwind -declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind -declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) nounwind +declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll index cf50756d465e..30ec52ac6420 100644 --- a/test/CodeGen/ARM/vstlane.ll +++ b/test/CodeGen/ARM/vstlane.ll @@ -4,7 +4,7 @@ define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst2lanei8: ;CHECK: vst2.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ret void } @@ -13,7 +13,7 @@ define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) ret void } @@ -22,7 +22,7 @@ define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) ret void } @@ -31,7 +31,7 @@ define void @vst2lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) ret void } @@ -40,7 +40,7 @@ define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ret void } @@ -49,7 +49,7 @@ define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2) + call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) ret void } @@ -58,24 +58,24 @@ define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3) + call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1) ret void } -declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst3lanei8: ;CHECK: vst3.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ret void } @@ -84,7 +84,7 @@ define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) ret void } @@ -93,7 +93,7 @@ define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) ret void } @@ -102,7 +102,7 @@ define void @vst3lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) ret void } @@ -111,7 +111,7 @@ define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6) + call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 1) ret void } @@ -120,7 +120,7 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0) + call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1) ret void } @@ -129,25 +129,25 @@ define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) ret void } -declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst4lanei8: ;CHECK: vst4.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ret void } @@ -156,7 +156,7 @@ define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) ret void } @@ -165,7 +165,7 @@ define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) ret void } @@ -174,7 +174,7 @@ define void @vst4lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) ret void } @@ -183,7 +183,7 @@ define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7) + call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 1) ret void } @@ -192,7 +192,7 @@ define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2) + call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) ret void } @@ -201,15 +201,15 @@ define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) ret void } -declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll index 3416de76f123..df77bb31fc8b 100644 --- a/test/CodeGen/ARM/vsub.ll +++ b/test/CodeGen/ARM/vsub.ll @@ -157,8 +157,10 @@ define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vsubl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = sext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = sub <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -166,8 +168,10 @@ define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vsubl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = sub <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -175,8 +179,10 @@ define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vsubl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = sext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = sub <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -184,8 +190,10 @@ define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vsubl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = zext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = sub <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -193,8 +201,10 @@ define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vsubl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = zext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = sub <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -202,25 +212,20 @@ define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vsubl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = zext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = sub <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } -declare <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind { ;CHECK: vsubws8: ;CHECK: vsubw.s8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp4 = sub <8 x i16> %tmp1, %tmp3 + ret <8 x i16> %tmp4 } define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { @@ -228,8 +233,9 @@ define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { ;CHECK: vsubw.s16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp4 = sub <4 x i32> %tmp1, %tmp3 + ret <4 x i32> %tmp4 } define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { @@ -237,8 +243,9 @@ define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { ;CHECK: vsubw.s32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp4 = sub <2 x i64> %tmp1, %tmp3 + ret <2 x i64> %tmp4 } define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { @@ -246,8 +253,9 @@ define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { ;CHECK: vsubw.u8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp4 = sub <8 x i16> %tmp1, %tmp3 + ret <8 x i16> %tmp4 } define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { @@ -255,8 +263,9 @@ define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { ;CHECK: vsubw.u16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp4 = sub <4 x i32> %tmp1, %tmp3 + ret <4 x i32> %tmp4 } define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind { @@ -264,14 +273,7 @@ define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind { ;CHECK: vsubw.u32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp4 = sub <2 x i64> %tmp1, %tmp3 + ret <2 x i64> %tmp4 } - -declare <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll index 10bb10ac24a1..b1c2f93b47c6 100644 --- a/test/CodeGen/ARM/vtrn.ll +++ b/test/CodeGen/ARM/vtrn.ll @@ -95,3 +95,30 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } + +; Undef shuffle indices should not prevent matching to VTRN: + +define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vtrni8_undef: +;CHECK: vtrn.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vtrnQi16_undef: +;CHECK: vtrn.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14> + %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll index 6cef188d76dd..9130f628919a 100644 --- a/test/CodeGen/ARM/vuzp.ll +++ b/test/CodeGen/ARM/vuzp.ll @@ -73,3 +73,30 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } + +; Undef shuffle indices should not prevent matching to VUZP: + +define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vuzpi8_undef: +;CHECK: vuzp.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vuzpQi16_undef: +;CHECK: vuzp.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14> + %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll index a9ecdcab42d7..926970aeb29b 100644 --- a/test/CodeGen/ARM/vzip.ll +++ b/test/CodeGen/ARM/vzip.ll @@ -73,3 +73,30 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } + +; Undef shuffle indices should not prevent matching to VZIP: + +define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vzipi8_undef: +;CHECK: vzip.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vzipQi8_undef: +;CHECK: vzip.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> + %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31> + %tmp5 = add <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} + diff --git a/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll b/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll new file mode 100644 index 000000000000..b838ec949eae --- /dev/null +++ b/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=alpha | FileCheck %s + +define fastcc i64 @getcount(i64 %s) { + %tmp431 = mul i64 %s, 12884901888 + ret i64 %tmp431 +} + +; CHECK: sll $16,33,$0 +; CHECK-NEXT: sll $16,32,$1 +; CHECK-NEXT: addq $0,$1,$0 + diff --git a/test/CodeGen/CellSPU/arg_ret.ll b/test/CodeGen/CellSPU/arg_ret.ll new file mode 100644 index 000000000000..743292a58d59 --- /dev/null +++ b/test/CodeGen/CellSPU/arg_ret.ll @@ -0,0 +1,33 @@ +; Test parameter passing and return values +;RUN: llc --march=cellspu %s -o - | FileCheck %s + +; this fits into registers r3-r74 +%paramstruct = type { i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, + i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, + i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, + i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, + i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, + i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32} +define ccc i32 @test_regs( %paramstruct %prm ) +{ +;CHECK: lr $3, $74 +;CHECK: bi $lr + %1 = extractvalue %paramstruct %prm, 71 + ret i32 %1 +} + +define ccc i32 @test_regs_and_stack( %paramstruct %prm, i32 %stackprm ) +{ +;CHECK-NOT: a $3, $74, $75 + %1 = extractvalue %paramstruct %prm, 71 + %2 = add i32 %1, %stackprm + ret i32 %2 +} + +define ccc %paramstruct @test_return( i32 %param, %paramstruct %prm ) +{ +;CHECK: lqd $75, 80($sp) +;CHECK: lr $3, $4 + ret %paramstruct %prm +} + diff --git a/test/CodeGen/CellSPU/bigstack.ll b/test/CodeGen/CellSPU/bigstack.ll index 5483f463732b..63293e2aecb1 100644 --- a/test/CodeGen/CellSPU/bigstack.ll +++ b/test/CodeGen/CellSPU/bigstack.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=cellspu -o %t1.s -; RUN: grep lqx %t1.s | count 4 -; RUN: grep il %t1.s | grep -v file | count 7 -; RUN: grep stqx %t1.s | count 2 +; RUN: grep lqx %t1.s | count 3 +; RUN: grep il %t1.s | grep -v file | count 5 +; RUN: grep stqx %t1.s | count 1 define i32 @bigstack() nounwind { entry: diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll index eb7cf2c6467c..559b266e59df 100644 --- a/test/CodeGen/CellSPU/call.ll +++ b/test/CodeGen/CellSPU/call.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=cellspu -regalloc=linearscan > %t1.s ; RUN: grep brsl %t1.s | count 1 -; RUN: grep brasl %t1.s | count 1 -; RUN: grep stqd %t1.s | count 80 +; RUN: grep brasl %t1.s | count 2 +; RUN: grep stqd %t1.s | count 82 ; RUN: llc < %s -march=cellspu | FileCheck %s target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" @@ -29,3 +29,25 @@ define i32 @stub_2(...) { entry: ret i32 0 } + +; check that struct is passed in r3-> +; assert this by changing the second field in the struct +%0 = type { i32, i32, i32 } +declare %0 @callee() +define %0 @test_structret() +{ +;CHECK: stqd $lr, 16($sp) +;CHECK: stqd $sp, -48($sp) +;CHECK: ai $sp, $sp, -48 +;CHECK: brasl $lr, callee + %rv = call %0 @callee() +;CHECK: ai $4, $4, 1 +;CHECK: lqd $lr, 64($sp) +;CHECK: ai $sp, $sp, 48 +;CHECK: bi $lr + %oldval = extractvalue %0 %rv, 1 + %newval = add i32 %oldval,1 + %newrv = insertvalue %0 %rv, i32 %newval, 1 + ret %0 %newrv +} + diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll index d94d77c9f142..141361d5702b 100644 --- a/test/CodeGen/CellSPU/call_indirect.ll +++ b/test/CodeGen/CellSPU/call_indirect.ll @@ -12,7 +12,7 @@ ; RUN: grep rotqby %t2.s | count 5 ; RUN: grep lqd %t2.s | count 13 ; RUN: grep ilhu %t2.s | count 2 -; RUN: grep ai %t2.s | count 8 +; RUN: grep ai %t2.s | count 9 ; RUN: grep dispatch_tab %t2.s | count 6 ; ModuleID = 'call_indirect.bc' diff --git a/test/CodeGen/CellSPU/shuffles.ll b/test/CodeGen/CellSPU/shuffles.ll index 04accb9c56b8..f37d2ae89b00 100644 --- a/test/CodeGen/CellSPU/shuffles.ll +++ b/test/CodeGen/CellSPU/shuffles.ll @@ -16,3 +16,26 @@ define <4 x float> @splat(float %param1) { ret <4 x float> %val } +define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) { + %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0 +;CHECK: lqa $6, +;CHECK: shufb $4, $4, $5, $6 + %sl2_17 = insertelement <2 x float> %sl2_17_tmp1, float %val2, i32 1 + +;CHECK: cdd $5, 0($3) +;CHECK: lqd $6, 0($3) +;CHECK: shufb $4, $4, $6, $5 +;CHECK: stqd $4, 0($3) +;CHECK: bi $lr + store <2 x float> %sl2_17, <2 x float>* %ptr + ret void +} + +define <4 x float> @test_insert_1(<4 x float> %vparam, float %eltparam) { +;CHECK: cwd $5, 4($sp) +;CHECK: shufb $3, $4, $3, $5 +;CHECK: bi $lr + %rv = insertelement <4 x float> %vparam, float %eltparam, i32 1 + ret <4 x float> %rv +} + diff --git a/test/CodeGen/CellSPU/v2f32.ll b/test/CodeGen/CellSPU/v2f32.ll new file mode 100644 index 000000000000..b81c0cdbb299 --- /dev/null +++ b/test/CodeGen/CellSPU/v2f32.ll @@ -0,0 +1,75 @@ +;RUN: llc --march=cellspu %s -o - | FileCheck %s +%vec = type <2 x float> + +define %vec @test_ret(%vec %param) +{ +;CHECK: bi $lr + ret %vec %param +} + +define %vec @test_add(%vec %param) +{ +;CHECK: fa {{\$.}}, $3, $3 + %1 = fadd %vec %param, %param +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_sub(%vec %param) +{ +;CHECK: fs {{\$.}}, $3, $3 + %1 = fsub %vec %param, %param + +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_mul(%vec %param) +{ +;CHECK: fm {{\$.}}, $3, $3 + %1 = fmul %vec %param, %param + +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_splat(float %param ) { +;CHECK: lqa +;CHECK: shufb + %sv = insertelement <1 x float> undef, float %param, i32 0 + %rv = shufflevector <1 x float> %sv, <1 x float> undef, <2 x i32> zeroinitializer +;CHECK: bi $lr + ret %vec %rv +} + +define void @test_store(%vec %val, %vec* %ptr){ + +;CHECK: stqd + store %vec undef, %vec* null + +;CHECK: stqd $3, 0(${{.}}) +;CHECK: bi $lr + store %vec %val, %vec* %ptr + ret void +} + +define %vec @test_insert(){ +;CHECK: cwd +;CHECK: shufb $3 + %rv = insertelement %vec undef, float 0.0e+00, i32 undef +;CHECK: bi $lr + ret %vec %rv +} + +define void @test_unaligned_store() { +;CHECK: cdd $3, 8($3) +;CHECK: lqd +;CHECK: shufb +;CHECK: stqd + %data = alloca [4 x float], align 16 ; <[4 x float]*> [#uses=1] + %ptr = getelementptr [4 x float]* %data, i32 0, i32 2 ; <float*> [#uses=1] + %vptr = bitcast float* %ptr to <2 x float>* ; <[1 x <2 x float>]*> [#uses=1] + store <2 x float> undef, <2 x float>* %vptr + ret void +} + diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll new file mode 100644 index 000000000000..dd51be5a71d2 --- /dev/null +++ b/test/CodeGen/CellSPU/v2i32.ll @@ -0,0 +1,64 @@ +;RUN: llc --march=cellspu %s -o - | FileCheck %s +%vec = type <2 x i32> + +define %vec @test_ret(%vec %param) +{ +;CHECK: bi $lr + ret %vec %param +} + +define %vec @test_add(%vec %param) +{ +;CHECK: a {{\$.}}, $3, $3 + %1 = add %vec %param, %param +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_sub(%vec %param) +{ +;CHECK: sf {{\$.}}, $4, $3 + %1 = sub %vec %param, <i32 1, i32 1> + +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_mul(%vec %param) +{ +;CHECK: mpyu +;CHECK: mpyh +;CHECK: a {{\$., \$., \$.}} +;CHECK: a {{\$., \$., \$.}} + %1 = mul %vec %param, %param + +;CHECK: bi $lr + ret %vec %1 +} + +define <2 x i32> @test_splat(i32 %param ) { +;TODO insertelement transforms to a PREFSLOT2VEC, that trasforms to the +; somewhat redundant: +;CHECK-NOT or $3, $3, $3 +;CHECK: lqa +;CHECK: shufb + %sv = insertelement <1 x i32> undef, i32 %param, i32 0 + %rv = shufflevector <1 x i32> %sv, <1 x i32> undef, <2 x i32> zeroinitializer +;CHECK: bi $lr + ret <2 x i32> %rv +} + +define i32 @test_extract() { +;CHECK: shufb $3 + %rv = extractelement <2 x i32> zeroinitializer, i32 undef ; <i32> [#uses=1] +;CHECK: bi $lr + ret i32 %rv +} + +define void @test_store( %vec %val, %vec* %ptr) +{ +;CHECK: stqd $3, 0(${{.}}) +;CHECK: bi $lr + store %vec %val, %vec* %ptr + ret void +} diff --git a/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll b/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll new file mode 100644 index 000000000000..a2945aaec331 --- /dev/null +++ b/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s + +define float @test1() +{ + ret float extractelement (<2 x float> bitcast (<1 x double> <double 0x3f800000> to <2 x float>), i32 1); +} diff --git a/test/CodeGen/Mips/2008-06-05-Carry.ll b/test/CodeGen/Mips/2008-06-05-Carry.ll index 8e7b70e2216f..9d8e391f874e 100644 --- a/test/CodeGen/Mips/2008-06-05-Carry.ll +++ b/test/CodeGen/Mips/2008-06-05-Carry.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define i64 @add64(i64 %u, i64 %v) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-03-SRet.ll b/test/CodeGen/Mips/2008-07-03-SRet.ll index b2aaa00754b7..b1d20d93f187 100644 --- a/test/CodeGen/Mips/2008-07-03-SRet.ll +++ b/test/CodeGen/Mips/2008-07-03-SRet.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep {sw.*(\$4)} | count 3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" %struct.sret0 = type { i32, i32, i32 } define void @test0(%struct.sret0* noalias sret %agg.result, i32 %dummy) nounwind { diff --git a/test/CodeGen/Mips/2008-07-05-ByVal.ll b/test/CodeGen/Mips/2008-07-05-ByVal.ll index 6bb6bd862b25..a1f05044b6c6 100644 --- a/test/CodeGen/Mips/2008-07-05-ByVal.ll +++ b/test/CodeGen/Mips/2008-07-05-ByVal.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep {lw.*(\$4)} | count 2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" %struct.byval0 = type { i32, i32 } define i64 @test0(%struct.byval0* byval %b, i64 %sum) nounwind { diff --git a/test/CodeGen/Mips/2008-07-06-fadd64.ll b/test/CodeGen/Mips/2008-07-06-fadd64.ll index 808ce16910ee..ecd8521027af 100644 --- a/test/CodeGen/Mips/2008-07-06-fadd64.ll +++ b/test/CodeGen/Mips/2008-07-06-fadd64.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep __adddf3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define double @dofloat(double %a, double %b) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-07-FPExtend.ll b/test/CodeGen/Mips/2008-07-07-FPExtend.ll index 7ac0f5f840db..681788e98196 100644 --- a/test/CodeGen/Mips/2008-07-07-FPExtend.ll +++ b/test/CodeGen/Mips/2008-07-07-FPExtend.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep __extendsfdf2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define double @dofloat(float %a) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-07-Float2Int.ll b/test/CodeGen/Mips/2008-07-07-Float2Int.ll index ca996367733e..d804c7dcf317 100644 --- a/test/CodeGen/Mips/2008-07-07-Float2Int.ll +++ b/test/CodeGen/Mips/2008-07-07-Float2Int.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep trunc.w.s | count 3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define i32 @fptoint(float %a) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll b/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll index 20de18a0164c..b8b4c5c610de 100644 --- a/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll +++ b/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll @@ -5,7 +5,7 @@ ; RUN: grep __fixunsdfsi %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define double @int2fp(i32 %a) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll index f6b2045444a5..bda4a3172f30 100644 --- a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll +++ b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll @@ -6,7 +6,7 @@ ; RUN: not grep {gp_rel} %t target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" @.str = internal constant [10 x i8] c"AAAAAAAAA\00" @i0 = internal constant [5 x i32] [ i32 0, i32 1, i32 2, i32 3, i32 4 ] diff --git a/test/CodeGen/Mips/2008-07-15-SmallSection.ll b/test/CodeGen/Mips/2008-07-15-SmallSection.ll index 26eb4db26d4d..91efd68622a2 100644 --- a/test/CodeGen/Mips/2008-07-15-SmallSection.ll +++ b/test/CodeGen/Mips/2008-07-15-SmallSection.ll @@ -10,7 +10,7 @@ ; RUN: grep {\%lo} %t1 | count 2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" %struct.anon = type { i32, i32 } @s0 = global [8 x i8] c"AAAAAAA\00", align 4 diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll index 59599b399c29..41ae5dd65f51 100644 --- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll +++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll @@ -3,7 +3,7 @@ ; RUN: grep seb %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define i8 @A(i8 %e.0, i8 signext %sum) signext nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll index 21ff96005421..20bd88889061 100644 --- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll +++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll @@ -2,7 +2,7 @@ ; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2 ; RUN: grep {rodata.cst4,"aM",@progbits} %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @F(float %a) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/test/CodeGen/Mips/2008-07-23-fpcmp.ll index 80101fa25b3e..ca837ffd2a50 100644 --- a/test/CodeGen/Mips/2008-07-23-fpcmp.ll +++ b/test/CodeGen/Mips/2008-07-23-fpcmp.ll @@ -3,7 +3,7 @@ ; RUN: grep {bc1\[tf\]} %t | count 3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @A(float %a, float %b) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-29-icmp.ll b/test/CodeGen/Mips/2008-07-29-icmp.ll index 042cad60e2b0..52a4b081ddb3 100644 --- a/test/CodeGen/Mips/2008-07-29-icmp.ll +++ b/test/CodeGen/Mips/2008-07-29-icmp.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep {b\[ne\]\[eq\]} | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @A(float %a, float %b, i32 %j) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-31-fcopysign.ll b/test/CodeGen/Mips/2008-07-31-fcopysign.ll index 77680bccf976..47382f989ca4 100644 --- a/test/CodeGen/Mips/2008-07-31-fcopysign.ll +++ b/test/CodeGen/Mips/2008-07-31-fcopysign.ll @@ -3,7 +3,7 @@ ; RUN: grep neg.s %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @A(float %i, float %j) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll index cd35ccaee83d..23ed64a96d8e 100644 --- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll +++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll @@ -4,7 +4,7 @@ ; RUN: grep multu %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" %struct.DWstruct = type { i32, i32 } define i32 @A0(i32 %u, i32 %v) nounwind { diff --git a/test/CodeGen/Mips/2008-08-03-fabs64.ll b/test/CodeGen/Mips/2008-08-03-fabs64.ll index 2f33e9bea73f..0fc45f7d1b05 100644 --- a/test/CodeGen/Mips/2008-08-03-fabs64.ll +++ b/test/CodeGen/Mips/2008-08-03-fabs64.ll @@ -3,7 +3,7 @@ ; RUN: grep {ori.*65535} %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define double @A(double %c, double %d) nounwind readnone { entry: diff --git a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll index ca90b500f050..f8eb02855979 100644 --- a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll +++ b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll @@ -3,7 +3,7 @@ ; RUN: grep mfc1 %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @A(i32 %u) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-08-06-Alloca.ll b/test/CodeGen/Mips/2008-08-06-Alloca.ll index 79e49a3d682e..7be7974e0ffe 100644 --- a/test/CodeGen/Mips/2008-08-06-Alloca.ll +++ b/test/CodeGen/Mips/2008-08-06-Alloca.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define i32 @twoalloca(i32 %size) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-08-07-CC.ll b/test/CodeGen/Mips/2008-08-07-CC.ll index 54d454cc3ade..63c25951423a 100644 --- a/test/CodeGen/Mips/2008-08-07-CC.ll +++ b/test/CodeGen/Mips/2008-08-07-CC.ll @@ -3,7 +3,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define internal fastcc i32 @A(i32 %u) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-08-07-FPRound.ll b/test/CodeGen/Mips/2008-08-07-FPRound.ll index f3bb965cdb69..67f86d741141 100644 --- a/test/CodeGen/Mips/2008-08-07-FPRound.ll +++ b/test/CodeGen/Mips/2008-08-07-FPRound.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep __truncdfsf2 | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @round2float(double %a) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-08-08-ctlz.ll b/test/CodeGen/Mips/2008-08-08-ctlz.ll index 1da1db24bf5a..fb3332329d6c 100644 --- a/test/CodeGen/Mips/2008-08-08-ctlz.ll +++ b/test/CodeGen/Mips/2008-08-08-ctlz.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep clz | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define i32 @A0(i32 %u) nounwind { entry: diff --git a/test/CodeGen/Mips/2010-07-20-Select.ll b/test/CodeGen/Mips/2010-07-20-Select.ll new file mode 100644 index 000000000000..8b7f9a919378 --- /dev/null +++ b/test/CodeGen/Mips/2010-07-20-Select.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s +; Fix PR7473 + +define i32 @main() nounwind readnone { +entry: + %a = alloca i32, align 4 ; <i32*> [#uses=2] + %c = alloca i32, align 4 ; <i32*> [#uses=2] + volatile store i32 1, i32* %a, align 4 + volatile store i32 0, i32* %c, align 4 + %0 = volatile load i32* %a, align 4 ; <i32> [#uses=1] + %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1] +; CHECK: addiu $4, $zero, 3 + %iftmp.0.0 = select i1 %1, i32 3, i32 0 ; <i32> [#uses=1] + %2 = volatile load i32* %c, align 4 ; <i32> [#uses=1] + %3 = icmp eq i32 %2, 0 ; <i1> [#uses=1] +; CHECK: addu $4, $zero, $3 +; CHECK: addu $2, $5, $4 + %iftmp.2.0 = select i1 %3, i32 0, i32 5 ; <i32> [#uses=1] + %4 = add nsw i32 %iftmp.2.0, %iftmp.0.0 ; <i32> [#uses=1] + ret i32 %4 +} diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll new file mode 100644 index 000000000000..07fc10cae180 --- /dev/null +++ b/test/CodeGen/Mips/2010-07-20-Switch.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s + +define i32 @main() nounwind readnone { +entry: + %x = alloca i32, align 4 ; <i32*> [#uses=2] + volatile store i32 2, i32* %x, align 4 + %0 = volatile load i32* %x, align 4 ; <i32> [#uses=1] +; CHECK: lui $3, %hi($JTI0_0) +; CHECK: sll $2, $2, 2 +; CHECK: addiu $3, $3, %lo($JTI0_0) + switch i32 %0, label %bb4 [ + i32 0, label %bb5 + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + ] + +bb1: ; preds = %entry + ret i32 2 + +; CHECK: $BB0_2 +bb2: ; preds = %entry + ret i32 0 + +bb3: ; preds = %entry + ret i32 3 + +bb4: ; preds = %entry + ret i32 4 + +bb5: ; preds = %entry + ret i32 1 +} diff --git a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll b/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll deleted file mode 100644 index db2ab877ff7d..000000000000 --- a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll +++ /dev/null @@ -1,8 +0,0 @@ -; RUN: llc < %s -march=ppc32 | grep nop -target triple = "powerpc-apple-darwin8" - - -define void @bork() noreturn nounwind { -entry: - unreachable -} diff --git a/test/CodeGen/PowerPC/empty-functions.ll b/test/CodeGen/PowerPC/empty-functions.ll new file mode 100644 index 000000000000..3a2907d5d7b9 --- /dev/null +++ b/test/CodeGen/PowerPC/empty-functions.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck -check-prefix=CHECK-NO-FP %s +; RUN: llc < %s -mtriple=powerpc-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s + +define void @func() { +entry: + unreachable +} +; CHECK-NO-FP: _func: +; CHECK-NO-FP: nop + +; CHECK-FP: _func: +; CHECK-FP: nop diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll index 32c6f4809cb4..399f19f8d2e2 100644 --- a/test/CodeGen/PowerPC/vec_constants.ll +++ b/test/CodeGen/PowerPC/vec_constants.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep CPI -define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) { +define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind { %tmp = load <4 x i32>* %P1 ; <<4 x i32>> [#uses=1] %tmp4 = and <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1] store <4 x i32> %tmp4, <4 x i32>* %P1 @@ -15,26 +15,30 @@ define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) { ret void } -define <4 x i32> @test_30() { +define <4 x i32> @test_30() nounwind { ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 > } -define <4 x i32> @test_29() { +define <4 x i32> @test_29() nounwind { ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 > } -define <8 x i16> @test_n30() { +define <8 x i16> @test_n30() nounwind { ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 > } -define <16 x i8> @test_n104() { +define <16 x i8> @test_n104() nounwind { ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 > } -define <4 x i32> @test_vsldoi() { +define <4 x i32> @test_vsldoi() nounwind { ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 > } -define <4 x i32> @test_rol() { +define <8 x i16> @test_vsldoi_65023() nounwind { + ret <8 x i16> < i16 65023, i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023 > +} + +define <4 x i32> @test_rol() nounwind { ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 > } diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores.ll b/test/CodeGen/SystemZ/05-MemLoadsStores.ll index cf12063e5d4c..eabeb0a42254 100644 --- a/test/CodeGen/SystemZ/05-MemLoadsStores.ll +++ b/test/CodeGen/SystemZ/05-MemLoadsStores.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s | grep {st %} | count 2 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define void @foo1(i32* nocapture %foo, i32* nocapture %bar) nounwind { entry: diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll index 1e6232a62550..53bb641cf1eb 100644 --- a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll +++ b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s | grep {sth.%} | count 2 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define void @foo1(i16* nocapture %foo, i16* nocapture %bar) nounwind { entry: diff --git a/test/CodeGen/SystemZ/07-BrUnCond.ll b/test/CodeGen/SystemZ/07-BrUnCond.ll index e0bc302c7314..ac6067abbee0 100644 --- a/test/CodeGen/SystemZ/07-BrUnCond.ll +++ b/test/CodeGen/SystemZ/07-BrUnCond.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define void @foo() noreturn nounwind { entry: diff --git a/test/CodeGen/SystemZ/09-DynamicAlloca.ll b/test/CodeGen/SystemZ/09-DynamicAlloca.ll index 27189ab41567..30810ce6eb90 100644 --- a/test/CodeGen/SystemZ/09-DynamicAlloca.ll +++ b/test/CodeGen/SystemZ/09-DynamicAlloca.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define void @foo(i64 %N) nounwind { entry: diff --git a/test/CodeGen/SystemZ/09-Globals.ll b/test/CodeGen/SystemZ/09-Globals.ll index 6e0c1ab2c165..50a26e2a451a 100644 --- a/test/CodeGen/SystemZ/09-Globals.ll +++ b/test/CodeGen/SystemZ/09-Globals.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s | grep larl | count 3 target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" @bar = common global i64 0, align 8 ; <i64*> [#uses=3] define i64 @foo() nounwind readonly { diff --git a/test/CodeGen/SystemZ/10-FuncsPic.ll b/test/CodeGen/SystemZ/10-FuncsPic.ll index cc325389d787..f291e5ff42b6 100644 --- a/test/CodeGen/SystemZ/10-FuncsPic.ll +++ b/test/CodeGen/SystemZ/10-FuncsPic.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -relocation-model=pic | grep PLT | count 1 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" @ptr = external global void (...)* ; <void (...)**> [#uses=2] define void @foo1() nounwind { diff --git a/test/CodeGen/SystemZ/10-GlobalsPic.ll b/test/CodeGen/SystemZ/10-GlobalsPic.ll index a77671e2ba7b..c581ad9c4578 100644 --- a/test/CodeGen/SystemZ/10-GlobalsPic.ll +++ b/test/CodeGen/SystemZ/10-GlobalsPic.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 6 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" @src = external global i32 ; <i32*> [#uses=2] @dst = external global i32 ; <i32*> [#uses=2] @ptr = external global i32* ; <i32**> [#uses=2] diff --git a/test/CodeGen/SystemZ/11-BSwap.ll b/test/CodeGen/SystemZ/11-BSwap.ll index 609d9dcf59c5..b170a8044a9d 100644 --- a/test/CodeGen/SystemZ/11-BSwap.ll +++ b/test/CodeGen/SystemZ/11-BSwap.ll @@ -2,7 +2,7 @@ target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define i16 @foo(i16 zeroext %a) zeroext { diff --git a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll index 07a164d42645..54424e18f68b 100644 --- a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll +++ b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=systemz | grep rll target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone { entry: diff --git a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll index 99d0ee7b03d9..89b22251eb23 100644 --- a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll +++ b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define void @compdecomp(i8* nocapture %data, i64 %data_len) nounwind { entry: diff --git a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll index a35167fba04f..68ccb848980c 100644 --- a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll +++ b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define signext i32 @bit_place_piece(i32 signext %col, i32 signext %player, i64* nocapture %b1, i64* nocapture %b2) nounwind { entry: diff --git a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll index b37f7e92d5fb..98feb83231dc 100644 --- a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll +++ b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s | FileCheck %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) nounwind diff --git a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll index 5457b12afcba..f4e176eb4421 100644 --- a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll +++ b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define float @foo(i32 signext %a) { entry: diff --git a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll index a91e29ea4f9d..63fd8553b32e 100644 --- a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll +++ b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define signext i32 @dfg_parse() nounwind { entry: diff --git a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll index 2074bfd5d7b9..929c472d1ef6 100644 --- a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll +++ b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s | not grep r11 -target triple = "thumb-linux-gnueabi" +target triple = "thumb-unknown-linux-gnueabi" %struct.__sched_param = type { i32 } %struct.pthread_attr_t = type { i32, i32, %struct.__sched_param, i32, i32, i32, i32, i8*, i32 } @i.1882 = internal global i32 1 ; <i32*> [#uses=2] diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll new file mode 100644 index 000000000000..9a6321bb43c4 --- /dev/null +++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll @@ -0,0 +1,147 @@ +; RUN: llc -mtriple=thumbv6-apple-darwin10 < %s | FileCheck %s +; RUN: opt -strip-debug < %s | llc -mtriple=thumbv6-apple-darwin10 | FileCheck %s +; Stripping out debug info formerly caused the last two multiplies to be emitted in +; the other order. 7797940 (part of it dated 6/29/2010..7/15/2010). + +%0 = type { [3 x double] } + +@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%0*, i32, i32)* @_Z19getClosestDiagonal3ii to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] + +define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind { +; CHECK: blx ___muldf3 +; CHECK: blx ___muldf3 +; CHECK: beq LBB0_8 +; CHECK: blx ___muldf3 +; <label>:3 + switch i32 %1, label %4 [ + i32 0, label %5 + i32 3, label %5 + ] + +; <label>:4 ; preds = %3 + br label %5, !dbg !0 + +; <label>:5 ; preds = %4, %3, %3 + %storemerge = phi double [ -1.000000e+00, %4 ], [ 1.000000e+00, %3 ], [ 1.000000e+00, %3 ] ; <double> [#uses=1] + %v_6 = icmp slt i32 %1, 2 ; <i1> [#uses=1] + %storemerge1 = select i1 %v_6, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3] + call void @llvm.dbg.value(metadata !{double %storemerge}, i64 0, metadata !91), !dbg !0 + %v_7 = icmp eq i32 %2, 1, !dbg !92 ; <i1> [#uses=1] + %storemerge2 = select i1 %v_7, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3] + %v_8 = getelementptr inbounds %0* %0, i32 0, i32 0, i32 0 ; <double*> [#uses=1] + %v_10 = getelementptr inbounds %0* %0, i32 0, i32 0, i32 2 ; <double*> [#uses=1] + %v_11 = fmul double %storemerge1, %storemerge1, !dbg !93 ; <double> [#uses=1] + %v_15 = tail call double @sqrt(double %v_11) nounwind readonly, !dbg !93 ; <double> [#uses=1] + %v_16 = fdiv double 1.000000e+00, %v_15, !dbg !93 ; <double> [#uses=3] + %v_17 = fmul double %storemerge, %v_16, !dbg !97 ; <double> [#uses=1] + store double %v_17, double* %v_8, align 4, !dbg !97 + %v_19 = fmul double %storemerge2, %v_16, !dbg !97 ; <double> [#uses=1] + store double %v_19, double* %v_10, align 4, !dbg !97 + ret void, !dbg !98 +} + +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone + +declare double @sqrt(double) nounwind readonly + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +!0 = metadata !{i32 46, i32 0, metadata !1, null} +!1 = metadata !{i32 524299, metadata !2, i32 44, i32 0} ; [ DW_TAG_lexical_block ] +!2 = metadata !{i32 524299, metadata !3, i32 44, i32 0} ; [ DW_TAG_lexical_block ] +!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"getClosestDiagonal3", metadata !"getClosestDiagonal3", metadata !"_Z19getClosestDiagonal3ii", metadata !4, i32 44, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!4 = metadata !{i32 524329, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 524305, i32 0, i32 4, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ] +!7 = metadata !{metadata !8, metadata !22, metadata !22} +!8 = metadata !{i32 524307, metadata !4, metadata !"ggVector3", metadata !9, i32 66, i64 192, i64 32, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_structure_type ] +!9 = metadata !{i32 524329, metadata !"ggVector3.h", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ] +!10 = metadata !{metadata !11, metadata !16, metadata !23, metadata !26, metadata !29, metadata !30, metadata !35, metadata !36, metadata !37, metadata !41, metadata !42, metadata !43, metadata !46, metadata !47, metadata !48, metadata !52, metadata !53, metadata !54, metadata !57, metadata !60, metadata !63, metadata !66, metadata !70, metadata !71, metadata !74, metadata !75, metadata !76, metadata !77, metadata !78, metadata !81, metadata !82, metadata !83, metadata !84, metadata !85, metadata !88, metadata !89, metadata !90} +!11 = metadata !{i32 524301, metadata !8, metadata !"e", metadata !9, i32 160, i64 192, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ] +!12 = metadata !{i32 524289, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !13, metadata !14, i32 0, null} ; [ DW_TAG_array_type ] +!13 = metadata !{i32 524324, metadata !4, metadata !"double", metadata !4, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!14 = metadata !{metadata !15} +!15 = metadata !{i32 524321, i64 0, i64 2} ; [ DW_TAG_subrange_type ] +!16 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 72, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!17 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ] +!18 = metadata !{null, metadata !19, metadata !20} +!19 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ] +!20 = metadata !{i32 524310, metadata !21, metadata !"ggBoolean", metadata !21, i32 478, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_typedef ] +!21 = metadata !{i32 524329, metadata !"math.h", metadata !"/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm", metadata !5} ; [ DW_TAG_file_type ] +!22 = metadata !{i32 524324, metadata !4, metadata !"int", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!23 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 73, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!24 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ] +!25 = metadata !{null, metadata !19} +!26 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 74, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!27 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null} ; [ DW_TAG_subroutine_type ] +!28 = metadata !{null, metadata !19, metadata !13, metadata !13, metadata !13} +!29 = metadata !{i32 524334, i32 0, metadata !8, metadata !"Set", metadata !"Set", metadata !"_ZN9ggVector33SetEddd", metadata !9, i32 81, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!30 = metadata !{i32 524334, i32 0, metadata !8, metadata !"x", metadata !"x", metadata !"_ZNK9ggVector31xEv", metadata !9, i32 82, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!31 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !32, i32 0, null} ; [ DW_TAG_subroutine_type ] +!32 = metadata !{metadata !13, metadata !33} +!33 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 64, metadata !34} ; [ DW_TAG_pointer_type ] +!34 = metadata !{i32 524326, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] +!35 = metadata !{i32 524334, i32 0, metadata !8, metadata !"y", metadata !"y", metadata !"_ZNK9ggVector31yEv", metadata !9, i32 83, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!36 = metadata !{i32 524334, i32 0, metadata !8, metadata !"z", metadata !"z", metadata !"_ZNK9ggVector31zEv", metadata !9, i32 84, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!37 = metadata !{i32 524334, i32 0, metadata !8, metadata !"x", metadata !"x", metadata !"_ZN9ggVector31xEv", metadata !9, i32 85, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!38 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !39, i32 0, null} ; [ DW_TAG_subroutine_type ] +!39 = metadata !{metadata !40, metadata !19} +!40 = metadata !{i32 524304, metadata !4, metadata !"double", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !13} ; [ DW_TAG_reference_type ] +!41 = metadata !{i32 524334, i32 0, metadata !8, metadata !"y", metadata !"y", metadata !"_ZN9ggVector31yEv", metadata !9, i32 86, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!42 = metadata !{i32 524334, i32 0, metadata !8, metadata !"z", metadata !"z", metadata !"_ZN9ggVector31zEv", metadata !9, i32 87, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!43 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetX", metadata !"SetX", metadata !"_ZN9ggVector34SetXEd", metadata !9, i32 88, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!44 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !45, i32 0, null} ; [ DW_TAG_subroutine_type ] +!45 = metadata !{null, metadata !19, metadata !13} +!46 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetY", metadata !"SetY", metadata !"_ZN9ggVector34SetYEd", metadata !9, i32 89, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!47 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetZ", metadata !"SetZ", metadata !"_ZN9ggVector34SetZEd", metadata !9, i32 90, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!48 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 92, metadata !49, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!49 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !50, i32 0, null} ; [ DW_TAG_subroutine_type ] +!50 = metadata !{null, metadata !19, metadata !51} +!51 = metadata !{i32 524304, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !34} ; [ DW_TAG_reference_type ] +!52 = metadata !{i32 524334, i32 0, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZNK9ggVector39toleranceEv", metadata !9, i32 100, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!53 = metadata !{i32 524334, i32 0, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZN9ggVector39toleranceEv", metadata !9, i32 101, metadata !38, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!54 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator+", metadata !"operator+", metadata !"_ZNK9ggVector3psEv", metadata !9, i32 107, metadata !55, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!55 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !56, i32 0, null} ; [ DW_TAG_subroutine_type ] +!56 = metadata !{metadata !51, metadata !33} +!57 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator-", metadata !"operator-", metadata !"_ZNK9ggVector3ngEv", metadata !9, i32 108, metadata !58, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!58 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !59, i32 0, null} ; [ DW_TAG_subroutine_type ] +!59 = metadata !{metadata !8, metadata !33} +!60 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZNK9ggVector3ixEi", metadata !9, i32 290, metadata !61, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!61 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !62, i32 0, null} ; [ DW_TAG_subroutine_type ] +!62 = metadata !{metadata !13, metadata !33, metadata !22} +!63 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZN9ggVector3ixEi", metadata !9, i32 278, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!64 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, null} ; [ DW_TAG_subroutine_type ] +!65 = metadata !{metadata !40, metadata !19, metadata !22} +!66 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator+=", metadata !"operator+=", metadata !"_ZN9ggVector3pLERKS_", metadata !9, i32 303, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!67 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !68, i32 0, null} ; [ DW_TAG_subroutine_type ] +!68 = metadata !{metadata !69, metadata !19, metadata !51} +!69 = metadata !{i32 524304, metadata !4, metadata !"ggVector3", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_reference_type ] +!70 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator-=", metadata !"operator-=", metadata !"_ZN9ggVector3mIERKS_", metadata !9, i32 310, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!71 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator*=", metadata !"operator*=", metadata !"_ZN9ggVector3mLEd", metadata !9, i32 317, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!72 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !73, i32 0, null} ; [ DW_TAG_subroutine_type ] +!73 = metadata !{metadata !69, metadata !19, metadata !13} +!74 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator/=", metadata !"operator/=", metadata !"_ZN9ggVector3dVEd", metadata !9, i32 324, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!75 = metadata !{i32 524334, i32 0, metadata !8, metadata !"length", metadata !"length", metadata !"_ZNK9ggVector36lengthEv", metadata !9, i32 121, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!76 = metadata !{i32 524334, i32 0, metadata !8, metadata !"squaredLength", metadata !"squaredLength", metadata !"_ZNK9ggVector313squaredLengthEv", metadata !9, i32 122, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!77 = metadata !{i32 524334, i32 0, metadata !8, metadata !"MakeUnitVector", metadata !"MakeUnitVector", metadata !"_ZN9ggVector314MakeUnitVectorEv", metadata !9, i32 217, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!78 = metadata !{i32 524334, i32 0, metadata !8, metadata !"Perturb", metadata !"Perturb", metadata !"_ZNK9ggVector37PerturbEdd", metadata !9, i32 126, metadata !79, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!79 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !80, i32 0, null} ; [ DW_TAG_subroutine_type ] +!80 = metadata !{metadata !8, metadata !33, metadata !13, metadata !13} +!81 = metadata !{i32 524334, i32 0, metadata !8, metadata !"maxComponent", metadata !"maxComponent", metadata !"_ZNK9ggVector312maxComponentEv", metadata !9, i32 128, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!82 = metadata !{i32 524334, i32 0, metadata !8, metadata !"minComponent", metadata !"minComponent", metadata !"_ZNK9ggVector312minComponentEv", metadata !9, i32 129, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!83 = metadata !{i32 524334, i32 0, metadata !8, metadata !"maxAbsComponent", metadata !"maxAbsComponent", metadata !"_ZNK9ggVector315maxAbsComponentEv", metadata !9, i32 131, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!84 = metadata !{i32 524334, i32 0, metadata !8, metadata !"minAbsComponent", metadata !"minAbsComponent", metadata !"_ZNK9ggVector315minAbsComponentEv", metadata !9, i32 132, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!85 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMinComponent", metadata !"indexOfMinComponent", metadata !"_ZNK9ggVector319indexOfMinComponentEv", metadata !9, i32 133, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!86 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !87, i32 0, null} ; [ DW_TAG_subroutine_type ] +!87 = metadata !{metadata !22, metadata !33} +!88 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMinAbsComponent", metadata !"indexOfMinAbsComponent", metadata !"_ZNK9ggVector322indexOfMinAbsComponentEv", metadata !9, i32 137, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!89 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMaxComponent", metadata !"indexOfMaxComponent", metadata !"_ZNK9ggVector319indexOfMaxComponentEv", metadata !9, i32 146, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!90 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMaxAbsComponent", metadata !"indexOfMaxAbsComponent", metadata !"_ZNK9ggVector322indexOfMaxAbsComponentEv", metadata !9, i32 150, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!91 = metadata !{i32 524544, metadata !1, metadata !"vx", metadata !4, i32 46, metadata !13} ; [ DW_TAG_auto_variable ] +!92 = metadata !{i32 48, i32 0, metadata !1, null} +!93 = metadata !{i32 218, i32 0, metadata !94, metadata !96} +!94 = metadata !{i32 524299, metadata !95, i32 217, i32 0} ; [ DW_TAG_lexical_block ] +!95 = metadata !{i32 524299, metadata !77, i32 217, i32 0} ; [ DW_TAG_lexical_block ] +!96 = metadata !{i32 51, i32 0, metadata !1, null} +!97 = metadata !{i32 227, i32 0, metadata !94, metadata !96} +!98 = metadata !{i32 52, i32 0, metadata !1, null} diff --git a/test/CodeGen/Thumb/barrier.ll b/test/CodeGen/Thumb/barrier.ll new file mode 100644 index 000000000000..c611b865f67d --- /dev/null +++ b/test/CodeGen/Thumb/barrier.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s -check-prefix=V6 +; RUN: llc < %s -march=thumb -mattr=+v6m | FileCheck %s -check-prefix=V6M + +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 ) + +define void @t1() { +; V6: t1: +; V6: blx {{_*}}sync_synchronize + +; V6M: t1: +; V6M: dsb + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true ) + ret void +} + +define void @t2() { +; V6: t2: +; V6: blx {{_*}}sync_synchronize + +; V6M: t2: +; V6M: dmb + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false ) + ret void +} diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll index acfdc917ddf0..5c8ad974bc0e 100644 --- a/test/CodeGen/Thumb/dyn-stackalloc.ll +++ b/test/CodeGen/Thumb/dyn-stackalloc.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=thumb | not grep {ldr sp} ; RUN: llc < %s -mtriple=thumb-apple-darwin | \ ; RUN: not grep {sub.*r7} -; RUN: llc < %s -march=thumb | grep 4294967280 +; RUN: llc < %s -march=thumb | grep {mov.*r6, sp} %struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* } %struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* } diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll index 02de36af1cc7..b289484f5efb 100644 --- a/test/CodeGen/Thumb/large-stack.ll +++ b/test/CodeGen/Thumb/large-stack.ll @@ -1,20 +1,35 @@ -; RUN: llc < %s -march=thumb | grep {ldr.*LCP} | count 5 +; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s define void @test1() { +; CHECK: test1: +; CHECK: sub sp, #256 +; CHECK: add sp, #256 %tmp = alloca [ 64 x i32 ] , align 4 ret void } define void @test2() { +; CHECK: test2: +; CHECK: ldr r0, LCPI +; CHECK: add sp, r0 +; CHECK: mov sp, r7 +; CHECK: sub sp, #4 %tmp = alloca [ 4168 x i8 ] , align 4 ret void } define i32 @test3() { - %retval = alloca i32, align 4 - %tmp = alloca i32, align 4 - %a = alloca [805306369 x i8], align 16 - store i32 0, i32* %tmp - %tmp1 = load i32* %tmp - ret i32 %tmp1 +; CHECK: test3: +; CHECK: ldr r2, LCPI +; CHECK: add sp, r2 +; CHECK: ldr r1, LCPI +; CHECK: add r1, sp +; CHECK: mov sp, r7 +; CHECK: sub sp, #4 + %retval = alloca i32, align 4 + %tmp = alloca i32, align 4 + %a = alloca [805306369 x i8], align 16 + store i32 0, i32* %tmp + %tmp1 = load i32* %tmp + ret i32 %tmp1 } diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll index 16a9c4442d8a..c2ba208e4ae2 100644 --- a/test/CodeGen/Thumb/vargs.ll +++ b/test/CodeGen/Thumb/vargs.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=thumb -; RUN: llc < %s -mtriple=thumb-linux | grep pop | count 1 +; RUN: llc < %s -mtriple=thumb-linux | grep pop | count 2 ; RUN: llc < %s -mtriple=thumb-darwin | grep pop | count 2 @str = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1] diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll index 98a5263c2f99..45d356c3dc67 100644 --- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll +++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll @@ -11,8 +11,8 @@ define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) { ; CHECK: _ZNKSs7compareERKSs: ; CHECK: it eq -; CHECK-NEXT: subeq.w r0, r6, r8 -; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r8, r9, pc} +; CHECK-NEXT: subeq r0, r6, r7 +; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r7, r8, pc} entry: %0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3] %1 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3] diff --git a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll index 3f1b9eb8d9d0..2246de35e03c 100644 --- a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll +++ b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll @@ -7,17 +7,12 @@ define void @t() nounwind ssp { entry: ; CHECK: t: -; CHECK: mov r0, sp -; CHECK: bfc r0, #0, #3 -; CHECK: subs r0, #16 -; CHECK: mov sp, r0 -; Yes, this is stupid codegen, but it's correct. -; CHECK: mov r0, sp -; CHECK: bfc r0, #0, #3 -; CHECK: subs r0, #16 -; CHECK: mov sp, r0 %size = mul i32 8, 2 +; CHECK: subs r0, #16 +; CHECK: mov sp, r0 %vla_a = alloca i8, i32 %size, align 8 +; CHECK: subs r0, #16 +; CHECK: mov sp, r0 %vla_b = alloca i8, i32 %size, align 8 unreachable } diff --git a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll new file mode 100644 index 000000000000..abcf13a3e38f --- /dev/null +++ b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -O3 | FileCheck %s + +@.str = private constant [4 x i8] c"%d\0A\00", align 4 ; <[4 x i8]*> [#uses=1] + +define internal fastcc i32 @Callee(i32 %i) nounwind { +entry: +; CHECK: Callee: + %0 = icmp eq i32 %i, 0 ; <i1> [#uses=1] + br i1 %0, label %bb2, label %bb + +bb: ; preds = %entry + %1 = alloca [1000 x i8], align 4 ; <[1000 x i8]*> [#uses=1] + %.sub = getelementptr inbounds [1000 x i8]* %1, i32 0, i32 0 ; <i8*> [#uses=2] + %2 = call i32 (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* %.sub, i32 0, i32 1000, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %i) nounwind ; <i32> [#uses=0] + %3 = load i8* %.sub, align 4 ; <i8> [#uses=1] + %4 = sext i8 %3 to i32 ; <i32> [#uses=1] + ret i32 %4 + +bb2: ; preds = %entry +; Must restore sp from fp here +; CHECK: mov sp, r7 +; CHECK: sub sp, #8 +; CHECK: pop + ret i32 0 +} + +declare i32 @__sprintf_chk(i8*, i32, i32, i8*, ...) nounwind + +define i32 @main() nounwind { +; CHECK: main: +bb.nph: + br label %bb + +bb: ; preds = %bb, %bb.nph + %0 = phi i32 [ 0, %bb.nph ], [ %3, %bb ] ; <i32> [#uses=2] + %j.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb ] ; <i32> [#uses=1] + %1 = tail call fastcc i32 @Callee(i32 %0) nounwind ; <i32> [#uses=1] + %2 = add nsw i32 %1, %j.01 ; <i32> [#uses=2] + %3 = add nsw i32 %0, 1 ; <i32> [#uses=2] + %exitcond = icmp eq i32 %3, 10000 ; <i1> [#uses=1] + br i1 %exitcond, label %bb2, label %bb + +bb2: ; preds = %bb +; No need to restore sp from fp here. +; CHECK: printf +; CHECK-NOT: mov sp, r7 +; CHECK-NOT: sub sp, #12 +; CHECK: pop + %4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0] + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/test/CodeGen/Thumb2/bfi.ll b/test/CodeGen/Thumb2/bfi.ll new file mode 100644 index 000000000000..22473bb35a0a --- /dev/null +++ b/test/CodeGen/Thumb2/bfi.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=thumb -mattr=+v6t2 < %s | FileCheck %s + +%struct.F = type { [3 x i8], i8 } + +@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1] + +define void @f1([1 x i32] %f.coerce0) nounwind { +entry: +; CHECK: f1 +; CHECK: movs r2, #10 +; CHECK: bfi r1, r2, #22, #4 + %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1] + %1 = and i32 %0, -62914561 ; <i32> [#uses=1] + %2 = or i32 %1, 41943040 ; <i32> [#uses=1] + store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4 + ret void +} + +define i32 @f2(i32 %A, i32 %B) nounwind readnone optsize { +entry: +; CHECK: f2 +; CHECK: lsrs r1, r1, #7 +; CHECK: bfi r0, r1, #7, #16 + %and = and i32 %A, -8388481 ; <i32> [#uses=1] + %and2 = and i32 %B, 8388480 ; <i32> [#uses=1] + %or = or i32 %and2, %and ; <i32> [#uses=1] + ret i32 %or +} + +define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize { +entry: +; CHECK: f3 +; CHECK: lsrs r2, r0, #7 +; CHECK: mov r0, r1 +; CHECK: bfi r0, r2, #7, #16 + %and = and i32 %A, 8388480 ; <i32> [#uses=1] + %and2 = and i32 %B, -8388481 ; <i32> [#uses=1] + %or = or i32 %and2, %and ; <i32> [#uses=1] + ret i32 %or +} diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll new file mode 100644 index 000000000000..f7ec5a3b577c --- /dev/null +++ b/test/CodeGen/Thumb2/cortex-fp.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=CORTEXM3 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m4 | FileCheck %s -check-prefix=CORTEXM4 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 + + +define float @foo(float %a, float %b) { +entry: +; CHECK: foo +; CORTEXM3: blx ___mulsf3 +; CORTEXM4: vmul.f32 s0, s1, s0 +; CORTEXA8: vmul.f32 d0, d1, d0 + %0 = fmul float %a, %b + ret float %0 +} + +define double @bar(double %a, double %b) { +entry: +; CHECK: bar + %0 = fmul double %a, %b +; CORTEXM3: blx ___muldf3 +; CORTEXM4: blx ___muldf3 +; CORTEXA8: vmul.f64 d0, d1, d0 + ret double %0 +} diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll index 87af9d10572b..d8b51ec82ded 100644 --- a/test/CodeGen/Thumb2/crash.ll +++ b/test/CodeGen/Thumb2/crash.ll @@ -14,11 +14,11 @@ entry: %6 = bitcast i32* %sp3 to <4 x i32>* ; <<4 x i32>*> [#uses=1] %7 = load <4 x i32>* %6, align 16 ; <<4 x i32>> [#uses=1] %8 = bitcast i32* %dp to i8* ; <i8*> [#uses=1] - tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7) + tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7, i32 1) ret void } -declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind @sbuf = common global [16 x i32] zeroinitializer, align 16 ; <[16 x i32]*> [#uses=5] @dbuf = common global [16 x i32] zeroinitializer ; <[16 x i32]*> [#uses=2] @@ -44,6 +44,6 @@ bb2: ; preds = %bb %3 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 4) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1] %4 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 8) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1] %5 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 12) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1] - tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5) nounwind + tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind ret i32 0 } diff --git a/test/CodeGen/Thumb2/div.ll b/test/CodeGen/Thumb2/div.ll index 0cddd489fb46..e63a115273ff 100644 --- a/test/CodeGen/Thumb2/div.ll +++ b/test/CodeGen/Thumb2/div.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=thumb -mattr=+thumb2 \ ; RUN: | FileCheck %s -check-prefix=CHECK-THUMB -; RUN: llc < %s -march=arm -mcpu=cortex-m3 -mattr=+thumb2 \ +; RUN: llc < %s -march=thumb -mcpu=cortex-m3 -mattr=+thumb2 \ ; RUN: | FileCheck %s -check-prefix=CHECK-THUMBV7M define i32 @f1(i32 %a, i32 %b) { diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll index 29b8e75cb8b3..650d788cb4d2 100644 --- a/test/CodeGen/Thumb2/ldr-str-imm12.ll +++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll @@ -22,7 +22,7 @@ define %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind { entry: -; CHECK: ldr.w r9, [r7, #28] +; CHECK: ldr.w {{(r[0-9])|(lr)}}, [r7, #28] %xgaps.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0] %ycomp.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0] br label %bb20 @@ -46,9 +46,9 @@ bb119: ; preds = %bb20, %bb20 bb420: ; preds = %bb20, %bb20 ; CHECK: bb420 -; CHECK: str r{{[0-7]}}, [sp] -; CHECK: str r{{[0-7]}}, [sp, #4] -; CHECK: str r{{[0-7]}}, [sp, #8] +; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp] +; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #4] +; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #8] ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #24] store %union.rec* null, %union.rec** @zz_hold, align 4 store %union.rec* null, %union.rec** @zz_res, align 4 diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll index 7fa782f91de9..ad957a1fcb45 100644 --- a/test/CodeGen/Thumb2/lsr-deficiency.ll +++ b/test/CodeGen/Thumb2/lsr-deficiency.ll @@ -21,8 +21,8 @@ entry: bb: ; preds = %bb, %entry ; CHECK: LBB0_1: ; CHECK: cmp r2, #0 -; CHECK: sub.w r9, r2, #1 -; CHECK: mov r2, r9 +; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], r2, #1 +; CHECK: mov r2, [[REGISTER]] %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1] %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2] diff --git a/test/CodeGen/Thumb2/machine-licm-vdup.ll b/test/CodeGen/Thumb2/machine-licm-vdup.ll new file mode 100644 index 000000000000..fde2ee0ab0c9 --- /dev/null +++ b/test/CodeGen/Thumb2/machine-licm-vdup.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-fp-elim -arm-vdup-splat | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s +; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375. +; Eventually this should become the default and be moved into machine-licm.ll. +; FIXME: the vdup should be hoisted out of the loop, 8248029. + +define void @t2(i8* %ptr1, i8* %ptr2) nounwind { +entry: +; CHECK: t2: +; CHECK: mov.w r3, #1065353216 + br i1 undef, label %bb1, label %bb2 + +bb1: +; CHECK-NEXT: %bb1 +; CHECK: vdup.32 q1, r3 + %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ] + %tmp1 = shl i32 %indvar, 2 + %gep1 = getelementptr i8* %ptr1, i32 %tmp1 + %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1) + %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2) + %gep2 = getelementptr i8* %ptr2, i32 %tmp1 + call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1) + %indvar.next = add i32 %indvar, 1 + %cond = icmp eq i32 %indvar.next, 10 + br i1 %cond, label %bb2, label %bb1 + +bb2: + ret void +} + +; CHECK-NOT: LCPI1_0: +; CHECK: .subsections_via_symbols + +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly + +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind + +declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll index cdb3041b3bea..b949b2f30506 100644 --- a/test/CodeGen/Thumb2/machine-licm.ll +++ b/test/CodeGen/Thumb2/machine-licm.ll @@ -64,10 +64,10 @@ bb1: %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ] %tmp1 = shl i32 %indvar, 2 %gep1 = getelementptr i8* %ptr1, i32 %tmp1 - %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1) + %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1) %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2) %gep2 = getelementptr i8* %ptr2, i32 %tmp1 - call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3) + call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1) %indvar.next = add i32 %indvar, 1 %cond = icmp eq i32 %indvar.next, 10 br i1 %cond, label %bb2, label %bb1 @@ -79,8 +79,8 @@ bb2: ; CHECK: LCPI1_0: ; CHECK: .section -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly -declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone diff --git a/test/CodeGen/Thumb2/thumb2-and2.ll b/test/CodeGen/Thumb2/thumb2-and2.ll index 76c56d00473d..7b0432de9bb5 100644 --- a/test/CodeGen/Thumb2/thumb2-and2.ll +++ b/test/CodeGen/Thumb2/thumb2-and2.ll @@ -30,7 +30,7 @@ define i32 @f4(i32 %a) { ret i32 %tmp } ; CHECK: f4: -; CHECK: and r0, r0, #1448498774 +; CHECK: bic r0, r0, #-1448498775 ; 66846720 = 0x03fc0000 define i32 @f5(i32 %a) { diff --git a/test/CodeGen/Thumb2/thumb2-badreg-operands.ll b/test/CodeGen/Thumb2/thumb2-badreg-operands.ll new file mode 100644 index 000000000000..4df06b836fc5 --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-badreg-operands.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s + +define void @b(i32 %x) nounwind optsize { +entry: +; CHECK: b +; CHECK: mov r2, sp +; CHECK: mls r0, r0, r1, r2 +; CHECK: mov sp, r0 + %0 = mul i32 %x, 24 ; <i32> [#uses=1] + %vla = alloca i8, i32 %0, align 1 ; <i8*> [#uses=1] + call arm_aapcscc void @a(i8* %vla) nounwind optsize + ret void +} + +declare void @a(i8*) optsize diff --git a/test/CodeGen/Thumb2/thumb2-barrier.ll b/test/CodeGen/Thumb2/thumb2-barrier.ll new file mode 100644 index 000000000000..a54d09e62919 --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-barrier.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s + +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 ) + +define void @t1() { +; CHECK: t1: +; CHECK: dsb + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true ) + ret void +} + +define void @t2() { +; CHECK: t2: +; CHECK: dmb + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false ) + ret void +} diff --git a/test/CodeGen/Thumb2/thumb2-call-tc.ll b/test/CodeGen/Thumb2/thumb2-call-tc.ll index 24502b0338c2..2e4da1b289b5 100644 --- a/test/CodeGen/Thumb2/thumb2-call-tc.ll +++ b/test/CodeGen/Thumb2/thumb2-call-tc.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN ; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX +; XFAIL: * @t = weak global i32 ()* null ; <i32 ()**> [#uses=1] diff --git a/test/CodeGen/Thumb2/thumb2-cmp.ll b/test/CodeGen/Thumb2/thumb2-cmp.ll index d4773bb5809b..63249f4cf145 100644 --- a/test/CodeGen/Thumb2/thumb2-cmp.ll +++ b/test/CodeGen/Thumb2/thumb2-cmp.ll @@ -39,3 +39,17 @@ define i1 @f5(i32 %a) { %tmp = icmp eq i32 %a, 1114112 ret i1 %tmp } + +; Check that we don't do an invalid (a > b) --> !(a < b + 1) transform. +; +; CHECK: f6: +; CHECK-NOT: cmp.w r0, #-2147483648 +; CHECK: bx lr +define i32 @f6(i32 %a) { + %tmp = icmp sgt i32 %a, 2147483647 + br i1 %tmp, label %true, label %false +true: + ret i32 2 +false: + ret i32 0 +} diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll index c02441547718..5315535db045 100644 --- a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll +++ b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s +; XFAIL: * define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK: t1: diff --git a/test/CodeGen/Thumb2/thumb2-pack.ll b/test/CodeGen/Thumb2/thumb2-pack.ll index c8302df78f68..2e8bb1d60934 100644 --- a/test/CodeGen/Thumb2/thumb2-pack.ll +++ b/test/CodeGen/Thumb2/thumb2-pack.ll @@ -1,8 +1,7 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | \ -; RUN: grep pkhbt | count 5 -; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | \ -; RUN: grep pkhtb | count 4 +; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s +; CHECK: test1 +; CHECK: pkhbt r0, r0, r1, lsl #16 define i32 @test1(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp4 = shl i32 %Y, 16 ; <i32> [#uses=1] @@ -10,6 +9,8 @@ define i32 @test1(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test1a +; CHECK: pkhbt r0, r0, r1, lsl #16 define i32 @test1a(i32 %X, i32 %Y) { %tmp19 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp37 = shl i32 %Y, 16 ; <i32> [#uses=1] @@ -17,6 +18,8 @@ define i32 @test1a(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test2 +; CHECK: pkhbt r0, r0, r1, lsl #12 define i32 @test2(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp3 = shl i32 %Y, 12 ; <i32> [#uses=1] @@ -25,6 +28,8 @@ define i32 @test2(i32 %X, i32 %Y) { ret i32 %tmp57 } +; CHECK: test3 +; CHECK: pkhbt r0, r0, r1, lsl #18 define i32 @test3(i32 %X, i32 %Y) { %tmp19 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp37 = shl i32 %Y, 18 ; <i32> [#uses=1] @@ -32,6 +37,8 @@ define i32 @test3(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test4 +; CHECK: pkhbt r0, r0, r1 define i32 @test4(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp3 = and i32 %Y, -65536 ; <i32> [#uses=1] @@ -39,6 +46,8 @@ define i32 @test4(i32 %X, i32 %Y) { ret i32 %tmp46 } +; CHECK: test5 +; CHECK: pkhtb r0, r0, r1, asr #16 define i32 @test5(i32 %X, i32 %Y) { %tmp17 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp2 = bitcast i32 %Y to i32 ; <i32> [#uses=1] @@ -47,6 +56,8 @@ define i32 @test5(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test5a +; CHECK: pkhtb r0, r0, r1, asr #16 define i32 @test5a(i32 %X, i32 %Y) { %tmp110 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp37 = lshr i32 %Y, 16 ; <i32> [#uses=1] @@ -55,6 +66,8 @@ define i32 @test5a(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test6 +; CHECK: pkhtb r0, r0, r1, asr #12 define i32 @test6(i32 %X, i32 %Y) { %tmp1 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp37 = lshr i32 %Y, 12 ; <i32> [#uses=1] @@ -64,6 +77,8 @@ define i32 @test6(i32 %X, i32 %Y) { ret i32 %tmp59 } +; CHECK: test7 +; CHECK: pkhtb r0, r0, r1, asr #18 define i32 @test7(i32 %X, i32 %Y) { %tmp1 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp3 = ashr i32 %Y, 18 ; <i32> [#uses=1] @@ -71,3 +86,12 @@ define i32 @test7(i32 %X, i32 %Y) { %tmp57 = or i32 %tmp4, %tmp1 ; <i32> [#uses=1] ret i32 %tmp57 } + +; CHECK: test8 +; CHECK: pkhtb r0, r0, r1, asr #22 +define i32 @test8(i32 %X, i32 %Y) { + %tmp1 = and i32 %X, -65536 + %tmp3 = lshr i32 %Y, 22 + %tmp57 = or i32 %tmp3, %tmp1 + ret i32 %tmp57 +} diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll index 3946371709d5..4f92c9333806 100644 --- a/test/CodeGen/Thumb2/thumb2-spill-q.ll +++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll @@ -7,7 +7,7 @@ %quux = type { i32 (...)**, %baz*, i32 } %quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo } -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly define void @aaa(%quuz* %this, i8* %block) { ; CHECK: aaa: @@ -15,11 +15,11 @@ define void @aaa(%quuz* %this, i8* %block) { ; CHECK: vst1.64 {{.*}}[{{.*}}, :128] ; CHECK: vld1.64 {{.*}}[{{.*}}, :128] entry: - %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 6.300000e+01, float* undef, align 4 - %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 0.000000e+00, float* undef, align 4 - %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] %val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1] br label %bb4 diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll index 1fa4e5c21dab..2074f98cb608 100644 --- a/test/CodeGen/Thumb2/thumb2-uxtb.ll +++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll @@ -6,7 +6,7 @@ define i32 @test1(i32 %x) { ; ARMv7A: uxtb16 r0, r0 ; ARMv7M: test1 -; ARMv7M: and r0, r0, #16711935 +; ARMv7M: bic r0, r0, #-16711936 %tmp1 = and i32 %x, 16711935 ; <i32> [#uses=1] ret i32 %tmp1 } diff --git a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll index 2d7bd27d24bd..35b0159d39c6 100644 --- a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll +++ b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 | grep setnp -; RUN: llc < %s -march=x86 -enable-unsafe-fp-math -enable-finite-only-fp-math | \ +; RUN: llc < %s -march=x86 -enable-unsafe-fp-math -enable-no-nans-fp-math | \ ; RUN: not grep setnp define i32 @test(float %f) { diff --git a/test/CodeGen/X86/2007-06-14-branchfold.ll b/test/CodeGen/X86/2007-06-14-branchfold.ll deleted file mode 100644 index 2680b1543fbb..000000000000 --- a/test/CodeGen/X86/2007-06-14-branchfold.ll +++ /dev/null @@ -1,133 +0,0 @@ -; RUN: llc < %s -march=x86 -mcpu=i686 | not grep jmp -; check that branch folding understands FP_REG_KILL is not a branch - -target triple = "i686-pc-linux-gnu" - %struct.FRAME.c34003a = type { float, float } -@report_E = global i8 0 ; <i8*> [#uses=0] - -define void @main() { -entry: - %FRAME.31 = alloca %struct.FRAME.c34003a, align 8 ; <%struct.FRAME.c34003a*> [#uses=4] - %tmp20 = call i32 @report__ident_int( i32 -50 ) ; <i32> [#uses=1] - %tmp2021 = sitofp i32 %tmp20 to float ; <float> [#uses=5] - %tmp23 = fcmp ult float %tmp2021, 0xC7EFFFFFE0000000 ; <i1> [#uses=1] - %tmp26 = fcmp ugt float %tmp2021, 0x47EFFFFFE0000000 ; <i1> [#uses=1] - %bothcond = or i1 %tmp23, %tmp26 ; <i1> [#uses=1] - br i1 %bothcond, label %bb, label %bb30 - -bb: ; preds = %entry - unwind - -bb30: ; preds = %entry - %tmp35 = call i32 @report__ident_int( i32 50 ) ; <i32> [#uses=1] - %tmp3536 = sitofp i32 %tmp35 to float ; <float> [#uses=4] - %tmp38 = fcmp ult float %tmp3536, 0xC7EFFFFFE0000000 ; <i1> [#uses=1] - %tmp44 = fcmp ugt float %tmp3536, 0x47EFFFFFE0000000 ; <i1> [#uses=1] - %bothcond226 = or i1 %tmp38, %tmp44 ; <i1> [#uses=1] - br i1 %bothcond226, label %bb47, label %bb49 - -bb47: ; preds = %bb30 - unwind - -bb49: ; preds = %bb30 - %tmp60 = fcmp ult float %tmp3536, %tmp2021 ; <i1> [#uses=1] - %tmp60.not = xor i1 %tmp60, true ; <i1> [#uses=1] - %tmp65 = fcmp olt float %tmp2021, 0xC7EFFFFFE0000000 ; <i1> [#uses=1] - %bothcond227 = and i1 %tmp65, %tmp60.not ; <i1> [#uses=1] - br i1 %bothcond227, label %cond_true68, label %cond_next70 - -cond_true68: ; preds = %bb49 - unwind - -cond_next70: ; preds = %bb49 - %tmp71 = call i32 @report__ident_int( i32 -30 ) ; <i32> [#uses=1] - %tmp7172 = sitofp i32 %tmp71 to float ; <float> [#uses=3] - %tmp74 = fcmp ult float %tmp7172, 0xC7EFFFFFE0000000 ; <i1> [#uses=1] - %tmp80 = fcmp ugt float %tmp7172, 0x47EFFFFFE0000000 ; <i1> [#uses=1] - %bothcond228 = or i1 %tmp74, %tmp80 ; <i1> [#uses=1] - br i1 %bothcond228, label %bb83, label %bb85 - -bb83: ; preds = %cond_next70 - unwind - -bb85: ; preds = %cond_next70 - %tmp90 = getelementptr %struct.FRAME.c34003a* %FRAME.31, i32 0, i32 1 ; <float*> [#uses=3] - store float %tmp7172, float* %tmp90 - %tmp92 = call i32 @report__ident_int( i32 30 ) ; <i32> [#uses=1] - %tmp9293 = sitofp i32 %tmp92 to float ; <float> [#uses=7] - %tmp95 = fcmp ult float %tmp9293, 0xC7EFFFFFE0000000 ; <i1> [#uses=1] - %tmp101 = fcmp ugt float %tmp9293, 0x47EFFFFFE0000000 ; <i1> [#uses=1] - %bothcond229 = or i1 %tmp95, %tmp101 ; <i1> [#uses=1] - br i1 %bothcond229, label %bb104, label %bb106 - -bb104: ; preds = %bb85 - unwind - -bb106: ; preds = %bb85 - %tmp111 = getelementptr %struct.FRAME.c34003a* %FRAME.31, i32 0, i32 0 ; <float*> [#uses=2] - store float %tmp9293, float* %tmp111 - %tmp123 = load float* %tmp90 ; <float> [#uses=4] - %tmp125 = fcmp ult float %tmp9293, %tmp123 ; <i1> [#uses=1] - br i1 %tmp125, label %cond_next147, label %cond_true128 - -cond_true128: ; preds = %bb106 - %tmp133 = fcmp olt float %tmp123, %tmp2021 ; <i1> [#uses=1] - %tmp142 = fcmp ogt float %tmp9293, %tmp3536 ; <i1> [#uses=1] - %bothcond230 = or i1 %tmp133, %tmp142 ; <i1> [#uses=1] - br i1 %bothcond230, label %bb145, label %cond_next147 - -bb145: ; preds = %cond_true128 - unwind - -cond_next147: ; preds = %cond_true128, %bb106 - %tmp157 = fcmp ugt float %tmp123, -3.000000e+01 ; <i1> [#uses=1] - %tmp165 = fcmp ult float %tmp9293, -3.000000e+01 ; <i1> [#uses=1] - %bothcond231 = or i1 %tmp157, %tmp165 ; <i1> [#uses=1] - br i1 %bothcond231, label %bb168, label %bb169 - -bb168: ; preds = %cond_next147 - unwind - -bb169: ; preds = %cond_next147 - %tmp176 = fcmp ugt float %tmp123, 3.000000e+01 ; <i1> [#uses=1] - %tmp184 = fcmp ult float %tmp9293, 3.000000e+01 ; <i1> [#uses=1] - %bothcond232 = or i1 %tmp176, %tmp184 ; <i1> [#uses=1] - br i1 %bothcond232, label %bb187, label %bb188 - -bb187: ; preds = %bb169 - unwind - -bb188: ; preds = %bb169 - %tmp192 = call fastcc float @c34003a__ident.154( %struct.FRAME.c34003a* %FRAME.31, float 3.000000e+01 ) ; <float> [#uses=2] - %tmp194 = load float* %tmp90 ; <float> [#uses=1] - %tmp196 = fcmp ugt float %tmp194, 0.000000e+00 ; <i1> [#uses=1] - br i1 %tmp196, label %bb207, label %cond_next200 - -cond_next200: ; preds = %bb188 - %tmp202 = load float* %tmp111 ; <float> [#uses=1] - %tmp204 = fcmp ult float %tmp202, 0.000000e+00 ; <i1> [#uses=1] - br i1 %tmp204, label %bb207, label %bb208 - -bb207: ; preds = %cond_next200, %bb188 - unwind - -bb208: ; preds = %cond_next200 - %tmp212 = call fastcc float @c34003a__ident.154( %struct.FRAME.c34003a* %FRAME.31, float 0.000000e+00 ) ; <float> [#uses=1] - %tmp214 = fcmp oge float %tmp212, %tmp192 ; <i1> [#uses=1] - %tmp217 = fcmp oge float %tmp192, 1.000000e+02 ; <i1> [#uses=1] - %tmp221 = or i1 %tmp214, %tmp217 ; <i1> [#uses=1] - br i1 %tmp221, label %cond_true224, label %UnifiedReturnBlock - -cond_true224: ; preds = %bb208 - call void @abort( ) noreturn - ret void - -UnifiedReturnBlock: ; preds = %bb208 - ret void -} - -declare fastcc float @c34003a__ident.154(%struct.FRAME.c34003a* %CHAIN.32, float %x) - -declare i32 @report__ident_int(i32 %x) - -declare void @abort() noreturn diff --git a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll b/test/CodeGen/X86/2008-01-25-EmptyFunction.ll deleted file mode 100644 index b936686798f0..000000000000 --- a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll +++ /dev/null @@ -1,8 +0,0 @@ -; RUN: llc < %s -march=x86 | grep nop -target triple = "i686-apple-darwin8" - - -define void @bork() noreturn nounwind { -entry: - unreachable -} diff --git a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll b/test/CodeGen/X86/2008-08-06-CmpStride.ll index 99cb8569b3f4..99cb8569b3f4 100644 --- a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll +++ b/test/CodeGen/X86/2008-08-06-CmpStride.ll diff --git a/test/Transforms/LoopStrengthReduce/2009-02-09-ivs-different-sizes.ll b/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll index 36cc53545103..36cc53545103 100644 --- a/test/Transforms/LoopStrengthReduce/2009-02-09-ivs-different-sizes.ll +++ b/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index 4a97ac35afc7..bb01e5afceff 100644 --- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {7 machine-licm} +; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {6 machine-licm} ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s ; rdar://6627786 ; rdar://7792037 diff --git a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll index 8d426271a194..28539307aa40 100644 --- a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll +++ b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | FileCheck %s +; RUN: llc < %s -mtriple=i686-linux | FileCheck %s ; Check the register copy comes after the call to f and before the call to g ; PR3784 diff --git a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll index da493d4910e1..b13d33eb3fd9 100644 --- a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll +++ b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -asm-verbose | FileCheck %s +; RUN: llc < %s -mtriple=i686-linux -asm-verbose | FileCheck %s ; Check that register copies in the landing pad come after the EH_LABEL declare i32 @f() diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll index b5873bae5f05..90dabb8ab635 100644 --- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll +++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -stats -info-output-file - > %t +; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t ; RUN: not grep spill %t ; RUN: not grep {%rsp} %t ; RUN: not grep {%rbp} %t diff --git a/test/DebugInfo/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll index 001f853dd236..85ee091c3478 100644 --- a/test/DebugInfo/2010-01-18-DbgValue.ll +++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll @@ -1,7 +1,4 @@ -; RUN: llc -O0 < %s | FileCheck %s -; ModuleID = 'try.c' -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" -target triple = "i386-apple-darwin9.8" +; RUN: llc -march=x86 -O0 < %s | FileCheck %s ; Currently, dbg.declare generates a DEBUG_VALUE comment. Eventually it will ; generate DWARF and this test will need to be modified or removed. diff --git a/test/DebugInfo/2010-02-01-DbgValueCrash.ll b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll index 70103e5f72bd..2113263c0ac3 100644 --- a/test/DebugInfo/2010-02-01-DbgValueCrash.ll +++ b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll @@ -1,6 +1,5 @@ ; RUN: llc -O1 < %s ; ModuleID = 'pr6157.bc' -target triple = "x86_64-unknown-linux-gnu" ; formerly crashed in SelectionDAGBuilder %tart.reflect.ComplexType = type { double, double } diff --git a/test/DebugInfo/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll index 52e948428952..d2115496f8f4 100644 --- a/test/DebugInfo/2010-05-25-DotDebugLoc.ll +++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll @@ -1,4 +1,4 @@ -; RUN: llc -O2 < %s -mtriple=x86_64-apple-darwin | grep debug_loc12 +; RUN: llc -march=x86-64 -O2 < %s | grep debug_loc12 ; Test to check .debug_loc support. This test case emits 13 debug_loc entries. %0 = type { double } diff --git a/test/DebugInfo/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll index 80643d0792ac..80643d0792ac 100644 --- a/test/DebugInfo/2010-05-28-Crash.ll +++ b/test/CodeGen/X86/2010-05-28-Crash.ll diff --git a/test/DebugInfo/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll index 812d3720d6f5..812d3720d6f5 100644 --- a/test/DebugInfo/2010-06-01-DeadArg-DbgInfo.ll +++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll diff --git a/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll b/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll new file mode 100644 index 000000000000..be7d94c4f291 --- /dev/null +++ b/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -mcpu=i486 +; PR7375 +; +; This function contains a block (while.cond) with a lonely RFP use that is +; not a kill. We still need an FP_REG_KILL for that block since the register +; allocator will insert a reload. +; +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +define void @_ZN7QVectorIdE4fillERKdi(double* nocapture %t) nounwind ssp align 2 { +entry: + %tmp2 = load double* %t ; <double> [#uses=1] + br i1 undef, label %if.end, label %if.then + +if.then: ; preds = %entry + br i1 undef, label %if.end, label %bb.nph + +while.cond: ; preds = %bb.nph, %while.cond + store double %tmp2, double* undef + br i1 undef, label %if.end, label %while.cond + +bb.nph: ; preds = %if.then + br label %while.cond + +if.end: ; preds = %while.cond, %if.then, %entry + ret void +} diff --git a/test/CodeGen/X86/2010-07-15-Crash.ll b/test/CodeGen/X86/2010-07-15-Crash.ll new file mode 100644 index 000000000000..3ac4cf5964c3 --- /dev/null +++ b/test/CodeGen/X86/2010-07-15-Crash.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s -o /dev/null +; PR7653 + +@__FUNCTION__.1623 = external constant [4 x i8] ; <[4 x i8]*> [#uses=1] + +define void @foo() nounwind { +entry: + tail call void asm sideeffect "", "s,i,~{fpsr},~{flags}"(i8* getelementptr +inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0), i8* getelementptr +inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0)) nounwind + ret void +} diff --git a/test/CodeGen/X86/2010-07-29-SetccSimplify.ll b/test/CodeGen/X86/2010-07-29-SetccSimplify.ll new file mode 100644 index 000000000000..96016cfe1c73 --- /dev/null +++ b/test/CodeGen/X86/2010-07-29-SetccSimplify.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s + +define i32 @extend2bit_v2(i32 %val) { +entry: + %0 = trunc i32 %val to i2 ; <i2> [#uses=1] + %1 = sext i2 %0 to i32 ; <i32> [#uses=1] + %2 = icmp eq i32 %1, 3 ; <i1> [#uses=1] + %3 = zext i1 %2 to i32 ; <i32> [#uses=1] + ret i32 %3 +} + +; CHECK: extend2bit_v2: +; CHECK: xorl %eax, %eax +; CHECK-NEXT: ret diff --git a/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll new file mode 100644 index 000000000000..1919d2ef34ae --- /dev/null +++ b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s +; PR7814 + +@g_16 = global i64 -3738643449681751625, align 8 ; <i64*> [#uses=1] +@g_38 = global i32 0, align 4 ; <i32*> [#uses=2] +@.str = private constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1] + +define i32 @main() nounwind { +entry: + %tmp = load i64* @g_16 ; <i64> [#uses=1] + %not.lnot = icmp ne i64 %tmp, 0 ; <i1> [#uses=1] + %conv = sext i1 %not.lnot to i64 ; <i64> [#uses=1] + %and = and i64 %conv, 150 ; <i64> [#uses=1] + %conv.i = trunc i64 %and to i8 ; <i8> [#uses=1] + %cmp = icmp sgt i8 %conv.i, 0 ; <i1> [#uses=1] + br i1 %cmp, label %if.then, label %entry.if.end_crit_edge + +; CHECK: andl $150 +; CHECK-NEXT: testb +; CHECK-NEXT: jg + +entry.if.end_crit_edge: ; preds = %entry + %tmp4.pre = load i32* @g_38 ; <i32> [#uses=1] + br label %if.end + +if.then: ; preds = %entry + store i32 1, i32* @g_38 + br label %if.end + +if.end: ; preds = %entry.if.end_crit_edge, %if.then + %tmp4 = phi i32 [ %tmp4.pre, %entry.if.end_crit_edge ], [ 1, %if.then ] ; <i32> [#uses=1] + %call5 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %tmp4) nounwind ; <i32> [#uses=0] + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/test/CodeGen/X86/2010-08-04-MingWCrash.ll b/test/CodeGen/X86/2010-08-04-MingWCrash.ll new file mode 100644 index 000000000000..98a0887c0e69 --- /dev/null +++ b/test/CodeGen/X86/2010-08-04-MingWCrash.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -mtriple=i386-pc-mingw32 + +define void @func() nounwind { +invoke.cont: + %call = tail call i8* @malloc() + %a = invoke i32 @bar() + to label %bb1 unwind label %lpad + +bb1: + ret void + +lpad: + %exn = tail call i8* @llvm.eh.exception() nounwind + %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1, i8* null) nounwind + %ehspec.fails = icmp slt i32 %eh.selector, 0 + br i1 %ehspec.fails, label %ehspec.unexpected, label %cleanup + +cleanup: + tail call void @_Unwind_Resume_or_Rethrow(i8* %exn) noreturn nounwind + unreachable + +ehspec.unexpected: + tail call void @__cxa_call_unexpected(i8* %exn) noreturn nounwind + unreachable +} + +declare noalias i8* @malloc() + +declare i8* @llvm.eh.exception() nounwind readonly + +declare i32 @__gxx_personality_v0(...) + +declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind + +declare void @_Unwind_Resume_or_Rethrow(i8*) + +declare void @__cxa_call_unexpected(i8*) + +declare i32 @bar() diff --git a/test/CodeGen/X86/2010-08-10-DbgConstant.ll b/test/CodeGen/X86/2010-08-10-DbgConstant.ll new file mode 100644 index 000000000000..d98ef14e108b --- /dev/null +++ b/test/CodeGen/X86/2010-08-10-DbgConstant.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=x86 -O0 < %s | FileCheck %s +; CHECK: DW_TAG_constant +; CHECK-NEXT: ascii "ro" #{{#?}} DW_AT_name + +define void @foo() nounwind ssp { +entry: + call void @bar(i32 201), !dbg !8 + ret void, !dbg !8 +} + +declare void @bar(i32) + +!llvm.dbg.sp = !{!0} +!llvm.dbg.gv = !{!5} + +!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 524329, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !"clang 2.8", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{null} +!5 = metadata !{i32 524327, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201} ; [ DW_TAG_constant ] +!6 = metadata !{i32 524326, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_const_type ] +!7 = metadata !{i32 524324, metadata !1, metadata !"unsigned int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!8 = metadata !{i32 3, i32 14, metadata !9, null} +!9 = metadata !{i32 524299, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] diff --git a/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll b/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll new file mode 100644 index 000000000000..e5542baf2ee8 --- /dev/null +++ b/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -verify-machineinstrs | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +; This test exercises the alias checking in SimpleRegisterCoalescing::RemoveCopyByCommutingDef. + +define void @f(i32* %w, i32* %h, i8* %_this, i8* %image) nounwind ssp { + %x1 = tail call i64 @g(i8* %_this, i8* %image) nounwind ; <i64> [#uses=3] + %tmp1 = trunc i64 %x1 to i32 ; <i32> [#uses=1] +; CHECK: movl (%r{{.*}}), % + %x4 = load i32* %h, align 4 ; <i32> [#uses=1] + +; The imull clobbers a 32-bit register. +; CHECK: imull %{{...}}, %e[[CLOBBER:..]] + %x5 = mul nsw i32 %x4, %tmp1 ; <i32> [#uses=1] + +; So we cannot use the corresponding 64-bit register anymore. +; CHECK-NOT: shrq $32, %r[[CLOBBER]] + %btmp3 = lshr i64 %x1, 32 ; <i64> [#uses=1] + %btmp4 = trunc i64 %btmp3 to i32 ; <i32> [#uses=1] + +; CHECK: idiv + %x6 = sdiv i32 %x5, %btmp4 ; <i32> [#uses=1] + store i32 %x6, i32* %w, align 4 + ret void +} + +declare i64 @g(i8*, i8*) diff --git a/test/CodeGen/X86/GC/dg.exp b/test/CodeGen/X86/GC/dg.exp index f2005891a59a..629a14773615 100644 --- a/test/CodeGen/X86/GC/dg.exp +++ b/test/CodeGen/X86/GC/dg.exp @@ -1,3 +1,5 @@ load_lib llvm.exp -RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]] +if { [llvm_supports_target X86] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]] +} diff --git a/test/CodeGen/X86/MachineSink-PHIUse.ll b/test/CodeGen/X86/MachineSink-PHIUse.ll new file mode 100644 index 000000000000..728e37736018 --- /dev/null +++ b/test/CodeGen/X86/MachineSink-PHIUse.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -mtriple=x86_64-appel-darwin -stats |& grep {machine-sink} + +define fastcc void @t() nounwind ssp { +entry: + br i1 undef, label %bb, label %bb4 + +bb: ; preds = %entry + br i1 undef, label %return, label %bb3 + +bb3: ; preds = %bb + unreachable + +bb4: ; preds = %entry + br i1 undef, label %bb.nph, label %return + +bb.nph: ; preds = %bb4 + br label %bb5 + +bb5: ; preds = %bb9, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp12, %bb9 ] ; <i64> [#uses=1] + %tmp12 = add i64 %indvar, 1 ; <i64> [#uses=2] + %tmp13 = trunc i64 %tmp12 to i32 ; <i32> [#uses=0] + br i1 undef, label %bb9, label %bb6 + +bb6: ; preds = %bb5 + br i1 undef, label %bb9, label %bb7 + +bb7: ; preds = %bb6 + br i1 undef, label %bb9, label %bb8 + +bb8: ; preds = %bb7 + unreachable + +bb9: ; preds = %bb7, %bb6, %bb5 + br i1 undef, label %bb5, label %return + +return: ; preds = %bb9, %bb4, %bb + ret void +} diff --git a/test/CodeGen/X86/avx-128.ll b/test/CodeGen/X86/avx-128.ll new file mode 100644 index 000000000000..a72160be719a --- /dev/null +++ b/test/CodeGen/X86/avx-128.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s + +@z = common global <4 x float> zeroinitializer, align 16 + +define void @zero() nounwind ssp { +entry: + ; CHECK: vpxor + ; CHECK: vmovaps + store <4 x float> zeroinitializer, <4 x float>* @z, align 16 + ret void +} + diff --git a/test/CodeGen/X86/avx-256.ll b/test/CodeGen/X86/avx-256.ll new file mode 100644 index 000000000000..20d31e738857 --- /dev/null +++ b/test/CodeGen/X86/avx-256.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s + +@x = common global <8 x float> zeroinitializer, align 32 +@y = common global <4 x double> zeroinitializer, align 32 + +define void @zero() nounwind ssp { +entry: + ; CHECK: vxorps + ; CHECK: vmovaps + ; CHECK: vmovaps + store <8 x float> zeroinitializer, <8 x float>* @x, align 32 + store <4 x double> zeroinitializer, <4 x double>* @y, align 32 + ret void +} + diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll new file mode 100644 index 000000000000..9de90237d146 --- /dev/null +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -0,0 +1,2587 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s + +define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vaesdec + %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone + + +define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vaesdeclast + %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone + + +define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vaesenc + %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone + + +define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vaesenclast + %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone + + +define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) { + ; CHECK: vaesimc + %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone + + +define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) { + ; CHECK: vaeskeygenassist + %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone + + +define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vaddsd + %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcmpordpd + %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone + + +define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcmpordsd + %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone + + +define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: setae + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: setbe + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: setne + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { + ; CHECK: vcvtdq2pd + %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone + + +define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { + ; CHECK: vcvtdq2ps + %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { + ; CHECK: vcvtpd2dq + %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { + ; CHECK: vcvtpd2ps + %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { + ; CHECK: vcvtps2dq + %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { + ; CHECK: vcvtps2pd + %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone + + +define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { + ; CHECK: vcvtsd2si + %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { + ; CHECK: vcvtsd2ss + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) { + ; CHECK: movl + ; CHECK: vcvtsi2sd + %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { + ; CHECK: vcvtss2sd + %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { + ; CHECK: vcvttpd2dq + %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { + ; CHECK: vcvttps2dq + %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone + + +define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { + ; CHECK: vcvttss2si + %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vdivsd + %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) { + ; CHECK: movl + ; CHECK: vmovdqu + %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly + + +define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) { + ; CHECK: movl + ; CHECK: vmovupd + %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.loadu.pd(i8*) nounwind readonly + + +define void @test_x86_sse2_maskmov_dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { + ; CHECK: pushl + ; CHECK: movl + ; CHECK: vmaskmovdqu + ; CHECK: popl + call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) + ret void +} +declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind + + +define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vmaxpd + %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vmaxsd + %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vminpd + %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vminsd + %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { + ; CHECK: vmovmskpd + %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone + + +define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) { + ; CHECK: movl + ; CHECK: vmovntdq + call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1) + ret void +} +declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind + + +define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) { + ; CHECK: movl + ; CHECK: vmovntpd + call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1) + ret void +} +declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind + + +define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vmulsd + %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpackssdw + %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpacksswb + %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpackuswb + %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpaddsb + %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpaddsw + %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpaddusb + %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpaddusw + %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpavgb + %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpavgw + %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpeqb + %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcmpeqd + %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcmpeqw + %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpgtb + %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcmpgtd + %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcmpgtw + %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmaddwd + %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmaxsw + %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpmaxub + %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpminsw + %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpminub + %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { + ; CHECK: vpmovmskb + %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmulhw + %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmulhuw + %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpmuludq + %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpsadbw + %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpslld + %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { + ; CHECK: vpslldq + %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { + ; CHECK: vpslldq + %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpsllq + %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsllw + %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { + ; CHECK: vpslld + %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { + ; CHECK: vpsllq + %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { + ; CHECK: vpsllw + %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpsrad + %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsraw + %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { + ; CHECK: vpsrad + %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { + ; CHECK: vpsraw + %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpsrld + %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { + ; CHECK: vpsrldq + %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) { + ; CHECK: vpsrldq + %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpsrlq + %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsrlw + %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { + ; CHECK: vpsrld + %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { + ; CHECK: vpsrlq + %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { + ; CHECK: vpsrlw + %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone + + +define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpsubsb + %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsubsw + %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpsubusb + %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsubusw + %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { + ; CHECK: vsqrtpd + %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { + ; CHECK: vsqrtsd + %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone + + +define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { + ; CHECK: movl + ; CHECK: vmovq + call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) + ret void +} +declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind + + +define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { + ; CHECK: movl + ; CHECK: vmovdqu + call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1) + ret void +} +declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind + + +define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { + ; CHECK: movl + ; CHECK: vmovupd + call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1) + ret void +} +declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind + + +define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vsubsd + %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: setae + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: setbe + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: setne + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vaddsubpd + %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vaddsubps + %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vhaddpd + %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vhaddps + %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vhsubpd + %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vhsubps + %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) { + ; CHECK: movl + ; CHECK: vlddqu + %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly + + +define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vblendpd + %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vblendps + %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone + + +define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: vblendvpd + %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: vblendvps + %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vdppd + %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vdpps + %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vinsertps + %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse41_movntdqa(i8* %a0) { + ; CHECK: movl + ; CHECK: vmovntdqa + %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readonly + + +define <16 x i8> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vmpsadbw + %res = call <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpackusdw + %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone + + +define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { + ; CHECK: vpblendvb + %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpblendw + %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcmpeqq + %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) { + ; CHECK: vphminposuw + %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpmaxsb + %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpmaxsd + %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpmaxud + %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmaxuw + %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpminsb + %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpminsd + %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpminud + %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpminuw + %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) { + ; CHECK: vpmovsxbd + %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) { + ; CHECK: vpmovsxbq + %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) { + ; CHECK: vpmovsxbw + %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) { + ; CHECK: vpmovsxdq + %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) { + ; CHECK: vpmovsxwd + %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { + ; CHECK: vpmovsxwq + %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { + ; CHECK: vpmovzxbd + %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { + ; CHECK: vpmovzxbq + %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { + ; CHECK: vpmovzxbw + %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { + ; CHECK: vpmovzxdq + %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { + ; CHECK: vpmovzxwd + %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { + ; CHECK: vpmovzxwq + %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpmuldq + %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone + + +define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vptest + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vptest + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vptest + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { + ; CHECK: vroundpd + %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) { + ; CHECK: vroundps + %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone + + +define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vroundsd + %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vroundss + %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestrm + %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcmpgtq + %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistrm + %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vaddss + %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcmpordps + %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone + + +define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcmpordss + %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone + + +define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: setae + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: setbe + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: setne + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) { + ; CHECK: movl + ; CHECK: vcvtsi2ss + %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone + + +define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) { + ; CHECK: vcvtss2si + %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone + + +define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) { + ; CHECK: vcvttss2si + %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vdivss + %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone + + +define void @test_x86_sse_ldmxcsr(i8* %a0) { + ; CHECK: movl + ; CHECK: vldmxcsr + call void @llvm.x86.sse.ldmxcsr(i8* %a0) + ret void +} +declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind + + +define <4 x float> @test_x86_sse_loadu_ps(i8* %a0) { + ; CHECK: movl + ; CHECK: vmovups + %res = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly + + +define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vmaxps + %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vmaxss + %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vminps + %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vminss + %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) { + ; CHECK: vmovmskps + %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone + + +define void @test_x86_sse_movnt_ps(i8* %a0, <4 x float> %a1) { + ; CHECK: movl + ; CHECK: vmovntps + call void @llvm.x86.sse.movnt.ps(i8* %a0, <4 x float> %a1) + ret void +} +declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind + + +define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vmulss + %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { + ; CHECK: vrcpps + %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) { + ; CHECK: vrcpss + %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) { + ; CHECK: vrsqrtps + %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) { + ; CHECK: vrsqrtss + %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { + ; CHECK: vsqrtps + %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { + ; CHECK: vsqrtss + %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone + + +define void @test_x86_sse_stmxcsr(i8* %a0) { + ; CHECK: movl + ; CHECK: vstmxcsr + call void @llvm.x86.sse.stmxcsr(i8* %a0) + ret void +} +declare void @llvm.x86.sse.stmxcsr(i8*) nounwind + + +define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { + ; CHECK: movl + ; CHECK: vmovups + call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) + ret void +} +declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind + + +define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vsubss + %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: setae + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: setbe + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: setne + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { + ; CHECK: vpabsb + %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { + ; CHECK: vpabsd + %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { + ; CHECK: vpabsw + %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vphaddd + %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vphaddsw + %res = call <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vphaddw + %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vphsubd + %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vphsubsw + %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vphsubw + %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmaddubsw + %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmulhrsw + %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpshufb + %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone + + +define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpsignb + %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpsignd + %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsignw + %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vaddsubpd + %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vaddsubps + %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vblendpd + %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone + + +define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vblendps + %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone + + +define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { + ; CHECK: vblendvpd + %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { + ; CHECK: vblendvps + %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vcmpordpd + %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vcmpordps + %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone + + +define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) { + ; CHECK: vcvtpd2psy + %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone + + +define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) { + ; CHECK: vcvtpd2dqy + %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone + + +define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { + ; CHECK: vcvtps2pd + %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone + + +define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) { + ; CHECK: vcvtps2dq + %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { + ; CHECK: vcvtdq2pd + %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone + + +define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { + ; CHECK: vcvtdq2ps + %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) { + ; CHECK: vcvttpd2dqy + %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone + + +define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) { + ; CHECK: vcvttps2dq + %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone + + +define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vdpps + %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone + + +define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vhaddpd + %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vhaddps + %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vhsubpd + %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vhsubps + %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) { + ; CHECK: vlddqu + %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly + + +define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) { + ; CHECK: vmovdqu + %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly + + +define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) { + ; CHECK: vmovupd + %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly + + +define <8 x float> @test_x86_avx_loadu_ps_256(i8* %a0) { + ; CHECK: vmovups + %res = call <8 x float> @llvm.x86.avx.loadu.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.loadu.ps.256(i8*) nounwind readonly + + +define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) { + ; CHECK: vmaskmovpd + %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly + + +define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) { + ; CHECK: vmaskmovpd + %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly + + +define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) { + ; CHECK: vmaskmovps + %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly + + +define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) { + ; CHECK: vmaskmovps + %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly + + +define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: vmaskmovpd + call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind + + +define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) { + ; CHECK: vmaskmovpd + call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind + + +define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: vmaskmovps + call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind + + +define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) { + ; CHECK: vmaskmovps + call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind + + +define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vmaxpd + %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vmaxps + %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vminpd + %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vminps + %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) { + ; CHECK: vmovmskpd + %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone + + +define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) { + ; CHECK: vmovmskps + %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone + + +define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) { + ; CHECK: vmovntdq + call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1) + ret void +} +declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind + + +define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) { + ; CHECK: vmovntpd + call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1) + ret void +} +declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind + + +define void @test_x86_avx_movnt_ps_256(i8* %a0, <8 x float> %a1) { + ; CHECK: vmovntps + call void @llvm.x86.avx.movnt.ps.256(i8* %a0, <8 x float> %a1) + ret void +} +declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind + + +define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) { + ; CHECK: vptest + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) { + ; CHECK: vptest + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) { + ; CHECK: vptest + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) { + ; CHECK: vrcpps + %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) { + ; CHECK: vroundpd + %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone + + +define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) { + ; CHECK: vroundps + %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone + + +define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) { + ; CHECK: vrsqrtps + %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { + ; CHECK: vsqrtpd + %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { + ; CHECK: vsqrtps + %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone + + +define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { + ; CHECK: vmovdqu + call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1) + ret void +} +declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind + + +define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) { + ; CHECK: vmovupd + call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1) + ret void +} +declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind + + +define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) { + ; CHECK: vmovups + call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1) + ret void +} +declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind + + +define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) { + ; CHECK: vbroadcastsd + %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly + + +define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) { + ; CHECK: vbroadcastf128 + %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly + + +define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { + ; CHECK: vbroadcastf128 + %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly + + +define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) { + ; CHECK: vbroadcastss + %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly + + +define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) { + ; CHECK: vbroadcastss + %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly + + +define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) { + ; CHECK: vextractf128 + %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone + + +define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) { + ; CHECK: vextractf128 + %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone + + +define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) { + ; CHECK: vextractf128 + %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone + + +define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) { + ; CHECK: vinsertf128 + %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) { + ; CHECK: vinsertf128 + %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone + + +define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vinsertf128 + %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone + + +define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vperm2f128 + %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vperm2f128 + %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone + + +define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { + ; CHECK: vperm2f128 + %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone + + +define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) { + ; CHECK: vpermilpd + %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone + + +define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) { + ; CHECK: vpermilpd + %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone + + +define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { + ; CHECK: vpermilps + %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) { + ; CHECK: vpermilps + %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone + + +define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) { + ; CHECK: vpermilpd + %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone + + +define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { + ; CHECK: vpermilpd + %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone + + +define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) { + ; CHECK: vpermilps + %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone + + +define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { + ; CHECK: vpermilps + %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vtestps + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vtestps + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vtestps + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vtestps + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vtestps + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vtestps + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define void @test_x86_avx_vzeroall() { + ; CHECK: vzeroall + call void @llvm.x86.avx.vzeroall() + ret void +} +declare void @llvm.x86.avx.vzeroall() nounwind + + +define void @test_x86_avx_vzeroupper() { + ; CHECK: vzeroupper + call void @llvm.x86.avx.vzeroupper() + ret void +} +declare void @llvm.x86.avx.vzeroupper() nounwind + + diff --git a/test/CodeGen/X86/avx-intrinsics-x86_64.ll b/test/CodeGen/X86/avx-intrinsics-x86_64.ll new file mode 100644 index 000000000000..b1867105ce85 --- /dev/null +++ b/test/CodeGen/X86/avx-intrinsics-x86_64.ll @@ -0,0 +1,50 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=corei7 -mattr=avx | FileCheck %s + +define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) { + ; CHECK: vcvtsd2si + %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { + ; CHECK: vcvtsi2sd + %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone + + +define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) { + ; CHECK: vcvttss2si + %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone + + +define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) { + ; CHECK: vcvtss2si + %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) { + ; CHECK: vcvtsi2ss + %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone + + +define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) { + ; CHECK: vcvttss2si + %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone + + diff --git a/test/CodeGen/X86/barrier-sse.ll b/test/CodeGen/X86/barrier-sse.ll new file mode 100644 index 000000000000..6190c3684ed6 --- /dev/null +++ b/test/CodeGen/X86/barrier-sse.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence +; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence +; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep mfence +; RUN: llc < %s -march=x86 -mattr=+sse2 | grep MEMBARRIER + + +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1) + +define void @test() { + call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false) + call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 false, i1 false) + call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true, i1 false) + + call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 false, i1 false) + call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 true, i1 false) + call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 true, i1 false) + + call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 false) + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 false) + ret void +} diff --git a/test/CodeGen/X86/barrier.ll b/test/CodeGen/X86/barrier.ll new file mode 100644 index 000000000000..fad6ef690c2f --- /dev/null +++ b/test/CodeGen/X86/barrier.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s -march=x86 -mattr=-sse2 | grep lock +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1) + +define void @test() { + call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false) + ret void +}
\ No newline at end of file diff --git a/test/CodeGen/X86/call-imm.ll b/test/CodeGen/X86/call-imm.ll index 255adfbb2bb4..3857fb157905 100644 --- a/test/CodeGen/X86/call-imm.ll +++ b/test/CodeGen/X86/call-imm.ll @@ -5,7 +5,7 @@ ; Call to immediate is not safe on x86-64 unless we *know* that the ; call will be within 32-bits pcrel from the dest immediate. -; RUN: llc < %s -march=x86-64 | grep {call.*\*%rax} +; RUN: llc < %s -march=x86-64 | grep {call.*\\*%rax} ; PR3666 ; PR3773 diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll b/test/CodeGen/X86/change-compare-stride-trickiness-0.ll index 1f7f6ecafafb..1f7f6ecafafb 100644 --- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll +++ b/test/CodeGen/X86/change-compare-stride-trickiness-0.ll diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll b/test/CodeGen/X86/change-compare-stride-trickiness-1.ll index cb638092ea1a..cb638092ea1a 100644 --- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll +++ b/test/CodeGen/X86/change-compare-stride-trickiness-1.ll diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-2.ll b/test/CodeGen/X86/change-compare-stride-trickiness-2.ll index ae27383895ce..ae27383895ce 100644 --- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-2.ll +++ b/test/CodeGen/X86/change-compare-stride-trickiness-2.ll diff --git a/test/CodeGen/X86/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll index 05388f9b2a96..2a44463e5d32 100644 --- a/test/CodeGen/X86/constant-pool-remat-0.ll +++ b/test/CodeGen/X86/constant-pool-remat-0.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86-64 | grep LCPI | count 3 -; RUN: llc < %s -march=x86-64 -stats -info-output-file - | grep asm-printer | grep 6 +; RUN: llc < %s -march=x86-64 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep LCPI | count 3 -; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -info-output-file - | grep asm-printer | grep 12 +; RUN: llc < %s -march=x86 -mattr=+sse2 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 12 declare float @qux(float %y) diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll index f29cbf323e37..96fef0fbfc61 100644 --- a/test/CodeGen/X86/critical-edge-split.ll +++ b/test/CodeGen/X86/critical-edge-split.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -stats -info-output-file - | grep asm-printer | grep 29 +; RUN: llc < %s -mtriple=i386-apple-darwin -o /dev/null -stats -info-output-file - | grep asm-printer | grep 29 %CC = type { %Register } %II = type { %"struct.XX::II::$_74" } diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll index bdbaac05f118..bf57e78f35d4 100644 --- a/test/CodeGen/X86/dllexport.ll +++ b/test/CodeGen/X86/dllexport.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s | FileCheck %s ; PR2936 -target triple = "i386-mingw32" +target triple = "i386-pc-mingw32" define dllexport x86_fastcallcc i32 @foo() nounwind { entry: diff --git a/test/CodeGen/X86/dyn-stackalloc.ll b/test/CodeGen/X86/dyn-stackalloc.ll index 1df092018dd8..e577611ebcf1 100644 --- a/test/CodeGen/X86/dyn-stackalloc.ll +++ b/test/CodeGen/X86/dyn-stackalloc.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=x86 | not egrep {\\\$4294967289|-7} -; RUN: llc < %s -march=x86 | egrep {\\\$4294967280|-16} -; RUN: llc < %s -march=x86-64 | grep {\\-16} +; RUN: llc < %s -mtriple=i686-linux | not egrep {\\\$4294967289|-7} +; RUN: llc < %s -mtriple=i686-linux | egrep {\\\$4294967280|-16} +; RUN: llc < %s -mtriple=x86_64-linux | grep {\\-16} define void @t() nounwind { A: diff --git a/test/CodeGen/X86/empty-functions.ll b/test/CodeGen/X86/empty-functions.ll new file mode 100644 index 000000000000..b303cd1f7368 --- /dev/null +++ b/test/CodeGen/X86/empty-functions.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=CHECK-NO-FP %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s + +define void @func() { +entry: + unreachable +} +; CHECK-NO-FP: _func: +; CHECK-NO-FP-NOT: movq %rsp, %rbp +; CHECK-NO-FP: nop + +; CHECK-FP: _func: +; CHECK-FP: movq %rsp, %rbp +; CHECK-FP-NEXT: Ltmp1: +; CHECK-FP: nop diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll index 23b45ebb8d8b..9ded7e05dc46 100644 --- a/test/CodeGen/X86/fabs.ll +++ b/test/CodeGen/X86/fabs.ll @@ -1,7 +1,7 @@ ; Make sure this testcase codegens to the fabs instruction, not a call to fabsf ; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \ ; RUN: count 2 -; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-finite-only-fp-math | \ +; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | \ ; RUN: grep fabs\$ | count 3 declare float @fabsf(float) diff --git a/test/CodeGen/X86/fast-isel-atomic.ll b/test/CodeGen/X86/fast-isel-atomic.ll new file mode 100644 index 000000000000..74c586846d96 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-atomic.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -O0 -march=x86-64 +; rdar://8204072 +; PR7652 + +@sc = external global i8 +@uc = external global i8 + +declare i8 @llvm.atomic.load.and.i8.p0i8(i8* nocapture, i8) nounwind + +define void @test_fetch_and_op() nounwind { +entry: + %tmp40 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @sc, i8 11) ; <i8> [#uses=1] + store i8 %tmp40, i8* @sc + %tmp41 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @uc, i8 11) ; <i8> [#uses=1] + store i8 %tmp41, i8* @uc + ret void +} diff --git a/test/CodeGen/X86/fast-isel-cmp-branch.ll b/test/CodeGen/X86/fast-isel-cmp-branch.ll new file mode 100644 index 000000000000..4ab1bc61c7e2 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-cmp-branch.ll @@ -0,0 +1,29 @@ +; RUN: llc -O0 -march=x86-64 -asm-verbose=false < %s | FileCheck %s +; rdar://8337108 + +; Fast-isel shouldn't try to look through the compare because it's in a +; different basic block, so its operands aren't necessarily exported +; for cross-block usage. + +; CHECK: movb %al, 7(%rsp) +; CHECK: callq {{_?}}bar +; CHECK: movb 7(%rsp), %al + +declare void @bar() + +define void @foo(i32 %a, i32 %b) nounwind { +entry: + %q = add i32 %a, 7 + %r = add i32 %b, 9 + %t = icmp ult i32 %q, %r + invoke void @bar() to label %next unwind label %unw +next: + br i1 %t, label %true, label %return +true: + call void @bar() + br label %return +return: + ret void +unw: + unreachable +} diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll index 1270ab78ab5f..577dd7223a4d 100644 --- a/test/CodeGen/X86/fast-isel-gep.ll +++ b/test/CodeGen/X86/fast-isel-gep.ll @@ -9,7 +9,7 @@ define i32 @test1(i32 %t3, i32* %t1) nounwind { %t15 = load i32* %t9 ; <i32> [#uses=1] ret i32 %t15 ; X32: test1: -; X32: movl (%ecx,%eax,4), %eax +; X32: movl (%eax,%ecx,4), %eax ; X32: ret ; X64: test1: @@ -23,7 +23,7 @@ define i32 @test2(i64 %t3, i32* %t1) nounwind { %t15 = load i32* %t9 ; <i32> [#uses=1] ret i32 %t15 ; X32: test2: -; X32: movl (%eax,%ecx,4), %eax +; X32: movl (%edx,%ecx,4), %eax ; X32: ret ; X64: test2: diff --git a/test/CodeGen/X86/fast-isel-shift-imm.ll b/test/CodeGen/X86/fast-isel-shift-imm.ll index 7759bb056892..5c62c1880516 100644 --- a/test/CodeGen/X86/fast-isel-shift-imm.ll +++ b/test/CodeGen/X86/fast-isel-shift-imm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -O0 | grep {sarl \$80, %eax} +; RUN: llc < %s -march=x86 -O0 | grep {sarl \$80, %e} ; PR3242 define void @foo(i32 %x, i32* %p) nounwind { diff --git a/test/CodeGen/X86/force-align-stack.ll b/test/CodeGen/X86/force-align-stack.ll new file mode 100644 index 000000000000..ffcbf8a908c8 --- /dev/null +++ b/test/CodeGen/X86/force-align-stack.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -relocation-model=static -force-align-stack | FileCheck %s +; Tests to make sure that we always align the stack out to the minimum needed - +; in this case 16-bytes. +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" +target triple = "i386-apple-darwin10.3" + +define void @a() nounwind ssp { +entry: +; CHECK: _a: +; CHECK: andl $-16, %esp + %z = alloca <16 x i8> ; <<16 x i8>*> [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store <16 x i8> zeroinitializer, <16 x i8>* %z, align 16 + call void @b(<16 x i8>* %z) nounwind + br label %return + +return: ; preds = %entry + ret void +} + +declare void @b(<16 x i8>*) diff --git a/test/Transforms/LoopStrengthReduce/insert-positions.ll b/test/CodeGen/X86/insert-positions.ll index 1a695f35e3b0..1a695f35e3b0 100644 --- a/test/Transforms/LoopStrengthReduce/insert-positions.ll +++ b/test/CodeGen/X86/insert-positions.ll diff --git a/test/CodeGen/X86/int-intrinsic.ll b/test/CodeGen/X86/int-intrinsic.ll new file mode 100644 index 000000000000..45a9b0f15c67 --- /dev/null +++ b/test/CodeGen/X86/int-intrinsic.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=x86 | FileCheck %s +; RUN: llc < %s -march=x86-64 | FileCheck %s + +declare void @llvm.x86.int(i8) nounwind + +; CHECK: int3 +; CHECK: ret +define void @primitive_int3 () { +bb.entry: + call void @llvm.x86.int(i8 3) nounwind + ret void +} + +; CHECK: int $-128 +; CHECK: ret +define void @primitive_int128 () { +bb.entry: + call void @llvm.x86.int(i8 128) nounwind + ret void +} diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll index 71685bb5b83a..b0105ac533bd 100644 --- a/test/CodeGen/X86/licm-nested.ll +++ b/test/CodeGen/X86/licm-nested.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -stats -info-output-file - | grep machine-licm | grep 2 +; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep machine-licm | grep 3 ; MachineLICM should be able to hoist the symbolic addresses out of ; the inner loops. diff --git a/test/CodeGen/X86/lock-inst-encoding.ll b/test/CodeGen/X86/lock-inst-encoding.ll new file mode 100644 index 000000000000..03468e2b3f4f --- /dev/null +++ b/test/CodeGen/X86/lock-inst-encoding.ll @@ -0,0 +1,22 @@ +; RUN: llc -O0 --show-mc-encoding < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +; CHECK: f0: +; CHECK: addq %rax, (%rdi) +; CHECK: # encoding: [0xf0,0x48,0x01,0x07] +; CHECK: ret +define void @f0(i64* %a0) { + %t0 = and i64 1, 1 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) nounwind + %1 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %a0, i64 %t0) nounwind + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) nounwind + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +declare i32 @llvm.atomic.load.and.i32.p0i32(i32* nocapture, i32) nounwind + +declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll index 6c0eb8c0df93..6556fdeea834 100644 --- a/test/CodeGen/X86/loop-strength-reduce4.ll +++ b/test/CodeGen/X86/loop-strength-reduce4.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC -; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC +; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC +; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC ; By starting the IV at -64 instead of 0, a cmp is eliminated, ; as the flags from the add can be used directly. diff --git a/test/CodeGen/X86/lsr-interesting-step.ll b/test/CodeGen/X86/lsr-interesting-step.ll new file mode 100644 index 000000000000..4b7050bd507b --- /dev/null +++ b/test/CodeGen/X86/lsr-interesting-step.ll @@ -0,0 +1,51 @@ +; RUN: llc < %s -march=x86-64 -relocation-model=static -mtriple=x86_64-unknown-linux-gnu + +; The inner loop should require only one add (and no leas either). +; rdar://8100380 + +; CHECK: BB0_4: +; CHECK-NEXT: movb $0, flags(%rdx) +; CHECK-NEXT: addq %rcx, %rdx +; CHECK-NEXT: cmpq $8192, %rdx +; CHECK-NEXT: jl + +@flags = external global [8192 x i8], align 16 ; <[8192 x i8]*> [#uses=1] + +define void @foo() nounwind { +entry: + %tmp = icmp slt i64 2, 8192 ; <i1> [#uses=1] + br i1 %tmp, label %bb, label %bb21 + +bb: ; preds = %entry + br label %bb7 + +bb7: ; preds = %bb, %bb17 + %tmp8 = phi i64 [ %tmp18, %bb17 ], [ 2, %bb ] ; <i64> [#uses=2] + %tmp9 = icmp slt i64 2, 8192 ; <i1> [#uses=1] + br i1 %tmp9, label %bb10, label %bb17 + +bb10: ; preds = %bb7 + br label %bb11 + +bb11: ; preds = %bb10, %bb11 + %tmp12 = phi i64 [ %tmp14, %bb11 ], [ 2, %bb10 ] ; <i64> [#uses=2] + %tmp13 = getelementptr inbounds [8192 x i8]* @flags, i64 0, i64 %tmp12 ; <i8*> [#uses=1] + store i8 0, i8* %tmp13, align 1 + %tmp14 = add nsw i64 %tmp12, %tmp8 ; <i64> [#uses=2] + %tmp15 = icmp slt i64 %tmp14, 8192 ; <i1> [#uses=1] + br i1 %tmp15, label %bb11, label %bb16 + +bb16: ; preds = %bb11 + br label %bb17 + +bb17: ; preds = %bb16, %bb7 + %tmp18 = add nsw i64 %tmp8, 1 ; <i64> [#uses=2] + %tmp19 = icmp slt i64 %tmp18, 8192 ; <i1> [#uses=1] + br i1 %tmp19, label %bb7, label %bb20 + +bb20: ; preds = %bb17 + br label %bb21 + +bb21: ; preds = %bb20, %entry + ret void +} diff --git a/test/CodeGen/X86/lsr-normalization.ll b/test/CodeGen/X86/lsr-normalization.ll new file mode 100644 index 000000000000..932141d0448e --- /dev/null +++ b/test/CodeGen/X86/lsr-normalization.ll @@ -0,0 +1,99 @@ +; RUN: llc < %s -march=x86-64 | grep div | count 1 +; rdar://8168938 + +; This testcase involves SCEV normalization with the exit value from +; one loop involved with the increment value for an addrec on another +; loop. The expression should be properly normalized and simplified, +; and require only a single division. + +%0 = type { %0*, %0* } + +@0 = private constant [13 x i8] c"Result: %lu\0A\00" ; <[13 x i8]*> [#uses=1] +@1 = internal constant [5 x i8] c"Huh?\00" ; <[5 x i8]*> [#uses=1] + +define i32 @main(i32 %arg, i8** nocapture %arg1) nounwind { +bb: + %tmp = alloca %0, align 8 ; <%0*> [#uses=11] + %tmp2 = bitcast %0* %tmp to i8* ; <i8*> [#uses=1] + call void @llvm.memset.p0i8.i64(i8* %tmp2, i8 0, i64 16, i32 8, i1 false) nounwind + %tmp3 = getelementptr inbounds %0* %tmp, i64 0, i32 0 ; <%0**> [#uses=3] + store %0* %tmp, %0** %tmp3 + %tmp4 = getelementptr inbounds %0* %tmp, i64 0, i32 1 ; <%0**> [#uses=1] + store %0* %tmp, %0** %tmp4 + %tmp5 = call noalias i8* @_Znwm(i64 24) nounwind ; <i8*> [#uses=2] + %tmp6 = getelementptr inbounds i8* %tmp5, i64 16 ; <i8*> [#uses=2] + %tmp7 = icmp eq i8* %tmp6, null ; <i1> [#uses=1] + br i1 %tmp7, label %bb10, label %bb8 + +bb8: ; preds = %bb + %tmp9 = bitcast i8* %tmp6 to i32* ; <i32*> [#uses=1] + store i32 1, i32* %tmp9 + br label %bb10 + +bb10: ; preds = %bb8, %bb + %tmp11 = bitcast i8* %tmp5 to %0* ; <%0*> [#uses=1] + call void @_ZNSt15_List_node_base4hookEPS_(%0* %tmp11, %0* %tmp) nounwind + %tmp12 = load %0** %tmp3 ; <%0*> [#uses=3] + %tmp13 = icmp eq %0* %tmp12, %tmp ; <i1> [#uses=1] + br i1 %tmp13, label %bb14, label %bb16 + +bb14: ; preds = %bb10 + %tmp15 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @1, i64 0, i64 0)) + br label %bb35 + +bb16: ; preds = %bb16, %bb10 + %tmp17 = phi i64 [ %tmp22, %bb16 ], [ 0, %bb10 ] ; <i64> [#uses=1] + %tmp18 = phi %0* [ %tmp20, %bb16 ], [ %tmp12, %bb10 ] ; <%0*> [#uses=1] + %tmp19 = getelementptr inbounds %0* %tmp18, i64 0, i32 0 ; <%0**> [#uses=1] + %tmp20 = load %0** %tmp19 ; <%0*> [#uses=2] + %tmp21 = icmp eq %0* %tmp20, %tmp ; <i1> [#uses=1] + %tmp22 = add i64 %tmp17, 1 ; <i64> [#uses=2] + br i1 %tmp21, label %bb23, label %bb16 + +bb23: ; preds = %bb16 + %tmp24 = udiv i64 100, %tmp22 ; <i64> [#uses=1] + br label %bb25 + +bb25: ; preds = %bb25, %bb23 + %tmp26 = phi i64 [ %tmp31, %bb25 ], [ 0, %bb23 ] ; <i64> [#uses=1] + %tmp27 = phi %0* [ %tmp29, %bb25 ], [ %tmp12, %bb23 ] ; <%0*> [#uses=1] + %tmp28 = getelementptr inbounds %0* %tmp27, i64 0, i32 0 ; <%0**> [#uses=1] + %tmp29 = load %0** %tmp28 ; <%0*> [#uses=2] + %tmp30 = icmp eq %0* %tmp29, %tmp ; <i1> [#uses=1] + %tmp31 = add i64 %tmp26, 1 ; <i64> [#uses=2] + br i1 %tmp30, label %bb32, label %bb25 + +bb32: ; preds = %bb25 + %tmp33 = mul i64 %tmp31, %tmp24 ; <i64> [#uses=1] + %tmp34 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @0, i64 0, i64 0), i64 %tmp33) nounwind + br label %bb35 + +bb35: ; preds = %bb32, %bb14 + %tmp36 = load %0** %tmp3 ; <%0*> [#uses=2] + %tmp37 = icmp eq %0* %tmp36, %tmp ; <i1> [#uses=1] + br i1 %tmp37, label %bb44, label %bb38 + +bb38: ; preds = %bb38, %bb35 + %tmp39 = phi %0* [ %tmp41, %bb38 ], [ %tmp36, %bb35 ] ; <%0*> [#uses=2] + %tmp40 = getelementptr inbounds %0* %tmp39, i64 0, i32 0 ; <%0**> [#uses=1] + %tmp41 = load %0** %tmp40 ; <%0*> [#uses=2] + %tmp42 = bitcast %0* %tmp39 to i8* ; <i8*> [#uses=1] + call void @_ZdlPv(i8* %tmp42) nounwind + %tmp43 = icmp eq %0* %tmp41, %tmp ; <i1> [#uses=1] + br i1 %tmp43, label %bb44, label %bb38 + +bb44: ; preds = %bb38, %bb35 + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +declare void @_ZNSt15_List_node_base4hookEPS_(%0*, %0*) + +declare noalias i8* @_Znwm(i64) + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +declare void @_ZdlPv(i8*) nounwind + +declare i32 @puts(i8* nocapture) nounwind diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll index b7e69b84bf84..d2ff58be1055 100644 --- a/test/CodeGen/X86/lsr-reuse.ll +++ b/test/CodeGen/X86/lsr-reuse.ll @@ -464,7 +464,7 @@ bb5: ; preds = %bb3, %entry ; And the one at %bb68, where we want to be sure to use superhero mode: -; CHECK: BB10_10: +; CHECK: BB10_9: ; CHECK-NEXT: movaps 48(%r{{[^,]*}}), %xmm{{.*}} ; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}} ; CHECK-NEXT: movaps 32(%r{{[^,]*}}), %xmm{{.*}} @@ -484,7 +484,7 @@ bb5: ; preds = %bb3, %entry ; CHECK-NEXT: addq $64, %r{{.*}} ; CHECK-NEXT: addq $64, %r{{.*}} ; CHECK-NEXT: addq $-16, %r{{.*}} -; CHECK-NEXT: BB10_11: +; CHECK-NEXT: BB10_10: ; CHECK-NEXT: cmpq $15, %r{{.*}} ; CHECK-NEXT: jg diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll new file mode 100644 index 000000000000..c9ed3e553a46 --- /dev/null +++ b/test/CodeGen/X86/lsr-static-addr.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s + +; CHECK: xorl %eax, %eax +; CHECK: movsd .LCPI0_0(%rip), %xmm0 +; CHECK: align +; CHECK-NEXT: BB0_2: +; CHECK-NEXT: movsd A(,%rax,8) +; CHECK-NEXT: mulsd +; CHECK-NEXT: movsd +; CHECK-NEXT: incq %rax + +@A = external global [0 x double] + +define void @foo(i64 %n) nounwind { +entry: + %cmp5 = icmp sgt i64 %n, 0 + br i1 %cmp5, label %for.body, label %for.end + +for.body: + %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr [0 x double]* @A, i64 0, i64 %i.06 + %tmp3 = load double* %arrayidx, align 8 + %mul = fmul double %tmp3, 2.300000e+00 + store double %mul, double* %arrayidx, align 8 + %inc = add nsw i64 %i.06, 1 + %exitcond = icmp eq i64 %inc, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/CodeGen/X86/lsr-wrap.ll b/test/CodeGen/X86/lsr-wrap.ll index ec8db501ef34..d605e4f14fe4 100644 --- a/test/CodeGen/X86/lsr-wrap.ll +++ b/test/CodeGen/X86/lsr-wrap.ll @@ -3,7 +3,7 @@ ; LSR would like to use a single IV for both of these, however it's ; not safe due to wraparound. -; CHECK: addb $-4, %r +; CHECK: addb $-4, % ; CHECK: decw % @g_19 = common global i32 0 ; <i32*> [#uses=2] diff --git a/test/CodeGen/X86/narrow_op-2.ll b/test/CodeGen/X86/narrow_op-2.ll deleted file mode 100644 index 796ef7a29e49..000000000000 --- a/test/CodeGen/X86/narrow_op-2.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s - - %struct.bf = type { i64, i16, i16, i32 } -@bfi = external global %struct.bf* - -define void @t1() nounwind ssp { -entry: - -; CHECK: andb $-2, 10( -; CHECK: andb $-3, 10( - - %0 = load %struct.bf** @bfi, align 8 - %1 = getelementptr %struct.bf* %0, i64 0, i32 1 - %2 = bitcast i16* %1 to i32* - %3 = load i32* %2, align 1 - %4 = and i32 %3, -65537 - store i32 %4, i32* %2, align 1 - %5 = load %struct.bf** @bfi, align 8 - %6 = getelementptr %struct.bf* %5, i64 0, i32 1 - %7 = bitcast i16* %6 to i32* - %8 = load i32* %7, align 1 - %9 = and i32 %8, -131073 - store i32 %9, i32* %7, align 1 - ret void -} diff --git a/test/CodeGen/X86/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll index 9f9f92115c79..8bed62488070 100644 --- a/test/CodeGen/X86/phi-immediate-factoring.ll +++ b/test/CodeGen/X86/phi-immediate-factoring.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" target triple = "i686-apple-darwin8" -define i32 @foo(i32 %A, i32 %B, i32 %C) { +define i32 @foo(i32 %A, i32 %B, i32 %C) nounwind { entry: switch i32 %A, label %out [ i32 1, label %bb diff --git a/test/CodeGen/X86/pr7882.ll b/test/CodeGen/X86/pr7882.ll new file mode 100644 index 000000000000..88404dbe125e --- /dev/null +++ b/test/CodeGen/X86/pr7882.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin -pre-RA-sched=fast \ +; RUN: | FileCheck %s +; make sure scheduler honors the flags clobber. PR 7882. + +define i32 @main(i32 %argc, i8** %argv) nounwind +{ +entry: +; CHECK: InlineAsm End +; CHECK: cmpl + %res = icmp slt i32 1, %argc + %tmp = call i32 asm sideeffect alignstack + "push $$0 + popf + mov $$13, $0", "=r,r,~{memory},~{flags}" (i1 %res) + %ret = select i1 %res, i32 %tmp, i32 42 + ret i32 %ret +} diff --git a/test/CodeGen/X86/shl-anyext.ll b/test/CodeGen/X86/shl-anyext.ll new file mode 100644 index 000000000000..10d489b9a8a6 --- /dev/null +++ b/test/CodeGen/X86/shl-anyext.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s + +; Codegen should be able to use a 32-bit shift instead of a 64-bit shift. +; CHECK: shll $16 + +define fastcc void @test(i32 %level, i64 %a, i64 %b, i64 %c, i64 %d, i32* %p) nounwind { +if.end523: ; preds = %if.end453 + %conv7981749 = zext i32 %level to i64 ; <i64> [#uses=1] + %and799 = shl i64 %conv7981749, 16 ; <i64> [#uses=1] + %shl800 = and i64 %and799, 16711680 ; <i64> [#uses=1] + %or801 = or i64 %shl800, %a ; <i64> [#uses=1] + %or806 = or i64 %or801, %b ; <i64> [#uses=1] + %or811 = or i64 %or806, %c ; <i64> [#uses=1] + %or819 = or i64 %or811, %d ; <i64> [#uses=1] + %conv820 = trunc i64 %or819 to i32 ; <i32> [#uses=1] + store i32 %conv820, i32* %p + ret void +} + +; CHECK: foo: + +declare void @bar(i64) + +define fastcc void @foo(i32 %t) { +bb: + %tmp = add i32 %t, -1 ; <i32> [#uses=1] + br label %bb1 + +bb1: ; preds = %bb + %tmp2 = zext i32 %tmp to i64 ; <i64> [#uses=2] + %tmp3 = add i64 %tmp2, 1 ; <i64> [#uses=1] + %tmp4 = xor i64 %tmp2, 536870911 ; <i64> [#uses=1] + %tmp5 = and i64 %tmp3, %tmp4 ; <i64> [#uses=1] + %tmp6 = shl i64 %tmp5, 3 ; <i64> [#uses=1] + %tmp7 = sub i64 64, %tmp6 ; <i64> [#uses=1] + %tmp8 = and i64 %tmp7, 4294967288 ; <i64> [#uses=1] + %tmp9 = lshr i64 -1, %tmp8 ; <i64> [#uses=1] + call void @bar(i64 %tmp9) + ret void +} diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll index 4b27f2edb759..a3c9957be34e 100644 --- a/test/CodeGen/X86/sibcall.ll +++ b/test/CodeGen/X86/sibcall.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32 ; RUN: llc < %s -march=x86-64 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64 +; Darwin 8 generates stubs, which don't match +; XFAIL: apple-darwin8 define void @t1(i32 %x) nounwind ssp { entry: diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll index ebcdc655eeda..348121ac8bcf 100644 --- a/test/CodeGen/X86/sse-minmax.ll +++ b/test/CodeGen/X86/sse-minmax.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s -; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=UNSAFE %s -; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s +; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s +; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s ; Some of these patterns can be matched as SSE min or max. Some of ; then can be matched provided that the operands are swapped. diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll new file mode 100644 index 000000000000..73f88aec643f --- /dev/null +++ b/test/CodeGen/X86/sse1.ll @@ -0,0 +1,45 @@ +; Tests for SSE1 and below, without SSE2+. +; RUN: llc < %s -march=x86 -mcpu=pentium3 -O3 | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=pentium3 -O3 | FileCheck %s + +define <8 x i16> @test1(<8 x i32> %a) nounwind { +; CHECK: test1 + ret <8 x i16> zeroinitializer +} + +define <8 x i16> @test2(<8 x i32> %a) nounwind { +; CHECK: test2 + %c = trunc <8 x i32> %a to <8 x i16> ; <<8 x i16>> [#uses=1] + ret <8 x i16> %c +} + +; PR7993 +;define <4 x i32> @test3(<4 x i16> %a) nounwind { +; %c = sext <4 x i16> %a to <4 x i32> ; <<4 x i32>> [#uses=1] +; ret <4 x i32> %c +;} + +; This should not emit shuffles to populate the top 2 elements of the 4-element +; vector that this ends up returning. +; rdar://8368414 +define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind { +entry: + %tmp7 = extractelement <2 x float> %A, i32 0 + %tmp5 = extractelement <2 x float> %A, i32 1 + %tmp3 = extractelement <2 x float> %B, i32 0 + %tmp1 = extractelement <2 x float> %B, i32 1 + %add.r = fadd float %tmp7, %tmp3 + %add.i = fsub float %tmp5, %tmp1 + %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0 + %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1 + ret <2 x float> %tmp9 +; CHECK: test4: +; CHECK-NOT: shufps $16 +; CHECK: shufps $1, +; CHECK-NOT: shufps $16 +; CHECK: shufps $1, +; CHECK-NOT: shufps $16 +; CHECK: unpcklps +; CHECK-NOT: shufps $16 +; CHECK: ret +} diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll index 20b8eac9c8d8..6fc019071f8b 100644 --- a/test/CodeGen/X86/sse2.ll +++ b/test/CodeGen/X86/sse2.ll @@ -1,14 +1,14 @@ ; Tests for SSE2 and below, without SSE3+. ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s -define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { +define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { %tmp3 = load <2 x double>* %A, align 16 %tmp7 = insertelement <2 x double> undef, double %B, i32 0 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 > store <2 x double> %tmp9, <2 x double>* %r, align 16 ret void -; CHECK: t1: +; CHECK: test1: ; CHECK: movl 8(%esp), %eax ; CHECK-NEXT: movapd (%eax), %xmm0 ; CHECK-NEXT: movlpd 12(%esp), %xmm0 @@ -17,14 +17,14 @@ define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { ; CHECK-NEXT: ret } -define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { +define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { %tmp3 = load <2 x double>* %A, align 16 %tmp7 = insertelement <2 x double> undef, double %B, i32 0 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 > store <2 x double> %tmp9, <2 x double>* %r, align 16 ret void -; CHECK: t2: +; CHECK: test2: ; CHECK: movl 8(%esp), %eax ; CHECK-NEXT: movapd (%eax), %xmm0 ; CHECK-NEXT: movhpd 12(%esp), %xmm0 @@ -32,3 +32,163 @@ define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { ; CHECK-NEXT: movapd %xmm0, (%eax) ; CHECK-NEXT: ret } + + +define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind { + %tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2] + %tmp3 = load <4 x float>* %A ; <<4 x float>> [#uses=2] + %tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0 ; <float> [#uses=1] + %tmp7 = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1] + %tmp8 = extractelement <4 x float> %tmp3, i32 1 ; <float> [#uses=1] + %tmp9 = extractelement <4 x float> %tmp, i32 1 ; <float> [#uses=1] + %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0 ; <<4 x float>> [#uses=1] + %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1 ; <<4 x float>> [#uses=1] + %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2 ; <<4 x float>> [#uses=1] + %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3 ; <<4 x float>> [#uses=1] + store <4 x float> %tmp13, <4 x float>* %res + ret void +; CHECK: @test3 +; CHECK: unpcklps +} + +define void @test4(<4 x float> %X, <4 x float>* %res) nounwind { + %tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] + store <4 x float> %tmp5, <4 x float>* %res + ret void +; CHECK: @test4 +; CHECK: pshufd $50, %xmm0, %xmm0 +} + +define <4 x i32> @test5(i8** %ptr) nounwind { +; CHECK: test5: +; CHECK: pxor +; CHECK: punpcklbw +; CHECK: punpcklwd + + %tmp = load i8** %ptr ; <i8*> [#uses=1] + %tmp.upgrd.1 = bitcast i8* %tmp to float* ; <float*> [#uses=1] + %tmp.upgrd.2 = load float* %tmp.upgrd.1 ; <float> [#uses=1] + %tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0 ; <<4 x float>> [#uses=1] + %tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1] + %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1] + %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1] + %tmp21 = bitcast <4 x float> %tmp11 to <16 x i8> ; <<16 x i8>> [#uses=1] + %tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 > ; <<16 x i8>> [#uses=1] + %tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1] + %tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32> ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp36 +} + +define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind { + %tmp1 = load <4 x float>* %A ; <<4 x float>> [#uses=1] + %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1] + store <4 x float> %tmp2, <4 x float>* %res + ret void + +; CHECK: test6: +; CHECK: movaps (%eax), %xmm0 +; CHECK: movaps %xmm0, (%eax) +} + +define void @test7() nounwind { + bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:1 [#uses=1] + shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1] + store <4 x float> %2, <4 x float>* null + ret void + +; CHECK: test7: +; CHECK: pxor %xmm0, %xmm0 +; CHECK: movaps %xmm0, 0 +} + +@x = external global [4 x i32] + +define <2 x i64> @test8() nounwind { + %tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1] + %tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1) ; <i32> [#uses=1] + %tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2) ; <i32> [#uses=1] + %tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3) ; <i32> [#uses=1] + %tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 ; <<4 x i32>> [#uses=1] + %tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] + %tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2 ; <<4 x i32>> [#uses=1] + %tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3 ; <<4 x i32>> [#uses=1] + %tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64> ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp16 +; CHECK: test8: +; CHECK: movups (%eax), %xmm0 +} + +define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind { + %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] + %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] + %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] + %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] + ret <4 x float> %tmp13 +; CHECK: test9: +; CHECK: movups 8(%esp), %xmm0 +} + +define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind { + %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] + %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] + %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] + %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] + ret <4 x float> %tmp13 +; CHECK: test10: +; CHECK: movaps 4(%esp), %xmm0 +} + +define <2 x double> @test11(double %a, double %b) nounwind { + %tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1] + %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1] + ret <2 x double> %tmp7 +; CHECK: test11: +; CHECK: movapd 4(%esp), %xmm0 +} + +define void @test12() nounwind { + %tmp1 = load <4 x float>* null ; <<4 x float>> [#uses=2] + %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1] + %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] + %tmp4 = fadd <4 x float> %tmp2, %tmp3 ; <<4 x float>> [#uses=1] + store <4 x float> %tmp4, <4 x float>* null + ret void +; CHECK: test12: +; CHECK: movhlps +; CHECK: shufps +} + +define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind { + %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=1] + %tmp5 = load <4 x float>* %C ; <<4 x float>> [#uses=1] + %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] + store <4 x float> %tmp11, <4 x float>* %res + ret void +; CHECK: test13 +; CHECK: shufps $69, (%eax), %xmm0 +; CHECK: pshufd $-40, %xmm0, %xmm0 +} + +define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind { + %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=2] + %tmp5 = load <4 x float>* %x ; <<4 x float>> [#uses=2] + %tmp9 = fadd <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] + %tmp21 = fsub <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] + %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1] + ret <4 x float> %tmp27 +; CHECK: test14: +; CHECK: addps %xmm1, %xmm0 +; CHECK: subps %xmm1, %xmm2 +; CHECK: movlhps %xmm2, %xmm0 +} + +define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind { +entry: + %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=1] + %tmp3 = load <4 x float>* %x ; <<4 x float>> [#uses=1] + %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] + ret <4 x float> %tmp4 +; CHECK: test15: +; CHECK: movhlps %xmm1, %xmm0 +} diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index ef66d1a44a18..3a14fa26300c 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -224,3 +224,28 @@ declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone +; This used to compile to insertps $0 + insertps $16. insertps $0 is always +; pointless. +define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind { +entry: + %tmp7 = extractelement <2 x float> %A, i32 0 + %tmp5 = extractelement <2 x float> %A, i32 1 + %tmp3 = extractelement <2 x float> %B, i32 0 + %tmp1 = extractelement <2 x float> %B, i32 1 + %add.r = fadd float %tmp7, %tmp3 + %add.i = fadd float %tmp5, %tmp1 + %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0 + %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1 + ret <2 x float> %tmp9 +; X32: buildvector: +; X32-NOT: insertps $0 +; X32: insertps $16 +; X32-NOT: insertps $0 +; X32: ret +; X64: buildvector: +; X64-NOT: insertps $0 +; X64: insertps $16 +; X64-NOT: insertps $0 +; X64: ret +} + diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll deleted file mode 100644 index 001a54096408..000000000000 --- a/test/CodeGen/X86/stack-color-with-reg.ll +++ /dev/null @@ -1,361 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t -; RUN: grep asm-printer %t | grep 166 -; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 5 - - type { [62 x %struct.Bitvec*] } ; type %0 - type { i8* } ; type %1 - type { double } ; type %2 - %struct..5sPragmaType = type { i8*, i32 } - %struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 } - %struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* } - %struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 } - %struct.AuxData = type { i8*, void (i8*)* } - %struct.Bitvec = type { i32, i32, i32, %0 } - %struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* } - %struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* } - %struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* } - %struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* } - %struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] } - %struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 } - %struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 } - %struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* } - %struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 } - %struct.Context = type { i64, i32, %struct.Fifo } - %struct.CountCtx = type { i64 } - %struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* } - %struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* } - %struct.DbPage = type { %struct.Pager*, i32, %struct.DbPage*, %struct.DbPage*, %struct.PagerLruLink, %struct.DbPage*, i8, i8, i8, i8, i8, i16, %struct.DbPage*, %struct.DbPage*, i8* } - %struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..5sPragmaType, %struct..5sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 } - %struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* } - %struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 } - %struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 } - %struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* } - %struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] } - %struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] } - %struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* } - %struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 } - %struct.IdList = type { %struct..5sPragmaType*, i32, i32 } - %struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** } - %struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] } - %struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* } - %struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.DbPage*, i32, %struct.MemPage* } - %struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* } - %struct.Op = type { i8, i8, i8, i8, i32, i32, i32, %1 } - %struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.DbPage*, %struct.DbPage*, %struct.DbPage*, i64, i64, i64, i64, i64, i32, void (%struct.DbPage*, i32)*, void (%struct.DbPage*, i32)*, i32, %struct.DbPage**, i8*, [16 x i8] } - %struct.PagerLruLink = type { %struct.DbPage*, %struct.DbPage* } - %struct.PagerLruList = type { %struct.DbPage*, %struct.DbPage*, %struct.DbPage* } - %struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* } - %struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] } - %struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] } - %struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 } - %struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* } - %struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..5sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* } - %struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..5sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* } - %struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 } - %struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] } - %struct._OvflCell = type { i8*, i16 } - %struct._ht = type { i32, %struct.HashElem* } - %struct.sColMap = type { i32, i8* } - %struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %2, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 } - %struct.sqlite3InitInfo = type { i32, i32, i8 } - %struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* } - %struct.sqlite3_file = type { %struct.sqlite3_io_methods* } - %struct.sqlite3_index_constraint = type { i32, i8, i8, i32 } - %struct.sqlite3_index_constraint_usage = type { i32, i8 } - %struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double } - %struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* } - %struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* } - %struct.sqlite3_mutex = type opaque - %struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* } - %struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* } - %struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* } -@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.MemPage*, i32, i32)* @dropCell to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] - -define fastcc void @dropCell(%struct.MemPage* nocapture %pPage, i32 %idx, i32 %sz) nounwind ssp { -entry: - %0 = getelementptr %struct.MemPage* %pPage, i64 0, i32 18 ; <i8**> [#uses=1] - %1 = load i8** %0, align 8 ; <i8*> [#uses=34] - %2 = getelementptr %struct.MemPage* %pPage, i64 0, i32 12 ; <i16*> [#uses=1] - %3 = load i16* %2, align 2 ; <i16> [#uses=1] - %4 = zext i16 %3 to i32 ; <i32> [#uses=2] - %5 = shl i32 %idx, 1 ; <i32> [#uses=2] - %6 = add i32 %4, %5 ; <i32> [#uses=1] - %7 = sext i32 %6 to i64 ; <i64> [#uses=2] - %8 = getelementptr i8* %1, i64 %7 ; <i8*> [#uses=1] - %9 = load i8* %8, align 1 ; <i8> [#uses=2] - %10 = zext i8 %9 to i32 ; <i32> [#uses=1] - %11 = shl i32 %10, 8 ; <i32> [#uses=1] - %.sum3 = add i64 %7, 1 ; <i64> [#uses=1] - %12 = getelementptr i8* %1, i64 %.sum3 ; <i8*> [#uses=1] - %13 = load i8* %12, align 1 ; <i8> [#uses=2] - %14 = zext i8 %13 to i32 ; <i32> [#uses=1] - %15 = or i32 %11, %14 ; <i32> [#uses=3] - %16 = icmp slt i32 %sz, 4 ; <i1> [#uses=1] - %size_addr.0.i = select i1 %16, i32 4, i32 %sz ; <i32> [#uses=3] - %17 = getelementptr %struct.MemPage* %pPage, i64 0, i32 8 ; <i8*> [#uses=5] - %18 = load i8* %17, align 8 ; <i8> [#uses=1] - %19 = zext i8 %18 to i32 ; <i32> [#uses=4] - %20 = add i32 %19, 1 ; <i32> [#uses=2] - br label %bb3.i - -bb3.i: ; preds = %bb3.i, %entry - %addr.0.i = phi i32 [ %20, %entry ], [ %29, %bb3.i ] ; <i32> [#uses=1] - %21 = sext i32 %addr.0.i to i64 ; <i64> [#uses=2] - %22 = getelementptr i8* %1, i64 %21 ; <i8*> [#uses=2] - %23 = load i8* %22, align 1 ; <i8> [#uses=2] - %24 = zext i8 %23 to i32 ; <i32> [#uses=1] - %25 = shl i32 %24, 8 ; <i32> [#uses=1] - %.sum34.i = add i64 %21, 1 ; <i64> [#uses=1] - %26 = getelementptr i8* %1, i64 %.sum34.i ; <i8*> [#uses=2] - %27 = load i8* %26, align 1 ; <i8> [#uses=2] - %28 = zext i8 %27 to i32 ; <i32> [#uses=1] - %29 = or i32 %25, %28 ; <i32> [#uses=3] - %.not.i = icmp uge i32 %29, %15 ; <i1> [#uses=1] - %30 = icmp eq i32 %29, 0 ; <i1> [#uses=1] - %or.cond.i = or i1 %30, %.not.i ; <i1> [#uses=1] - br i1 %or.cond.i, label %bb5.i, label %bb3.i - -bb5.i: ; preds = %bb3.i - store i8 %9, i8* %22, align 1 - store i8 %13, i8* %26, align 1 - %31 = zext i32 %15 to i64 ; <i64> [#uses=2] - %32 = getelementptr i8* %1, i64 %31 ; <i8*> [#uses=1] - store i8 %23, i8* %32, align 1 - %.sum32.i = add i64 %31, 1 ; <i64> [#uses=1] - %33 = getelementptr i8* %1, i64 %.sum32.i ; <i8*> [#uses=1] - store i8 %27, i8* %33, align 1 - %34 = add i32 %15, 2 ; <i32> [#uses=1] - %35 = zext i32 %34 to i64 ; <i64> [#uses=2] - %36 = getelementptr i8* %1, i64 %35 ; <i8*> [#uses=1] - %37 = lshr i32 %size_addr.0.i, 8 ; <i32> [#uses=1] - %38 = trunc i32 %37 to i8 ; <i8> [#uses=1] - store i8 %38, i8* %36, align 1 - %39 = trunc i32 %size_addr.0.i to i8 ; <i8> [#uses=1] - %.sum31.i = add i64 %35, 1 ; <i64> [#uses=1] - %40 = getelementptr i8* %1, i64 %.sum31.i ; <i8*> [#uses=1] - store i8 %39, i8* %40, align 1 - %41 = getelementptr %struct.MemPage* %pPage, i64 0, i32 14 ; <i16*> [#uses=4] - %42 = load i16* %41, align 2 ; <i16> [#uses=1] - %43 = trunc i32 %size_addr.0.i to i16 ; <i16> [#uses=1] - %44 = add i16 %42, %43 ; <i16> [#uses=1] - store i16 %44, i16* %41, align 2 - %45 = load i8* %17, align 8 ; <i8> [#uses=1] - %46 = zext i8 %45 to i32 ; <i32> [#uses=1] - %47 = add i32 %46, 1 ; <i32> [#uses=1] - br label %bb11.outer.i - -bb11.outer.i: ; preds = %bb6.i, %bb5.i - %addr.1.ph.i = phi i32 [ %47, %bb5.i ], [ %111, %bb6.i ] ; <i32> [#uses=1] - %48 = sext i32 %addr.1.ph.i to i64 ; <i64> [#uses=2] - %49 = getelementptr i8* %1, i64 %48 ; <i8*> [#uses=1] - %.sum30.i = add i64 %48, 1 ; <i64> [#uses=1] - %50 = getelementptr i8* %1, i64 %.sum30.i ; <i8*> [#uses=1] - br label %bb11.i - -bb6.i: ; preds = %bb11.i - %51 = zext i32 %111 to i64 ; <i64> [#uses=2] - %52 = getelementptr i8* %1, i64 %51 ; <i8*> [#uses=2] - %53 = load i8* %52, align 1 ; <i8> [#uses=1] - %54 = zext i8 %53 to i32 ; <i32> [#uses=1] - %55 = shl i32 %54, 8 ; <i32> [#uses=1] - %.sum24.i = add i64 %51, 1 ; <i64> [#uses=1] - %56 = getelementptr i8* %1, i64 %.sum24.i ; <i8*> [#uses=2] - %57 = load i8* %56, align 1 ; <i8> [#uses=3] - %58 = zext i8 %57 to i32 ; <i32> [#uses=1] - %59 = or i32 %55, %58 ; <i32> [#uses=5] - %60 = add i32 %111, 2 ; <i32> [#uses=1] - %61 = zext i32 %60 to i64 ; <i64> [#uses=2] - %62 = getelementptr i8* %1, i64 %61 ; <i8*> [#uses=2] - %63 = load i8* %62, align 1 ; <i8> [#uses=1] - %64 = zext i8 %63 to i32 ; <i32> [#uses=1] - %65 = shl i32 %64, 8 ; <i32> [#uses=1] - %.sum23.i = add i64 %61, 1 ; <i64> [#uses=1] - %66 = getelementptr i8* %1, i64 %.sum23.i ; <i8*> [#uses=2] - %67 = load i8* %66, align 1 ; <i8> [#uses=2] - %68 = zext i8 %67 to i32 ; <i32> [#uses=1] - %69 = or i32 %65, %68 ; <i32> [#uses=1] - %70 = add i32 %111, 3 ; <i32> [#uses=1] - %71 = add i32 %70, %69 ; <i32> [#uses=1] - %72 = icmp sge i32 %71, %59 ; <i1> [#uses=1] - %73 = icmp ne i32 %59, 0 ; <i1> [#uses=1] - %74 = and i1 %72, %73 ; <i1> [#uses=1] - br i1 %74, label %bb9.i, label %bb11.outer.i - -bb9.i: ; preds = %bb6.i - %75 = load i8* %17, align 8 ; <i8> [#uses=1] - %76 = zext i8 %75 to i32 ; <i32> [#uses=1] - %77 = add i32 %76, 7 ; <i32> [#uses=1] - %78 = zext i32 %77 to i64 ; <i64> [#uses=1] - %79 = getelementptr i8* %1, i64 %78 ; <i8*> [#uses=2] - %80 = load i8* %79, align 1 ; <i8> [#uses=1] - %81 = sub i8 %109, %57 ; <i8> [#uses=1] - %82 = add i8 %81, %67 ; <i8> [#uses=1] - %83 = add i8 %82, %80 ; <i8> [#uses=1] - store i8 %83, i8* %79, align 1 - %84 = zext i32 %59 to i64 ; <i64> [#uses=2] - %85 = getelementptr i8* %1, i64 %84 ; <i8*> [#uses=1] - %86 = load i8* %85, align 1 ; <i8> [#uses=1] - store i8 %86, i8* %52, align 1 - %.sum22.i = add i64 %84, 1 ; <i64> [#uses=1] - %87 = getelementptr i8* %1, i64 %.sum22.i ; <i8*> [#uses=1] - %88 = load i8* %87, align 1 ; <i8> [#uses=1] - store i8 %88, i8* %56, align 1 - %89 = add i32 %59, 2 ; <i32> [#uses=1] - %90 = zext i32 %89 to i64 ; <i64> [#uses=2] - %91 = getelementptr i8* %1, i64 %90 ; <i8*> [#uses=1] - %92 = load i8* %91, align 1 ; <i8> [#uses=1] - %93 = zext i8 %92 to i32 ; <i32> [#uses=1] - %94 = shl i32 %93, 8 ; <i32> [#uses=1] - %.sum20.i = add i64 %90, 1 ; <i64> [#uses=1] - %95 = getelementptr i8* %1, i64 %.sum20.i ; <i8*> [#uses=2] - %96 = load i8* %95, align 1 ; <i8> [#uses=1] - %97 = zext i8 %96 to i32 ; <i32> [#uses=1] - %98 = or i32 %94, %97 ; <i32> [#uses=1] - %99 = sub i32 %59, %111 ; <i32> [#uses=1] - %100 = add i32 %99, %98 ; <i32> [#uses=1] - %101 = lshr i32 %100, 8 ; <i32> [#uses=1] - %102 = trunc i32 %101 to i8 ; <i8> [#uses=1] - store i8 %102, i8* %62, align 1 - %103 = load i8* %95, align 1 ; <i8> [#uses=1] - %104 = sub i8 %57, %109 ; <i8> [#uses=1] - %105 = add i8 %104, %103 ; <i8> [#uses=1] - store i8 %105, i8* %66, align 1 - br label %bb11.i - -bb11.i: ; preds = %bb9.i, %bb11.outer.i - %106 = load i8* %49, align 1 ; <i8> [#uses=1] - %107 = zext i8 %106 to i32 ; <i32> [#uses=1] - %108 = shl i32 %107, 8 ; <i32> [#uses=1] - %109 = load i8* %50, align 1 ; <i8> [#uses=3] - %110 = zext i8 %109 to i32 ; <i32> [#uses=1] - %111 = or i32 %108, %110 ; <i32> [#uses=6] - %112 = icmp eq i32 %111, 0 ; <i1> [#uses=1] - br i1 %112, label %bb12.i, label %bb6.i - -bb12.i: ; preds = %bb11.i - %113 = zext i32 %20 to i64 ; <i64> [#uses=2] - %114 = getelementptr i8* %1, i64 %113 ; <i8*> [#uses=2] - %115 = load i8* %114, align 1 ; <i8> [#uses=2] - %116 = add i32 %19, 5 ; <i32> [#uses=1] - %117 = zext i32 %116 to i64 ; <i64> [#uses=2] - %118 = getelementptr i8* %1, i64 %117 ; <i8*> [#uses=3] - %119 = load i8* %118, align 1 ; <i8> [#uses=1] - %120 = icmp eq i8 %115, %119 ; <i1> [#uses=1] - br i1 %120, label %bb13.i, label %bb1.preheader - -bb13.i: ; preds = %bb12.i - %121 = add i32 %19, 2 ; <i32> [#uses=1] - %122 = zext i32 %121 to i64 ; <i64> [#uses=1] - %123 = getelementptr i8* %1, i64 %122 ; <i8*> [#uses=1] - %124 = load i8* %123, align 1 ; <i8> [#uses=1] - %125 = add i32 %19, 6 ; <i32> [#uses=1] - %126 = zext i32 %125 to i64 ; <i64> [#uses=1] - %127 = getelementptr i8* %1, i64 %126 ; <i8*> [#uses=1] - %128 = load i8* %127, align 1 ; <i8> [#uses=1] - %129 = icmp eq i8 %124, %128 ; <i1> [#uses=1] - br i1 %129, label %bb14.i, label %bb1.preheader - -bb14.i: ; preds = %bb13.i - %130 = zext i8 %115 to i32 ; <i32> [#uses=1] - %131 = shl i32 %130, 8 ; <i32> [#uses=1] - %.sum29.i = add i64 %113, 1 ; <i64> [#uses=1] - %132 = getelementptr i8* %1, i64 %.sum29.i ; <i8*> [#uses=1] - %133 = load i8* %132, align 1 ; <i8> [#uses=1] - %134 = zext i8 %133 to i32 ; <i32> [#uses=1] - %135 = or i32 %134, %131 ; <i32> [#uses=2] - %136 = zext i32 %135 to i64 ; <i64> [#uses=1] - %137 = getelementptr i8* %1, i64 %136 ; <i8*> [#uses=1] - %138 = bitcast i8* %137 to i16* ; <i16*> [#uses=1] - %139 = bitcast i8* %114 to i16* ; <i16*> [#uses=1] - %tmp.i = load i16* %138, align 1 ; <i16> [#uses=1] - store i16 %tmp.i, i16* %139, align 1 - %140 = load i8* %118, align 1 ; <i8> [#uses=1] - %141 = zext i8 %140 to i32 ; <i32> [#uses=1] - %142 = shl i32 %141, 8 ; <i32> [#uses=1] - %.sum28.i = add i64 %117, 1 ; <i64> [#uses=1] - %143 = getelementptr i8* %1, i64 %.sum28.i ; <i8*> [#uses=2] - %144 = load i8* %143, align 1 ; <i8> [#uses=2] - %145 = zext i8 %144 to i32 ; <i32> [#uses=1] - %146 = or i32 %142, %145 ; <i32> [#uses=1] - %147 = add i32 %135, 2 ; <i32> [#uses=1] - %148 = zext i32 %147 to i64 ; <i64> [#uses=2] - %149 = getelementptr i8* %1, i64 %148 ; <i8*> [#uses=1] - %150 = load i8* %149, align 1 ; <i8> [#uses=1] - %151 = zext i8 %150 to i32 ; <i32> [#uses=1] - %152 = shl i32 %151, 8 ; <i32> [#uses=1] - %.sum27.i = add i64 %148, 1 ; <i64> [#uses=1] - %153 = getelementptr i8* %1, i64 %.sum27.i ; <i8*> [#uses=2] - %154 = load i8* %153, align 1 ; <i8> [#uses=1] - %155 = zext i8 %154 to i32 ; <i32> [#uses=1] - %156 = or i32 %152, %155 ; <i32> [#uses=1] - %157 = add i32 %156, %146 ; <i32> [#uses=1] - %158 = lshr i32 %157, 8 ; <i32> [#uses=1] - %159 = trunc i32 %158 to i8 ; <i8> [#uses=1] - store i8 %159, i8* %118, align 1 - %160 = load i8* %153, align 1 ; <i8> [#uses=1] - %161 = add i8 %160, %144 ; <i8> [#uses=1] - store i8 %161, i8* %143, align 1 - br label %bb1.preheader - -bb1.preheader: ; preds = %bb14.i, %bb13.i, %bb12.i - %i.08 = add i32 %idx, 1 ; <i32> [#uses=2] - %162 = getelementptr %struct.MemPage* %pPage, i64 0, i32 15 ; <i16*> [#uses=4] - %163 = load i16* %162, align 4 ; <i16> [#uses=2] - %164 = zext i16 %163 to i32 ; <i32> [#uses=1] - %165 = icmp sgt i32 %164, %i.08 ; <i1> [#uses=1] - br i1 %165, label %bb, label %bb2 - -bb: ; preds = %bb, %bb1.preheader - %indvar = phi i64 [ 0, %bb1.preheader ], [ %indvar.next, %bb ] ; <i64> [#uses=3] - %tmp16 = add i32 %5, %4 ; <i32> [#uses=1] - %tmp.17 = sext i32 %tmp16 to i64 ; <i64> [#uses=1] - %tmp19 = shl i64 %indvar, 1 ; <i64> [#uses=1] - %ctg2.sum = add i64 %tmp.17, %tmp19 ; <i64> [#uses=4] - %ctg229 = getelementptr i8* %1, i64 %ctg2.sum ; <i8*> [#uses=1] - %ctg229.sum31 = add i64 %ctg2.sum, 2 ; <i64> [#uses=1] - %166 = getelementptr i8* %1, i64 %ctg229.sum31 ; <i8*> [#uses=1] - %167 = load i8* %166, align 1 ; <i8> [#uses=1] - store i8 %167, i8* %ctg229 - %ctg229.sum30 = add i64 %ctg2.sum, 3 ; <i64> [#uses=1] - %168 = getelementptr i8* %1, i64 %ctg229.sum30 ; <i8*> [#uses=1] - %169 = load i8* %168, align 1 ; <i8> [#uses=1] - %ctg229.sum = add i64 %ctg2.sum, 1 ; <i64> [#uses=1] - %170 = getelementptr i8* %1, i64 %ctg229.sum ; <i8*> [#uses=1] - store i8 %169, i8* %170, align 1 - %indvar15 = trunc i64 %indvar to i32 ; <i32> [#uses=1] - %i.09 = add i32 %indvar15, %i.08 ; <i32> [#uses=1] - %i.0 = add i32 %i.09, 1 ; <i32> [#uses=1] - %171 = load i16* %162, align 4 ; <i16> [#uses=2] - %172 = zext i16 %171 to i32 ; <i32> [#uses=1] - %173 = icmp sgt i32 %172, %i.0 ; <i1> [#uses=1] - %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] - br i1 %173, label %bb, label %bb2 - -bb2: ; preds = %bb, %bb1.preheader - %174 = phi i16 [ %163, %bb1.preheader ], [ %171, %bb ] ; <i16> [#uses=1] - %175 = add i16 %174, -1 ; <i16> [#uses=2] - store i16 %175, i16* %162, align 4 - %176 = load i8* %17, align 8 ; <i8> [#uses=1] - %177 = zext i8 %176 to i32 ; <i32> [#uses=1] - %178 = add i32 %177, 3 ; <i32> [#uses=1] - %179 = zext i32 %178 to i64 ; <i64> [#uses=1] - %180 = getelementptr i8* %1, i64 %179 ; <i8*> [#uses=1] - %181 = lshr i16 %175, 8 ; <i16> [#uses=1] - %182 = trunc i16 %181 to i8 ; <i8> [#uses=1] - store i8 %182, i8* %180, align 1 - %183 = load i8* %17, align 8 ; <i8> [#uses=1] - %184 = zext i8 %183 to i32 ; <i32> [#uses=1] - %185 = add i32 %184, 3 ; <i32> [#uses=1] - %186 = zext i32 %185 to i64 ; <i64> [#uses=1] - %187 = load i16* %162, align 4 ; <i16> [#uses=1] - %188 = trunc i16 %187 to i8 ; <i8> [#uses=1] - %.sum = add i64 %186, 1 ; <i64> [#uses=1] - %189 = getelementptr i8* %1, i64 %.sum ; <i8*> [#uses=1] - store i8 %188, i8* %189, align 1 - %190 = load i16* %41, align 2 ; <i16> [#uses=1] - %191 = add i16 %190, 2 ; <i16> [#uses=1] - store i16 %191, i16* %41, align 2 - %192 = getelementptr %struct.MemPage* %pPage, i64 0, i32 1 ; <i8*> [#uses=1] - store i8 1, i8* %192, align 1 - ret void -} diff --git a/test/CodeGen/X86/stdcall.ll b/test/CodeGen/X86/stdcall.ll index 70204bcf4745..a7c2517e7dbe 100644 --- a/test/CodeGen/X86/stdcall.ll +++ b/test/CodeGen/X86/stdcall.ll @@ -2,7 +2,7 @@ ; PR5851 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" -target triple = "i386-mingw32" +target triple = "i386-pc-mingw32" %0 = type { void (...)* } diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll index 5682e7caf8bd..abc5174c98de 100644 --- a/test/CodeGen/X86/store-narrow.ll +++ b/test/CodeGen/X86/store-narrow.ll @@ -1,6 +1,6 @@ ; rdar://7860110 -; RUN: llc < %s | FileCheck %s -check-prefix=X64 -; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=X32 +; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=X64 +; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s -check-prefix=X32 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.2" @@ -125,3 +125,30 @@ entry: ; X32: movb %cl, 5(%{{.*}}) } +; PR7833 + +@g_16 = internal global i32 -1 + +; X64: test8: +; X64-NEXT: movl _g_16(%rip), %eax +; X64-NEXT: movl $0, _g_16(%rip) +; X64-NEXT: orl $1, %eax +; X64-NEXT: movl %eax, _g_16(%rip) +; X64-NEXT: ret +define void @test8() nounwind { + %tmp = load i32* @g_16 + store i32 0, i32* @g_16 + %or = or i32 %tmp, 1 + store i32 %or, i32* @g_16 + ret void +} + +; X64: test9: +; X64-NEXT: orb $1, _g_16(%rip) +; X64-NEXT: ret +define void @test9() nounwind { + %tmp = load i32* @g_16 + %or = or i32 %tmp, 1 + store i32 %or, i32* @g_16 + ret void +} diff --git a/test/CodeGen/X86/tailcall-fastisel.ll b/test/CodeGen/X86/tailcall-fastisel.ll index d54fb4115b07..7f92af4dca9f 100644 --- a/test/CodeGen/X86/tailcall-fastisel.ll +++ b/test/CodeGen/X86/tailcall-fastisel.ll @@ -1,8 +1,6 @@ -; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | grep TAILCALL +; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | not grep TAILCALL -; Fast-isel shouldn't attempt to handle this tail call, and it should -; cleanly terminate instruction selection in the block after it's -; done to avoid emitting invalid MachineInstrs. +; Fast-isel shouldn't attempt to cope with tail calls. %0 = type { i64, i32, i8* } @@ -11,3 +9,11 @@ fail: ; preds = %entry %tmp20 = tail call fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 undef) ; <i8*> [#uses=1] ret i8* %tmp20 } + +define i32 @foo() nounwind { +entry: + %0 = tail call i32 (...)* @bar() nounwind ; <i32> [#uses=1] + ret i32 %0 +} + +declare i32 @bar(...) nounwind diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll index 4c37225ce027..6f6d6f2cd967 100644 --- a/test/CodeGen/X86/twoaddr-coalesce.ll +++ b/test/CodeGen/X86/twoaddr-coalesce.ll @@ -3,7 +3,7 @@ @"\01LC" = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1] -define i32 @main() nounwind { +define i32 @foo() nounwind { bb1.thread: br label %bb1 diff --git a/test/CodeGen/X86/v2f32.ll b/test/CodeGen/X86/v2f32.ll index 9c4b773a6190..76c3fdfc060c 100644 --- a/test/CodeGen/X86/v2f32.ll +++ b/test/CodeGen/X86/v2f32.ll @@ -10,15 +10,16 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind { store float %c, float* %P2 ret void ; X64: test1: -; X64-NEXT: addss %xmm1, %xmm0 -; X64-NEXT: movss %xmm0, (%rdi) +; X64-NEXT: pshufd $1, %xmm0, %xmm1 +; X64-NEXT: addss %xmm0, %xmm1 +; X64-NEXT: movss %xmm1, (%rdi) ; X64-NEXT: ret ; X32: test1: -; X32-NEXT: movss 4(%esp), %xmm0 -; X32-NEXT: addss 8(%esp), %xmm0 -; X32-NEXT: movl 12(%esp), %eax -; X32-NEXT: movss %xmm0, (%eax) +; X32-NEXT: pshufd $1, %xmm0, %xmm1 +; X32-NEXT: addss %xmm0, %xmm1 +; X32-NEXT: movl 4(%esp), %eax +; X32-NEXT: movss %xmm1, (%eax) ; X32-NEXT: ret } @@ -28,12 +29,42 @@ define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounw ret <2 x float> %Z ; X64: test2: -; X64-NEXT: insertps $0 -; X64-NEXT: insertps $16 -; X64-NEXT: insertps $0 -; X64-NEXT: insertps $16 -; X64-NEXT: addps -; X64-NEXT: movaps -; X64-NEXT: pshufd +; X64-NEXT: addps %xmm1, %xmm0 ; X64-NEXT: ret } + + +define <2 x float> @test3(<4 x float> %A) nounwind { + %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> + %C = fadd <2 x float> %B, %B + ret <2 x float> %C +; CHECK: test3: +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: ret +} + +define <2 x float> @test4(<2 x float> %A) nounwind { + %C = fadd <2 x float> %A, %A + ret <2 x float> %C +; CHECK: test4: +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: ret +} + +define <4 x float> @test5(<4 x float> %A) nounwind { + %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> + %C = fadd <2 x float> %B, %B + br label %BB + +BB: + %D = fadd <2 x float> %C, %C + %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> + ret <4 x float> %E + +; CHECK: _test5: +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: ret +} + + diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll index 6f18d13cc9d3..f8531646effa 100644 --- a/test/CodeGen/X86/vec_cast.ll +++ b/test/CodeGen/X86/vec_cast.ll @@ -1,15 +1,16 @@ -; RUN: llc < %s -march=x86-64 -; RUN: llc < %s -march=x86-64 -disable-mmx +; RUN: llc < %s -march=x86-64 -mcpu=core2 +; RUN: llc < %s -march=x86-64 -mcpu=core2 -disable-mmx + define <8 x i32> @a(<8 x i16> %a) nounwind { %c = sext <8 x i16> %a to <8 x i32> ret <8 x i32> %c } -define <3 x i32> @b(<3 x i16> %a) nounwind { - %c = sext <3 x i16> %a to <3 x i32> - ret <3 x i32> %c -} +;define <3 x i32> @b(<3 x i16> %a) nounwind { +; %c = sext <3 x i16> %a to <3 x i32> +; ret <3 x i32> %c +;} define <1 x i32> @c(<1 x i16> %a) nounwind { %c = sext <1 x i16> %a to <1 x i32> @@ -21,10 +22,10 @@ define <8 x i32> @d(<8 x i16> %a) nounwind { ret <8 x i32> %c } -define <3 x i32> @e(<3 x i16> %a) nounwind { - %c = zext <3 x i16> %a to <3 x i32> - ret <3 x i32> %c -} +;define <3 x i32> @e(<3 x i16> %a) nounwind { +; %c = zext <3 x i16> %a to <3 x i32> +; ret <3 x i32> %c +;} define <1 x i32> @f(<1 x i16> %a) nounwind { %c = zext <1 x i16> %a to <1 x i32> diff --git a/test/CodeGen/X86/vec_insert-6.ll b/test/CodeGen/X86/vec_insert-6.ll index 54aa43f0c35d..de3b36ff126c 100644 --- a/test/CodeGen/X86/vec_insert-6.ll +++ b/test/CodeGen/X86/vec_insert-6.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pslldq -; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 6 +; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6 define <4 x float> @t3(<4 x float>* %P) nounwind { %tmp1 = load <4 x float>* %P diff --git a/test/CodeGen/X86/vec_insert-9.ll b/test/CodeGen/X86/vec_insert-9.ll index 2e829df1f8df..e5a7ccc5ef94 100644 --- a/test/CodeGen/X86/vec_insert-9.ll +++ b/test/CodeGen/X86/vec_insert-9.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse41 > %t -; RUN: grep pinsrd %t | count 2 +; RUN: grep pinsrd %t | count 1 define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind { entry: diff --git a/test/CodeGen/X86/vec_shift4.ll b/test/CodeGen/X86/vec_shift4.ll new file mode 100644 index 000000000000..9ef7fbdb0c50 --- /dev/null +++ b/test/CodeGen/X86/vec_shift4.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s + +define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp { +entry: +; CHECK-NOT: shll +; CHECK: pslld +; CHECK: paddd +; CHECK: cvttps2dq +; CHECK: pmulld + + %shl = shl <4 x i32> %r, %a ; <<4 x i32>> [#uses=1] + %tmp2 = bitcast <4 x i32> %shl to <2 x i64> ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp2 +} + +define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp { +entry: +; CHECK-NOT: shlb +; CHECK: pblendvb +; CHECK: pblendvb +; CHECK: pblendvb + %shl = shl <16 x i8> %r, %a ; <<16 x i8>> [#uses=1] + %tmp2 = bitcast <16 x i8> %shl to <2 x i64> ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp2 +} diff --git a/test/CodeGen/X86/vec_shuffle-10.ll b/test/CodeGen/X86/vec_shuffle-10.ll deleted file mode 100644 index a63e3868ad75..000000000000 --- a/test/CodeGen/X86/vec_shuffle-10.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep unpcklps %t | count 1 -; RUN: grep pshufd %t | count 1 -; RUN: not grep {sub.*esp} %t - -define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) { - %tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2] - %tmp3 = load <4 x float>* %A ; <<4 x float>> [#uses=2] - %tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0 ; <float> [#uses=1] - %tmp7 = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1] - %tmp8 = extractelement <4 x float> %tmp3, i32 1 ; <float> [#uses=1] - %tmp9 = extractelement <4 x float> %tmp, i32 1 ; <float> [#uses=1] - %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0 ; <<4 x float>> [#uses=1] - %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1 ; <<4 x float>> [#uses=1] - %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2 ; <<4 x float>> [#uses=1] - %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3 ; <<4 x float>> [#uses=1] - store <4 x float> %tmp13, <4 x float>* %res - ret void -} - -define void @test2(<4 x float> %X, <4 x float>* %res) { - %tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] - store <4 x float> %tmp5, <4 x float>* %res - ret void -} diff --git a/test/CodeGen/X86/vec_shuffle-19.ll b/test/CodeGen/X86/vec_shuffle-19.ll index 9fc09dfdd2b8..861a1cc5b93c 100644 --- a/test/CodeGen/X86/vec_shuffle-19.ll +++ b/test/CodeGen/X86/vec_shuffle-19.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4 +; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4 ; PR2485 define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind { diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll index 6d1bac0743d4..fc06b9514e43 100644 --- a/test/CodeGen/X86/vec_shuffle-20.ll +++ b/test/CodeGen/X86/vec_shuffle-20.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3 +; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind { entry: diff --git a/test/CodeGen/X86/vec_shuffle-24.ll b/test/CodeGen/X86/vec_shuffle-24.ll index 7562f1d89594..1b104deb3055 100644 --- a/test/CodeGen/X86/vec_shuffle-24.ll +++ b/test/CodeGen/X86/vec_shuffle-24.ll @@ -1,7 +1,8 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | grep punpck +; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s define i32 @t() nounwind optsize { entry: +; CHECK: punpckldq %a = alloca <4 x i32> ; <<4 x i32>*> [#uses=2] %b = alloca <4 x i32> ; <<4 x i32>*> [#uses=5] volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll deleted file mode 100644 index f4930b084504..000000000000 --- a/test/CodeGen/X86/vec_shuffle-3.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep movlhps %t | count 1 -; RUN: grep movhlps %t | count 1 - -define <4 x float> @test1(<4 x float>* %x, <4 x float>* %y) { - %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=2] - %tmp5 = load <4 x float>* %x ; <<4 x float>> [#uses=2] - %tmp9 = fadd <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] - %tmp21 = fsub <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] - %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1] - ret <4 x float> %tmp27 -} - -define <4 x float> @movhl(<4 x float>* %x, <4 x float>* %y) { -entry: - %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=1] - %tmp3 = load <4 x float>* %x ; <<4 x float>> [#uses=1] - %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] - ret <4 x float> %tmp4 -} diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll new file mode 100644 index 000000000000..1ed858de64e8 --- /dev/null +++ b/test/CodeGen/X86/vec_shuffle-37.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp { +entry: +; CHECK: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: movlps (%rax), %xmm1 +; CHECK-NEXT: shufps $36, %xmm1, %xmm0 + %0 = load <4 x i32>* undef, align 16 + %1 = load <4 x i32>* %a0, align 16 + %2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4> + ret <4 x i32> %2 +} + diff --git a/test/CodeGen/X86/vec_shuffle-4.ll b/test/CodeGen/X86/vec_shuffle-4.ll deleted file mode 100644 index 829fedf97cc5..000000000000 --- a/test/CodeGen/X86/vec_shuffle-4.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 > %t -; RUN: grep shuf %t | count 2 -; RUN: not grep unpck %t - -define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) { - %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=1] - %tmp5 = load <4 x float>* %C ; <<4 x float>> [#uses=1] - %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] - store <4 x float> %tmp11, <4 x float>* %res - ret void -} - diff --git a/test/CodeGen/X86/vec_shuffle-5.ll b/test/CodeGen/X86/vec_shuffle-5.ll deleted file mode 100644 index c24167a6150d..000000000000 --- a/test/CodeGen/X86/vec_shuffle-5.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep movhlps %t | count 1 -; RUN: grep shufps %t | count 1 - -define void @test() nounwind { - %tmp1 = load <4 x float>* null ; <<4 x float>> [#uses=2] - %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1] - %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] - %tmp4 = fadd <4 x float> %tmp2, %tmp3 ; <<4 x float>> [#uses=1] - store <4 x float> %tmp4, <4 x float>* null - ret void -} - diff --git a/test/CodeGen/X86/vec_shuffle-6.ll b/test/CodeGen/X86/vec_shuffle-6.ll deleted file mode 100644 index 28fd59b29dd3..000000000000 --- a/test/CodeGen/X86/vec_shuffle-6.ll +++ /dev/null @@ -1,42 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep movapd %t | count 1 -; RUN: grep movaps %t | count 1 -; RUN: grep movups %t | count 2 - -target triple = "i686-apple-darwin" -@x = external global [4 x i32] - -define <2 x i64> @test1() { - %tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1] - %tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1) ; <i32> [#uses=1] - %tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2) ; <i32> [#uses=1] - %tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3) ; <i32> [#uses=1] - %tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 ; <<4 x i32>> [#uses=1] - %tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] - %tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2 ; <<4 x i32>> [#uses=1] - %tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3 ; <<4 x i32>> [#uses=1] - %tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64> ; <<2 x i64>> [#uses=1] - ret <2 x i64> %tmp16 -} - -define <4 x float> @test2(i32 %dummy, float %a, float %b, float %c, float %d) { - %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] - %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] - %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] - %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] - ret <4 x float> %tmp13 -} - -define <4 x float> @test3(float %a, float %b, float %c, float %d) { - %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] - %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] - %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] - %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] - ret <4 x float> %tmp13 -} - -define <2 x double> @test4(double %a, double %b) { - %tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1] - %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1] - ret <2 x double> %tmp7 -} diff --git a/test/CodeGen/X86/vec_shuffle-7.ll b/test/CodeGen/X86/vec_shuffle-7.ll deleted file mode 100644 index 64bd6a3c83b8..000000000000 --- a/test/CodeGen/X86/vec_shuffle-7.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep pxor %t | count 1 -; RUN: not grep shufps %t - -define void @test() { - bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:1 [#uses=1] - shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1] - store <4 x float> %2, <4 x float>* null - unreachable -} - diff --git a/test/CodeGen/X86/vec_shuffle-8.ll b/test/CodeGen/X86/vec_shuffle-8.ll deleted file mode 100644 index 964ce7b2892b..000000000000 --- a/test/CodeGen/X86/vec_shuffle-8.ll +++ /dev/null @@ -1,10 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | \ -; RUN: not grep shufps - -define void @test(<4 x float>* %res, <4 x float>* %A) { - %tmp1 = load <4 x float>* %A ; <<4 x float>> [#uses=1] - %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1] - store <4 x float> %tmp2, <4 x float>* %res - ret void -} - diff --git a/test/CodeGen/X86/vec_shuffle-9.ll b/test/CodeGen/X86/vec_shuffle-9.ll deleted file mode 100644 index 07195869b8cf..000000000000 --- a/test/CodeGen/X86/vec_shuffle-9.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s - -define <4 x i32> @test(i8** %ptr) { -; CHECK: pxor -; CHECK: punpcklbw -; CHECK: punpcklwd - - %tmp = load i8** %ptr ; <i8*> [#uses=1] - %tmp.upgrd.1 = bitcast i8* %tmp to float* ; <float*> [#uses=1] - %tmp.upgrd.2 = load float* %tmp.upgrd.1 ; <float> [#uses=1] - %tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0 ; <<4 x float>> [#uses=1] - %tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1] - %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1] - %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1] - %tmp21 = bitcast <4 x float> %tmp11 to <16 x i8> ; <<16 x i8>> [#uses=1] - %tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 > ; <<16 x i8>> [#uses=1] - %tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16> ; <<8 x i16>> [#uses=1] - %tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1] - %tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32> ; <<4 x i32>> [#uses=1] - ret <4 x i32> %tmp36 -} diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll index 25dde57c767e..463f522a11df 100644 --- a/test/CodeGen/X86/widen_shuffle-1.ll +++ b/test/CodeGen/X86/widen_shuffle-1.ll @@ -3,7 +3,8 @@ ; widening shuffle v3float and then a add define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { entry: -; CHECK: insertps +; CHECK: shuf: +; CHECK: extractps ; CHECK: extractps %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2> %val = fadd <3 x float> %x, %src2 @@ -15,7 +16,8 @@ entry: ; widening shuffle v3float with a different mask and then a add define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { entry: -; CHECK: insertps +; CHECK: shuf2: +; CHECK: extractps ; CHECK: extractps %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2> %val = fadd <3 x float> %x, %src2 @@ -26,7 +28,7 @@ entry: ; Example of when widening a v3float operation causes the DAG to replace a node ; with the operation that we are currently widening, i.e. when replacing ; opA with opB, the DAG will produce new operations with opA. -define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) { +define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind { entry: ; CHECK: pshufd %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5> diff --git a/test/CodeGen/X86/win_chkstk.ll b/test/CodeGen/X86/win_chkstk.ll new file mode 100644 index 000000000000..27d3358d4ac1 --- /dev/null +++ b/test/CodeGen/X86/win_chkstk.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32 +; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32 +; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64 +; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX + +; Windows and mingw require a prologue helper routine if more than 4096 bytes area +; allocated on the stack. Windows uses __chkstk and mingw uses __alloca. __alloca +; and the 32-bit version of __chkstk will probe the stack and adjust the stack pointer. +; The 64-bit version of __chkstk is only responsible for probing the stack. The 64-bit +; prologue is responsible for adjusting the stack pointer. + +; Stack allocation >= 4096 bytes will require call to __chkstk in the Windows ABI. +define i32 @main4k() nounwind { +entry: +; WIN_X32: call __chkstk +; WIN_X64: call __chkstk +; MINGW_X32: call __alloca +; MINGW_X64: call _alloca +; LINUX-NOT: call __chkstk + %array4096 = alloca [4096 x i8], align 16 ; <[4096 x i8]*> [#uses=0] + ret i32 0 +} + +; Make sure we don't call __chkstk or __alloca when we have less than a 4096 stack +; allocation. +define i32 @main128() nounwind { +entry: +; WIN_X32: # BB#0: +; WIN_X32-NOT: call __chkstk +; WIN_X32: ret + +; WIN_X64: # BB#0: +; WIN_X64-NOT: call __chkstk +; WIN_X64: ret + +; MINGW_X64: # BB#0: +; MINGW_X64-NOT: call _alloca +; MINGW_X64: ret + +; LINUX: # BB#0: +; LINUX-NOT: call __chkstk +; LINUX: ret + %array128 = alloca [128 x i8], align 16 ; <[128 x i8]*> [#uses=0] + ret i32 0 +} diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll index 3e3bb95d06f7..447007439fbb 100644 --- a/test/CodeGen/X86/zero-remat.ll +++ b/test/CodeGen/X86/zero-remat.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64 -; RUN: llc < %s -march=x86-64 -stats -info-output-file - | grep asm-printer | grep 12 +; RUN: llc < %s -march=x86-64 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 12 ; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32 declare void @bar(double %x) diff --git a/test/DebugInfo/2010-07-19-Crash.ll b/test/DebugInfo/2010-07-19-Crash.ll new file mode 100644 index 000000000000..87a4a8955a3e --- /dev/null +++ b/test/DebugInfo/2010-07-19-Crash.ll @@ -0,0 +1,24 @@ +; RUN: llc -o /dev/null < %s +; PR7662 +; Do not add variables to !11 because it is a declaration entry. + +define i32 @bar() nounwind readnone ssp { +entry: + ret i32 42, !dbg !9 +} + +!llvm.dbg.sp = !{!0, !6, !11} +!llvm.dbg.lv.foo = !{!7} + +!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 524329, metadata !"one.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"one.c", metadata !".", metadata !"clang 2.8", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{metadata !5} +!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 7, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null} ; [ DW_TAG_subprogram ] +!11 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 7, metadata !3, i1 true, i1 false, i32 0, i32 0, null, i1 false, i1 true, null} ; [ DW_TAG_subprogram ] +!7 = metadata !{i32 524544, metadata !8, metadata !"one", metadata !1, i32 8, metadata !5} ; [ DW_TAG_auto_variable ] +!8 = metadata !{i32 524299, metadata !6, i32 7, i32 18} ; [ DW_TAG_lexical_block ] +!9 = metadata !{i32 4, i32 3, metadata !10, null} +!10 = metadata !{i32 524299, metadata !0, i32 3, i32 11} ; [ DW_TAG_lexical_block ] diff --git a/test/DebugInfo/2010-08-04-StackVariable.ll b/test/DebugInfo/2010-08-04-StackVariable.ll new file mode 100644 index 000000000000..61cd20bb1ab3 --- /dev/null +++ b/test/DebugInfo/2010-08-04-StackVariable.ll @@ -0,0 +1,124 @@ +; RUN: llc -O0 < %s | grep DW_OP_fbreg +; Use DW_OP_fbreg in variable's location expression if the variable is in a stack slot. + +%struct.SVal = type { i8*, i32 } + +define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp { +entry: + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !23), !dbg !24 + call void @llvm.dbg.value(metadata !{%struct.SVal* %location}, i64 0, metadata !25), !dbg !24 + %0 = icmp ne i32 %i, 0, !dbg !27 ; <i1> [#uses=1] + br i1 %0, label %bb, label %bb1, !dbg !27 + +bb: ; preds = %entry + %1 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; <i32*> [#uses=1] + %2 = load i32* %1, align 8, !dbg !29 ; <i32> [#uses=1] + %3 = add i32 %2, %i, !dbg !29 ; <i32> [#uses=1] + br label %bb2, !dbg !29 + +bb1: ; preds = %entry + %4 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; <i32*> [#uses=1] + %5 = load i32* %4, align 8, !dbg !30 ; <i32> [#uses=1] + %6 = sub i32 %5, 1, !dbg !30 ; <i32> [#uses=1] + br label %bb2, !dbg !30 + +bb2: ; preds = %bb1, %bb + %.0 = phi i32 [ %3, %bb ], [ %6, %bb1 ] ; <i32> [#uses=1] + br label %return, !dbg !29 + +return: ; preds = %bb2 + ret i32 %.0, !dbg !29 +} + +define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 { +entry: + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + call void @llvm.dbg.value(metadata !{%struct.SVal* %this}, i64 0, metadata !31), !dbg !34 + %0 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 0, !dbg !34 ; <i8**> [#uses=1] + store i8* null, i8** %0, align 8, !dbg !34 + %1 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 1, !dbg !34 ; <i32*> [#uses=1] + store i32 0, i32* %1, align 8, !dbg !34 + br label %return, !dbg !34 + +return: ; preds = %entry + ret void, !dbg !35 +} + +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone + +define i32 @main() nounwind ssp { +entry: + %0 = alloca %struct.SVal ; <%struct.SVal*> [#uses=3] + %v = alloca %struct.SVal ; <%struct.SVal*> [#uses=4] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + call void @llvm.dbg.declare(metadata !{%struct.SVal* %v}, metadata !38), !dbg !41 + call void @_ZN4SValC1Ev(%struct.SVal* %v) nounwind, !dbg !41 + %1 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !42 ; <i32*> [#uses=1] + store i32 1, i32* %1, align 8, !dbg !42 + %2 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1] + %3 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1] + %4 = load i8** %3, align 8, !dbg !43 ; <i8*> [#uses=1] + store i8* %4, i8** %2, align 8, !dbg !43 + %5 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1] + %6 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1] + %7 = load i32* %6, align 8, !dbg !43 ; <i32> [#uses=1] + store i32 %7, i32* %5, align 8, !dbg !43 + %8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; <i32> [#uses=0] + call void @llvm.dbg.value(metadata !{i32 %8}, i64 0, metadata !44), !dbg !43 + br label %return, !dbg !45 + +return: ; preds = %entry + ret i32 0, !dbg !45 +} + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +!llvm.dbg.sp = !{!0, !9, !16, !17, !20} + +!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 524307, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ] +!2 = metadata !{i32 524329, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 524305, i32 0, i32 4, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9} +!5 = metadata !{i32 524301, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ] +!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] +!7 = metadata !{i32 524301, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ] +!8 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!10 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ] +!11 = metadata !{null, metadata !12, metadata !13} +!12 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ] +!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!14 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ] +!15 = metadata !{null, metadata !12} +!16 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ] +!17 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ] +!18 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ] +!19 = metadata !{metadata !13, metadata !13, metadata !1} +!20 = metadata !{i32 524334, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ] +!21 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ] +!22 = metadata !{metadata !13} +!23 = metadata !{i32 524545, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13} ; [ DW_TAG_arg_variable ] +!24 = metadata !{i32 16, i32 0, metadata !17, null} +!25 = metadata !{i32 524545, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26} ; [ DW_TAG_arg_variable ] +!26 = metadata !{i32 524304, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ] +!27 = metadata !{i32 17, i32 0, metadata !28, null} +!28 = metadata !{i32 524299, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ] +!29 = metadata !{i32 18, i32 0, metadata !28, null} +!30 = metadata !{i32 20, i32 0, metadata !28, null} +!31 = metadata !{i32 524545, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32} ; [ DW_TAG_arg_variable ] +!32 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ] +!33 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ] +!34 = metadata !{i32 11, i32 0, metadata !16, null} +!35 = metadata !{i32 11, i32 0, metadata !36, null} +!36 = metadata !{i32 524299, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ] +!37 = metadata !{i32 524299, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ] +!38 = metadata !{i32 524544, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1} ; [ DW_TAG_auto_variable ] +!39 = metadata !{i32 524299, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ] +!40 = metadata !{i32 524299, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ] +!41 = metadata !{i32 24, i32 0, metadata !39, null} +!42 = metadata !{i32 25, i32 0, metadata !39, null} +!43 = metadata !{i32 26, i32 0, metadata !39, null} +!44 = metadata !{i32 524544, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13} ; [ DW_TAG_auto_variable ] +!45 = metadata !{i32 27, i32 0, metadata !39, null} diff --git a/test/DebugInfo/printdbginfo2.ll b/test/DebugInfo/printdbginfo2.ll index e19395b0df1f..319379197477 100644 --- a/test/DebugInfo/printdbginfo2.ll +++ b/test/DebugInfo/printdbginfo2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -print-dbginfo -disable-output | FileCheck %s +; RUN: opt < %s -print-dbginfo -disable-output |& FileCheck %s ; grep {%b is variable b of type x declared at x.c:7} %t1 ; grep {%2 is variable b of type x declared at x.c:7} %t1 ; grep {@c.1442 is variable c of type int declared at x.c:4} %t1 diff --git a/test/Feature/NamedMDNode.ll b/test/Feature/NamedMDNode.ll index 02a79f81cdf1..0c6bcd9abfe5 100644 --- a/test/Feature/NamedMDNode.ll +++ b/test/Feature/NamedMDNode.ll @@ -3,7 +3,7 @@ ;; Simple NamedMDNode !0 = metadata !{i32 42} !1 = metadata !{metadata !"foo"} -!llvm.stuff = !{!0, !1, null} +!llvm.stuff = !{!0, !1} !samename = !{!0, !1} declare void @samename() diff --git a/test/Feature/linker_private_linkages.ll b/test/Feature/linker_private_linkages.ll index 19bcbb40aa01..f9f290875645 100644 --- a/test/Feature/linker_private_linkages.ll +++ b/test/Feature/linker_private_linkages.ll @@ -4,3 +4,4 @@ @foo = linker_private hidden global i32 0 @bar = linker_private_weak hidden global i32 0 +@qux = linker_private_weak_def_auto global i32 0 diff --git a/test/Feature/metadata.ll b/test/Feature/metadata.ll index d43815be46aa..9856b375495c 100644 --- a/test/Feature/metadata.ll +++ b/test/Feature/metadata.ll @@ -1,9 +1,11 @@ ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis ; PR7105 -define void @foo() { +define void @foo(i32 %x) { call void @llvm.zonk(metadata !1, i64 0, metadata !1) - ret void + store i32 0, i32* null, !whatever !0, !whatever_else !{}, !more !{metadata !"hello"} + store i32 0, i32* null, !whatever !{i32 %x, metadata !"hello", metadata !1, metadata !{}, metadata !2} + ret void, !whatever !{i32 %x} } declare void @llvm.zonk(metadata, i64, metadata) nounwind readnone diff --git a/test/Feature/unions.ll b/test/Feature/unions.ll deleted file mode 100644 index 3cf8c3ce0e97..000000000000 --- a/test/Feature/unions.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: llvm-as < %s | llvm-dis > %t1.ll -; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll -; RUN: diff %t1.ll %t2.ll - -%union.anon = type union { i8, i32, float } - -@union1 = constant union { i32, i8 } { i32 4 } -@union2 = constant union { i32, i8 } insertvalue(union { i32, i8 } undef, i32 4, 0) -@union3 = common global %union.anon zeroinitializer, align 8 - -define void @"Unions" () { - ret void -} - diff --git a/test/FrontendC++/2009-07-15-LineNumbers.cpp b/test/FrontendC++/2009-07-15-LineNumbers.cpp index a8eda77e4a72..e1cc81f40f79 100644 --- a/test/FrontendC++/2009-07-15-LineNumbers.cpp +++ b/test/FrontendC++/2009-07-15-LineNumbers.cpp @@ -1,7 +1,7 @@ // This is a regression test on debug info to make sure that we can // print line numbers in asm. // RUN: %llvmgcc -S -O0 -g %s -o - | \ -// RUN: llc --disable-fp-elim -O0 -relocation-model=pic | grep { 2009-07-15-LineNumbers.cpp:25$} +// RUN: llc --disable-fp-elim -O0 -relocation-model=pic | grep {2009-07-15-LineNumbers.cpp:25$} #include <stdlib.h> diff --git a/test/FrontendC++/2010-07-19-nowarn.cpp b/test/FrontendC++/2010-07-19-nowarn.cpp new file mode 100644 index 000000000000..8742bf152329 --- /dev/null +++ b/test/FrontendC++/2010-07-19-nowarn.cpp @@ -0,0 +1,21 @@ +// RUN: %llvmgcc %s -c -m32 -fasm-blocks -o /dev/null +// This should not warn about unreferenced label. 8195660. +// XFAIL: * +// XTARGET: x86,i386,i686 + +void quarterAsm(int array[], int len) +{ + __asm + { + mov esi, array; + mov ecx, len; + shr ecx, 2; +loop: + movdqa xmm0, [esi]; + psrad xmm0, 2; + movdqa [esi], xmm0; + add esi, 16; + sub ecx, 1; + jnz loop; + } +} diff --git a/test/FrontendC++/2010-07-23-DeclLoc.cpp b/test/FrontendC++/2010-07-23-DeclLoc.cpp new file mode 100644 index 000000000000..c72de3b33623 --- /dev/null +++ b/test/FrontendC++/2010-07-23-DeclLoc.cpp @@ -0,0 +1,86 @@ +// RUN: %llvmgxx -emit-llvm -S -g %s -o - | FileCheck %s +// Require the template function declaration refer to the correct filename. +// First, locate the function decl in metadata, and pluck out the file handle: +// CHECK: {{extract_dwarf_data_from_header.*extract_dwarf_data_from_header.*extract_dwarf_data_from_header.*[^ ]+", metadata !}}[[filehandle:[0-9]+]], +// Second: Require that filehandle refer to the correct filename: +// CHECK: {{^!}}[[filehandle]] = metadata {{![{].*}} metadata !"decl_should_be_here.hpp", +typedef long unsigned int __darwin_size_t; +typedef __darwin_size_t size_t; +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; +namespace std { + template<typename _Tp> class auto_ptr { + _Tp* _M_ptr; + public: + typedef _Tp element_type; + auto_ptr(element_type* __p = 0) throw() : _M_ptr(__p) { } + element_type& operator*() const throw() { } + }; +} +class Pointer32 { +public: + typedef uint32_t ptr_t; + typedef uint32_t size_t; +}; +class Pointer64 { +public: + typedef uint64_t ptr_t; + typedef uint64_t size_t; +}; +class BigEndian {}; +class LittleEndian {}; +template <typename _SIZE, typename _ENDIANNESS> class SizeAndEndianness { +public: + typedef _SIZE SIZE; +}; +typedef SizeAndEndianness<Pointer32, LittleEndian> ISA32Little; +typedef SizeAndEndianness<Pointer32, BigEndian> ISA32Big; +typedef SizeAndEndianness<Pointer64, LittleEndian> ISA64Little; +typedef SizeAndEndianness<Pointer64, BigEndian> ISA64Big; +template <typename SIZE> class TRange { +protected: + typename SIZE::ptr_t _location; + typename SIZE::size_t _length; + TRange(typename SIZE::ptr_t location, typename SIZE::size_t length) : _location(location), _length(length) { } +}; +template <typename SIZE, typename T> class TRangeValue : public TRange<SIZE> { + T _value; +public: + TRangeValue(typename SIZE::ptr_t location, typename SIZE::size_t length, T value) : TRange<SIZE>(location, length), _value(value) {}; +}; +template <typename SIZE> class TAddressRelocator {}; +class CSCppSymbolOwner{}; +class CSCppSymbolOwnerData{}; +template <typename SIZE> class TRawSymbolOwnerData +{ + TRangeValue< SIZE, uint8_t* > _TEXT_text_section; + const char* _dsym_path; + uint32_t _dylib_current_version; + uint32_t _dylib_compatibility_version; +public: + TRawSymbolOwnerData() : + _TEXT_text_section(0, 0, __null), _dsym_path(__null), _dylib_current_version(0), _dylib_compatibility_version(0) {} +}; +template <typename SIZE_AND_ENDIANNESS> class TExtendedMachOHeader {}; +# 16 "decl_should_be_here.hpp" +template <typename SIZE_AND_ENDIANNESS> void extract_dwarf_data_from_header(TExtendedMachOHeader<SIZE_AND_ENDIANNESS>& header, + TRawSymbolOwnerData<typename SIZE_AND_ENDIANNESS::SIZE>& symbol_owner_data, + TAddressRelocator<typename SIZE_AND_ENDIANNESS::SIZE>* address_relocator) {} +struct CSCppSymbolOwnerHashFunctor { + size_t operator()(const CSCppSymbolOwner& symbol_owner) const { +# 97 "wrong_place_for_decl.cpp" + } +}; +template <typename SIZE_AND_ENDIANNESS> CSCppSymbolOwnerData* create_symbol_owner_data_arch_specific(CSCppSymbolOwner* symbol_owner, const char* dsym_path) { + typedef typename SIZE_AND_ENDIANNESS::SIZE SIZE; + std::auto_ptr< TRawSymbolOwnerData<SIZE> > data(new TRawSymbolOwnerData<SIZE>()); + std::auto_ptr< TExtendedMachOHeader<SIZE_AND_ENDIANNESS> > header; + extract_dwarf_data_from_header(*header, *data, (TAddressRelocator<typename SIZE_AND_ENDIANNESS::SIZE>*)__null); +} +CSCppSymbolOwnerData* create_symbol_owner_data2(CSCppSymbolOwner* symbol_owner, const char* dsym_path) { + create_symbol_owner_data_arch_specific< ISA32Little >(symbol_owner, dsym_path); + create_symbol_owner_data_arch_specific< ISA32Big >(symbol_owner, dsym_path); + create_symbol_owner_data_arch_specific< ISA64Little >(symbol_owner, dsym_path); + create_symbol_owner_data_arch_specific< ISA64Big >(symbol_owner, dsym_path); +} diff --git a/test/FrontendC++/2010-08-31-ByValArg.cpp b/test/FrontendC++/2010-08-31-ByValArg.cpp new file mode 100644 index 000000000000..be0d354b1d98 --- /dev/null +++ b/test/FrontendC++/2010-08-31-ByValArg.cpp @@ -0,0 +1,53 @@ +// This regression test checks byval arguments' debug info. +// Radar 8367011 +// RUN: %llvmgcc -S -O0 -g %s -o - | \ +// RUN: llc --disable-fp-elim -o %t.s -O0 -relocation-model=pic +// RUN: %compile_c %t.s -o %t.o +// RUN: %link %t.o -o %t.exe +// RUN: echo {break get\nrun\np missing_arg.b} > %t.in +// RUN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | \ +// RUN: grep {1 = 4242} + +// XTARGET: x86_64-apple-darwin + +class EVT { +public: + int a; + int b; + int c; +}; + +class VAL { +public: + int x; + int y; +}; +void foo(EVT e); +EVT bar(); + +void get(int *i, unsigned dl, VAL v, VAL *p, unsigned n, EVT missing_arg) { +//CHECK: .ascii "missing_arg" + EVT e = bar(); + if (dl == n) + foo(missing_arg); +} + + +EVT bar() { + EVT e; + return e; +} + +void foo(EVT e) {} + +int main(){ + VAL v; + EVT ma; + ma.a = 1; + ma.b = 4242; + ma.c = 3; + int i = 42; + get (&i, 1, v, &v, 2, ma); + return 0; +} + diff --git a/test/FrontendC/2008-03-24-BitField-And-Alloca.c b/test/FrontendC/2008-03-24-BitField-And-Alloca.c index 291f036523a4..641bcf1dbeb9 100644 --- a/test/FrontendC/2008-03-24-BitField-And-Alloca.c +++ b/test/FrontendC/2008-03-24-BitField-And-Alloca.c @@ -1,5 +1,5 @@ // RUN: %llvmgcc -O2 -S %s -o - | not grep alloca -// RUN: %llvmgcc -m32 -O2 -S %s -o - | not grep store +// RUN: %llvmgcc -m32 -O2 -S %s -o - | not grep {store } enum { PP_C, diff --git a/test/FrontendC/2010-05-18-asmsched.c b/test/FrontendC/2010-05-18-asmsched.c index 34abbe3e5c5c..12e91405d10f 100644 --- a/test/FrontendC/2010-05-18-asmsched.c +++ b/test/FrontendC/2010-05-18-asmsched.c @@ -1,4 +1,4 @@ -// RUN: %llvmgcc %s -c -O3 -m64 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin | FileCheck %s +// RUN: %llvmgcc %s -c -O3 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin | FileCheck %s // r9 used to be clobbered before its value was moved to r10. 7993104. void foo(int x, int y) { @@ -14,4 +14,4 @@ void foo(int x, int y) { lr9 = x; lr10 = foo; asm volatile("bar" : "=r"(lr9) : "r"(lr9), "r"(lr10)); -}
\ No newline at end of file +} diff --git a/test/FrontendC/2010-07-14-overconservative-align.c b/test/FrontendC/2010-07-14-overconservative-align.c index 65fbdb83003b..1744ba84185d 100644 --- a/test/FrontendC/2010-07-14-overconservative-align.c +++ b/test/FrontendC/2010-07-14-overconservative-align.c @@ -1,4 +1,4 @@ -// RUN: %llvmgcc %s -emit-llvm -m64 -S -o - | FileCheck %s +// RUN: %llvmgcc %s -emit-llvm -S -o - | FileCheck %s // PR 5995 struct s { int word; @@ -9,6 +9,6 @@ struct s { void func (struct s *s) { -// CHECK: load %struct.s** %s_addr, align 8 +// CHECK: load %struct.s** %s_addr, align {{[48]}} s->word = 0; } diff --git a/test/FrontendC/2010-07-14-ref-off-end.c b/test/FrontendC/2010-07-14-ref-off-end.c index 6ccd05b770e9..c7fdd95a7aa0 100644 --- a/test/FrontendC/2010-07-14-ref-off-end.c +++ b/test/FrontendC/2010-07-14-ref-off-end.c @@ -17,8 +17,8 @@ return(char)s->c; } main() { -// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0 ; <i32*> [#uses=2] -// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0 ; <i32*> [#uses=2] +// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0 +// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0 struct T t; t.i=0xff; t.c=0xffff11; diff --git a/test/FrontendC/2010-07-27-MinNoFoldConst.c b/test/FrontendC/2010-07-27-MinNoFoldConst.c new file mode 100644 index 000000000000..7cd8b4c43764 --- /dev/null +++ b/test/FrontendC/2010-07-27-MinNoFoldConst.c @@ -0,0 +1,18 @@ +// RUN: %llvmgcc -S %s -o - | FileCheck %s +extern int printf(const char *, ...); +static void bad(unsigned int v1, unsigned int v2) { + printf("%u\n", 1631381461u * (((v2 - 1273463329u <= v1 - 1273463329u) ? v2 : v1) - 1273463329u) + 121322179u); +} +// Radar 8198362 +// GCC FE wants to convert the above to +// 1631381461u * MIN(v2 - 1273463329u, v1 - 1273463329u) +// and then to +// MIN(1631381461u * v2 - 4047041419, 1631381461u * v1 - 4047041419) +// +// 1631381461u * 1273463329u = 2077504466193943669, but 32-bit overflow clips +// this to 4047041419. This breaks the comparision implicit in the MIN(). +// Two multiply operations suggests the bad optimization is happening; +// one multiplication, after the MIN(), is correct. +// CHECK: mul +// CHECK-NOT: mul +// CHECK: ret diff --git a/test/FrontendC/2010-08-12-asm-aggr-arg.c b/test/FrontendC/2010-08-12-asm-aggr-arg.c new file mode 100644 index 000000000000..81ec14b28826 --- /dev/null +++ b/test/FrontendC/2010-08-12-asm-aggr-arg.c @@ -0,0 +1,16 @@ +// RUN: %llvmgcc %s -S -O0 -o - | FileCheck %s +// Radar 8288710: A small aggregate can be passed as an integer. Make sure +// we don't get an error with "input constraint with a matching output +// constraint of incompatible type!" + +struct wrapper { + int i; +}; + +// CHECK: xyz +int test(int i) { + struct wrapper w; + w.i = i; + __asm__("xyz" : "=r" (w) : "0" (w)); + return w.i; +} diff --git a/test/FrontendC/asm-reg-var-local.c b/test/FrontendC/asm-reg-var-local.c new file mode 100644 index 000000000000..22bd43c076d2 --- /dev/null +++ b/test/FrontendC/asm-reg-var-local.c @@ -0,0 +1,32 @@ +// RUN: %llvmgcc %s -S -o - | FileCheck %s +// Exercise various use cases for local asm "register variables". +// XFAIL: * +// XTARGET: x86_64,i686,i386 + +int foo() { +// CHECK: %a = alloca i32 + + register int a asm("rsi")=5; +// CHECK: store i32 5, i32* %a, align 4 + + asm volatile("; %0 This asm defines rsi" : "=r"(a)); +// CHECK: %asmtmp = call i32 asm sideeffect "; $0 This asm defines rsi", "={rsi} +// CHECK: store i32 %asmtmp, i32* %a + + a = 42; +// CHECK: store i32 42, i32* %a, align 4 + + asm volatile("; %0 This asm uses rsi" : : "r"(a)); +// CHECK: %1 = load i32* %a, align 4 +// CHECK: call void asm sideeffect "", "{rsi}"(i32 %1) nounwind +// CHECK: %2 = call i32 asm sideeffect "", "={rsi}"() nounwind +// CHECK: call void asm sideeffect "; $0 This asm uses rsi", "{rsi},~{dirflag},~{fpsr},~{flags}"(i32 %2) + + return a; +// CHECK: %3 = load i32* %a, align 4 +// CHECK: call void asm sideeffect "", "{rsi}"(i32 %3) nounwind +// CHECK: %4 = call i32 asm sideeffect "", "={rsi}"() nounwind +// CHECK: store i32 %4, i32* %0, align 4 +// CHECK: %5 = load i32* %0, align 4 +// CHECK: store i32 %5, i32* %retval, align 4 +} diff --git a/test/FrontendC/cstring-align.c b/test/FrontendC/cstring-align.c index b9ec281f5677..764126e02184 100644 --- a/test/FrontendC/cstring-align.c +++ b/test/FrontendC/cstring-align.c @@ -1,6 +1,4 @@ -// RUN: %llvmgcc %s -c -Os -m32 -emit-llvm -o - | llc -march=x86 -mtriple=i386-apple-darwin10 | FileCheck %s -check-prefix=DARWIN32 -// RUN: %llvmgcc %s -c -Os -m64 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=DARWIN64 -// XTARGET: darwin +// RUN: %llvmgcc %s -c -Os -emit-llvm -o - | llc -march=x86 -mtriple=i386-apple-darwin10 | FileCheck %s extern void func(const char *, const char *); @@ -8,10 +6,6 @@ void long_function_name() { func("%s: the function name", __func__); } -// DARWIN64: .align 4 -// DARWIN64: ___func__. -// DARWIN64: .asciz "long_function_name" - -// DARWIN32: .align 4 -// DARWIN32: ___func__. -// DARWIN32: .asciz "long_function_name" +// CHECK: .align 4 +// CHECK: ___func__. +// CHECK: .asciz "long_function_name" diff --git a/test/FrontendC/misaligned-param.c b/test/FrontendC/misaligned-param.c new file mode 100644 index 000000000000..b4fcfe312f5a --- /dev/null +++ b/test/FrontendC/misaligned-param.c @@ -0,0 +1,15 @@ +// RUN: %llvmgcc %s -m32 -S -o - | FileCheck %s +// Misaligned parameter must be memcpy'd to correctly aligned temporary. +// XFAIL: * +// XTARGET: i386-apple-darwin,i686-apple-darwin,x86_64-apple-darwin + +struct s { int x; long double y; }; +long double foo(struct s x, int i, struct s y) { +// CHECK: foo +// CHECK: %x_addr = alloca %struct.s, align 16 +// CHECK: %y_addr = alloca %struct.s, align 16 +// CHECK: memcpy +// CHECK: memcpy +// CHECK: bar + return bar(&x, &y); +} diff --git a/test/FrontendC/vla-1.c b/test/FrontendC/vla-1.c index 76f6c53c1e16..77f78a5e3af7 100644 --- a/test/FrontendC/vla-1.c +++ b/test/FrontendC/vla-1.c @@ -1,5 +1,6 @@ -// RUN: true -// %llvmgcc -std=gnu99 %s -S |& grep {error: "is greater than the stack alignment" } +// RUN: %llvmgcc_only -std=gnu99 %s -S |& grep {warning: alignment for} +// ppc does not support this feature, and gets a fatal error at runtime. +// XFAIL: powerpc int foo(int a) { diff --git a/test/FrontendC/vla-2.c b/test/FrontendC/vla-2.c new file mode 100644 index 000000000000..555cfc789250 --- /dev/null +++ b/test/FrontendC/vla-2.c @@ -0,0 +1,10 @@ +// RUN: %llvmgcc -std=gnu99 %s -S -o - | grep ".*alloca.*align 16" + +extern void bar(int[]); + +void foo(int a) +{ + int var[a] __attribute__((__aligned__(16))); + bar(var); + return; +} diff --git a/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm b/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm new file mode 100644 index 000000000000..298844e97b5d --- /dev/null +++ b/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm @@ -0,0 +1,27 @@ +// RUN: not %llvmgcc %s -S -emit-llvm -o - |& FileCheck %s +// This tests for a specific diagnostic in LLVM-GCC. +// Clang compiles this correctly with no diagnostic, +// ergo this test will fail with a Clang-based front-end. +class TFENodeVector { +public: + TFENodeVector(const TFENodeVector& inNodeVector); + TFENodeVector(); +}; + +@interface TWindowHistoryEntry {} +@property (assign, nonatomic) TFENodeVector targetPath; +@end + +@implementation TWindowHistoryEntry +@synthesize targetPath; +- (void) initWithWindowController { + TWindowHistoryEntry* entry; + TFENodeVector newPath; + // CHECK: setting a C++ non-POD object value is not implemented +#ifdef __clang__ +#error setting a C++ non-POD object value is not implemented +#endif + entry.targetPath = newPath; + [entry setTargetPath:newPath]; +} +@end diff --git a/test/FrontendObjC++/2010-08-04-Template.mm b/test/FrontendObjC++/2010-08-04-Template.mm new file mode 100644 index 000000000000..d0383406d7e2 --- /dev/null +++ b/test/FrontendObjC++/2010-08-04-Template.mm @@ -0,0 +1,10 @@ +// RUN: %llvmgcc %s -S -emit-llvm +struct TRunSoon { + template <class P1> static void Post() {} +}; + +@implementation TPrivsTableViewMainController +- (void) applyToEnclosed { + TRunSoon::Post<int>(); +} +@end diff --git a/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm b/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm new file mode 100644 index 000000000000..b33d7307af49 --- /dev/null +++ b/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm @@ -0,0 +1,16 @@ +// RUN: %llvmgcc %s -S -emit-llvm +struct TFENode { + TFENode(const TFENode& inNode); +}; + +@interface TIconViewController +- (const TFENode&) target; +@end + +void sortAllChildrenForNode(const TFENode&node); + +@implementation TIconViewController +- (void) setArrangeBy { + sortAllChildrenForNode(self.target); +} +@end diff --git a/test/FrontendObjC/2009-08-17-DebugInfo.m b/test/FrontendObjC/2009-08-17-DebugInfo.m index 2c72e9532ceb..8ed7c24dc134 100644 --- a/test/FrontendObjC/2009-08-17-DebugInfo.m +++ b/test/FrontendObjC/2009-08-17-DebugInfo.m @@ -5,7 +5,7 @@ // RUN: %link %t.o -o %t.exe -framework Foundation // RUN: echo {break randomFunc\n} > %t.in // RUN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | \ -// RUN: grep {Breakpoint 1 at 0x.*: file 2009-08-17-DebugInfo.m, line 21} +// RUN: grep {Breakpoint 1 at 0x.*: file .*2009-08-17-DebugInfo.m, line 21} // XTARGET: darwin @interface MyClass { diff --git a/test/Integer/a15.ll b/test/Integer/a15.ll deleted file mode 100644 index 5c9dc3b1be0d..000000000000 --- a/test/Integer/a15.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 15 bits -; -@b = constant i15 add(i15 32767, i15 1) -@c = constant i15 add(i15 32767, i15 32767) -@d = constant i15 add(i15 32760, i15 8) -@e = constant i15 sub(i15 0 , i15 1) -@f = constant i15 sub(i15 0 , i15 32767) -@g = constant i15 sub(i15 2 , i15 32767) - -@h = constant i15 shl(i15 1 , i15 15) -@i = constant i15 shl(i15 1 , i15 14) -@j = constant i15 lshr(i15 32767 , i15 14) -@l = constant i15 ashr(i15 32767 , i15 14) - -@n = constant i15 mul(i15 32767, i15 2) -@q = constant i15 mul(i15 -16383,i15 -3) -@r = constant i15 sdiv(i15 -1, i15 16383) -@s = constant i15 udiv(i15 -1, i15 16383) -@t = constant i15 srem(i15 1, i15 32766) -@u = constant i15 urem(i15 32767,i15 -1) -@o = constant i15 trunc( i16 32768 to i15 ) -@p = constant i15 trunc( i16 32767 to i15 ) -@v = constant i15 srem(i15 -1, i15 768) - diff --git a/test/Integer/a15.ll.out b/test/Integer/a15.ll.out deleted file mode 100644 index 5195cdf3761f..000000000000 --- a/test/Integer/a15.ll.out +++ /dev/null @@ -1,21 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i15 0 ; <i15*> [#uses=0] -@c = constant i15 -2 ; <i15*> [#uses=0] -@d = constant i15 0 ; <i15*> [#uses=0] -@e = constant i15 -1 ; <i15*> [#uses=0] -@f = constant i15 1 ; <i15*> [#uses=0] -@g = constant i15 3 ; <i15*> [#uses=0] -@h = constant i15 undef ; <i15*> [#uses=0] -@i = constant i15 -16384 ; <i15*> [#uses=0] -@j = constant i15 1 ; <i15*> [#uses=0] -@l = constant i15 -1 ; <i15*> [#uses=0] -@n = constant i15 -2 ; <i15*> [#uses=0] -@q = constant i15 16381 ; <i15*> [#uses=0] -@r = constant i15 0 ; <i15*> [#uses=0] -@s = constant i15 2 ; <i15*> [#uses=0] -@t = constant i15 1 ; <i15*> [#uses=0] -@u = constant i15 0 ; <i15*> [#uses=0] -@o = constant i15 0 ; <i15*> [#uses=0] -@p = constant i15 -1 ; <i15*> [#uses=0] -@v = constant i15 -1 ; <i15*> [#uses=0] diff --git a/test/Integer/a17.ll b/test/Integer/a17.ll deleted file mode 100644 index db03e7c6be0d..000000000000 --- a/test/Integer/a17.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 17 bits -; -@b = constant i17 add(i17 131071, i17 1) -@c = constant i17 add(i17 131071, i17 131071) -@d = constant i17 add(i17 131064, i17 8) -@e = constant i17 sub(i17 0 , i17 1) -@f = constant i17 sub(i17 0 , i17 131071) -@g = constant i17 sub(i17 2 , i17 131071) - -@h = constant i17 shl(i17 1 , i17 17) -@i = constant i17 shl(i17 1 , i17 16) -@j = constant i17 lshr(i17 131071 , i17 16) -@l = constant i17 ashr(i17 131071 , i17 16) - -@n = constant i17 mul(i17 131071, i17 2) -@q = constant i17 sdiv(i17 -1, i17 65535) -@r = constant i17 udiv(i17 -1, i17 65535) -@s = constant i17 srem(i17 1, i17 131070) -@t = constant i17 urem(i17 131071,i17 -1) -@o = constant i17 trunc( i18 131072 to i17 ) -@p = constant i17 trunc( i18 131071 to i17 ) -@v = constant i17 srem(i17 -1, i17 15) diff --git a/test/Integer/a17.ll.out b/test/Integer/a17.ll.out deleted file mode 100644 index ba6641289e31..000000000000 --- a/test/Integer/a17.ll.out +++ /dev/null @@ -1,20 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i17 0 ; <i17*> [#uses=0] -@c = constant i17 -2 ; <i17*> [#uses=0] -@d = constant i17 0 ; <i17*> [#uses=0] -@e = constant i17 -1 ; <i17*> [#uses=0] -@f = constant i17 1 ; <i17*> [#uses=0] -@g = constant i17 3 ; <i17*> [#uses=0] -@h = constant i17 undef ; <i17*> [#uses=0] -@i = constant i17 -65536 ; <i17*> [#uses=0] -@j = constant i17 1 ; <i17*> [#uses=0] -@l = constant i17 -1 ; <i17*> [#uses=0] -@n = constant i17 -2 ; <i17*> [#uses=0] -@q = constant i17 0 ; <i17*> [#uses=0] -@r = constant i17 2 ; <i17*> [#uses=0] -@s = constant i17 1 ; <i17*> [#uses=0] -@t = constant i17 0 ; <i17*> [#uses=0] -@o = constant i17 0 ; <i17*> [#uses=0] -@p = constant i17 -1 ; <i17*> [#uses=0] -@v = constant i17 -1 ; <i17*> [#uses=0] diff --git a/test/Integer/a31.ll b/test/Integer/a31.ll deleted file mode 100644 index c0c571f63068..000000000000 --- a/test/Integer/a31.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 31 bits -; -@b = constant i31 add(i31 2147483647, i31 1) -@c = constant i31 add(i31 2147483647, i31 2147483647) -@d = constant i31 add(i31 2147483640, i31 8) -@e = constant i31 sub(i31 0 , i31 1) -@f = constant i31 sub(i31 0 , i31 2147483647) -@g = constant i31 sub(i31 2 , i31 2147483647) - -@h = constant i31 shl(i31 1 , i31 31) -@i = constant i31 shl(i31 1 , i31 30) -@j = constant i31 lshr(i31 2147483647 , i31 30) -@l = constant i31 ashr(i31 2147483647 , i31 30) - -@n = constant i31 mul(i31 2147483647, i31 2) -@q = constant i31 sdiv(i31 -1, i31 1073741823) -@r = constant i31 udiv(i31 -1, i31 1073741823) -@s = constant i31 srem(i31 1, i31 2147483646) -@t = constant i31 urem(i31 2147483647,i31 -1) -@o = constant i31 trunc( i32 2147483648 to i31 ) -@p = constant i31 trunc( i32 2147483647 to i31 ) -@u = constant i31 srem(i31 -3, i31 17) diff --git a/test/Integer/a31.ll.out b/test/Integer/a31.ll.out deleted file mode 100644 index 7407a746b5bf..000000000000 --- a/test/Integer/a31.ll.out +++ /dev/null @@ -1,20 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i31 0 ; <i31*> [#uses=0] -@c = constant i31 -2 ; <i31*> [#uses=0] -@d = constant i31 0 ; <i31*> [#uses=0] -@e = constant i31 -1 ; <i31*> [#uses=0] -@f = constant i31 1 ; <i31*> [#uses=0] -@g = constant i31 3 ; <i31*> [#uses=0] -@h = constant i31 undef ; <i31*> [#uses=0] -@i = constant i31 -1073741824 ; <i31*> [#uses=0] -@j = constant i31 1 ; <i31*> [#uses=0] -@l = constant i31 -1 ; <i31*> [#uses=0] -@n = constant i31 -2 ; <i31*> [#uses=0] -@q = constant i31 0 ; <i31*> [#uses=0] -@r = constant i31 2 ; <i31*> [#uses=0] -@s = constant i31 1 ; <i31*> [#uses=0] -@t = constant i31 0 ; <i31*> [#uses=0] -@o = constant i31 0 ; <i31*> [#uses=0] -@p = constant i31 -1 ; <i31*> [#uses=0] -@u = constant i31 -3 ; <i31*> [#uses=0] diff --git a/test/Integer/a33.ll b/test/Integer/a33.ll deleted file mode 100644 index f328907b4608..000000000000 --- a/test/Integer/a33.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 33 bits -; -@b = constant i33 add(i33 8589934591, i33 1) -@c = constant i33 add(i33 8589934591, i33 8589934591) -@d = constant i33 add(i33 8589934584, i33 8) -@e = constant i33 sub(i33 0 , i33 1) -@f = constant i33 sub(i33 0 , i33 8589934591) -@g = constant i33 sub(i33 2 , i33 8589934591) - -@h = constant i33 shl(i33 1 , i33 33) -@i = constant i33 shl(i33 1 , i33 32) -@j = constant i33 lshr(i33 8589934591 , i33 32) -@l = constant i33 ashr(i33 8589934591 , i33 32) - -@n = constant i33 mul(i33 8589934591, i33 2) -@q = constant i33 sdiv(i33 -1, i33 4294967295) -@r = constant i33 udiv(i33 -1, i33 4294967295) -@s = constant i33 srem(i33 1, i33 8589934590) -@t = constant i33 urem(i33 8589934591,i33 -1) -@o = constant i33 trunc( i34 8589934592 to i33 ) -@p = constant i33 trunc( i34 8589934591 to i33 ) -@u = constant i33 srem(i33 -1, i33 17) - diff --git a/test/Integer/a33.ll.out b/test/Integer/a33.ll.out deleted file mode 100644 index 6cd61ee69cdc..000000000000 --- a/test/Integer/a33.ll.out +++ /dev/null @@ -1,20 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i33 0 ; <i33*> [#uses=0] -@c = constant i33 -2 ; <i33*> [#uses=0] -@d = constant i33 0 ; <i33*> [#uses=0] -@e = constant i33 -1 ; <i33*> [#uses=0] -@f = constant i33 1 ; <i33*> [#uses=0] -@g = constant i33 3 ; <i33*> [#uses=0] -@h = constant i33 undef ; <i33*> [#uses=0] -@i = constant i33 -4294967296 ; <i33*> [#uses=0] -@j = constant i33 1 ; <i33*> [#uses=0] -@l = constant i33 -1 ; <i33*> [#uses=0] -@n = constant i33 -2 ; <i33*> [#uses=0] -@q = constant i33 0 ; <i33*> [#uses=0] -@r = constant i33 2 ; <i33*> [#uses=0] -@s = constant i33 1 ; <i33*> [#uses=0] -@t = constant i33 0 ; <i33*> [#uses=0] -@o = constant i33 0 ; <i33*> [#uses=0] -@p = constant i33 -1 ; <i33*> [#uses=0] -@u = constant i33 -1 ; <i33*> [#uses=0] diff --git a/test/Integer/a63.ll b/test/Integer/a63.ll deleted file mode 100644 index 052ecd585002..000000000000 --- a/test/Integer/a63.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 63 bits -; -@b = constant i63 add(i63 9223372036854775807, i63 1) -@c = constant i63 add(i63 9223372036854775807, i63 9223372036854775807) -@d = constant i63 add(i63 9223372036854775800, i63 8) -@e = constant i63 sub(i63 0 , i63 1) -@f = constant i63 sub(i63 0 , i63 9223372036854775807) -@g = constant i63 sub(i63 2 , i63 9223372036854775807) - -@h = constant i63 shl(i63 1 , i63 63) -@i = constant i63 shl(i63 1 , i63 62) -@j = constant i63 lshr(i63 9223372036854775807 , i63 62) -@l = constant i63 ashr(i63 9223372036854775807 , i63 62) - -@n = constant i63 mul(i63 9223372036854775807, i63 2) -@q = constant i63 sdiv(i63 -1, i63 4611686018427387903) -@u = constant i63 sdiv(i63 -1, i63 1) -@r = constant i63 udiv(i63 -1, i63 4611686018427387903) -@s = constant i63 srem(i63 3, i63 9223372036854775806) -@t = constant i63 urem(i63 9223372036854775807,i63 -1) -@o = constant i63 trunc( i64 9223372036854775808 to i63 ) -@p = constant i63 trunc( i64 9223372036854775807 to i63 ) diff --git a/test/Integer/a63.ll.out b/test/Integer/a63.ll.out deleted file mode 100644 index 18dff5a2964e..000000000000 --- a/test/Integer/a63.ll.out +++ /dev/null @@ -1,20 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i63 0 ; <i63*> [#uses=0] -@c = constant i63 -2 ; <i63*> [#uses=0] -@d = constant i63 0 ; <i63*> [#uses=0] -@e = constant i63 -1 ; <i63*> [#uses=0] -@f = constant i63 1 ; <i63*> [#uses=0] -@g = constant i63 3 ; <i63*> [#uses=0] -@h = constant i63 undef ; <i63*> [#uses=0] -@i = constant i63 -4611686018427387904 ; <i63*> [#uses=0] -@j = constant i63 1 ; <i63*> [#uses=0] -@l = constant i63 -1 ; <i63*> [#uses=0] -@n = constant i63 -2 ; <i63*> [#uses=0] -@q = constant i63 0 ; <i63*> [#uses=0] -@u = constant i63 -1 ; <i63*> [#uses=0] -@r = constant i63 2 ; <i63*> [#uses=0] -@s = constant i63 1 ; <i63*> [#uses=0] -@t = constant i63 0 ; <i63*> [#uses=0] -@o = constant i63 0 ; <i63*> [#uses=0] -@p = constant i63 -1 ; <i63*> [#uses=0] diff --git a/test/Integer/a7.ll b/test/Integer/a7.ll deleted file mode 100644 index 1edb35f9104b..000000000000 --- a/test/Integer/a7.ll +++ /dev/null @@ -1,31 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 7 bits -; -@b = constant i7 add(i7 127, i7 1) -@q = constant i7 add(i7 -64, i7 -1) -@c = constant i7 add(i7 127, i7 127) -@d = constant i7 add(i7 120, i7 8) -@e = constant i7 sub(i7 0 , i7 1) -@f = constant i7 sub(i7 0 , i7 127) -@g = constant i7 sub(i7 2 , i7 127) -@r = constant i7 sub(i7 -3, i7 120) -@s = constant i7 sub(i7 -3, i7 -8) - -@h = constant i7 shl(i7 1 , i7 7) -@i = constant i7 shl(i7 1 , i7 6) -@j = constant i7 lshr(i7 127 , i7 6) -@l = constant i7 ashr(i7 127 , i7 6) -@m2= constant i7 ashr(i7 -1 , i7 3) - -@n = constant i7 mul(i7 127, i7 2) -@t = constant i7 mul(i7 -63, i7 -2) -@u = constant i7 mul(i7 -32, i7 2) -@v = constant i7 sdiv(i7 -1, i7 63) -@w = constant i7 udiv(i7 -1, i7 63) -@x = constant i7 srem(i7 1 , i7 126) -@y = constant i7 urem(i7 127, i7 -1) -@o = constant i7 trunc( i8 128 to i7 ) -@p = constant i7 trunc( i8 255 to i7 ) - diff --git a/test/Integer/a7.ll.out b/test/Integer/a7.ll.out deleted file mode 100644 index 250925d795e6..000000000000 --- a/test/Integer/a7.ll.out +++ /dev/null @@ -1,25 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i7 0 ; <i7*> [#uses=0] -@q = constant i7 63 ; <i7*> [#uses=0] -@c = constant i7 -2 ; <i7*> [#uses=0] -@d = constant i7 0 ; <i7*> [#uses=0] -@e = constant i7 -1 ; <i7*> [#uses=0] -@f = constant i7 1 ; <i7*> [#uses=0] -@g = constant i7 3 ; <i7*> [#uses=0] -@r = constant i7 5 ; <i7*> [#uses=0] -@s = constant i7 5 ; <i7*> [#uses=0] -@h = constant i7 undef ; <i7*> [#uses=0] -@i = constant i7 -64 ; <i7*> [#uses=0] -@j = constant i7 1 ; <i7*> [#uses=0] -@l = constant i7 -1 ; <i7*> [#uses=0] -@m2 = constant i7 -1 ; <i7*> [#uses=0] -@n = constant i7 -2 ; <i7*> [#uses=0] -@t = constant i7 -2 ; <i7*> [#uses=0] -@u = constant i7 -64 ; <i7*> [#uses=0] -@v = constant i7 0 ; <i7*> [#uses=0] -@w = constant i7 2 ; <i7*> [#uses=0] -@x = constant i7 1 ; <i7*> [#uses=0] -@y = constant i7 0 ; <i7*> [#uses=0] -@o = constant i7 0 ; <i7*> [#uses=0] -@p = constant i7 -1 ; <i7*> [#uses=0] diff --git a/test/Integer/a9.ll b/test/Integer/a9.ll deleted file mode 100644 index 711ec821c295..000000000000 --- a/test/Integer/a9.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 9 bits -; -@b = constant i9 add(i9 511, i9 1) -@c = constant i9 add(i9 511, i9 511) -@d = constant i9 add(i9 504, i9 8) -@e = constant i9 sub(i9 0 , i9 1) -@f = constant i9 sub(i9 0 , i9 511) -@g = constant i9 sub(i9 2 , i9 511) - -@h = constant i9 shl(i9 1 , i9 9) -@i = constant i9 shl(i9 1 , i9 8) -@j = constant i9 lshr(i9 511 , i9 8) -@l = constant i9 ashr(i9 511 , i9 8) - -@n = constant i9 mul(i9 511, i9 2) -@q = constant i9 sdiv(i9 511, i9 2) -@r = constant i9 udiv(i9 511, i9 2) -@s = constant i9 urem(i9 511, i9 -1) -@t = constant i9 srem(i9 1, i9 510) -@o = constant i9 trunc( i10 512 to i9 ) -@p = constant i9 trunc( i10 511 to i9 ) - diff --git a/test/Integer/a9.ll.out b/test/Integer/a9.ll.out deleted file mode 100644 index 6e38062c4a03..000000000000 --- a/test/Integer/a9.ll.out +++ /dev/null @@ -1,19 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i9 0 ; <i9*> [#uses=0] -@c = constant i9 -2 ; <i9*> [#uses=0] -@d = constant i9 0 ; <i9*> [#uses=0] -@e = constant i9 -1 ; <i9*> [#uses=0] -@f = constant i9 1 ; <i9*> [#uses=0] -@g = constant i9 3 ; <i9*> [#uses=0] -@h = constant i9 undef ; <i9*> [#uses=0] -@i = constant i9 -256 ; <i9*> [#uses=0] -@j = constant i9 1 ; <i9*> [#uses=0] -@l = constant i9 -1 ; <i9*> [#uses=0] -@n = constant i9 -2 ; <i9*> [#uses=0] -@q = constant i9 0 ; <i9*> [#uses=0] -@r = constant i9 255 ; <i9*> [#uses=0] -@s = constant i9 0 ; <i9*> [#uses=0] -@t = constant i9 1 ; <i9*> [#uses=0] -@o = constant i9 0 ; <i9*> [#uses=0] -@p = constant i9 -1 ; <i9*> [#uses=0] diff --git a/test/LLVMC/Alias.td b/test/LLVMC/Alias.td new file mode 100644 index 000000000000..5d37889304bd --- /dev/null +++ b/test/LLVMC/Alias.td @@ -0,0 +1,24 @@ +// Test alias generation. +// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t +// RUN: FileCheck -input-file %t %s +// RUN: %compile_cxx %t +// XFAIL: vg_leak + +include "llvm/CompilerDriver/Common.td" + +def OptList : OptionList<[ + +(switch_option "dummy1", (help "none")), +// CHECK: cl::alias Alias_dummy2 +(alias_option "dummy2", "dummy1") +]>; + +def dummy_tool : Tool<[ +(command "dummy_cmd"), +(in_language "dummy_lang"), +(out_language "dummy_lang"), +(actions (case + (switch_on "dummy1"), (forward "dummy1"))) +]>; + +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/AppendCmdHook.td b/test/LLVMC/AppendCmdHook.td index 254d5eaf37af..c85f002e6e8b 100644 --- a/test/LLVMC/AppendCmdHook.td +++ b/test/LLVMC/AppendCmdHook.td @@ -1,7 +1,7 @@ // Check that hooks can be invoked from 'append_cmd'. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -26,4 +26,4 @@ def dummy_tool : Tool<[ (switch_on "dummy2"), (append_cmd "-arg3 $CALL(MyHook)"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/EmptyCompilationGraph.td b/test/LLVMC/EmptyCompilationGraph.td index e5d5e9a64cdb..a52b8a8c1990 100644 --- a/test/LLVMC/EmptyCompilationGraph.td +++ b/test/LLVMC/EmptyCompilationGraph.td @@ -1,6 +1,6 @@ // Check that the compilation graph can be empty. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" diff --git a/test/LLVMC/EnvParentheses.td b/test/LLVMC/EnvParentheses.td index 86091db9bdfb..ce0cb824604c 100644 --- a/test/LLVMC/EnvParentheses.td +++ b/test/LLVMC/EnvParentheses.td @@ -2,7 +2,7 @@ // http://llvm.org/bugs/show_bug.cgi?id=4157 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: not grep {FOO")));} %t -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -13,6 +13,6 @@ def dummy_tool : Tool<[ (out_language "dummy") ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; def Graph : CompilationGraph<[]>; diff --git a/test/LLVMC/ExternOptions.td b/test/LLVMC/ExternOptions.td deleted file mode 100644 index d84ea847bf12..000000000000 --- a/test/LLVMC/ExternOptions.td +++ /dev/null @@ -1,26 +0,0 @@ -// Check that extern options work. -// The dummy tool and graph are required to silence warnings. -// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t -// XFAIL: vg_leak - -include "llvm/CompilerDriver/Common.td" - -// CHECK: extern cl::opt<bool> AutoGeneratedSwitch_Wall - -def OptList : OptionList<[(switch_option "Wall", (extern)), - (parameter_option "std", (extern)), - (prefix_list_option "L", (extern))]>; - -def dummy_tool : Tool<[ -(command "dummy_cmd"), -(in_language "dummy"), -(out_language "dummy"), -(actions (case - (switch_on "Wall"), (stop_compilation), - (not_empty "std"), (stop_compilation), - (not_empty "L"), (stop_compilation))) -]>; - -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; diff --git a/test/LLVMC/ForwardAs.td b/test/LLVMC/ForwardAs.td index 536b96a9758f..99b240e30fb3 100644 --- a/test/LLVMC/ForwardAs.td +++ b/test/LLVMC/ForwardAs.td @@ -2,12 +2,12 @@ // http://llvm.org/bugs/show_bug.cgi?id=4159 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" -def OptList : OptionList<[(parameter_option "dummy", (extern))]>; +def OptList : OptionList<[(parameter_option "dummy", (help "dummmy"))]>; def dummy_tool : Tool<[ (command "dummy_cmd"), @@ -18,4 +18,4 @@ def dummy_tool : Tool<[ (not_empty "dummy"), (forward_as "dummy", "unique_name"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/ForwardTransformedValue.td b/test/LLVMC/ForwardTransformedValue.td index 5e0bf290d1fd..9184ede36101 100644 --- a/test/LLVMC/ForwardTransformedValue.td +++ b/test/LLVMC/ForwardTransformedValue.td @@ -2,13 +2,13 @@ // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" -def OptList : OptionList<[(parameter_option "a", (extern)), - (prefix_list_option "b", (extern))]>; +def OptList : OptionList<[(parameter_option "a", (help "dummy")), + (prefix_list_option "b", (help "dummy"))]>; // CHECK: std::string HookA // CHECK: std::string HookB @@ -18,10 +18,10 @@ def dummy_tool : Tool<[ (in_language "dummy"), (out_language "dummy"), (actions (case - // CHECK: HookA(AutoGeneratedParameter_a + // CHECK: HookA(autogenerated::Parameter_a (not_empty "a"), (forward_transformed_value "a", "HookA"), - // CHECK: HookB(AutoGeneratedList_b + // CHECK: HookB(autogenerated::List_b (not_empty "b"), (forward_transformed_value "b", "HookB"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/ForwardValue.td b/test/LLVMC/ForwardValue.td index 4c7a0ee0ec5e..a42a3f06ec3d 100644 --- a/test/LLVMC/ForwardValue.td +++ b/test/LLVMC/ForwardValue.td @@ -2,23 +2,23 @@ // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" -def OptList : OptionList<[(parameter_option "a", (extern)), - (prefix_list_option "b", (extern))]>; +def OptList : OptionList<[(parameter_option "a", (help "dummy")), + (prefix_list_option "b", (help "dummy"))]>; def dummy_tool : Tool<[ (command "dummy_cmd"), (in_language "dummy"), (out_language "dummy"), (actions (case - // CHECK: , AutoGeneratedParameter_a)); + // CHECK: , autogenerated::Parameter_a)); (not_empty "a"), (forward_value "a"), - // CHECK: B = AutoGeneratedList_b.begin() + // CHECK: B = autogenerated::List_b.begin() (not_empty "b"), (forward_value "b"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/HookWithArguments.td b/test/LLVMC/HookWithArguments.td index 5ff96cd6a88d..bbba2e984599 100644 --- a/test/LLVMC/HookWithArguments.td +++ b/test/LLVMC/HookWithArguments.td @@ -1,7 +1,7 @@ // Check that hooks with arguments work. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -17,4 +17,4 @@ def dummy_tool : Tool<[ (out_language "dummy") ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/HookWithInFile.td b/test/LLVMC/HookWithInFile.td index 9855dbc5bd9a..ed08b5321ccf 100644 --- a/test/LLVMC/HookWithInFile.td +++ b/test/LLVMC/HookWithInFile.td @@ -1,7 +1,7 @@ // Check that a hook can be given $INFILE as an argument. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -13,4 +13,4 @@ def dummy_tool : Tool<[ (out_language "dummy") ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/Init.td b/test/LLVMC/Init.td index 05209bf61aca..c3846797026e 100644 --- a/test/LLVMC/Init.td +++ b/test/LLVMC/Init.td @@ -1,7 +1,7 @@ // Check that (init true/false) and (init "str") work. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -22,4 +22,4 @@ def dummy_tool : Tool<[ (not_empty "dummy2"), (forward "dummy2"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/LanguageMap.td b/test/LLVMC/LanguageMap.td new file mode 100644 index 000000000000..a0502142e6d7 --- /dev/null +++ b/test/LLVMC/LanguageMap.td @@ -0,0 +1,29 @@ +// Check that LanguageMap is processed properly. +// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t +// RUN: FileCheck -input-file %t %s +// RUN: %compile_cxx %t +// XFAIL: vg_leak + +include "llvm/CompilerDriver/Common.td" + +def OptList : OptionList<[ +(switch_option "dummy1", (help "none")) +]>; + +def dummy_tool : Tool<[ +(command "dummy_cmd"), +(in_language "dummy_lang"), +(out_language "dummy_lang"), +(actions (case + (switch_on "dummy1"), (forward "dummy1"))) +]>; + +def lang_map : LanguageMap<[ + // CHECK: langMap["dummy"] = "dummy_lang" + // CHECK: langMap["DUM"] = "dummy_lang" + (lang_to_suffixes "dummy_lang", ["dummy", "DUM"]), + // CHECK: langMap["DUM2"] = "dummy_lang_2" + (lang_to_suffixes "dummy_lang_2", "DUM2") +]>; + +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/MultiValuedOption.td b/test/LLVMC/MultiValuedOption.td index 73ccb6311f3c..08c753380d47 100644 --- a/test/LLVMC/MultiValuedOption.td +++ b/test/LLVMC/MultiValuedOption.td @@ -2,7 +2,7 @@ // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -10,7 +10,7 @@ include "llvm/CompilerDriver/Common.td" def OptList : OptionList<[ // CHECK: cl::multi_val(2) (prefix_list_option "foo", (multi_val 2)), - (parameter_list_option "baz", (multi_val 2), (extern))]>; + (parameter_list_option "baz", (multi_val 2))]>; def dummy_tool : Tool<[ (command "dummy_cmd"), @@ -21,4 +21,4 @@ def dummy_tool : Tool<[ (not_empty "baz"), (forward "baz"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/MultipleCompilationGraphs.td b/test/LLVMC/MultipleCompilationGraphs.td index 86cd6131243a..b3746c03b6cb 100644 --- a/test/LLVMC/MultipleCompilationGraphs.td +++ b/test/LLVMC/MultipleCompilationGraphs.td @@ -1,6 +1,6 @@ // Check that multiple compilation graphs are allowed. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" diff --git a/test/LLVMC/MultiplePluginPriorities.td b/test/LLVMC/MultiplePluginPriorities.td deleted file mode 100644 index 2fe06450eecb..000000000000 --- a/test/LLVMC/MultiplePluginPriorities.td +++ /dev/null @@ -1,17 +0,0 @@ -// Check that multiple plugin priorities are not allowed. -// RUN: ignore tblgen -I %p/../../include --gen-llvmc %s |& grep "More than one 'PluginPriority' instance found" - -// Disable for Darwin PPC: <rdar://problem/7598390> -// XFAIL: powerpc-apple-darwin - -// Generally XFAIL'ed for now, this is (sometimes?) failing on x86_64-apple-darwin10. -// RUN: false -// XFAIL: * - -include "llvm/CompilerDriver/Common.td" - -def Graph : CompilationGraph<[]>; - -def Priority1 : PluginPriority<1>; - -def Priority2 : PluginPriority<2>; diff --git a/test/LLVMC/NoActions.td b/test/LLVMC/NoActions.td index a80bcfe6ce1c..34b444066350 100644 --- a/test/LLVMC/NoActions.td +++ b/test/LLVMC/NoActions.td @@ -1,7 +1,7 @@ // Check that tools without associated actions are accepted. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -13,4 +13,4 @@ def dummy_tool : Tool<[ (out_language "dummy") ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/NoCompilationGraph.td b/test/LLVMC/NoCompilationGraph.td index 69df70133307..4182882c451f 100644 --- a/test/LLVMC/NoCompilationGraph.td +++ b/test/LLVMC/NoCompilationGraph.td @@ -1,6 +1,6 @@ // Check that the compilation graph is not required. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" diff --git a/test/LLVMC/OneOrMore.td b/test/LLVMC/OneOrMore.td index 37fbc87fdfab..54fa62d1ff04 100644 --- a/test/LLVMC/OneOrMore.td +++ b/test/LLVMC/OneOrMore.td @@ -2,7 +2,7 @@ // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -22,4 +22,4 @@ def dummy_tool : Tool<[ (not_empty "baz"), (forward "baz"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/OptionPreprocessor.td b/test/LLVMC/OptionPreprocessor.td index c2641be7e645..8019c42634f3 100644 --- a/test/LLVMC/OptionPreprocessor.td +++ b/test/LLVMC/OptionPreprocessor.td @@ -1,7 +1,7 @@ // Test for the OptionPreprocessor and related functionality. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -63,5 +63,5 @@ def dummy : Tool< (not_empty "foo_l"), (error))) ]>; -def Graph : CompilationGraph<[Edge<"root", "dummy">]>; +def Graph : CompilationGraph<[(edge "root", "dummy")]>; diff --git a/test/LLVMC/OutputSuffixHook.td b/test/LLVMC/OutputSuffixHook.td index 4ecad2360ba0..1f5ecd1237f3 100644 --- a/test/LLVMC/OutputSuffixHook.td +++ b/test/LLVMC/OutputSuffixHook.td @@ -1,8 +1,8 @@ // Check that hooks can be invoked from 'output_suffix'. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t -// XFAIL: * +// RUN: %compile_cxx %t +// XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -13,7 +13,7 @@ def OptList : OptionList<[ ]>; def dummy_tool : Tool<[ -(cmd_line "dummy_cmd $INFILE"), +(command "dummy_cmd"), (in_language "dummy_lang"), (out_language "dummy_lang"), (actions (case @@ -21,4 +21,4 @@ def dummy_tool : Tool<[ (switch_on "dummy1"), (output_suffix "$CALL(MyHook)"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/TestWarnings.td b/test/LLVMC/TestWarnings.td index 0388cb0b0f1e..b0f57e97e0d8 100644 --- a/test/LLVMC/TestWarnings.td +++ b/test/LLVMC/TestWarnings.td @@ -5,4 +5,4 @@ include "llvm/CompilerDriver/Common.td" -def OptList : OptionList<[(switch_option "Wall", (extern))]>; +def OptList : OptionList<[(switch_option "Wall", (help "dummy"))]>; diff --git a/test/Linker/metadata-a.ll b/test/Linker/metadata-a.ll new file mode 100644 index 000000000000..5a9d2e40b948 --- /dev/null +++ b/test/Linker/metadata-a.ll @@ -0,0 +1,15 @@ +; RUN: llvm-link %s %p/metadata-b.ll -S -o - | FileCheck %s + +; CHECK: define void @foo(i32 %a) +; CHECK: ret void, !attach !0, !also !{i32 %a} +; CHECK: define void @goo(i32 %b) +; CHECK: ret void, !attach !1, !and !{i32 %b} +; CHECK: !0 = metadata !{i32 524334, void (i32)* @foo} +; CHECK: !1 = metadata !{i32 524334, void (i32)* @goo} + +define void @foo(i32 %a) nounwind { +entry: + ret void, !attach !0, !also !{ i32 %a } +} + +!0 = metadata !{i32 524334, void (i32)* @foo} diff --git a/test/Linker/metadata-b.ll b/test/Linker/metadata-b.ll new file mode 100644 index 000000000000..ef0270af0756 --- /dev/null +++ b/test/Linker/metadata-b.ll @@ -0,0 +1,9 @@ +; This file is for use with metadata-a.ll +; RUN: true + +define void @goo(i32 %b) nounwind { +entry: + ret void, !attach !0, !and !{ i32 %b } +} + +!0 = metadata !{i32 524334, void (i32)* @goo} diff --git a/test/MC/AsmParser/ARM/arm_instructions.s b/test/MC/AsmParser/ARM/arm_instructions.s new file mode 100644 index 000000000000..8632cb0cefd8 --- /dev/null +++ b/test/MC/AsmParser/ARM/arm_instructions.s @@ -0,0 +1,8 @@ +@ RUN: llvm-mc -triple arm-unknown-unknown %s | FileCheck %s + +@ CHECK: nop + nop + +@ CHECK: nopeq + nopeq + diff --git a/test/MC/AsmParser/ELF/dg.exp b/test/MC/AsmParser/ELF/dg.exp new file mode 100644 index 000000000000..ca6aefe9c53d --- /dev/null +++ b/test/MC/AsmParser/ELF/dg.exp @@ -0,0 +1,6 @@ +load_lib llvm.exp + +if { [llvm_supports_target X86] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]] +} + diff --git a/test/MC/AsmParser/ELF/directive_previous.s b/test/MC/AsmParser/ELF/directive_previous.s new file mode 100644 index 000000000000..5db1eac03d39 --- /dev/null +++ b/test/MC/AsmParser/ELF/directive_previous.s @@ -0,0 +1,13 @@ +# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s + +.bss +# CHECK: .bss + +.text +# CHECK: .text + +.previous +# CHECK: .bss + +.previous +# CHECK: .text diff --git a/test/MC/AsmParser/ELF/directive_section.s b/test/MC/AsmParser/ELF/directive_section.s new file mode 100644 index 000000000000..9531c026e674 --- /dev/null +++ b/test/MC/AsmParser/ELF/directive_section.s @@ -0,0 +1,23 @@ +# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s + + .bss +# CHECK: .bss + + .data.rel.ro +# CHECK: .data.rel.ro + + .data.rel +# CHECK: .data.rel + + .eh_frame +# CHECK: .eh_frame + + .rodata +# CHECK: .rodata + + .tbss +# CHECK: .tbss + + .tdata +# CHECK: .tdata + diff --git a/test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s b/test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s new file mode 100644 index 000000000000..47bf980894d0 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s @@ -0,0 +1,42 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vpclmulqdq $17, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x11] + vpclmulhqhqdq %xmm2, %xmm5, %xmm1 + +// CHECK: vpclmulqdq $17, (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x11] + vpclmulhqhqdq (%eax), %xmm5, %xmm3 + +// CHECK: vpclmulqdq $1, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x01] + vpclmulhqlqdq %xmm2, %xmm5, %xmm1 + +// CHECK: vpclmulqdq $1, (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x01] + vpclmulhqlqdq (%eax), %xmm5, %xmm3 + +// CHECK: vpclmulqdq $16, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x10] + vpclmullqhqdq %xmm2, %xmm5, %xmm1 + +// CHECK: vpclmulqdq $16, (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x10] + vpclmullqhqdq (%eax), %xmm5, %xmm3 + +// CHECK: vpclmulqdq $0, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x00] + vpclmullqlqdq %xmm2, %xmm5, %xmm1 + +// CHECK: vpclmulqdq $0, (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x00] + vpclmullqlqdq (%eax), %xmm5, %xmm3 + +// CHECK: vpclmulqdq $17, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x11] + vpclmulqdq $17, %xmm2, %xmm5, %xmm1 + +// CHECK: vpclmulqdq $17, (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x11] + vpclmulqdq $17, (%eax), %xmm5, %xmm3 + diff --git a/test/MC/AsmParser/X86/x86_32-avx-encoding.s b/test/MC/AsmParser/X86/x86_32-avx-encoding.s new file mode 100644 index 000000000000..b7ade6670a01 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_32-avx-encoding.s @@ -0,0 +1,3241 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vaddss %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0x58,0xd4] + vaddss %xmm4, %xmm6, %xmm2 + +// CHECK: vmulss %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0x59,0xd4] + vmulss %xmm4, %xmm6, %xmm2 + +// CHECK: vsubss %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0x5c,0xd4] + vsubss %xmm4, %xmm6, %xmm2 + +// CHECK: vdivss %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0x5e,0xd4] + vdivss %xmm4, %xmm6, %xmm2 + +// CHECK: vaddsd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0x58,0xd4] + vaddsd %xmm4, %xmm6, %xmm2 + +// CHECK: vmulsd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0x59,0xd4] + vmulsd %xmm4, %xmm6, %xmm2 + +// CHECK: vsubsd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0x5c,0xd4] + vsubsd %xmm4, %xmm6, %xmm2 + +// CHECK: vdivsd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0x5e,0xd4] + vdivsd %xmm4, %xmm6, %xmm2 + +// CHECK: vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] + vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] + vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] + vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] + vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] + vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] + vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] + vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] + vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vaddps %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0x58,0xd4] + vaddps %xmm4, %xmm6, %xmm2 + +// CHECK: vsubps %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0x5c,0xd4] + vsubps %xmm4, %xmm6, %xmm2 + +// CHECK: vmulps %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0x59,0xd4] + vmulps %xmm4, %xmm6, %xmm2 + +// CHECK: vdivps %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0x5e,0xd4] + vdivps %xmm4, %xmm6, %xmm2 + +// CHECK: vaddpd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0x58,0xd4] + vaddpd %xmm4, %xmm6, %xmm2 + +// CHECK: vsubpd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0x5c,0xd4] + vsubpd %xmm4, %xmm6, %xmm2 + +// CHECK: vmulpd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0x59,0xd4] + vmulpd %xmm4, %xmm6, %xmm2 + +// CHECK: vdivpd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0x5e,0xd4] + vdivpd %xmm4, %xmm6, %xmm2 + +// CHECK: vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] + vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] + vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] + vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] + vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] + vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] + vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] + vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] + vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: vmaxss %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xda,0x5f,0xf2] + vmaxss %xmm2, %xmm4, %xmm6 + +// CHECK: vmaxsd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xdb,0x5f,0xf2] + vmaxsd %xmm2, %xmm4, %xmm6 + +// CHECK: vminss %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xda,0x5d,0xf2] + vminss %xmm2, %xmm4, %xmm6 + +// CHECK: vminsd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xdb,0x5d,0xf2] + vminsd %xmm2, %xmm4, %xmm6 + +// CHECK: vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x5f,0x6c,0xcb,0xfc] + vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x5f,0x6c,0xcb,0xfc] + vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vminss -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x5d,0x6c,0xcb,0xfc] + vminss -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x5d,0x6c,0xcb,0xfc] + vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmaxps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x5f,0xf2] + vmaxps %xmm2, %xmm4, %xmm6 + +// CHECK: vmaxpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x5f,0xf2] + vmaxpd %xmm2, %xmm4, %xmm6 + +// CHECK: vminps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x5d,0xf2] + vminps %xmm2, %xmm4, %xmm6 + +// CHECK: vminpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x5d,0xf2] + vminpd %xmm2, %xmm4, %xmm6 + +// CHECK: vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x5f,0x6c,0xcb,0xfc] + vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x5f,0x6c,0xcb,0xfc] + vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vminps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x5d,0x6c,0xcb,0xfc] + vminps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc] + vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vandps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x54,0xf2] + vandps %xmm2, %xmm4, %xmm6 + +// CHECK: vandpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x54,0xf2] + vandpd %xmm2, %xmm4, %xmm6 + +// CHECK: vandps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x54,0x6c,0xcb,0xfc] + vandps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x54,0x6c,0xcb,0xfc] + vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vorps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x56,0xf2] + vorps %xmm2, %xmm4, %xmm6 + +// CHECK: vorpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x56,0xf2] + vorpd %xmm2, %xmm4, %xmm6 + +// CHECK: vorps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x56,0x6c,0xcb,0xfc] + vorps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x56,0x6c,0xcb,0xfc] + vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vxorps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x57,0xf2] + vxorps %xmm2, %xmm4, %xmm6 + +// CHECK: vxorpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x57,0xf2] + vxorpd %xmm2, %xmm4, %xmm6 + +// CHECK: vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x57,0x6c,0xcb,0xfc] + vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x57,0x6c,0xcb,0xfc] + vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vandnps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x55,0xf2] + vandnps %xmm2, %xmm4, %xmm6 + +// CHECK: vandnpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x55,0xf2] + vandnpd %xmm2, %xmm4, %xmm6 + +// CHECK: vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x55,0x6c,0xcb,0xfc] + vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x55,0x6c,0xcb,0xfc] + vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmovss -4(%ebx,%ecx,8), %xmm5 +// CHECK: encoding: [0xc5,0xfa,0x10,0x6c,0xcb,0xfc] + vmovss -4(%ebx,%ecx,8), %xmm5 + +// CHECK: vmovss %xmm4, %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x10,0xec] + vmovss %xmm4, %xmm2, %xmm5 + +// CHECK: vmovsd -4(%ebx,%ecx,8), %xmm5 +// CHECK: encoding: [0xc5,0xfb,0x10,0x6c,0xcb,0xfc] + vmovsd -4(%ebx,%ecx,8), %xmm5 + +// CHECK: vmovsd %xmm4, %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x10,0xec] + vmovsd %xmm4, %xmm2, %xmm5 + +// CHECK: vunpckhps %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe8,0x15,0xe1] + vunpckhps %xmm1, %xmm2, %xmm4 + +// CHECK: vunpckhpd %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe9,0x15,0xe1] + vunpckhpd %xmm1, %xmm2, %xmm4 + +// CHECK: vunpcklps %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe8,0x14,0xe1] + vunpcklps %xmm1, %xmm2, %xmm4 + +// CHECK: vunpcklpd %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe9,0x14,0xe1] + vunpcklpd %xmm1, %xmm2, %xmm4 + +// CHECK: vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x15,0x6c,0xcb,0xfc] + vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x15,0x6c,0xcb,0xfc] + vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x14,0x6c,0xcb,0xfc] + vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x14,0x6c,0xcb,0xfc] + vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vcmpps $0, %xmm0, %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x00] + vcmpps $0, %xmm0, %xmm6, %xmm1 + +// CHECK: vcmpps $0, (%eax), %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc8,0xc2,0x08,0x00] + vcmpps $0, (%eax), %xmm6, %xmm1 + +// CHECK: vcmpps $7, %xmm0, %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x07] + vcmpps $7, %xmm0, %xmm6, %xmm1 + +// CHECK: vcmppd $0, %xmm0, %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x00] + vcmppd $0, %xmm0, %xmm6, %xmm1 + +// CHECK: vcmppd $0, (%eax), %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc9,0xc2,0x08,0x00] + vcmppd $0, (%eax), %xmm6, %xmm1 + +// CHECK: vcmppd $7, %xmm0, %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x07] + vcmppd $7, %xmm0, %xmm6, %xmm1 + +// CHECK: vshufps $8, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc6,0xd9,0x08] + vshufps $8, %xmm1, %xmm2, %xmm3 + +// CHECK: vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc6,0x5c,0xcb,0xfc,0x08] + vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vshufpd $8, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc6,0xd9,0x08] + vshufpd $8, %xmm1, %xmm2, %xmm3 + +// CHECK: vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc6,0x5c,0xcb,0xfc,0x08] + vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $0, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x00] + vcmpeqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $2, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x02] + vcmpleps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $1, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x01] + vcmpltps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $4, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x04] + vcmpneqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $6, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x06] + vcmpnleps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $5, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x05] + vcmpnltps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x07] + vcmpordps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $3, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x03] + vcmpunordps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x02] + vcmpleps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnleps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordps -4(%ebx,%ecx,8), %xmm6, %xmm2 + +// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $0, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x00] + vcmpeqpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $2, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x02] + vcmplepd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $1, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x01] + vcmpltpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $4, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x04] + vcmpneqpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $6, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x06] + vcmpnlepd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $5, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x05] + vcmpnltpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x07] + vcmpordpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $3, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x03] + vcmpunordpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqpd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x02] + vcmplepd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltpd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqpd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnlepd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltpd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordpd -4(%ebx,%ecx,8), %xmm6, %xmm2 + +// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordpd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vmovmskps %xmm2, %eax +// CHECK: encoding: [0xc5,0xf8,0x50,0xc2] + vmovmskps %xmm2, %eax + +// CHECK: vmovmskpd %xmm2, %eax +// CHECK: encoding: [0xc5,0xf9,0x50,0xc2] + vmovmskpd %xmm2, %eax + +// CHECK: vcmpss $0, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x00] + vcmpeqss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $2, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x02] + vcmpless %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $1, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x01] + vcmpltss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $4, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x04] + vcmpneqss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $6, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x06] + vcmpnless %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $5, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x05] + vcmpnltss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x07] + vcmpordss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $3, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x03] + vcmpunordss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x02] + vcmpless -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnless -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordss -4(%ebx,%ecx,8), %xmm6, %xmm2 + +// CHECK: vcmpss $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $0, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x00] + vcmpeqsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $2, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x02] + vcmplesd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $1, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x01] + vcmpltsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $4, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x04] + vcmpneqsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $6, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x06] + vcmpnlesd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $5, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x05] + vcmpnltsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x07] + vcmpordsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $3, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x03] + vcmpunordsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x02] + vcmplesd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnlesd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordsd -4(%ebx,%ecx,8), %xmm6, %xmm2 + +// CHECK: vcmpsd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vucomiss %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2e,0xd1] + vucomiss %xmm1, %xmm2 + +// CHECK: vucomiss (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2e,0x10] + vucomiss (%eax), %xmm2 + +// CHECK: vcomiss %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2f,0xd1] + vcomiss %xmm1, %xmm2 + +// CHECK: vcomiss (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2f,0x10] + vcomiss (%eax), %xmm2 + +// CHECK: vucomisd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2e,0xd1] + vucomisd %xmm1, %xmm2 + +// CHECK: vucomisd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2e,0x10] + vucomisd (%eax), %xmm2 + +// CHECK: vcomisd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2f,0xd1] + vcomisd %xmm1, %xmm2 + +// CHECK: vcomisd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2f,0x10] + vcomisd (%eax), %xmm2 + +// CHECK: vcvttss2si %xmm1, %eax +// CHECK: encoding: [0xc5,0xfa,0x2c,0xc1] + vcvttss2si %xmm1, %eax + +// CHECK: vcvttss2si (%ecx), %eax +// CHECK: encoding: [0xc5,0xfa,0x2c,0x01] + vcvttss2si (%ecx), %eax + +// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf2,0x2a,0x10] + vcvtsi2ss (%eax), %xmm1, %xmm2 + +// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf2,0x2a,0x10] + vcvtsi2ss (%eax), %xmm1, %xmm2 + +// CHECK: vcvttsd2si %xmm1, %eax +// CHECK: encoding: [0xc5,0xfb,0x2c,0xc1] + vcvttsd2si %xmm1, %eax + +// CHECK: vcvttsd2si (%ecx), %eax +// CHECK: encoding: [0xc5,0xfb,0x2c,0x01] + vcvttsd2si (%ecx), %eax + +// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf3,0x2a,0x10] + vcvtsi2sd (%eax), %xmm1, %xmm2 + +// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf3,0x2a,0x10] + vcvtsi2sd (%eax), %xmm1, %xmm2 + +// CHECK: vmovaps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x28,0x10] + vmovaps (%eax), %xmm2 + +// CHECK: vmovaps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x28,0xd1] + vmovaps %xmm1, %xmm2 + +// CHECK: vmovaps %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x29,0x08] + vmovaps %xmm1, (%eax) + +// CHECK: vmovapd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x28,0x10] + vmovapd (%eax), %xmm2 + +// CHECK: vmovapd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x28,0xd1] + vmovapd %xmm1, %xmm2 + +// CHECK: vmovapd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x29,0x08] + vmovapd %xmm1, (%eax) + +// CHECK: vmovups (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x10,0x10] + vmovups (%eax), %xmm2 + +// CHECK: vmovups %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x10,0xd1] + vmovups %xmm1, %xmm2 + +// CHECK: vmovups %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x11,0x08] + vmovups %xmm1, (%eax) + +// CHECK: vmovupd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x10,0x10] + vmovupd (%eax), %xmm2 + +// CHECK: vmovupd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x10,0xd1] + vmovupd %xmm1, %xmm2 + +// CHECK: vmovupd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x11,0x08] + vmovupd %xmm1, (%eax) + +// CHECK: vmovlps %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x13,0x08] + vmovlps %xmm1, (%eax) + +// CHECK: vmovlps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x12,0x18] + vmovlps (%eax), %xmm2, %xmm3 + +// CHECK: vmovlpd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x13,0x08] + vmovlpd %xmm1, (%eax) + +// CHECK: vmovlpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x12,0x18] + vmovlpd (%eax), %xmm2, %xmm3 + +// CHECK: vmovhps %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x17,0x08] + vmovhps %xmm1, (%eax) + +// CHECK: vmovhps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x16,0x18] + vmovhps (%eax), %xmm2, %xmm3 + +// CHECK: vmovhpd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x17,0x08] + vmovhpd %xmm1, (%eax) + +// CHECK: vmovhpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x16,0x18] + vmovhpd (%eax), %xmm2, %xmm3 + +// CHECK: vmovlhps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x16,0xd9] + vmovlhps %xmm1, %xmm2, %xmm3 + +// CHECK: vmovhlps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x12,0xd9] + vmovhlps %xmm1, %xmm2, %xmm3 + +// CHECK: vcvtss2sil %xmm1, %eax +// CHECK: encoding: [0xc5,0xfa,0x2d,0xc1] + vcvtss2si %xmm1, %eax + +// CHECK: vcvtss2sil (%eax), %ebx +// CHECK: encoding: [0xc5,0xfa,0x2d,0x18] + vcvtss2si (%eax), %ebx + +// CHECK: vcvtdq2ps %xmm5, %xmm6 +// CHECK: encoding: [0xc5,0xf8,0x5b,0xf5] + vcvtdq2ps %xmm5, %xmm6 + +// CHECK: vcvtdq2ps (%eax), %xmm6 +// CHECK: encoding: [0xc5,0xf8,0x5b,0x30] + vcvtdq2ps (%eax), %xmm6 + +// CHECK: vcvtsd2ss %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xdb,0x5a,0xf2] + vcvtsd2ss %xmm2, %xmm4, %xmm6 + +// CHECK: vcvtsd2ss (%eax), %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xdb,0x5a,0x30] + vcvtsd2ss (%eax), %xmm4, %xmm6 + +// CHECK: vcvtps2dq %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xf9,0x5b,0xda] + vcvtps2dq %xmm2, %xmm3 + +// CHECK: vcvtps2dq (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xf9,0x5b,0x18] + vcvtps2dq (%eax), %xmm3 + +// CHECK: vcvtss2sd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xda,0x5a,0xf2] + vcvtss2sd %xmm2, %xmm4, %xmm6 + +// CHECK: vcvtss2sd (%eax), %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xda,0x5a,0x30] + vcvtss2sd (%eax), %xmm4, %xmm6 + +// CHECK: vcvtdq2ps %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xf8,0x5b,0xf4] + vcvtdq2ps %xmm4, %xmm6 + +// CHECK: vcvtdq2ps (%ecx), %xmm4 +// CHECK: encoding: [0xc5,0xf8,0x5b,0x21] + vcvtdq2ps (%ecx), %xmm4 + +// CHECK: vcvttps2dq %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xfa,0x5b,0xda] + vcvttps2dq %xmm2, %xmm3 + +// CHECK: vcvttps2dq (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xfa,0x5b,0x18] + vcvttps2dq (%eax), %xmm3 + +// CHECK: vcvtps2pd %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xf8,0x5a,0xda] + vcvtps2pd %xmm2, %xmm3 + +// CHECK: vcvtps2pd (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xf8,0x5a,0x18] + vcvtps2pd (%eax), %xmm3 + +// CHECK: vcvtpd2ps %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xf9,0x5a,0xda] + vcvtpd2ps %xmm2, %xmm3 + +// CHECK: vsqrtpd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x51,0xd1] + vsqrtpd %xmm1, %xmm2 + +// CHECK: vsqrtpd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x51,0x10] + vsqrtpd (%eax), %xmm2 + +// CHECK: vsqrtps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x51,0xd1] + vsqrtps %xmm1, %xmm2 + +// CHECK: vsqrtps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x51,0x10] + vsqrtps (%eax), %xmm2 + +// CHECK: vsqrtsd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x51,0xd9] + vsqrtsd %xmm1, %xmm2, %xmm3 + +// CHECK: vsqrtsd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x51,0x18] + vsqrtsd (%eax), %xmm2, %xmm3 + +// CHECK: vsqrtss %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x51,0xd9] + vsqrtss %xmm1, %xmm2, %xmm3 + +// CHECK: vsqrtss (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x51,0x18] + vsqrtss (%eax), %xmm2, %xmm3 + +// CHECK: vrsqrtps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x52,0xd1] + vrsqrtps %xmm1, %xmm2 + +// CHECK: vrsqrtps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x52,0x10] + vrsqrtps (%eax), %xmm2 + +// CHECK: vrsqrtss %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x52,0xd9] + vrsqrtss %xmm1, %xmm2, %xmm3 + +// CHECK: vrsqrtss (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x52,0x18] + vrsqrtss (%eax), %xmm2, %xmm3 + +// CHECK: vrcpps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x53,0xd1] + vrcpps %xmm1, %xmm2 + +// CHECK: vrcpps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x53,0x10] + vrcpps (%eax), %xmm2 + +// CHECK: vrcpss %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x53,0xd9] + vrcpss %xmm1, %xmm2, %xmm3 + +// CHECK: vrcpss (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x53,0x18] + vrcpss (%eax), %xmm2, %xmm3 + +// CHECK: vmovntdq %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0xe7,0x08] + vmovntdq %xmm1, (%eax) + +// CHECK: vmovntpd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x2b,0x08] + vmovntpd %xmm1, (%eax) + +// CHECK: vmovntps %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x2b,0x08] + vmovntps %xmm1, (%eax) + +// CHECK: vldmxcsr (%eax) +// CHECK: encoding: [0xc5,0xf8,0xae,0x10] + vldmxcsr (%eax) + +// CHECK: vstmxcsr (%eax) +// CHECK: encoding: [0xc5,0xf8,0xae,0x18] + vstmxcsr (%eax) + +// CHECK: vldmxcsr 3735928559 +// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xef,0xbe,0xad,0xde] + vldmxcsr 0xdeadbeef + +// CHECK: vstmxcsr 3735928559 +// CHECK: encoding: [0xc5,0xf8,0xae,0x1d,0xef,0xbe,0xad,0xde] + vstmxcsr 0xdeadbeef + +// CHECK: vpsubb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf8,0xd9] + vpsubb %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf8,0x18] + vpsubb (%eax), %xmm2, %xmm3 + +// CHECK: vpsubw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf9,0xd9] + vpsubw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf9,0x18] + vpsubw (%eax), %xmm2, %xmm3 + +// CHECK: vpsubd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfa,0xd9] + vpsubd %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfa,0x18] + vpsubd (%eax), %xmm2, %xmm3 + +// CHECK: vpsubq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfb,0xd9] + vpsubq %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfb,0x18] + vpsubq (%eax), %xmm2, %xmm3 + +// CHECK: vpsubsb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe8,0xd9] + vpsubsb %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubsb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe8,0x18] + vpsubsb (%eax), %xmm2, %xmm3 + +// CHECK: vpsubsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe9,0xd9] + vpsubsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe9,0x18] + vpsubsw (%eax), %xmm2, %xmm3 + +// CHECK: vpsubusb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd8,0xd9] + vpsubusb %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubusb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd8,0x18] + vpsubusb (%eax), %xmm2, %xmm3 + +// CHECK: vpsubusw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd9,0xd9] + vpsubusw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubusw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd9,0x18] + vpsubusw (%eax), %xmm2, %xmm3 + +// CHECK: vpaddb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfc,0xd9] + vpaddb %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfc,0x18] + vpaddb (%eax), %xmm2, %xmm3 + +// CHECK: vpaddw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfd,0xd9] + vpaddw %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfd,0x18] + vpaddw (%eax), %xmm2, %xmm3 + +// CHECK: vpaddd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfe,0xd9] + vpaddd %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfe,0x18] + vpaddd (%eax), %xmm2, %xmm3 + +// CHECK: vpaddq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd4,0xd9] + vpaddq %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd4,0x18] + vpaddq (%eax), %xmm2, %xmm3 + +// CHECK: vpaddsb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xec,0xd9] + vpaddsb %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddsb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xec,0x18] + vpaddsb (%eax), %xmm2, %xmm3 + +// CHECK: vpaddsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xed,0xd9] + vpaddsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xed,0x18] + vpaddsw (%eax), %xmm2, %xmm3 + +// CHECK: vpaddusb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdc,0xd9] + vpaddusb %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddusb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdc,0x18] + vpaddusb (%eax), %xmm2, %xmm3 + +// CHECK: vpaddusw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdd,0xd9] + vpaddusw %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddusw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdd,0x18] + vpaddusw (%eax), %xmm2, %xmm3 + +// CHECK: vpmulhuw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe4,0xd9] + vpmulhuw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmulhuw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe4,0x18] + vpmulhuw (%eax), %xmm2, %xmm3 + +// CHECK: vpmulhw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe5,0xd9] + vpmulhw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmulhw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe5,0x18] + vpmulhw (%eax), %xmm2, %xmm3 + +// CHECK: vpmullw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd5,0xd9] + vpmullw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmullw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd5,0x18] + vpmullw (%eax), %xmm2, %xmm3 + +// CHECK: vpmuludq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf4,0xd9] + vpmuludq %xmm1, %xmm2, %xmm3 + +// CHECK: vpmuludq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf4,0x18] + vpmuludq (%eax), %xmm2, %xmm3 + +// CHECK: vpavgb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe0,0xd9] + vpavgb %xmm1, %xmm2, %xmm3 + +// CHECK: vpavgb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe0,0x18] + vpavgb (%eax), %xmm2, %xmm3 + +// CHECK: vpavgw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe3,0xd9] + vpavgw %xmm1, %xmm2, %xmm3 + +// CHECK: vpavgw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe3,0x18] + vpavgw (%eax), %xmm2, %xmm3 + +// CHECK: vpminsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xea,0xd9] + vpminsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpminsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xea,0x18] + vpminsw (%eax), %xmm2, %xmm3 + +// CHECK: vpminub %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xda,0xd9] + vpminub %xmm1, %xmm2, %xmm3 + +// CHECK: vpminub (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xda,0x18] + vpminub (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xee,0xd9] + vpmaxsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmaxsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xee,0x18] + vpmaxsw (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxub %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xde,0xd9] + vpmaxub %xmm1, %xmm2, %xmm3 + +// CHECK: vpmaxub (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xde,0x18] + vpmaxub (%eax), %xmm2, %xmm3 + +// CHECK: vpsadbw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf6,0xd9] + vpsadbw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsadbw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf6,0x18] + vpsadbw (%eax), %xmm2, %xmm3 + +// CHECK: vpsllw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf1,0xd9] + vpsllw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsllw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf1,0x18] + vpsllw (%eax), %xmm2, %xmm3 + +// CHECK: vpslld %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf2,0xd9] + vpslld %xmm1, %xmm2, %xmm3 + +// CHECK: vpslld (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf2,0x18] + vpslld (%eax), %xmm2, %xmm3 + +// CHECK: vpsllq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf3,0xd9] + vpsllq %xmm1, %xmm2, %xmm3 + +// CHECK: vpsllq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf3,0x18] + vpsllq (%eax), %xmm2, %xmm3 + +// CHECK: vpsraw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe1,0xd9] + vpsraw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsraw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe1,0x18] + vpsraw (%eax), %xmm2, %xmm3 + +// CHECK: vpsrad %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe2,0xd9] + vpsrad %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrad (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe2,0x18] + vpsrad (%eax), %xmm2, %xmm3 + +// CHECK: vpsrlw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd1,0xd9] + vpsrlw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrlw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd1,0x18] + vpsrlw (%eax), %xmm2, %xmm3 + +// CHECK: vpsrld %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd2,0xd9] + vpsrld %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrld (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd2,0x18] + vpsrld (%eax), %xmm2, %xmm3 + +// CHECK: vpsrlq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd3,0xd9] + vpsrlq %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrlq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd3,0x18] + vpsrlq (%eax), %xmm2, %xmm3 + +// CHECK: vpslld $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a] + vpslld $10, %xmm2, %xmm3 + +// CHECK: vpslldq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xfa,0x0a] + vpslldq $10, %xmm2, %xmm3 + +// CHECK: vpsllq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xf2,0x0a] + vpsllq $10, %xmm2, %xmm3 + +// CHECK: vpsllw $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x71,0xf2,0x0a] + vpsllw $10, %xmm2, %xmm3 + +// CHECK: vpsrad $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xe2,0x0a] + vpsrad $10, %xmm2, %xmm3 + +// CHECK: vpsraw $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x71,0xe2,0x0a] + vpsraw $10, %xmm2, %xmm3 + +// CHECK: vpsrld $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xd2,0x0a] + vpsrld $10, %xmm2, %xmm3 + +// CHECK: vpsrldq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xda,0x0a] + vpsrldq $10, %xmm2, %xmm3 + +// CHECK: vpsrlq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xd2,0x0a] + vpsrlq $10, %xmm2, %xmm3 + +// CHECK: vpsrlw $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x71,0xd2,0x0a] + vpsrlw $10, %xmm2, %xmm3 + +// CHECK: vpslld $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a] + vpslld $10, %xmm2, %xmm3 + +// CHECK: vpand %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdb,0xd9] + vpand %xmm1, %xmm2, %xmm3 + +// CHECK: vpand (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdb,0x18] + vpand (%eax), %xmm2, %xmm3 + +// CHECK: vpor %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xeb,0xd9] + vpor %xmm1, %xmm2, %xmm3 + +// CHECK: vpor (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xeb,0x18] + vpor (%eax), %xmm2, %xmm3 + +// CHECK: vpxor %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xef,0xd9] + vpxor %xmm1, %xmm2, %xmm3 + +// CHECK: vpxor (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xef,0x18] + vpxor (%eax), %xmm2, %xmm3 + +// CHECK: vpandn %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdf,0xd9] + vpandn %xmm1, %xmm2, %xmm3 + +// CHECK: vpandn (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdf,0x18] + vpandn (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpeqb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x74,0xd9] + vpcmpeqb %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpeqb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x74,0x18] + vpcmpeqb (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpeqw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x75,0xd9] + vpcmpeqw %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpeqw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x75,0x18] + vpcmpeqw (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpeqd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x76,0xd9] + vpcmpeqd %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpeqd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x76,0x18] + vpcmpeqd (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpgtb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x64,0xd9] + vpcmpgtb %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpgtb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x64,0x18] + vpcmpgtb (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpgtw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x65,0xd9] + vpcmpgtw %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpgtw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x65,0x18] + vpcmpgtw (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpgtd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x66,0xd9] + vpcmpgtd %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpgtd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x66,0x18] + vpcmpgtd (%eax), %xmm2, %xmm3 + +// CHECK: vpacksswb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x63,0xd9] + vpacksswb %xmm1, %xmm2, %xmm3 + +// CHECK: vpacksswb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x63,0x18] + vpacksswb (%eax), %xmm2, %xmm3 + +// CHECK: vpackssdw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6b,0xd9] + vpackssdw %xmm1, %xmm2, %xmm3 + +// CHECK: vpackssdw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6b,0x18] + vpackssdw (%eax), %xmm2, %xmm3 + +// CHECK: vpackuswb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x67,0xd9] + vpackuswb %xmm1, %xmm2, %xmm3 + +// CHECK: vpackuswb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x67,0x18] + vpackuswb (%eax), %xmm2, %xmm3 + +// CHECK: vpshufd $4, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xf9,0x70,0xda,0x04] + vpshufd $4, %xmm2, %xmm3 + +// CHECK: vpshufd $4, (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xf9,0x70,0x18,0x04] + vpshufd $4, (%eax), %xmm3 + +// CHECK: vpshufhw $4, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xfa,0x70,0xda,0x04] + vpshufhw $4, %xmm2, %xmm3 + +// CHECK: vpshufhw $4, (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xfa,0x70,0x18,0x04] + vpshufhw $4, (%eax), %xmm3 + +// CHECK: vpshuflw $4, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xfb,0x70,0xda,0x04] + vpshuflw $4, %xmm2, %xmm3 + +// CHECK: vpshuflw $4, (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xfb,0x70,0x18,0x04] + vpshuflw $4, (%eax), %xmm3 + +// CHECK: vpunpcklbw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x60,0xd9] + vpunpcklbw %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpcklbw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x60,0x18] + vpunpcklbw (%eax), %xmm2, %xmm3 + +// CHECK: vpunpcklwd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x61,0xd9] + vpunpcklwd %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpcklwd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x61,0x18] + vpunpcklwd (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckldq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x62,0xd9] + vpunpckldq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckldq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x62,0x18] + vpunpckldq (%eax), %xmm2, %xmm3 + +// CHECK: vpunpcklqdq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6c,0xd9] + vpunpcklqdq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpcklqdq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6c,0x18] + vpunpcklqdq (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhbw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x68,0xd9] + vpunpckhbw %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhbw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x68,0x18] + vpunpckhbw (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhwd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x69,0xd9] + vpunpckhwd %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhwd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x69,0x18] + vpunpckhwd (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhdq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6a,0xd9] + vpunpckhdq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhdq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6a,0x18] + vpunpckhdq (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhqdq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6d,0xd9] + vpunpckhqdq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhqdq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6d,0x18] + vpunpckhqdq (%eax), %xmm2, %xmm3 + +// CHECK: vpinsrw $7, %eax, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc4,0xd8,0x07] + vpinsrw $7, %eax, %xmm2, %xmm3 + +// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc4,0x18,0x07] + vpinsrw $7, (%eax), %xmm2, %xmm3 + +// CHECK: vpextrw $7, %xmm2, %eax +// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07] + vpextrw $7, %xmm2, %eax + +// CHECK: vpmovmskb %xmm1, %eax +// CHECK: encoding: [0xc5,0xf9,0xd7,0xc1] + vpmovmskb %xmm1, %eax + +// CHECK: vmaskmovdqu %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0xf7,0xd1] + vmaskmovdqu %xmm1, %xmm2 + +// CHECK: vmovd %xmm1, %eax +// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8] + vmovd %xmm1, %eax + +// CHECK: vmovd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x08] + vmovd %xmm1, (%eax) + +// CHECK: vmovd %eax, %xmm1 +// CHECK: encoding: [0xc5,0xf9,0x6e,0xc8] + vmovd %eax, %xmm1 + +// CHECK: vmovd (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x08] + vmovd (%eax), %xmm1 + +// CHECK: vmovq %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0xd6,0x08] + vmovq %xmm1, (%eax) + +// CHECK: vmovq %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x7e,0xd1] + vmovq %xmm1, %xmm2 + +// CHECK: vmovq (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xfa,0x7e,0x08] + vmovq (%eax), %xmm1 + +// CHECK: vcvtpd2dq %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfb,0xe6,0xd1] + vcvtpd2dq %xmm1, %xmm2 + +// CHECK: vcvtdq2pd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfa,0xe6,0xd1] + vcvtdq2pd %xmm1, %xmm2 + +// CHECK: vcvtdq2pd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xfa,0xe6,0x10] + vcvtdq2pd (%eax), %xmm2 + +// CHECK: vmovshdup %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x16,0xd1] + vmovshdup %xmm1, %xmm2 + +// CHECK: vmovshdup (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x16,0x10] + vmovshdup (%eax), %xmm2 + +// CHECK: vmovsldup %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x12,0xd1] + vmovsldup %xmm1, %xmm2 + +// CHECK: vmovsldup (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x12,0x10] + vmovsldup (%eax), %xmm2 + +// CHECK: vmovddup %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfb,0x12,0xd1] + vmovddup %xmm1, %xmm2 + +// CHECK: vmovddup (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xfb,0x12,0x10] + vmovddup (%eax), %xmm2 + +// CHECK: vaddsubps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xd0,0xd9] + vaddsubps %xmm1, %xmm2, %xmm3 + +// CHECK: vaddsubps (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf3,0xd0,0x10] + vaddsubps (%eax), %xmm1, %xmm2 + +// CHECK: vaddsubpd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd0,0xd9] + vaddsubpd %xmm1, %xmm2, %xmm3 + +// CHECK: vaddsubpd (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf1,0xd0,0x10] + vaddsubpd (%eax), %xmm1, %xmm2 + +// CHECK: vhaddps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7c,0xd9] + vhaddps %xmm1, %xmm2, %xmm3 + +// CHECK: vhaddps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7c,0x18] + vhaddps (%eax), %xmm2, %xmm3 + +// CHECK: vhaddpd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7c,0xd9] + vhaddpd %xmm1, %xmm2, %xmm3 + +// CHECK: vhaddpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7c,0x18] + vhaddpd (%eax), %xmm2, %xmm3 + +// CHECK: vhsubps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7d,0xd9] + vhsubps %xmm1, %xmm2, %xmm3 + +// CHECK: vhsubps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7d,0x18] + vhsubps (%eax), %xmm2, %xmm3 + +// CHECK: vhsubpd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7d,0xd9] + vhsubpd %xmm1, %xmm2, %xmm3 + +// CHECK: vhsubpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7d,0x18] + vhsubpd (%eax), %xmm2, %xmm3 + +// CHECK: vpabsb %xmm1, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0xd1] + vpabsb %xmm1, %xmm2 + +// CHECK: vpabsb (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0x10] + vpabsb (%eax), %xmm2 + +// CHECK: vpabsw %xmm1, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0xd1] + vpabsw %xmm1, %xmm2 + +// CHECK: vpabsw (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0x10] + vpabsw (%eax), %xmm2 + +// CHECK: vpabsd %xmm1, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0xd1] + vpabsd %xmm1, %xmm2 + +// CHECK: vpabsd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0x10] + vpabsd (%eax), %xmm2 + +// CHECK: vphaddw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0xd9] + vphaddw %xmm1, %xmm2, %xmm3 + +// CHECK: vphaddw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0x18] + vphaddw (%eax), %xmm2, %xmm3 + +// CHECK: vphaddd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0xd9] + vphaddd %xmm1, %xmm2, %xmm3 + +// CHECK: vphaddd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0x18] + vphaddd (%eax), %xmm2, %xmm3 + +// CHECK: vphaddsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0xd9] + vphaddsw %xmm1, %xmm2, %xmm3 + +// CHECK: vphaddsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0x18] + vphaddsw (%eax), %xmm2, %xmm3 + +// CHECK: vphsubw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0xd9] + vphsubw %xmm1, %xmm2, %xmm3 + +// CHECK: vphsubw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0x18] + vphsubw (%eax), %xmm2, %xmm3 + +// CHECK: vphsubd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0xd9] + vphsubd %xmm1, %xmm2, %xmm3 + +// CHECK: vphsubd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0x18] + vphsubd (%eax), %xmm2, %xmm3 + +// CHECK: vphsubsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0xd9] + vphsubsw %xmm1, %xmm2, %xmm3 + +// CHECK: vphsubsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0x18] + vphsubsw (%eax), %xmm2, %xmm3 + +// CHECK: vpmaddubsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0xd9] + vpmaddubsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmaddubsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0x18] + vpmaddubsw (%eax), %xmm2, %xmm3 + +// CHECK: vpshufb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0xd9] + vpshufb %xmm1, %xmm2, %xmm3 + +// CHECK: vpshufb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0x18] + vpshufb (%eax), %xmm2, %xmm3 + +// CHECK: vpsignb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0xd9] + vpsignb %xmm1, %xmm2, %xmm3 + +// CHECK: vpsignb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0x18] + vpsignb (%eax), %xmm2, %xmm3 + +// CHECK: vpsignw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0xd9] + vpsignw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsignw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0x18] + vpsignw (%eax), %xmm2, %xmm3 + +// CHECK: vpsignd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0xd9] + vpsignd %xmm1, %xmm2, %xmm3 + +// CHECK: vpsignd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0x18] + vpsignd (%eax), %xmm2, %xmm3 + +// CHECK: vpmulhrsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0xd9] + vpmulhrsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmulhrsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0x18] + vpmulhrsw (%eax), %xmm2, %xmm3 + +// CHECK: vpalignr $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0xd9,0x07] + vpalignr $7, %xmm1, %xmm2, %xmm3 + +// CHECK: vpalignr $7, (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0x18,0x07] + vpalignr $7, (%eax), %xmm2, %xmm3 + +// CHECK: vroundsd $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0xd9,0x07] + vroundsd $7, %xmm1, %xmm2, %xmm3 + +// CHECK: vroundsd $7, (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0x18,0x07] + vroundsd $7, (%eax), %xmm2, %xmm3 + +// CHECK: vroundss $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0xd9,0x07] + vroundss $7, %xmm1, %xmm2, %xmm3 + +// CHECK: vroundss $7, (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0x18,0x07] + vroundss $7, (%eax), %xmm2, %xmm3 + +// CHECK: vroundpd $7, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0xda,0x07] + vroundpd $7, %xmm2, %xmm3 + +// CHECK: vroundpd $7, (%eax), %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0x18,0x07] + vroundpd $7, (%eax), %xmm3 + +// CHECK: vroundps $7, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0xda,0x07] + vroundps $7, %xmm2, %xmm3 + +// CHECK: vroundps $7, (%eax), %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0x18,0x07] + vroundps $7, (%eax), %xmm3 + +// CHECK: vphminposuw %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0xda] + vphminposuw %xmm2, %xmm3 + +// CHECK: vphminposuw (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0x10] + vphminposuw (%eax), %xmm2 + +// CHECK: vpackusdw %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x2b,0xca] + vpackusdw %xmm2, %xmm3, %xmm1 + +// CHECK: vpackusdw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x2b,0x18] + vpackusdw (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpeqq %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x29,0xca] + vpcmpeqq %xmm2, %xmm3, %xmm1 + +// CHECK: vpcmpeqq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x29,0x18] + vpcmpeqq (%eax), %xmm2, %xmm3 + +// CHECK: vpminsb %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x38,0xca] + vpminsb %xmm2, %xmm3, %xmm1 + +// CHECK: vpminsb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x38,0x18] + vpminsb (%eax), %xmm2, %xmm3 + +// CHECK: vpminsd %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x39,0xca] + vpminsd %xmm2, %xmm3, %xmm1 + +// CHECK: vpminsd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x39,0x18] + vpminsd (%eax), %xmm2, %xmm3 + +// CHECK: vpminud %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3b,0xca] + vpminud %xmm2, %xmm3, %xmm1 + +// CHECK: vpminud (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3b,0x18] + vpminud (%eax), %xmm2, %xmm3 + +// CHECK: vpminuw %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3a,0xca] + vpminuw %xmm2, %xmm3, %xmm1 + +// CHECK: vpminuw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3a,0x18] + vpminuw (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxsb %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3c,0xca] + vpmaxsb %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxsb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3c,0x18] + vpmaxsb (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxsd %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3d,0xca] + vpmaxsd %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxsd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3d,0x18] + vpmaxsd (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxud %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3f,0xca] + vpmaxud %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxud (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3f,0x18] + vpmaxud (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxuw %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3e,0xca] + vpmaxuw %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxuw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3e,0x18] + vpmaxuw (%eax), %xmm2, %xmm3 + +// CHECK: vpmuldq %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x28,0xca] + vpmuldq %xmm2, %xmm3, %xmm1 + +// CHECK: vpmuldq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x28,0x18] + vpmuldq (%eax), %xmm2, %xmm3 + +// CHECK: vpmulld %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0xca] + vpmulld %xmm2, %xmm5, %xmm1 + +// CHECK: vpmulld (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0x18] + vpmulld (%eax), %xmm5, %xmm3 + +// CHECK: vblendps $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0xca,0x03] + vblendps $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vblendps $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0x08,0x03] + vblendps $3, (%eax), %xmm5, %xmm1 + +// CHECK: vblendpd $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0xca,0x03] + vblendpd $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vblendpd $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0x08,0x03] + vblendpd $3, (%eax), %xmm5, %xmm1 + +// CHECK: vpblendw $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0xca,0x03] + vpblendw $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vpblendw $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0x08,0x03] + vpblendw $3, (%eax), %xmm5, %xmm1 + +// CHECK: vmpsadbw $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0xca,0x03] + vmpsadbw $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vmpsadbw $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0x08,0x03] + vmpsadbw $3, (%eax), %xmm5, %xmm1 + +// CHECK: vdpps $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0xca,0x03] + vdpps $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vdpps $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0x08,0x03] + vdpps $3, (%eax), %xmm5, %xmm1 + +// CHECK: vdppd $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0xca,0x03] + vdppd $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vdppd $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0x08,0x03] + vdppd $3, (%eax), %xmm5, %xmm1 + +// CHECK: vblendvpd %xmm2, %xmm5, %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0xdd,0x20] + vblendvpd %xmm2, %xmm5, %xmm1, %xmm3 + +// CHECK: vblendvpd %xmm2, (%eax), %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0x18,0x20] + vblendvpd %xmm2, (%eax), %xmm1, %xmm3 + +// CHECK: vblendvps %xmm2, %xmm5, %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0xdd,0x20] + vblendvps %xmm2, %xmm5, %xmm1, %xmm3 + +// CHECK: vblendvps %xmm2, (%eax), %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0x18,0x20] + vblendvps %xmm2, (%eax), %xmm1, %xmm3 + +// CHECK: vpblendvb %xmm2, %xmm5, %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0xdd,0x20] + vpblendvb %xmm2, %xmm5, %xmm1, %xmm3 + +// CHECK: vpblendvb %xmm2, (%eax), %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0x18,0x20] + vpblendvb %xmm2, (%eax), %xmm1, %xmm3 + +// CHECK: vpmovsxbw %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0xea] + vpmovsxbw %xmm2, %xmm5 + +// CHECK: vpmovsxbw (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0x10] + vpmovsxbw (%eax), %xmm2 + +// CHECK: vpmovsxwd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0xea] + vpmovsxwd %xmm2, %xmm5 + +// CHECK: vpmovsxwd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0x10] + vpmovsxwd (%eax), %xmm2 + +// CHECK: vpmovsxdq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0xea] + vpmovsxdq %xmm2, %xmm5 + +// CHECK: vpmovsxdq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0x10] + vpmovsxdq (%eax), %xmm2 + +// CHECK: vpmovzxbw %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0xea] + vpmovzxbw %xmm2, %xmm5 + +// CHECK: vpmovzxbw (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0x10] + vpmovzxbw (%eax), %xmm2 + +// CHECK: vpmovzxwd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0xea] + vpmovzxwd %xmm2, %xmm5 + +// CHECK: vpmovzxwd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0x10] + vpmovzxwd (%eax), %xmm2 + +// CHECK: vpmovzxdq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0xea] + vpmovzxdq %xmm2, %xmm5 + +// CHECK: vpmovzxdq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0x10] + vpmovzxdq (%eax), %xmm2 + +// CHECK: vpmovsxbq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0xea] + vpmovsxbq %xmm2, %xmm5 + +// CHECK: vpmovsxbq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0x10] + vpmovsxbq (%eax), %xmm2 + +// CHECK: vpmovzxbq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0xea] + vpmovzxbq %xmm2, %xmm5 + +// CHECK: vpmovzxbq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0x10] + vpmovzxbq (%eax), %xmm2 + +// CHECK: vpmovsxbd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0xea] + vpmovsxbd %xmm2, %xmm5 + +// CHECK: vpmovsxbd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0x10] + vpmovsxbd (%eax), %xmm2 + +// CHECK: vpmovsxwq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0xea] + vpmovsxwq %xmm2, %xmm5 + +// CHECK: vpmovsxwq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0x10] + vpmovsxwq (%eax), %xmm2 + +// CHECK: vpmovzxbd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0xea] + vpmovzxbd %xmm2, %xmm5 + +// CHECK: vpmovzxbd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0x10] + vpmovzxbd (%eax), %xmm2 + +// CHECK: vpmovzxwq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0xea] + vpmovzxwq %xmm2, %xmm5 + +// CHECK: vpmovzxwq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0x10] + vpmovzxwq (%eax), %xmm2 + +// CHECK: vpextrw $7, %xmm2, %eax +// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07] + vpextrw $7, %xmm2, %eax + +// CHECK: vpextrw $7, %xmm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x79,0x15,0x10,0x07] + vpextrw $7, %xmm2, (%eax) + +// CHECK: vpextrd $7, %xmm2, %eax +// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0xd0,0x07] + vpextrd $7, %xmm2, %eax + +// CHECK: vpextrd $7, %xmm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0x10,0x07] + vpextrd $7, %xmm2, (%eax) + +// CHECK: vpextrb $7, %xmm2, %eax +// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xd0,0x07] + vpextrb $7, %xmm2, %eax + +// CHECK: vpextrb $7, %xmm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0x10,0x07] + vpextrb $7, %xmm2, (%eax) + +// CHECK: vextractps $7, %xmm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0x10,0x07] + vextractps $7, %xmm2, (%eax) + +// CHECK: vextractps $7, %xmm2, %eax +// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd0,0x07] + vextractps $7, %xmm2, %eax + +// CHECK: vpinsrw $7, %eax, %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0xc4,0xe8,0x07] + vpinsrw $7, %eax, %xmm2, %xmm5 + +// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0xc4,0x28,0x07] + vpinsrw $7, (%eax), %xmm2, %xmm5 + +// CHECK: vpinsrb $7, %eax, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0xe8,0x07] + vpinsrb $7, %eax, %xmm2, %xmm5 + +// CHECK: vpinsrb $7, (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0x28,0x07] + vpinsrb $7, (%eax), %xmm2, %xmm5 + +// CHECK: vpinsrd $7, %eax, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0xe8,0x07] + vpinsrd $7, %eax, %xmm2, %xmm5 + +// CHECK: vpinsrd $7, (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07] + vpinsrd $7, (%eax), %xmm2, %xmm5 + +// CHECK: vinsertps $7, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0xca,0x07] + vinsertps $7, %xmm2, %xmm5, %xmm1 + +// CHECK: vinsertps $7, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0x08,0x07] + vinsertps $7, (%eax), %xmm5, %xmm1 + +// CHECK: vptest %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0xea] + vptest %xmm2, %xmm5 + +// CHECK: vptest (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0x10] + vptest (%eax), %xmm2 + +// CHECK: vmovntdqa (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x2a,0x10] + vmovntdqa (%eax), %xmm2 + +// CHECK: vpcmpgtq %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0xca] + vpcmpgtq %xmm2, %xmm5, %xmm1 + +// CHECK: vpcmpgtq (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0x18] + vpcmpgtq (%eax), %xmm5, %xmm3 + +// CHECK: vpcmpistrm $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0xea,0x07] + vpcmpistrm $7, %xmm2, %xmm5 + +// CHECK: vpcmpistrm $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0x28,0x07] + vpcmpistrm $7, (%eax), %xmm5 + +// CHECK: vpcmpestrm $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0xea,0x07] + vpcmpestrm $7, %xmm2, %xmm5 + +// CHECK: vpcmpestrm $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0x28,0x07] + vpcmpestrm $7, (%eax), %xmm5 + +// CHECK: vpcmpistri $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0xea,0x07] + vpcmpistri $7, %xmm2, %xmm5 + +// CHECK: vpcmpistri $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0x28,0x07] + vpcmpistri $7, (%eax), %xmm5 + +// CHECK: vpcmpestri $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0xea,0x07] + vpcmpestri $7, %xmm2, %xmm5 + +// CHECK: vpcmpestri $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0x28,0x07] + vpcmpestri $7, (%eax), %xmm5 + +// CHECK: vaesimc %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0xea] + vaesimc %xmm2, %xmm5 + +// CHECK: vaesimc (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0x10] + vaesimc (%eax), %xmm2 + +// CHECK: vaesenc %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0xca] + vaesenc %xmm2, %xmm5, %xmm1 + +// CHECK: vaesenc (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0x18] + vaesenc (%eax), %xmm5, %xmm3 + +// CHECK: vaesenclast %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0xca] + vaesenclast %xmm2, %xmm5, %xmm1 + +// CHECK: vaesenclast (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0x18] + vaesenclast (%eax), %xmm5, %xmm3 + +// CHECK: vaesdec %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0xca] + vaesdec %xmm2, %xmm5, %xmm1 + +// CHECK: vaesdec (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0x18] + vaesdec (%eax), %xmm5, %xmm3 + +// CHECK: vaesdeclast %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0xca] + vaesdeclast %xmm2, %xmm5, %xmm1 + +// CHECK: vaesdeclast (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0x18] + vaesdeclast (%eax), %xmm5, %xmm3 + +// CHECK: vaeskeygenassist $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0xea,0x07] + vaeskeygenassist $7, %xmm2, %xmm5 + +// CHECK: vaeskeygenassist $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0x28,0x07] + vaeskeygenassist $7, (%eax), %xmm5 + +// CHECK: vcmpps $8, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x08] + vcmpeq_uqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $9, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x09] + vcmpngeps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $10, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0a] + vcmpngtps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $11, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0b] + vcmpfalseps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $12, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0c] + vcmpneq_oqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $13, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0d] + vcmpgeps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $14, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0e] + vcmpgtps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $15, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0f] + vcmptrueps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $16, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x10] + vcmpeq_osps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $17, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x11] + vcmplt_oqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $18, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x12] + vcmple_oqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $19, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x13] + vcmpunord_sps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $20, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x14] + vcmpneq_usps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $21, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x15] + vcmpnlt_uqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $22, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x16] + vcmpnle_uqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $23, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x17] + vcmpord_sps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $24, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x18] + vcmpeq_usps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $25, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x19] + vcmpnge_uqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $26, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1a] + vcmpngt_uqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $27, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1b] + vcmpfalse_osps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $28, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1c] + vcmpneq_osps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $29, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1d] + vcmpge_oqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $30, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1e] + vcmpgt_oqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $31, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1f] + vcmptrue_usps %xmm1, %xmm2, %xmm3 + +// CHECK: vmovaps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x28,0x10] + vmovaps (%eax), %ymm2 + +// CHECK: vmovaps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x28,0xd1] + vmovaps %ymm1, %ymm2 + +// CHECK: vmovaps %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfc,0x29,0x08] + vmovaps %ymm1, (%eax) + +// CHECK: vmovapd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x28,0x10] + vmovapd (%eax), %ymm2 + +// CHECK: vmovapd %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x28,0xd1] + vmovapd %ymm1, %ymm2 + +// CHECK: vmovapd %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfd,0x29,0x08] + vmovapd %ymm1, (%eax) + +// CHECK: vmovups (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x10,0x10] + vmovups (%eax), %ymm2 + +// CHECK: vmovups %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x10,0xd1] + vmovups %ymm1, %ymm2 + +// CHECK: vmovups %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfc,0x11,0x08] + vmovups %ymm1, (%eax) + +// CHECK: vmovupd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x10,0x10] + vmovupd (%eax), %ymm2 + +// CHECK: vmovupd %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x10,0xd1] + vmovupd %ymm1, %ymm2 + +// CHECK: vmovupd %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfd,0x11,0x08] + vmovupd %ymm1, (%eax) + +// CHECK: vunpckhps %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xec,0x15,0xe1] + vunpckhps %ymm1, %ymm2, %ymm4 + +// CHECK: vunpckhpd %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xed,0x15,0xe1] + vunpckhpd %ymm1, %ymm2, %ymm4 + +// CHECK: vunpcklps %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xec,0x14,0xe1] + vunpcklps %ymm1, %ymm2, %ymm4 + +// CHECK: vunpcklpd %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xed,0x14,0xe1] + vunpcklpd %ymm1, %ymm2, %ymm4 + +// CHECK: vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x15,0x6c,0xcb,0xfc] + vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x15,0x6c,0xcb,0xfc] + vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x14,0x6c,0xcb,0xfc] + vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x14,0x6c,0xcb,0xfc] + vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vmovntdq %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfd,0xe7,0x08] + vmovntdq %ymm1, (%eax) + +// CHECK: vmovntpd %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfd,0x2b,0x08] + vmovntpd %ymm1, (%eax) + +// CHECK: vmovntps %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfc,0x2b,0x08] + vmovntps %ymm1, (%eax) + +// CHECK: vmovmskps %xmm2, %eax +// CHECK: encoding: [0xc5,0xf8,0x50,0xc2] + vmovmskps %xmm2, %eax + +// CHECK: vmovmskpd %xmm2, %eax +// CHECK: encoding: [0xc5,0xf9,0x50,0xc2] + vmovmskpd %xmm2, %eax + +// CHECK: vmaxps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5f,0xf2] + vmaxps %ymm2, %ymm4, %ymm6 + +// CHECK: vmaxpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5f,0xf2] + vmaxpd %ymm2, %ymm4, %ymm6 + +// CHECK: vminps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5d,0xf2] + vminps %ymm2, %ymm4, %ymm6 + +// CHECK: vminpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5d,0xf2] + vminpd %ymm2, %ymm4, %ymm6 + +// CHECK: vsubps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5c,0xf2] + vsubps %ymm2, %ymm4, %ymm6 + +// CHECK: vsubpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5c,0xf2] + vsubpd %ymm2, %ymm4, %ymm6 + +// CHECK: vdivps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5e,0xf2] + vdivps %ymm2, %ymm4, %ymm6 + +// CHECK: vdivpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5e,0xf2] + vdivpd %ymm2, %ymm4, %ymm6 + +// CHECK: vaddps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x58,0xf2] + vaddps %ymm2, %ymm4, %ymm6 + +// CHECK: vaddpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x58,0xf2] + vaddpd %ymm2, %ymm4, %ymm6 + +// CHECK: vmulps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x59,0xf2] + vmulps %ymm2, %ymm4, %ymm6 + +// CHECK: vmulpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x59,0xf2] + vmulpd %ymm2, %ymm4, %ymm6 + +// CHECK: vmaxps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5f,0x30] + vmaxps (%eax), %ymm4, %ymm6 + +// CHECK: vmaxpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5f,0x30] + vmaxpd (%eax), %ymm4, %ymm6 + +// CHECK: vminps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5d,0x30] + vminps (%eax), %ymm4, %ymm6 + +// CHECK: vminpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5d,0x30] + vminpd (%eax), %ymm4, %ymm6 + +// CHECK: vsubps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5c,0x30] + vsubps (%eax), %ymm4, %ymm6 + +// CHECK: vsubpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5c,0x30] + vsubpd (%eax), %ymm4, %ymm6 + +// CHECK: vdivps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5e,0x30] + vdivps (%eax), %ymm4, %ymm6 + +// CHECK: vdivpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5e,0x30] + vdivpd (%eax), %ymm4, %ymm6 + +// CHECK: vaddps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x58,0x30] + vaddps (%eax), %ymm4, %ymm6 + +// CHECK: vaddpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x58,0x30] + vaddpd (%eax), %ymm4, %ymm6 + +// CHECK: vmulps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x59,0x30] + vmulps (%eax), %ymm4, %ymm6 + +// CHECK: vmulpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x59,0x30] + vmulpd (%eax), %ymm4, %ymm6 + +// CHECK: vsqrtpd %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x51,0xd1] + vsqrtpd %ymm1, %ymm2 + +// CHECK: vsqrtpd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x51,0x10] + vsqrtpd (%eax), %ymm2 + +// CHECK: vsqrtps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x51,0xd1] + vsqrtps %ymm1, %ymm2 + +// CHECK: vsqrtps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x51,0x10] + vsqrtps (%eax), %ymm2 + +// CHECK: vrsqrtps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x52,0xd1] + vrsqrtps %ymm1, %ymm2 + +// CHECK: vrsqrtps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x52,0x10] + vrsqrtps (%eax), %ymm2 + +// CHECK: vrcpps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x53,0xd1] + vrcpps %ymm1, %ymm2 + +// CHECK: vrcpps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x53,0x10] + vrcpps (%eax), %ymm2 + +// CHECK: vandps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x54,0xf2] + vandps %ymm2, %ymm4, %ymm6 + +// CHECK: vandpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x54,0xf2] + vandpd %ymm2, %ymm4, %ymm6 + +// CHECK: vandps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x54,0x6c,0xcb,0xfc] + vandps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x54,0x6c,0xcb,0xfc] + vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vorps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x56,0xf2] + vorps %ymm2, %ymm4, %ymm6 + +// CHECK: vorpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x56,0xf2] + vorpd %ymm2, %ymm4, %ymm6 + +// CHECK: vorps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x56,0x6c,0xcb,0xfc] + vorps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x56,0x6c,0xcb,0xfc] + vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vxorps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x57,0xf2] + vxorps %ymm2, %ymm4, %ymm6 + +// CHECK: vxorpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x57,0xf2] + vxorpd %ymm2, %ymm4, %ymm6 + +// CHECK: vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x57,0x6c,0xcb,0xfc] + vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x57,0x6c,0xcb,0xfc] + vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vandnps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x55,0xf2] + vandnps %ymm2, %ymm4, %ymm6 + +// CHECK: vandnpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x55,0xf2] + vandnpd %ymm2, %ymm4, %ymm6 + +// CHECK: vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x55,0x6c,0xcb,0xfc] + vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x55,0x6c,0xcb,0xfc] + vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vcvtps2pd %xmm3, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x5a,0xd3] + vcvtps2pd %xmm3, %ymm2 + +// CHECK: vcvtps2pd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x5a,0x10] + vcvtps2pd (%eax), %ymm2 + +// CHECK: vcvtdq2pd %xmm3, %ymm2 +// CHECK: encoding: [0xc5,0xfe,0xe6,0xd3] + vcvtdq2pd %xmm3, %ymm2 + +// CHECK: vcvtdq2pd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfe,0xe6,0x10] + vcvtdq2pd (%eax), %ymm2 + +// CHECK: vcvtdq2ps %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfc,0x5b,0xea] + vcvtdq2ps %ymm2, %ymm5 + +// CHECK: vcvtdq2ps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x5b,0x10] + vcvtdq2ps (%eax), %ymm2 + +// CHECK: vcvtps2dq %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfd,0x5b,0xea] + vcvtps2dq %ymm2, %ymm5 + +// CHECK: vcvtps2dq (%eax), %ymm5 +// CHECK: encoding: [0xc5,0xfd,0x5b,0x28] + vcvtps2dq (%eax), %ymm5 + +// CHECK: vcvttps2dq %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x5b,0xea] + vcvttps2dq %ymm2, %ymm5 + +// CHECK: vcvttps2dq (%eax), %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x5b,0x28] + vcvttps2dq (%eax), %ymm5 + +// CHECK: vcvttpd2dq %xmm1, %xmm5 +// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9] + vcvttpd2dq %xmm1, %xmm5 + +// CHECK: vcvttpd2dq %ymm2, %xmm5 +// CHECK: encoding: [0xc5,0xfd,0xe6,0xea] + vcvttpd2dq %ymm2, %xmm5 + +// CHECK: vcvttpd2dqx %xmm1, %xmm5 +// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9] + vcvttpd2dqx %xmm1, %xmm5 + +// CHECK: vcvttpd2dqx (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xf9,0xe6,0x08] + vcvttpd2dqx (%eax), %xmm1 + +// CHECK: vcvttpd2dqy %ymm2, %xmm1 +// CHECK: encoding: [0xc5,0xfd,0xe6,0xca] + vcvttpd2dqy %ymm2, %xmm1 + +// CHECK: vcvttpd2dqy (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xfd,0xe6,0x08] + vcvttpd2dqy (%eax), %xmm1 + +// CHECK: vcvtpd2ps %ymm2, %xmm5 +// CHECK: encoding: [0xc5,0xfd,0x5a,0xea] + vcvtpd2ps %ymm2, %xmm5 + +// CHECK: vcvtpd2psx %xmm1, %xmm5 +// CHECK: encoding: [0xc5,0xf9,0x5a,0xe9] + vcvtpd2psx %xmm1, %xmm5 + +// CHECK: vcvtpd2psx (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xf9,0x5a,0x08] + vcvtpd2psx (%eax), %xmm1 + +// CHECK: vcvtpd2psy %ymm2, %xmm1 +// CHECK: encoding: [0xc5,0xfd,0x5a,0xca] + vcvtpd2psy %ymm2, %xmm1 + +// CHECK: vcvtpd2psy (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xfd,0x5a,0x08] + vcvtpd2psy (%eax), %xmm1 + +// CHECK: vcvtpd2dq %ymm2, %xmm5 +// CHECK: encoding: [0xc5,0xff,0xe6,0xea] + vcvtpd2dq %ymm2, %xmm5 + +// CHECK: vcvtpd2dqy %ymm2, %xmm1 +// CHECK: encoding: [0xc5,0xff,0xe6,0xca] + vcvtpd2dqy %ymm2, %xmm1 + +// CHECK: vcvtpd2dqy (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xff,0xe6,0x08] + vcvtpd2dqy (%eax), %xmm1 + +// CHECK: vcvtpd2dqx %xmm1, %xmm5 +// CHECK: encoding: [0xc5,0xfb,0xe6,0xe9] + vcvtpd2dqx %xmm1, %xmm5 + +// CHECK: vcvtpd2dqx (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xfb,0xe6,0x08] + vcvtpd2dqx (%eax), %xmm1 + +// CHECK: vcmpps $0, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x00] + vcmpeqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $2, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x02] + vcmpleps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $1, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x01] + vcmpltps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $4, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x04] + vcmpneqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $6, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x06] + vcmpnleps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $5, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x05] + vcmpnltps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $7, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x07] + vcmpordps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $3, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x03] + vcmpunordps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x02] + vcmpleps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnleps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %ymm6, %ymm2 +// CHECK: encoding: [0xc5,0xcc,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordps -4(%ebx,%ecx,8), %ymm6, %ymm2 + +// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $0, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x00] + vcmpeqpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $2, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x02] + vcmplepd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $1, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x01] + vcmpltpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $4, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x04] + vcmpneqpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $6, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x06] + vcmpnlepd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $5, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x05] + vcmpnltpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $7, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x07] + vcmpordpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $3, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x03] + vcmpunordpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqpd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x02] + vcmplepd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltpd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqpd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnlepd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltpd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %ymm6, %ymm2 +// CHECK: encoding: [0xc5,0xcd,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordpd -4(%ebx,%ecx,8), %ymm6, %ymm2 + +// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordpd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $8, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x08] + vcmpeq_uqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $9, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x09] + vcmpngeps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $10, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0a] + vcmpngtps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $11, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0b] + vcmpfalseps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $12, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0c] + vcmpneq_oqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $13, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0d] + vcmpgeps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $14, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0e] + vcmpgtps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $15, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0f] + vcmptrueps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $16, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x10] + vcmpeq_osps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $17, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x11] + vcmplt_oqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $18, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x12] + vcmple_oqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $19, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x13] + vcmpunord_sps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $20, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x14] + vcmpneq_usps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $21, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x15] + vcmpnlt_uqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $22, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x16] + vcmpnle_uqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $23, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x17] + vcmpord_sps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $24, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x18] + vcmpeq_usps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $25, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x19] + vcmpnge_uqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $26, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1a] + vcmpngt_uqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $27, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1b] + vcmpfalse_osps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $28, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1c] + vcmpneq_osps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $29, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1d] + vcmpge_oqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $30, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1e] + vcmpgt_oqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $31, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1f] + vcmptrue_usps %ymm1, %ymm2, %ymm3 + +// CHECK: vaddsubps %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xef,0xd0,0xd9] + vaddsubps %ymm1, %ymm2, %ymm3 + +// CHECK: vaddsubps (%eax), %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xf7,0xd0,0x10] + vaddsubps (%eax), %ymm1, %ymm2 + +// CHECK: vaddsubpd %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xd0,0xd9] + vaddsubpd %ymm1, %ymm2, %ymm3 + +// CHECK: vaddsubpd (%eax), %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xf5,0xd0,0x10] + vaddsubpd (%eax), %ymm1, %ymm2 + +// CHECK: vhaddps %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xef,0x7c,0xd9] + vhaddps %ymm1, %ymm2, %ymm3 + +// CHECK: vhaddps (%eax), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xef,0x7c,0x18] + vhaddps (%eax), %ymm2, %ymm3 + +// CHECK: vhaddpd %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0x7c,0xd9] + vhaddpd %ymm1, %ymm2, %ymm3 + +// CHECK: vhaddpd (%eax), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0x7c,0x18] + vhaddpd (%eax), %ymm2, %ymm3 + +// CHECK: vhsubps %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xef,0x7d,0xd9] + vhsubps %ymm1, %ymm2, %ymm3 + +// CHECK: vhsubps (%eax), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xef,0x7d,0x18] + vhsubps (%eax), %ymm2, %ymm3 + +// CHECK: vhsubpd %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0x7d,0xd9] + vhsubpd %ymm1, %ymm2, %ymm3 + +// CHECK: vhsubpd (%eax), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0x7d,0x18] + vhsubpd (%eax), %ymm2, %ymm3 + +// CHECK: vblendps $3, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0xca,0x03] + vblendps $3, %ymm2, %ymm5, %ymm1 + +// CHECK: vblendps $3, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0x08,0x03] + vblendps $3, (%eax), %ymm5, %ymm1 + +// CHECK: vblendpd $3, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0xca,0x03] + vblendpd $3, %ymm2, %ymm5, %ymm1 + +// CHECK: vblendpd $3, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0x08,0x03] + vblendpd $3, (%eax), %ymm5, %ymm1 + +// CHECK: vdpps $3, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0xca,0x03] + vdpps $3, %ymm2, %ymm5, %ymm1 + +// CHECK: vdpps $3, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0x08,0x03] + vdpps $3, (%eax), %ymm5, %ymm1 + +// CHECK: vbroadcastf128 (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x1a,0x10] + vbroadcastf128 (%eax), %ymm2 + +// CHECK: vbroadcastsd (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x19,0x10] + vbroadcastsd (%eax), %ymm2 + +// CHECK: vbroadcastss (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x18,0x10] + vbroadcastss (%eax), %xmm2 + +// CHECK: vbroadcastss (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x18,0x10] + vbroadcastss (%eax), %ymm2 + +// CHECK: vinsertf128 $7, %xmm2, %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x6d,0x18,0xea,0x07] + vinsertf128 $7, %xmm2, %ymm2, %ymm5 + +// CHECK: vinsertf128 $7, (%eax), %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x6d,0x18,0x28,0x07] + vinsertf128 $7, (%eax), %ymm2, %ymm5 + +// CHECK: vextractf128 $7, %ymm2, %xmm2 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0xd2,0x07] + vextractf128 $7, %ymm2, %xmm2 + +// CHECK: vextractf128 $7, %ymm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0x10,0x07] + vextractf128 $7, %ymm2, (%eax) + +// CHECK: vmaskmovpd %xmm2, %xmm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x51,0x2f,0x10] + vmaskmovpd %xmm2, %xmm5, (%eax) + +// CHECK: vmaskmovpd %ymm2, %ymm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x55,0x2f,0x10] + vmaskmovpd %ymm2, %ymm5, (%eax) + +// CHECK: vmaskmovpd (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x69,0x2d,0x28] + vmaskmovpd (%eax), %xmm2, %xmm5 + +// CHECK: vmaskmovpd (%eax), %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x6d,0x2d,0x28] + vmaskmovpd (%eax), %ymm2, %ymm5 + +// CHECK: vmaskmovps %xmm2, %xmm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x51,0x2e,0x10] + vmaskmovps %xmm2, %xmm5, (%eax) + +// CHECK: vmaskmovps %ymm2, %ymm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x55,0x2e,0x10] + vmaskmovps %ymm2, %ymm5, (%eax) + +// CHECK: vmaskmovps (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x69,0x2c,0x28] + vmaskmovps (%eax), %xmm2, %xmm5 + +// CHECK: vmaskmovps (%eax), %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x6d,0x2c,0x28] + vmaskmovps (%eax), %ymm2, %ymm5 + +// CHECK: vpermilps $7, %xmm1, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x04,0xe9,0x07] + vpermilps $7, %xmm1, %xmm5 + +// CHECK: vpermilps $7, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x04,0xcd,0x07] + vpermilps $7, %ymm5, %ymm1 + +// CHECK: vpermilps $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x04,0x28,0x07] + vpermilps $7, (%eax), %xmm5 + +// CHECK: vpermilps $7, (%eax), %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x04,0x28,0x07] + vpermilps $7, (%eax), %ymm5 + +// CHECK: vpermilps %xmm1, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x0c,0xc9] + vpermilps %xmm1, %xmm5, %xmm1 + +// CHECK: vpermilps %ymm1, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x0c,0xc9] + vpermilps %ymm1, %ymm5, %ymm1 + +// CHECK: vpermilps (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0x0c,0x18] + vpermilps (%eax), %xmm5, %xmm3 + +// CHECK: vpermilps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x0c,0x08] + vpermilps (%eax), %ymm5, %ymm1 + +// CHECK: vpermilpd $7, %xmm1, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x05,0xe9,0x07] + vpermilpd $7, %xmm1, %xmm5 + +// CHECK: vpermilpd $7, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x05,0xcd,0x07] + vpermilpd $7, %ymm5, %ymm1 + +// CHECK: vpermilpd $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x05,0x28,0x07] + vpermilpd $7, (%eax), %xmm5 + +// CHECK: vpermilpd $7, (%eax), %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x05,0x28,0x07] + vpermilpd $7, (%eax), %ymm5 + +// CHECK: vpermilpd %xmm1, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x0d,0xc9] + vpermilpd %xmm1, %xmm5, %xmm1 + +// CHECK: vpermilpd %ymm1, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x0d,0xc9] + vpermilpd %ymm1, %ymm5, %ymm1 + +// CHECK: vpermilpd (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0x0d,0x18] + vpermilpd (%eax), %xmm5, %xmm3 + +// CHECK: vpermilpd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x0d,0x08] + vpermilpd (%eax), %ymm5, %ymm1 + +// CHECK: vperm2f128 $7, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x06,0xca,0x07] + vperm2f128 $7, %ymm2, %ymm5, %ymm1 + +// CHECK: vperm2f128 $7, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x06,0x08,0x07] + vperm2f128 $7, (%eax), %ymm5, %ymm1 + +// CHECK: vzeroall +// CHECK: encoding: [0xc5,0xfc,0x77] + vzeroall + +// CHECK: vzeroupper +// CHECK: encoding: [0xc5,0xf8,0x77] + vzeroupper + +// CHECK: vcvtsd2si %xmm4, %ecx +// CHECK: encoding: [0xc5,0xfb,0x2d,0xcc] + vcvtsd2si %xmm4, %ecx + +// CHECK: vcvtsd2si (%ecx), %ecx +// CHECK: encoding: [0xc5,0xfb,0x2d,0x09] + vcvtsd2si (%ecx), %ecx + +// CHECK: vcvtsi2sdl (%ebp), %xmm0, %xmm7 +// CHECK: encoding: [0xc5,0xfb,0x2a,0x7d,0x00] + vcvtsi2sdl (%ebp), %xmm0, %xmm7 + +// CHECK: vcvtsi2sdl (%esp), %xmm0, %xmm7 +// CHECK: encoding: [0xc5,0xfb,0x2a,0x3c,0x24] + vcvtsi2sdl (%esp), %xmm0, %xmm7 + +// CHECK: vlddqu (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xff,0xf0,0x10] + vlddqu (%eax), %ymm2 + +// CHECK: vmovddup %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xff,0x12,0xea] + vmovddup %ymm2, %ymm5 + +// CHECK: vmovddup (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xff,0x12,0x10] + vmovddup (%eax), %ymm2 + +// CHECK: vmovdqa %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfd,0x6f,0xea] + vmovdqa %ymm2, %ymm5 + +// CHECK: vmovdqa %ymm2, (%eax) +// CHECK: encoding: [0xc5,0xfd,0x7f,0x10] + vmovdqa %ymm2, (%eax) + +// CHECK: vmovdqa (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x6f,0x10] + vmovdqa (%eax), %ymm2 + +// CHECK: vmovdqu %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x6f,0xea] + vmovdqu %ymm2, %ymm5 + +// CHECK: vmovdqu %ymm2, (%eax) +// CHECK: encoding: [0xc5,0xfe,0x7f,0x10] + vmovdqu %ymm2, (%eax) + +// CHECK: vmovdqu (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfe,0x6f,0x10] + vmovdqu (%eax), %ymm2 + +// CHECK: vmovshdup %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x16,0xea] + vmovshdup %ymm2, %ymm5 + +// CHECK: vmovshdup (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfe,0x16,0x10] + vmovshdup (%eax), %ymm2 + +// CHECK: vmovsldup %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x12,0xea] + vmovsldup %ymm2, %ymm5 + +// CHECK: vmovsldup (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfe,0x12,0x10] + vmovsldup (%eax), %ymm2 + +// CHECK: vptest %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0xea] + vptest %ymm2, %ymm5 + +// CHECK: vptest (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0x10] + vptest (%eax), %ymm2 + +// CHECK: vroundpd $7, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0xcd,0x07] + vroundpd $7, %ymm5, %ymm1 + +// CHECK: vroundpd $7, (%eax), %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0x28,0x07] + vroundpd $7, (%eax), %ymm5 + +// CHECK: vroundps $7, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0xcd,0x07] + vroundps $7, %ymm5, %ymm1 + +// CHECK: vroundps $7, (%eax), %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0x28,0x07] + vroundps $7, (%eax), %ymm5 + +// CHECK: vshufpd $7, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd5,0xc6,0xca,0x07] + vshufpd $7, %ymm2, %ymm5, %ymm1 + +// CHECK: vshufpd $7, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd5,0xc6,0x08,0x07] + vshufpd $7, (%eax), %ymm5, %ymm1 + +// CHECK: vshufps $7, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd4,0xc6,0xca,0x07] + vshufps $7, %ymm2, %ymm5, %ymm1 + +// CHECK: vshufps $7, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd4,0xc6,0x08,0x07] + vshufps $7, (%eax), %ymm5, %ymm1 + +// CHECK: vtestpd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0xea] + vtestpd %xmm2, %xmm5 + +// CHECK: vtestpd %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0xea] + vtestpd %ymm2, %ymm5 + +// CHECK: vtestpd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0x10] + vtestpd (%eax), %xmm2 + +// CHECK: vtestpd (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0x10] + vtestpd (%eax), %ymm2 + +// CHECK: vtestps %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0xea] + vtestps %xmm2, %xmm5 + +// CHECK: vtestps %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0xea] + vtestps %ymm2, %ymm5 + +// CHECK: vtestps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0x10] + vtestps (%eax), %xmm2 + +// CHECK: vtestps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0x10] + vtestps (%eax), %ymm2 + +// CHECK: vblendvpd %ymm0, 57005(%eax,%eiz), %ymm1, %ymm2 +// CHECK: encoding: [0xc4,0xe3,0x75,0x4b,0x94,0x20,0xad,0xde,0x00,0x00,0x00] + vblendvpd %ymm0, 0xdead(%eax,%eiz), %ymm1, %ymm2 + diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index ebafb11061ef..ef774239ffe8 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -10047,2882 +10047,23 @@ // CHECK: encoding: [0xde,0x1d,0xed,0x7e,0x00,0x00] ficomps 32493 -// CHECK: vaddss %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0x58,0xd4] - vaddss %xmm4, %xmm6, %xmm2 +// CHECK: movl 57005(,%eiz), %ebx +// CHECK: encoding: [0x8b,0x1c,0x25,0xad,0xde,0x00,0x00] + movl 57005(,%eiz), %ebx -// CHECK: vmulss %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0x59,0xd4] - vmulss %xmm4, %xmm6, %xmm2 +// CHECK: movl 48879(,%eiz), %eax +// CHECK: encoding: [0x8b,0x04,0x25,0xef,0xbe,0x00,0x00] + movl 48879(,%eiz), %eax -// CHECK: vsubss %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0x5c,0xd4] - vsubss %xmm4, %xmm6, %xmm2 +// CHECK: movl -4(,%eiz,8), %eax +// CHECK: encoding: [0x8b,0x04,0xe5,0xfc,0xff,0xff,0xff] + movl -4(,%eiz,8), %eax -// CHECK: vdivss %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0x5e,0xd4] - vdivss %xmm4, %xmm6, %xmm2 +// CHECK: movl (%ecx,%eiz), %eax +// CHECK: encoding: [0x8b,0x04,0x21] + movl (%ecx,%eiz), %eax -// CHECK: vaddsd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0x58,0xd4] - vaddsd %xmm4, %xmm6, %xmm2 - -// CHECK: vmulsd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0x59,0xd4] - vmulsd %xmm4, %xmm6, %xmm2 - -// CHECK: vsubsd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0x5c,0xd4] - vsubsd %xmm4, %xmm6, %xmm2 - -// CHECK: vdivsd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0x5e,0xd4] - vdivsd %xmm4, %xmm6, %xmm2 - -// CHECK: vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] - vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] - vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] - vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] - vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] - vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] - vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] - vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] - vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vaddps %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0x58,0xd4] - vaddps %xmm4, %xmm6, %xmm2 - -// CHECK: vsubps %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0x5c,0xd4] - vsubps %xmm4, %xmm6, %xmm2 - -// CHECK: vmulps %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0x59,0xd4] - vmulps %xmm4, %xmm6, %xmm2 - -// CHECK: vdivps %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0x5e,0xd4] - vdivps %xmm4, %xmm6, %xmm2 - -// CHECK: vaddpd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0x58,0xd4] - vaddpd %xmm4, %xmm6, %xmm2 - -// CHECK: vsubpd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0x5c,0xd4] - vsubpd %xmm4, %xmm6, %xmm2 - -// CHECK: vmulpd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0x59,0xd4] - vmulpd %xmm4, %xmm6, %xmm2 - -// CHECK: vdivpd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0x5e,0xd4] - vdivpd %xmm4, %xmm6, %xmm2 - -// CHECK: vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] - vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] - vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] - vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] - vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] - vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] - vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] - vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] - vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: vmaxss %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xda,0x5f,0xf2] - vmaxss %xmm2, %xmm4, %xmm6 - -// CHECK: vmaxsd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xdb,0x5f,0xf2] - vmaxsd %xmm2, %xmm4, %xmm6 - -// CHECK: vminss %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xda,0x5d,0xf2] - vminss %xmm2, %xmm4, %xmm6 - -// CHECK: vminsd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xdb,0x5d,0xf2] - vminsd %xmm2, %xmm4, %xmm6 - -// CHECK: vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x5f,0x6c,0xcb,0xfc] - vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x5f,0x6c,0xcb,0xfc] - vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vminss -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x5d,0x6c,0xcb,0xfc] - vminss -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x5d,0x6c,0xcb,0xfc] - vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmaxps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x5f,0xf2] - vmaxps %xmm2, %xmm4, %xmm6 - -// CHECK: vmaxpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x5f,0xf2] - vmaxpd %xmm2, %xmm4, %xmm6 - -// CHECK: vminps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x5d,0xf2] - vminps %xmm2, %xmm4, %xmm6 - -// CHECK: vminpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x5d,0xf2] - vminpd %xmm2, %xmm4, %xmm6 - -// CHECK: vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x5f,0x6c,0xcb,0xfc] - vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x5f,0x6c,0xcb,0xfc] - vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vminps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x5d,0x6c,0xcb,0xfc] - vminps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc] - vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vandps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x54,0xf2] - vandps %xmm2, %xmm4, %xmm6 - -// CHECK: vandpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x54,0xf2] - vandpd %xmm2, %xmm4, %xmm6 - -// CHECK: vandps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x54,0x6c,0xcb,0xfc] - vandps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x54,0x6c,0xcb,0xfc] - vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vorps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x56,0xf2] - vorps %xmm2, %xmm4, %xmm6 - -// CHECK: vorpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x56,0xf2] - vorpd %xmm2, %xmm4, %xmm6 - -// CHECK: vorps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x56,0x6c,0xcb,0xfc] - vorps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x56,0x6c,0xcb,0xfc] - vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vxorps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x57,0xf2] - vxorps %xmm2, %xmm4, %xmm6 - -// CHECK: vxorpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x57,0xf2] - vxorpd %xmm2, %xmm4, %xmm6 - -// CHECK: vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x57,0x6c,0xcb,0xfc] - vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x57,0x6c,0xcb,0xfc] - vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vandnps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x55,0xf2] - vandnps %xmm2, %xmm4, %xmm6 - -// CHECK: vandnpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x55,0xf2] - vandnpd %xmm2, %xmm4, %xmm6 - -// CHECK: vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x55,0x6c,0xcb,0xfc] - vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x55,0x6c,0xcb,0xfc] - vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmovss -4(%ebx,%ecx,8), %xmm5 -// CHECK: encoding: [0xc5,0xfa,0x10,0x6c,0xcb,0xfc] - vmovss -4(%ebx,%ecx,8), %xmm5 - -// CHECK: vmovss %xmm4, %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x10,0xec] - vmovss %xmm4, %xmm2, %xmm5 - -// CHECK: vmovsd -4(%ebx,%ecx,8), %xmm5 -// CHECK: encoding: [0xc5,0xfb,0x10,0x6c,0xcb,0xfc] - vmovsd -4(%ebx,%ecx,8), %xmm5 - -// CHECK: vmovsd %xmm4, %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x10,0xec] - vmovsd %xmm4, %xmm2, %xmm5 - -// CHECK: vunpckhps %xmm1, %xmm2, %xmm4 -// CHECK: encoding: [0xc5,0xe8,0x15,0xe1] - vunpckhps %xmm1, %xmm2, %xmm4 - -// CHECK: vunpckhpd %xmm1, %xmm2, %xmm4 -// CHECK: encoding: [0xc5,0xe9,0x15,0xe1] - vunpckhpd %xmm1, %xmm2, %xmm4 - -// CHECK: vunpcklps %xmm1, %xmm2, %xmm4 -// CHECK: encoding: [0xc5,0xe8,0x14,0xe1] - vunpcklps %xmm1, %xmm2, %xmm4 - -// CHECK: vunpcklpd %xmm1, %xmm2, %xmm4 -// CHECK: encoding: [0xc5,0xe9,0x14,0xe1] - vunpcklpd %xmm1, %xmm2, %xmm4 - -// CHECK: vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x15,0x6c,0xcb,0xfc] - vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x15,0x6c,0xcb,0xfc] - vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x14,0x6c,0xcb,0xfc] - vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x14,0x6c,0xcb,0xfc] - vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vcmpps $0, %xmm0, %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x00] - vcmpps $0, %xmm0, %xmm6, %xmm1 - -// CHECK: vcmpps $0, (%eax), %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc8,0xc2,0x08,0x00] - vcmpps $0, (%eax), %xmm6, %xmm1 - -// CHECK: vcmpps $7, %xmm0, %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x07] - vcmpps $7, %xmm0, %xmm6, %xmm1 - -// CHECK: vcmppd $0, %xmm0, %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x00] - vcmppd $0, %xmm0, %xmm6, %xmm1 - -// CHECK: vcmppd $0, (%eax), %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc9,0xc2,0x08,0x00] - vcmppd $0, (%eax), %xmm6, %xmm1 - -// CHECK: vcmppd $7, %xmm0, %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x07] - vcmppd $7, %xmm0, %xmm6, %xmm1 - -// CHECK: vshufps $8, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc6,0xd9,0x08] - vshufps $8, %xmm1, %xmm2, %xmm3 - -// CHECK: vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc6,0x5c,0xcb,0xfc,0x08] - vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vshufpd $8, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc6,0xd9,0x08] - vshufpd $8, %xmm1, %xmm2, %xmm3 - -// CHECK: vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc6,0x5c,0xcb,0xfc,0x08] - vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $0, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x00] - vcmpeqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $2, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x02] - vcmpleps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $1, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x01] - vcmpltps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $4, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x04] - vcmpneqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $6, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x06] - vcmpnleps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $5, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x05] - vcmpnltps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x07] - vcmpordps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $3, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x03] - vcmpunordps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x02] - vcmpleps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnleps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordps -4(%ebx,%ecx,8), %xmm6, %xmm2 - -// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $0, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x00] - vcmpeqpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $2, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x02] - vcmplepd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $1, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x01] - vcmpltpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $4, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x04] - vcmpneqpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $6, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x06] - vcmpnlepd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $5, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x05] - vcmpnltpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x07] - vcmpordpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $3, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x03] - vcmpunordpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqpd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x02] - vcmplepd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltpd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqpd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnlepd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltpd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordpd -4(%ebx,%ecx,8), %xmm6, %xmm2 - -// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordpd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vmovmskps %xmm2, %eax -// CHECK: encoding: [0xc5,0xf8,0x50,0xc2] - vmovmskps %xmm2, %eax - -// CHECK: vmovmskpd %xmm2, %eax -// CHECK: encoding: [0xc5,0xf9,0x50,0xc2] - vmovmskpd %xmm2, %eax - -// CHECK: vcmpss $0, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x00] - vcmpeqss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $2, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x02] - vcmpless %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $1, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x01] - vcmpltss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $4, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x04] - vcmpneqss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $6, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x06] - vcmpnless %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $5, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x05] - vcmpnltss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x07] - vcmpordss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $3, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x03] - vcmpunordss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqss -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x02] - vcmpless -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltss -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqss -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnless -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltss -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordss -4(%ebx,%ecx,8), %xmm6, %xmm2 - -// CHECK: vcmpss $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordss -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $0, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x00] - vcmpeqsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $2, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x02] - vcmplesd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $1, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x01] - vcmpltsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $4, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x04] - vcmpneqsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $6, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x06] - vcmpnlesd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $5, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x05] - vcmpnltsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x07] - vcmpordsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $3, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x03] - vcmpunordsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqsd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x02] - vcmplesd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltsd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqsd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnlesd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltsd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordsd -4(%ebx,%ecx,8), %xmm6, %xmm2 - -// CHECK: vcmpsd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordsd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vucomiss %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x2e,0xd1] - vucomiss %xmm1, %xmm2 - -// CHECK: vucomiss (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x2e,0x10] - vucomiss (%eax), %xmm2 - -// CHECK: vcomiss %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x2f,0xd1] - vcomiss %xmm1, %xmm2 - -// CHECK: vcomiss (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x2f,0x10] - vcomiss (%eax), %xmm2 - -// CHECK: vucomisd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x2e,0xd1] - vucomisd %xmm1, %xmm2 - -// CHECK: vucomisd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x2e,0x10] - vucomisd (%eax), %xmm2 - -// CHECK: vcomisd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x2f,0xd1] - vcomisd %xmm1, %xmm2 - -// CHECK: vcomisd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x2f,0x10] - vcomisd (%eax), %xmm2 - -// CHECK: vcvttss2si %xmm1, %eax -// CHECK: encoding: [0xc5,0xfa,0x2c,0xc1] - vcvttss2si %xmm1, %eax - -// CHECK: vcvttss2si (%ecx), %eax -// CHECK: encoding: [0xc5,0xfa,0x2c,0x01] - vcvttss2si (%ecx), %eax - -// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf2,0x2a,0x10] - vcvtsi2ss (%eax), %xmm1, %xmm2 - -// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf2,0x2a,0x10] - vcvtsi2ss (%eax), %xmm1, %xmm2 - -// CHECK: vcvttsd2si %xmm1, %eax -// CHECK: encoding: [0xc5,0xfb,0x2c,0xc1] - vcvttsd2si %xmm1, %eax - -// CHECK: vcvttsd2si (%ecx), %eax -// CHECK: encoding: [0xc5,0xfb,0x2c,0x01] - vcvttsd2si (%ecx), %eax - -// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf3,0x2a,0x10] - vcvtsi2sd (%eax), %xmm1, %xmm2 - -// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf3,0x2a,0x10] - vcvtsi2sd (%eax), %xmm1, %xmm2 - -// CHECK: vmovaps (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x28,0x10] - vmovaps (%eax), %xmm2 - -// CHECK: vmovaps %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x28,0xd1] - vmovaps %xmm1, %xmm2 - -// CHECK: vmovaps %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf8,0x29,0x08] - vmovaps %xmm1, (%eax) - -// CHECK: vmovapd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x28,0x10] - vmovapd (%eax), %xmm2 - -// CHECK: vmovapd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x28,0xd1] - vmovapd %xmm1, %xmm2 - -// CHECK: vmovapd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x29,0x08] - vmovapd %xmm1, (%eax) - -// CHECK: vmovups (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x10,0x10] - vmovups (%eax), %xmm2 - -// CHECK: vmovups %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x10,0xd1] - vmovups %xmm1, %xmm2 - -// CHECK: vmovups %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf8,0x11,0x08] - vmovups %xmm1, (%eax) - -// CHECK: vmovupd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x10,0x10] - vmovupd (%eax), %xmm2 - -// CHECK: vmovupd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x10,0xd1] - vmovupd %xmm1, %xmm2 - -// CHECK: vmovupd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x11,0x08] - vmovupd %xmm1, (%eax) - -// CHECK: vmovlps %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf8,0x13,0x08] - vmovlps %xmm1, (%eax) - -// CHECK: vmovlps (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0x12,0x18] - vmovlps (%eax), %xmm2, %xmm3 - -// CHECK: vmovlpd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x13,0x08] - vmovlpd %xmm1, (%eax) - -// CHECK: vmovlpd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x12,0x18] - vmovlpd (%eax), %xmm2, %xmm3 - -// CHECK: vmovhps %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf8,0x17,0x08] - vmovhps %xmm1, (%eax) - -// CHECK: vmovhps (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0x16,0x18] - vmovhps (%eax), %xmm2, %xmm3 - -// CHECK: vmovhpd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x17,0x08] - vmovhpd %xmm1, (%eax) - -// CHECK: vmovhpd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x16,0x18] - vmovhpd (%eax), %xmm2, %xmm3 - -// CHECK: vmovlhps %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0x16,0xd9] - vmovlhps %xmm1, %xmm2, %xmm3 - -// CHECK: vmovhlps %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0x12,0xd9] - vmovhlps %xmm1, %xmm2, %xmm3 - -// CHECK: vcvtss2sil %xmm1, %eax -// CHECK: encoding: [0xc5,0xfa,0x2d,0xc1] - vcvtss2si %xmm1, %eax - -// CHECK: vcvtss2sil (%eax), %ebx -// CHECK: encoding: [0xc5,0xfa,0x2d,0x18] - vcvtss2si (%eax), %ebx - -// CHECK: vcvtdq2ps %xmm5, %xmm6 -// CHECK: encoding: [0xc5,0xf8,0x5b,0xf5] - vcvtdq2ps %xmm5, %xmm6 - -// CHECK: vcvtdq2ps (%eax), %xmm6 -// CHECK: encoding: [0xc5,0xf8,0x5b,0x30] - vcvtdq2ps (%eax), %xmm6 - -// CHECK: vcvtsd2ss %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xdb,0x5a,0xf2] - vcvtsd2ss %xmm2, %xmm4, %xmm6 - -// CHECK: vcvtsd2ss (%eax), %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xdb,0x5a,0x30] - vcvtsd2ss (%eax), %xmm4, %xmm6 - -// CHECK: vcvtps2dq %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xf9,0x5b,0xda] - vcvtps2dq %xmm2, %xmm3 - -// CHECK: vcvtps2dq (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xf9,0x5b,0x18] - vcvtps2dq (%eax), %xmm3 - -// CHECK: vcvtss2sd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xda,0x5a,0xf2] - vcvtss2sd %xmm2, %xmm4, %xmm6 - -// CHECK: vcvtss2sd (%eax), %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xda,0x5a,0x30] - vcvtss2sd (%eax), %xmm4, %xmm6 - -// CHECK: vcvtdq2ps %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xf8,0x5b,0xf4] - vcvtdq2ps %xmm4, %xmm6 - -// CHECK: vcvtdq2ps (%ecx), %xmm4 -// CHECK: encoding: [0xc5,0xf8,0x5b,0x21] - vcvtdq2ps (%ecx), %xmm4 - -// CHECK: vcvttps2dq %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xfa,0x5b,0xda] - vcvttps2dq %xmm2, %xmm3 - -// CHECK: vcvttps2dq (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xfa,0x5b,0x18] - vcvttps2dq (%eax), %xmm3 - -// CHECK: vcvtps2pd %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xf8,0x5a,0xda] - vcvtps2pd %xmm2, %xmm3 - -// CHECK: vcvtps2pd (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xf8,0x5a,0x18] - vcvtps2pd (%eax), %xmm3 - -// CHECK: vcvtpd2ps %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xf9,0x5a,0xda] - vcvtpd2ps %xmm2, %xmm3 - -// CHECK: vsqrtpd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x51,0xd1] - vsqrtpd %xmm1, %xmm2 - -// CHECK: vsqrtpd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x51,0x10] - vsqrtpd (%eax), %xmm2 - -// CHECK: vsqrtps %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x51,0xd1] - vsqrtps %xmm1, %xmm2 - -// CHECK: vsqrtps (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x51,0x10] - vsqrtps (%eax), %xmm2 - -// CHECK: vsqrtsd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x51,0xd9] - vsqrtsd %xmm1, %xmm2, %xmm3 - -// CHECK: vsqrtsd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x51,0x18] - vsqrtsd (%eax), %xmm2, %xmm3 - -// CHECK: vsqrtss %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x51,0xd9] - vsqrtss %xmm1, %xmm2, %xmm3 - -// CHECK: vsqrtss (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x51,0x18] - vsqrtss (%eax), %xmm2, %xmm3 - -// CHECK: vrsqrtps %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x52,0xd1] - vrsqrtps %xmm1, %xmm2 - -// CHECK: vrsqrtps (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x52,0x10] - vrsqrtps (%eax), %xmm2 - -// CHECK: vrsqrtss %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x52,0xd9] - vrsqrtss %xmm1, %xmm2, %xmm3 - -// CHECK: vrsqrtss (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x52,0x18] - vrsqrtss (%eax), %xmm2, %xmm3 - -// CHECK: vrcpps %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x53,0xd1] - vrcpps %xmm1, %xmm2 - -// CHECK: vrcpps (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x53,0x10] - vrcpps (%eax), %xmm2 - -// CHECK: vrcpss %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x53,0xd9] - vrcpss %xmm1, %xmm2, %xmm3 - -// CHECK: vrcpss (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x53,0x18] - vrcpss (%eax), %xmm2, %xmm3 - -// CHECK: vmovntdq %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0xe7,0x08] - vmovntdq %xmm1, (%eax) - -// CHECK: vmovntpd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x2b,0x08] - vmovntpd %xmm1, (%eax) - -// CHECK: vmovntps %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf8,0x2b,0x08] - vmovntps %xmm1, (%eax) - -// CHECK: vldmxcsr (%eax) -// CHECK: encoding: [0xc5,0xf8,0xae,0x10] - vldmxcsr (%eax) - -// CHECK: vstmxcsr (%eax) -// CHECK: encoding: [0xc5,0xf8,0xae,0x18] - vstmxcsr (%eax) - -// CHECK: vldmxcsr 3735928559 -// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xef,0xbe,0xad,0xde] - vldmxcsr 0xdeadbeef - -// CHECK: vstmxcsr 3735928559 -// CHECK: encoding: [0xc5,0xf8,0xae,0x1d,0xef,0xbe,0xad,0xde] - vstmxcsr 0xdeadbeef - -// CHECK: vpsubb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf8,0xd9] - vpsubb %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf8,0x18] - vpsubb (%eax), %xmm2, %xmm3 - -// CHECK: vpsubw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf9,0xd9] - vpsubw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf9,0x18] - vpsubw (%eax), %xmm2, %xmm3 - -// CHECK: vpsubd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfa,0xd9] - vpsubd %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfa,0x18] - vpsubd (%eax), %xmm2, %xmm3 - -// CHECK: vpsubq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfb,0xd9] - vpsubq %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfb,0x18] - vpsubq (%eax), %xmm2, %xmm3 - -// CHECK: vpsubsb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe8,0xd9] - vpsubsb %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubsb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe8,0x18] - vpsubsb (%eax), %xmm2, %xmm3 - -// CHECK: vpsubsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe9,0xd9] - vpsubsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe9,0x18] - vpsubsw (%eax), %xmm2, %xmm3 - -// CHECK: vpsubusb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd8,0xd9] - vpsubusb %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubusb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd8,0x18] - vpsubusb (%eax), %xmm2, %xmm3 - -// CHECK: vpsubusw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd9,0xd9] - vpsubusw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubusw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd9,0x18] - vpsubusw (%eax), %xmm2, %xmm3 - -// CHECK: vpaddb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfc,0xd9] - vpaddb %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfc,0x18] - vpaddb (%eax), %xmm2, %xmm3 - -// CHECK: vpaddw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfd,0xd9] - vpaddw %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfd,0x18] - vpaddw (%eax), %xmm2, %xmm3 - -// CHECK: vpaddd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfe,0xd9] - vpaddd %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfe,0x18] - vpaddd (%eax), %xmm2, %xmm3 - -// CHECK: vpaddq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd4,0xd9] - vpaddq %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd4,0x18] - vpaddq (%eax), %xmm2, %xmm3 - -// CHECK: vpaddsb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xec,0xd9] - vpaddsb %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddsb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xec,0x18] - vpaddsb (%eax), %xmm2, %xmm3 - -// CHECK: vpaddsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xed,0xd9] - vpaddsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xed,0x18] - vpaddsw (%eax), %xmm2, %xmm3 - -// CHECK: vpaddusb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdc,0xd9] - vpaddusb %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddusb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdc,0x18] - vpaddusb (%eax), %xmm2, %xmm3 - -// CHECK: vpaddusw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdd,0xd9] - vpaddusw %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddusw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdd,0x18] - vpaddusw (%eax), %xmm2, %xmm3 - -// CHECK: vpmulhuw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe4,0xd9] - vpmulhuw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmulhuw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe4,0x18] - vpmulhuw (%eax), %xmm2, %xmm3 - -// CHECK: vpmulhw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe5,0xd9] - vpmulhw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmulhw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe5,0x18] - vpmulhw (%eax), %xmm2, %xmm3 - -// CHECK: vpmullw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd5,0xd9] - vpmullw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmullw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd5,0x18] - vpmullw (%eax), %xmm2, %xmm3 - -// CHECK: vpmuludq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf4,0xd9] - vpmuludq %xmm1, %xmm2, %xmm3 - -// CHECK: vpmuludq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf4,0x18] - vpmuludq (%eax), %xmm2, %xmm3 - -// CHECK: vpavgb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe0,0xd9] - vpavgb %xmm1, %xmm2, %xmm3 - -// CHECK: vpavgb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe0,0x18] - vpavgb (%eax), %xmm2, %xmm3 - -// CHECK: vpavgw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe3,0xd9] - vpavgw %xmm1, %xmm2, %xmm3 - -// CHECK: vpavgw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe3,0x18] - vpavgw (%eax), %xmm2, %xmm3 - -// CHECK: vpminsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xea,0xd9] - vpminsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpminsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xea,0x18] - vpminsw (%eax), %xmm2, %xmm3 - -// CHECK: vpminub %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xda,0xd9] - vpminub %xmm1, %xmm2, %xmm3 - -// CHECK: vpminub (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xda,0x18] - vpminub (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xee,0xd9] - vpmaxsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmaxsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xee,0x18] - vpmaxsw (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxub %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xde,0xd9] - vpmaxub %xmm1, %xmm2, %xmm3 - -// CHECK: vpmaxub (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xde,0x18] - vpmaxub (%eax), %xmm2, %xmm3 - -// CHECK: vpsadbw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf6,0xd9] - vpsadbw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsadbw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf6,0x18] - vpsadbw (%eax), %xmm2, %xmm3 - -// CHECK: vpsllw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf1,0xd9] - vpsllw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsllw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf1,0x18] - vpsllw (%eax), %xmm2, %xmm3 - -// CHECK: vpslld %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf2,0xd9] - vpslld %xmm1, %xmm2, %xmm3 - -// CHECK: vpslld (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf2,0x18] - vpslld (%eax), %xmm2, %xmm3 - -// CHECK: vpsllq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf3,0xd9] - vpsllq %xmm1, %xmm2, %xmm3 - -// CHECK: vpsllq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf3,0x18] - vpsllq (%eax), %xmm2, %xmm3 - -// CHECK: vpsraw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe1,0xd9] - vpsraw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsraw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe1,0x18] - vpsraw (%eax), %xmm2, %xmm3 - -// CHECK: vpsrad %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe2,0xd9] - vpsrad %xmm1, %xmm2, %xmm3 - -// CHECK: vpsrad (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe2,0x18] - vpsrad (%eax), %xmm2, %xmm3 - -// CHECK: vpsrlw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd1,0xd9] - vpsrlw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsrlw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd1,0x18] - vpsrlw (%eax), %xmm2, %xmm3 - -// CHECK: vpsrld %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd2,0xd9] - vpsrld %xmm1, %xmm2, %xmm3 - -// CHECK: vpsrld (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd2,0x18] - vpsrld (%eax), %xmm2, %xmm3 - -// CHECK: vpsrlq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd3,0xd9] - vpsrlq %xmm1, %xmm2, %xmm3 - -// CHECK: vpsrlq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd3,0x18] - vpsrlq (%eax), %xmm2, %xmm3 - -// CHECK: vpslld $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a] - vpslld $10, %xmm2, %xmm3 - -// CHECK: vpslldq $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x73,0xfa,0x0a] - vpslldq $10, %xmm2, %xmm3 - -// CHECK: vpsllq $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x73,0xf2,0x0a] - vpsllq $10, %xmm2, %xmm3 - -// CHECK: vpsllw $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x71,0xf2,0x0a] - vpsllw $10, %xmm2, %xmm3 - -// CHECK: vpsrad $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x72,0xe2,0x0a] - vpsrad $10, %xmm2, %xmm3 - -// CHECK: vpsraw $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x71,0xe2,0x0a] - vpsraw $10, %xmm2, %xmm3 - -// CHECK: vpsrld $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x72,0xd2,0x0a] - vpsrld $10, %xmm2, %xmm3 - -// CHECK: vpsrldq $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x73,0xda,0x0a] - vpsrldq $10, %xmm2, %xmm3 - -// CHECK: vpsrlq $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x73,0xd2,0x0a] - vpsrlq $10, %xmm2, %xmm3 - -// CHECK: vpsrlw $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x71,0xd2,0x0a] - vpsrlw $10, %xmm2, %xmm3 - -// CHECK: vpslld $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a] - vpslld $10, %xmm2, %xmm3 - -// CHECK: vpand %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdb,0xd9] - vpand %xmm1, %xmm2, %xmm3 - -// CHECK: vpand (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdb,0x18] - vpand (%eax), %xmm2, %xmm3 - -// CHECK: vpor %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xeb,0xd9] - vpor %xmm1, %xmm2, %xmm3 - -// CHECK: vpor (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xeb,0x18] - vpor (%eax), %xmm2, %xmm3 - -// CHECK: vpxor %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xef,0xd9] - vpxor %xmm1, %xmm2, %xmm3 - -// CHECK: vpxor (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xef,0x18] - vpxor (%eax), %xmm2, %xmm3 - -// CHECK: vpandn %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdf,0xd9] - vpandn %xmm1, %xmm2, %xmm3 - -// CHECK: vpandn (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdf,0x18] - vpandn (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpeqb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x74,0xd9] - vpcmpeqb %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpeqb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x74,0x18] - vpcmpeqb (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpeqw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x75,0xd9] - vpcmpeqw %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpeqw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x75,0x18] - vpcmpeqw (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpeqd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x76,0xd9] - vpcmpeqd %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpeqd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x76,0x18] - vpcmpeqd (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpgtb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x64,0xd9] - vpcmpgtb %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpgtb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x64,0x18] - vpcmpgtb (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpgtw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x65,0xd9] - vpcmpgtw %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpgtw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x65,0x18] - vpcmpgtw (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpgtd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x66,0xd9] - vpcmpgtd %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpgtd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x66,0x18] - vpcmpgtd (%eax), %xmm2, %xmm3 - -// CHECK: vpacksswb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x63,0xd9] - vpacksswb %xmm1, %xmm2, %xmm3 - -// CHECK: vpacksswb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x63,0x18] - vpacksswb (%eax), %xmm2, %xmm3 - -// CHECK: vpackssdw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6b,0xd9] - vpackssdw %xmm1, %xmm2, %xmm3 - -// CHECK: vpackssdw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6b,0x18] - vpackssdw (%eax), %xmm2, %xmm3 - -// CHECK: vpackuswb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x67,0xd9] - vpackuswb %xmm1, %xmm2, %xmm3 - -// CHECK: vpackuswb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x67,0x18] - vpackuswb (%eax), %xmm2, %xmm3 - -// CHECK: vpshufd $4, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xf9,0x70,0xda,0x04] - vpshufd $4, %xmm2, %xmm3 - -// CHECK: vpshufd $4, (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xf9,0x70,0x18,0x04] - vpshufd $4, (%eax), %xmm3 - -// CHECK: vpshufhw $4, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xfa,0x70,0xda,0x04] - vpshufhw $4, %xmm2, %xmm3 - -// CHECK: vpshufhw $4, (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xfa,0x70,0x18,0x04] - vpshufhw $4, (%eax), %xmm3 - -// CHECK: vpshuflw $4, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xfb,0x70,0xda,0x04] - vpshuflw $4, %xmm2, %xmm3 - -// CHECK: vpshuflw $4, (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xfb,0x70,0x18,0x04] - vpshuflw $4, (%eax), %xmm3 - -// CHECK: vpunpcklbw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x60,0xd9] - vpunpcklbw %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpcklbw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x60,0x18] - vpunpcklbw (%eax), %xmm2, %xmm3 - -// CHECK: vpunpcklwd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x61,0xd9] - vpunpcklwd %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpcklwd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x61,0x18] - vpunpcklwd (%eax), %xmm2, %xmm3 - -// CHECK: vpunpckldq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x62,0xd9] - vpunpckldq %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpckldq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x62,0x18] - vpunpckldq (%eax), %xmm2, %xmm3 - -// CHECK: vpunpcklqdq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6c,0xd9] - vpunpcklqdq %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpcklqdq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6c,0x18] - vpunpcklqdq (%eax), %xmm2, %xmm3 - -// CHECK: vpunpckhbw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x68,0xd9] - vpunpckhbw %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpckhbw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x68,0x18] - vpunpckhbw (%eax), %xmm2, %xmm3 - -// CHECK: vpunpckhwd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x69,0xd9] - vpunpckhwd %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpckhwd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x69,0x18] - vpunpckhwd (%eax), %xmm2, %xmm3 - -// CHECK: vpunpckhdq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6a,0xd9] - vpunpckhdq %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpckhdq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6a,0x18] - vpunpckhdq (%eax), %xmm2, %xmm3 - -// CHECK: vpunpckhqdq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6d,0xd9] - vpunpckhqdq %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpckhqdq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6d,0x18] - vpunpckhqdq (%eax), %xmm2, %xmm3 - -// CHECK: vpinsrw $7, %eax, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc4,0xd8,0x07] - vpinsrw $7, %eax, %xmm2, %xmm3 - -// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc4,0x18,0x07] - vpinsrw $7, (%eax), %xmm2, %xmm3 - -// CHECK: vpextrw $7, %xmm2, %eax -// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07] - vpextrw $7, %xmm2, %eax - -// CHECK: vpmovmskb %xmm1, %eax -// CHECK: encoding: [0xc5,0xf9,0xd7,0xc1] - vpmovmskb %xmm1, %eax - -// CHECK: vmaskmovdqu %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0xf7,0xd1] - vmaskmovdqu %xmm1, %xmm2 - -// CHECK: vmovd %xmm1, %eax -// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8] - vmovd %xmm1, %eax - -// CHECK: vmovd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x7e,0x08] - vmovd %xmm1, (%eax) - -// CHECK: vmovd %eax, %xmm1 -// CHECK: encoding: [0xc5,0xf9,0x6e,0xc8] - vmovd %eax, %xmm1 - -// CHECK: vmovd (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xf9,0x6e,0x08] - vmovd (%eax), %xmm1 - -// CHECK: vmovq %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0xd6,0x08] - vmovq %xmm1, (%eax) - -// CHECK: vmovq %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfa,0x7e,0xd1] - vmovq %xmm1, %xmm2 - -// CHECK: vmovq (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xfa,0x7e,0x08] - vmovq (%eax), %xmm1 - -// CHECK: vcvtpd2dq %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfb,0xe6,0xd1] - vcvtpd2dq %xmm1, %xmm2 - -// CHECK: vcvtdq2pd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfa,0xe6,0xd1] - vcvtdq2pd %xmm1, %xmm2 - -// CHECK: vcvtdq2pd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xfa,0xe6,0x10] - vcvtdq2pd (%eax), %xmm2 - -// CHECK: vmovshdup %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfa,0x16,0xd1] - vmovshdup %xmm1, %xmm2 - -// CHECK: vmovshdup (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xfa,0x16,0x10] - vmovshdup (%eax), %xmm2 - -// CHECK: vmovsldup %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfa,0x12,0xd1] - vmovsldup %xmm1, %xmm2 - -// CHECK: vmovsldup (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xfa,0x12,0x10] - vmovsldup (%eax), %xmm2 - -// CHECK: vmovddup %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfb,0x12,0xd1] - vmovddup %xmm1, %xmm2 - -// CHECK: vmovddup (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xfb,0x12,0x10] - vmovddup (%eax), %xmm2 - -// CHECK: vaddsubps %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xd0,0xd9] - vaddsubps %xmm1, %xmm2, %xmm3 - -// CHECK: vaddsubps (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf3,0xd0,0x10] - vaddsubps (%eax), %xmm1, %xmm2 - -// CHECK: vaddsubpd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd0,0xd9] - vaddsubpd %xmm1, %xmm2, %xmm3 - -// CHECK: vaddsubpd (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf1,0xd0,0x10] - vaddsubpd (%eax), %xmm1, %xmm2 - -// CHECK: vhaddps %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x7c,0xd9] - vhaddps %xmm1, %xmm2, %xmm3 - -// CHECK: vhaddps (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x7c,0x18] - vhaddps (%eax), %xmm2, %xmm3 - -// CHECK: vhaddpd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x7c,0xd9] - vhaddpd %xmm1, %xmm2, %xmm3 - -// CHECK: vhaddpd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x7c,0x18] - vhaddpd (%eax), %xmm2, %xmm3 - -// CHECK: vhsubps %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x7d,0xd9] - vhsubps %xmm1, %xmm2, %xmm3 - -// CHECK: vhsubps (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x7d,0x18] - vhsubps (%eax), %xmm2, %xmm3 - -// CHECK: vhsubpd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x7d,0xd9] - vhsubpd %xmm1, %xmm2, %xmm3 - -// CHECK: vhsubpd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x7d,0x18] - vhsubpd (%eax), %xmm2, %xmm3 - -// CHECK: vpabsb %xmm1, %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0xd1] - vpabsb %xmm1, %xmm2 - -// CHECK: vpabsb (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0x10] - vpabsb (%eax), %xmm2 - -// CHECK: vpabsw %xmm1, %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0xd1] - vpabsw %xmm1, %xmm2 - -// CHECK: vpabsw (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0x10] - vpabsw (%eax), %xmm2 - -// CHECK: vpabsd %xmm1, %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0xd1] - vpabsd %xmm1, %xmm2 - -// CHECK: vpabsd (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0x10] - vpabsd (%eax), %xmm2 - -// CHECK: vphaddw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0xd9] - vphaddw %xmm1, %xmm2, %xmm3 - -// CHECK: vphaddw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0x18] - vphaddw (%eax), %xmm2, %xmm3 - -// CHECK: vphaddd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0xd9] - vphaddd %xmm1, %xmm2, %xmm3 - -// CHECK: vphaddd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0x18] - vphaddd (%eax), %xmm2, %xmm3 - -// CHECK: vphaddsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0xd9] - vphaddsw %xmm1, %xmm2, %xmm3 - -// CHECK: vphaddsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0x18] - vphaddsw (%eax), %xmm2, %xmm3 - -// CHECK: vphsubw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0xd9] - vphsubw %xmm1, %xmm2, %xmm3 - -// CHECK: vphsubw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0x18] - vphsubw (%eax), %xmm2, %xmm3 - -// CHECK: vphsubd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0xd9] - vphsubd %xmm1, %xmm2, %xmm3 - -// CHECK: vphsubd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0x18] - vphsubd (%eax), %xmm2, %xmm3 - -// CHECK: vphsubsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0xd9] - vphsubsw %xmm1, %xmm2, %xmm3 - -// CHECK: vphsubsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0x18] - vphsubsw (%eax), %xmm2, %xmm3 - -// CHECK: vpmaddubsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0xd9] - vpmaddubsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmaddubsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0x18] - vpmaddubsw (%eax), %xmm2, %xmm3 - -// CHECK: vpshufb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0xd9] - vpshufb %xmm1, %xmm2, %xmm3 - -// CHECK: vpshufb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0x18] - vpshufb (%eax), %xmm2, %xmm3 - -// CHECK: vpsignb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0xd9] - vpsignb %xmm1, %xmm2, %xmm3 - -// CHECK: vpsignb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0x18] - vpsignb (%eax), %xmm2, %xmm3 - -// CHECK: vpsignw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0xd9] - vpsignw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsignw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0x18] - vpsignw (%eax), %xmm2, %xmm3 - -// CHECK: vpsignd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0xd9] - vpsignd %xmm1, %xmm2, %xmm3 - -// CHECK: vpsignd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0x18] - vpsignd (%eax), %xmm2, %xmm3 - -// CHECK: vpmulhrsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0xd9] - vpmulhrsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmulhrsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0x18] - vpmulhrsw (%eax), %xmm2, %xmm3 - -// CHECK: vpalignr $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0xd9,0x07] - vpalignr $7, %xmm1, %xmm2, %xmm3 - -// CHECK: vpalignr $7, (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0x18,0x07] - vpalignr $7, (%eax), %xmm2, %xmm3 - -// CHECK: vroundsd $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0xd9,0x07] - vroundsd $7, %xmm1, %xmm2, %xmm3 - -// CHECK: vroundsd $7, (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0x18,0x07] - vroundsd $7, (%eax), %xmm2, %xmm3 - -// CHECK: vroundss $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0xd9,0x07] - vroundss $7, %xmm1, %xmm2, %xmm3 - -// CHECK: vroundss $7, (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0x18,0x07] - vroundss $7, (%eax), %xmm2, %xmm3 - -// CHECK: vroundpd $7, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0xda,0x07] - vroundpd $7, %xmm2, %xmm3 - -// CHECK: vroundpd $7, (%eax), %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0x18,0x07] - vroundpd $7, (%eax), %xmm3 - -// CHECK: vroundps $7, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0xda,0x07] - vroundps $7, %xmm2, %xmm3 - -// CHECK: vroundps $7, (%eax), %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0x18,0x07] - vroundps $7, (%eax), %xmm3 - -// CHECK: vphminposuw %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0xda] - vphminposuw %xmm2, %xmm3 - -// CHECK: vphminposuw (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0x10] - vphminposuw (%eax), %xmm2 - -// CHECK: vpackusdw %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x2b,0xca] - vpackusdw %xmm2, %xmm3, %xmm1 - -// CHECK: vpackusdw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x2b,0x18] - vpackusdw (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpeqq %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x29,0xca] - vpcmpeqq %xmm2, %xmm3, %xmm1 - -// CHECK: vpcmpeqq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x29,0x18] - vpcmpeqq (%eax), %xmm2, %xmm3 - -// CHECK: vpminsb %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x38,0xca] - vpminsb %xmm2, %xmm3, %xmm1 - -// CHECK: vpminsb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x38,0x18] - vpminsb (%eax), %xmm2, %xmm3 - -// CHECK: vpminsd %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x39,0xca] - vpminsd %xmm2, %xmm3, %xmm1 - -// CHECK: vpminsd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x39,0x18] - vpminsd (%eax), %xmm2, %xmm3 - -// CHECK: vpminud %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3b,0xca] - vpminud %xmm2, %xmm3, %xmm1 - -// CHECK: vpminud (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3b,0x18] - vpminud (%eax), %xmm2, %xmm3 - -// CHECK: vpminuw %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3a,0xca] - vpminuw %xmm2, %xmm3, %xmm1 - -// CHECK: vpminuw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3a,0x18] - vpminuw (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxsb %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3c,0xca] - vpmaxsb %xmm2, %xmm3, %xmm1 - -// CHECK: vpmaxsb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3c,0x18] - vpmaxsb (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxsd %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3d,0xca] - vpmaxsd %xmm2, %xmm3, %xmm1 - -// CHECK: vpmaxsd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3d,0x18] - vpmaxsd (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxud %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3f,0xca] - vpmaxud %xmm2, %xmm3, %xmm1 - -// CHECK: vpmaxud (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3f,0x18] - vpmaxud (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxuw %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3e,0xca] - vpmaxuw %xmm2, %xmm3, %xmm1 - -// CHECK: vpmaxuw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3e,0x18] - vpmaxuw (%eax), %xmm2, %xmm3 - -// CHECK: vpmuldq %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x28,0xca] - vpmuldq %xmm2, %xmm3, %xmm1 - -// CHECK: vpmuldq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x28,0x18] - vpmuldq (%eax), %xmm2, %xmm3 - -// CHECK: vpmulld %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0xca] - vpmulld %xmm2, %xmm5, %xmm1 - -// CHECK: vpmulld (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0x18] - vpmulld (%eax), %xmm5, %xmm3 - -// CHECK: vblendps $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0xca,0x03] - vblendps $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vblendps $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0x08,0x03] - vblendps $3, (%eax), %xmm5, %xmm1 - -// CHECK: vblendpd $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0xca,0x03] - vblendpd $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vblendpd $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0x08,0x03] - vblendpd $3, (%eax), %xmm5, %xmm1 - -// CHECK: vpblendw $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0xca,0x03] - vpblendw $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vpblendw $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0x08,0x03] - vpblendw $3, (%eax), %xmm5, %xmm1 - -// CHECK: vmpsadbw $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0xca,0x03] - vmpsadbw $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vmpsadbw $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0x08,0x03] - vmpsadbw $3, (%eax), %xmm5, %xmm1 - -// CHECK: vdpps $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0xca,0x03] - vdpps $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vdpps $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0x08,0x03] - vdpps $3, (%eax), %xmm5, %xmm1 - -// CHECK: vdppd $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0xca,0x03] - vdppd $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vdppd $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0x08,0x03] - vdppd $3, (%eax), %xmm5, %xmm1 - -// CHECK: vblendvpd %xmm2, %xmm5, %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0xdd,0x20] - vblendvpd %xmm2, %xmm5, %xmm1, %xmm3 - -// CHECK: vblendvpd %xmm2, (%eax), %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0x18,0x20] - vblendvpd %xmm2, (%eax), %xmm1, %xmm3 - -// CHECK: vblendvps %xmm2, %xmm5, %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0xdd,0x20] - vblendvps %xmm2, %xmm5, %xmm1, %xmm3 - -// CHECK: vblendvps %xmm2, (%eax), %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0x18,0x20] - vblendvps %xmm2, (%eax), %xmm1, %xmm3 - -// CHECK: vpblendvb %xmm2, %xmm5, %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0xdd,0x20] - vpblendvb %xmm2, %xmm5, %xmm1, %xmm3 - -// CHECK: vpblendvb %xmm2, (%eax), %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0x18,0x20] - vpblendvb %xmm2, (%eax), %xmm1, %xmm3 - -// CHECK: vpmovsxbw %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0xea] - vpmovsxbw %xmm2, %xmm5 - -// CHECK: vpmovsxbw (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0x10] - vpmovsxbw (%eax), %xmm2 - -// CHECK: vpmovsxwd %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0xea] - vpmovsxwd %xmm2, %xmm5 - -// CHECK: vpmovsxwd (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0x10] - vpmovsxwd (%eax), %xmm2 - -// CHECK: vpmovsxdq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0xea] - vpmovsxdq %xmm2, %xmm5 - -// CHECK: vpmovsxdq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0x10] - vpmovsxdq (%eax), %xmm2 - -// CHECK: vpmovzxbw %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0xea] - vpmovzxbw %xmm2, %xmm5 - -// CHECK: vpmovzxbw (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0x10] - vpmovzxbw (%eax), %xmm2 - -// CHECK: vpmovzxwd %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0xea] - vpmovzxwd %xmm2, %xmm5 - -// CHECK: vpmovzxwd (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0x10] - vpmovzxwd (%eax), %xmm2 - -// CHECK: vpmovzxdq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0xea] - vpmovzxdq %xmm2, %xmm5 - -// CHECK: vpmovzxdq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0x10] - vpmovzxdq (%eax), %xmm2 - -// CHECK: vpmovsxbq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0xea] - vpmovsxbq %xmm2, %xmm5 - -// CHECK: vpmovsxbq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0x10] - vpmovsxbq (%eax), %xmm2 - -// CHECK: vpmovzxbq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0xea] - vpmovzxbq %xmm2, %xmm5 - -// CHECK: vpmovzxbq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0x10] - vpmovzxbq (%eax), %xmm2 - -// CHECK: vpmovsxbd %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0xea] - vpmovsxbd %xmm2, %xmm5 - -// CHECK: vpmovsxbd (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0x10] - vpmovsxbd (%eax), %xmm2 - -// CHECK: vpmovsxwq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0xea] - vpmovsxwq %xmm2, %xmm5 - -// CHECK: vpmovsxwq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0x10] - vpmovsxwq (%eax), %xmm2 - -// CHECK: vpmovzxbd %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0xea] - vpmovzxbd %xmm2, %xmm5 - -// CHECK: vpmovzxbd (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0x10] - vpmovzxbd (%eax), %xmm2 - -// CHECK: vpmovzxwq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0xea] - vpmovzxwq %xmm2, %xmm5 - -// CHECK: vpmovzxwq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0x10] - vpmovzxwq (%eax), %xmm2 - -// CHECK: vpextrw $7, %xmm2, %eax -// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07] - vpextrw $7, %xmm2, %eax - -// CHECK: vpextrw $7, %xmm2, (%eax) -// CHECK: encoding: [0xc4,0xe3,0x79,0x15,0x10,0x07] - vpextrw $7, %xmm2, (%eax) - -// CHECK: vpextrd $7, %xmm2, %eax -// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0xd0,0x07] - vpextrd $7, %xmm2, %eax - -// CHECK: vpextrd $7, %xmm2, (%eax) -// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0x10,0x07] - vpextrd $7, %xmm2, (%eax) - -// CHECK: vpextrb $7, %xmm2, %eax -// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xd0,0x07] - vpextrb $7, %xmm2, %eax - -// CHECK: vpextrb $7, %xmm2, (%eax) -// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0x10,0x07] - vpextrb $7, %xmm2, (%eax) - -// CHECK: vextractps $7, %xmm2, (%eax) -// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0x10,0x07] - vextractps $7, %xmm2, (%eax) - -// CHECK: vextractps $7, %xmm2, %eax -// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd0,0x07] - vextractps $7, %xmm2, %eax - -// CHECK: vpinsrw $7, %eax, %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0xc4,0xe8,0x07] - vpinsrw $7, %eax, %xmm2, %xmm5 - -// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0xc4,0x28,0x07] - vpinsrw $7, (%eax), %xmm2, %xmm5 - -// CHECK: vpinsrb $7, %eax, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0xe8,0x07] - vpinsrb $7, %eax, %xmm2, %xmm5 - -// CHECK: vpinsrb $7, (%eax), %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0x28,0x07] - vpinsrb $7, (%eax), %xmm2, %xmm5 - -// CHECK: vpinsrd $7, %eax, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0xe8,0x07] - vpinsrd $7, %eax, %xmm2, %xmm5 - -// CHECK: vpinsrd $7, (%eax), %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07] - vpinsrd $7, (%eax), %xmm2, %xmm5 - -// CHECK: vinsertps $7, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0xca,0x07] - vinsertps $7, %xmm2, %xmm5, %xmm1 - -// CHECK: vinsertps $7, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0x08,0x07] - vinsertps $7, (%eax), %xmm5, %xmm1 - -// CHECK: vptest %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0xea] - vptest %xmm2, %xmm5 - -// CHECK: vptest (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0x10] - vptest (%eax), %xmm2 - -// CHECK: vmovntdqa (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x2a,0x10] - vmovntdqa (%eax), %xmm2 - -// CHECK: vpcmpgtq %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0xca] - vpcmpgtq %xmm2, %xmm5, %xmm1 - -// CHECK: vpcmpgtq (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0x18] - vpcmpgtq (%eax), %xmm5, %xmm3 - -// CHECK: vpcmpistrm $7, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0xea,0x07] - vpcmpistrm $7, %xmm2, %xmm5 - -// CHECK: vpcmpistrm $7, (%eax), %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0x28,0x07] - vpcmpistrm $7, (%eax), %xmm5 - -// CHECK: vpcmpestrm $7, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0xea,0x07] - vpcmpestrm $7, %xmm2, %xmm5 - -// CHECK: vpcmpestrm $7, (%eax), %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0x28,0x07] - vpcmpestrm $7, (%eax), %xmm5 - -// CHECK: vpcmpistri $7, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0xea,0x07] - vpcmpistri $7, %xmm2, %xmm5 - -// CHECK: vpcmpistri $7, (%eax), %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0x28,0x07] - vpcmpistri $7, (%eax), %xmm5 - -// CHECK: vpcmpestri $7, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0xea,0x07] - vpcmpestri $7, %xmm2, %xmm5 - -// CHECK: vpcmpestri $7, (%eax), %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0x28,0x07] - vpcmpestri $7, (%eax), %xmm5 - -// CHECK: vaesimc %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0xea] - vaesimc %xmm2, %xmm5 - -// CHECK: vaesimc (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0x10] - vaesimc (%eax), %xmm2 - -// CHECK: vaesenc %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0xca] - vaesenc %xmm2, %xmm5, %xmm1 - -// CHECK: vaesenc (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0x18] - vaesenc (%eax), %xmm5, %xmm3 - -// CHECK: vaesenclast %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0xca] - vaesenclast %xmm2, %xmm5, %xmm1 - -// CHECK: vaesenclast (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0x18] - vaesenclast (%eax), %xmm5, %xmm3 - -// CHECK: vaesdec %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0xca] - vaesdec %xmm2, %xmm5, %xmm1 - -// CHECK: vaesdec (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0x18] - vaesdec (%eax), %xmm5, %xmm3 - -// CHECK: vaesdeclast %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0xca] - vaesdeclast %xmm2, %xmm5, %xmm1 - -// CHECK: vaesdeclast (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0x18] - vaesdeclast (%eax), %xmm5, %xmm3 - -// CHECK: vaeskeygenassist $7, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0xea,0x07] - vaeskeygenassist $7, %xmm2, %xmm5 - -// CHECK: vaeskeygenassist $7, (%eax), %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0x28,0x07] - vaeskeygenassist $7, (%eax), %xmm5 - -// CHECK: vcmpps $8, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x08] - vcmpeq_uqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $9, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x09] - vcmpngeps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $10, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0a] - vcmpngtps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $11, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0b] - vcmpfalseps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $12, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0c] - vcmpneq_oqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $13, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0d] - vcmpgeps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $14, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0e] - vcmpgtps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $15, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0f] - vcmptrueps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $16, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x10] - vcmpeq_osps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $17, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x11] - vcmplt_oqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $18, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x12] - vcmple_oqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $19, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x13] - vcmpunord_sps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $20, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x14] - vcmpneq_usps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $21, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x15] - vcmpnlt_uqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $22, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x16] - vcmpnle_uqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $23, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x17] - vcmpord_sps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $24, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x18] - vcmpeq_usps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $25, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x19] - vcmpnge_uqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $26, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1a] - vcmpngt_uqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $27, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1b] - vcmpfalse_osps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $28, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1c] - vcmpneq_osps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $29, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1d] - vcmpge_oqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $30, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1e] - vcmpgt_oqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $31, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1f] - vcmptrue_usps %xmm1, %xmm2, %xmm3 - -// CHECK: vmovaps (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x28,0x10] - vmovaps (%eax), %ymm2 - -// CHECK: vmovaps %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x28,0xd1] - vmovaps %ymm1, %ymm2 - -// CHECK: vmovaps %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfc,0x29,0x08] - vmovaps %ymm1, (%eax) - -// CHECK: vmovapd (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x28,0x10] - vmovapd (%eax), %ymm2 - -// CHECK: vmovapd %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x28,0xd1] - vmovapd %ymm1, %ymm2 - -// CHECK: vmovapd %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfd,0x29,0x08] - vmovapd %ymm1, (%eax) - -// CHECK: vmovups (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x10,0x10] - vmovups (%eax), %ymm2 - -// CHECK: vmovups %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x10,0xd1] - vmovups %ymm1, %ymm2 - -// CHECK: vmovups %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfc,0x11,0x08] - vmovups %ymm1, (%eax) - -// CHECK: vmovupd (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x10,0x10] - vmovupd (%eax), %ymm2 - -// CHECK: vmovupd %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x10,0xd1] - vmovupd %ymm1, %ymm2 - -// CHECK: vmovupd %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfd,0x11,0x08] - vmovupd %ymm1, (%eax) - -// CHECK: vunpckhps %ymm1, %ymm2, %ymm4 -// CHECK: encoding: [0xc5,0xec,0x15,0xe1] - vunpckhps %ymm1, %ymm2, %ymm4 - -// CHECK: vunpckhpd %ymm1, %ymm2, %ymm4 -// CHECK: encoding: [0xc5,0xed,0x15,0xe1] - vunpckhpd %ymm1, %ymm2, %ymm4 - -// CHECK: vunpcklps %ymm1, %ymm2, %ymm4 -// CHECK: encoding: [0xc5,0xec,0x14,0xe1] - vunpcklps %ymm1, %ymm2, %ymm4 - -// CHECK: vunpcklpd %ymm1, %ymm2, %ymm4 -// CHECK: encoding: [0xc5,0xed,0x14,0xe1] - vunpcklpd %ymm1, %ymm2, %ymm4 - -// CHECK: vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x15,0x6c,0xcb,0xfc] - vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x15,0x6c,0xcb,0xfc] - vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x14,0x6c,0xcb,0xfc] - vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x14,0x6c,0xcb,0xfc] - vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vmovntdq %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfd,0xe7,0x08] - vmovntdq %ymm1, (%eax) - -// CHECK: vmovntpd %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfd,0x2b,0x08] - vmovntpd %ymm1, (%eax) - -// CHECK: vmovntps %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfc,0x2b,0x08] - vmovntps %ymm1, (%eax) - -// CHECK: vmovmskps %xmm2, %eax -// CHECK: encoding: [0xc5,0xf8,0x50,0xc2] - vmovmskps %xmm2, %eax - -// CHECK: vmovmskpd %xmm2, %eax -// CHECK: encoding: [0xc5,0xf9,0x50,0xc2] - vmovmskpd %xmm2, %eax - -// CHECK: vmaxps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5f,0xf2] - vmaxps %ymm2, %ymm4, %ymm6 - -// CHECK: vmaxpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5f,0xf2] - vmaxpd %ymm2, %ymm4, %ymm6 - -// CHECK: vminps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5d,0xf2] - vminps %ymm2, %ymm4, %ymm6 - -// CHECK: vminpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5d,0xf2] - vminpd %ymm2, %ymm4, %ymm6 - -// CHECK: vsubps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5c,0xf2] - vsubps %ymm2, %ymm4, %ymm6 - -// CHECK: vsubpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5c,0xf2] - vsubpd %ymm2, %ymm4, %ymm6 - -// CHECK: vdivps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5e,0xf2] - vdivps %ymm2, %ymm4, %ymm6 - -// CHECK: vdivpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5e,0xf2] - vdivpd %ymm2, %ymm4, %ymm6 - -// CHECK: vaddps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x58,0xf2] - vaddps %ymm2, %ymm4, %ymm6 - -// CHECK: vaddpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x58,0xf2] - vaddpd %ymm2, %ymm4, %ymm6 - -// CHECK: vmulps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x59,0xf2] - vmulps %ymm2, %ymm4, %ymm6 - -// CHECK: vmulpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x59,0xf2] - vmulpd %ymm2, %ymm4, %ymm6 - -// CHECK: vmaxps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5f,0x30] - vmaxps (%eax), %ymm4, %ymm6 - -// CHECK: vmaxpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5f,0x30] - vmaxpd (%eax), %ymm4, %ymm6 - -// CHECK: vminps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5d,0x30] - vminps (%eax), %ymm4, %ymm6 - -// CHECK: vminpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5d,0x30] - vminpd (%eax), %ymm4, %ymm6 - -// CHECK: vsubps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5c,0x30] - vsubps (%eax), %ymm4, %ymm6 - -// CHECK: vsubpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5c,0x30] - vsubpd (%eax), %ymm4, %ymm6 - -// CHECK: vdivps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5e,0x30] - vdivps (%eax), %ymm4, %ymm6 - -// CHECK: vdivpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5e,0x30] - vdivpd (%eax), %ymm4, %ymm6 - -// CHECK: vaddps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x58,0x30] - vaddps (%eax), %ymm4, %ymm6 - -// CHECK: vaddpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x58,0x30] - vaddpd (%eax), %ymm4, %ymm6 - -// CHECK: vmulps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x59,0x30] - vmulps (%eax), %ymm4, %ymm6 - -// CHECK: vmulpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x59,0x30] - vmulpd (%eax), %ymm4, %ymm6 - -// CHECK: vsqrtpd %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x51,0xd1] - vsqrtpd %ymm1, %ymm2 - -// CHECK: vsqrtpd (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x51,0x10] - vsqrtpd (%eax), %ymm2 - -// CHECK: vsqrtps %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x51,0xd1] - vsqrtps %ymm1, %ymm2 - -// CHECK: vsqrtps (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x51,0x10] - vsqrtps (%eax), %ymm2 - -// CHECK: vrsqrtps %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x52,0xd1] - vrsqrtps %ymm1, %ymm2 - -// CHECK: vrsqrtps (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x52,0x10] - vrsqrtps (%eax), %ymm2 - -// CHECK: vrcpps %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x53,0xd1] - vrcpps %ymm1, %ymm2 - -// CHECK: vrcpps (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x53,0x10] - vrcpps (%eax), %ymm2 - -// CHECK: vandps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x54,0xf2] - vandps %ymm2, %ymm4, %ymm6 - -// CHECK: vandpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x54,0xf2] - vandpd %ymm2, %ymm4, %ymm6 - -// CHECK: vandps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x54,0x6c,0xcb,0xfc] - vandps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x54,0x6c,0xcb,0xfc] - vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vorps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x56,0xf2] - vorps %ymm2, %ymm4, %ymm6 - -// CHECK: vorpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x56,0xf2] - vorpd %ymm2, %ymm4, %ymm6 - -// CHECK: vorps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x56,0x6c,0xcb,0xfc] - vorps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x56,0x6c,0xcb,0xfc] - vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vxorps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x57,0xf2] - vxorps %ymm2, %ymm4, %ymm6 - -// CHECK: vxorpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x57,0xf2] - vxorpd %ymm2, %ymm4, %ymm6 - -// CHECK: vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x57,0x6c,0xcb,0xfc] - vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x57,0x6c,0xcb,0xfc] - vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vandnps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x55,0xf2] - vandnps %ymm2, %ymm4, %ymm6 - -// CHECK: vandnpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x55,0xf2] - vandnpd %ymm2, %ymm4, %ymm6 - -// CHECK: vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x55,0x6c,0xcb,0xfc] - vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x55,0x6c,0xcb,0xfc] - vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vcvtps2pd %xmm3, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x5a,0xd3] - vcvtps2pd %xmm3, %ymm2 - -// CHECK: vcvtps2pd (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x5a,0x10] - vcvtps2pd (%eax), %ymm2 - -// CHECK: vcvtdq2pd %xmm3, %ymm2 -// CHECK: encoding: [0xc5,0xfe,0xe6,0xd3] - vcvtdq2pd %xmm3, %ymm2 - -// CHECK: vcvtdq2pd (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfe,0xe6,0x10] - vcvtdq2pd (%eax), %ymm2 - -// CHECK: vcvtdq2ps %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xfc,0x5b,0xea] - vcvtdq2ps %ymm2, %ymm5 - -// CHECK: vcvtdq2ps (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x5b,0x10] - vcvtdq2ps (%eax), %ymm2 - -// CHECK: vcvtps2dq %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xfd,0x5b,0xea] - vcvtps2dq %ymm2, %ymm5 - -// CHECK: vcvtps2dq (%eax), %ymm5 -// CHECK: encoding: [0xc5,0xfd,0x5b,0x28] - vcvtps2dq (%eax), %ymm5 - -// CHECK: vcvttps2dq %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xfe,0x5b,0xea] - vcvttps2dq %ymm2, %ymm5 - -// CHECK: vcvttps2dq (%eax), %ymm5 -// CHECK: encoding: [0xc5,0xfe,0x5b,0x28] - vcvttps2dq (%eax), %ymm5 - -// CHECK: vcvttpd2dq %xmm1, %xmm5 -// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9] - vcvttpd2dq %xmm1, %xmm5 - -// CHECK: vcvttpd2dq %ymm2, %xmm5 -// CHECK: encoding: [0xc5,0xfd,0xe6,0xea] - vcvttpd2dq %ymm2, %xmm5 - -// CHECK: vcvttpd2dqx %xmm1, %xmm5 -// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9] - vcvttpd2dqx %xmm1, %xmm5 - -// CHECK: vcvttpd2dqx (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xf9,0xe6,0x08] - vcvttpd2dqx (%eax), %xmm1 - -// CHECK: vcvttpd2dqy %ymm2, %xmm1 -// CHECK: encoding: [0xc5,0xfd,0xe6,0xca] - vcvttpd2dqy %ymm2, %xmm1 - -// CHECK: vcvttpd2dqy (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xfd,0xe6,0x08] - vcvttpd2dqy (%eax), %xmm1 - -// CHECK: vcvtpd2ps %ymm2, %xmm5 -// CHECK: encoding: [0xc5,0xfd,0x5a,0xea] - vcvtpd2ps %ymm2, %xmm5 - -// CHECK: vcvtpd2psx %xmm1, %xmm5 -// CHECK: encoding: [0xc5,0xf9,0x5a,0xe9] - vcvtpd2psx %xmm1, %xmm5 - -// CHECK: vcvtpd2psx (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xf9,0x5a,0x08] - vcvtpd2psx (%eax), %xmm1 - -// CHECK: vcvtpd2psy %ymm2, %xmm1 -// CHECK: encoding: [0xc5,0xfd,0x5a,0xca] - vcvtpd2psy %ymm2, %xmm1 - -// CHECK: vcvtpd2psy (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xfd,0x5a,0x08] - vcvtpd2psy (%eax), %xmm1 - -// CHECK: vcvtpd2dq %ymm2, %xmm5 -// CHECK: encoding: [0xc5,0xff,0xe6,0xea] - vcvtpd2dq %ymm2, %xmm5 - -// CHECK: vcvtpd2dqy %ymm2, %xmm1 -// CHECK: encoding: [0xc5,0xff,0xe6,0xca] - vcvtpd2dqy %ymm2, %xmm1 - -// CHECK: vcvtpd2dqy (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xff,0xe6,0x08] - vcvtpd2dqy (%eax), %xmm1 - -// CHECK: vcvtpd2dqx %xmm1, %xmm5 -// CHECK: encoding: [0xc5,0xfb,0xe6,0xe9] - vcvtpd2dqx %xmm1, %xmm5 - -// CHECK: vcvtpd2dqx (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xfb,0xe6,0x08] - vcvtpd2dqx (%eax), %xmm1 - -// CHECK: vcmpps $0, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x00] - vcmpeqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $2, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x02] - vcmpleps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $1, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x01] - vcmpltps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $4, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x04] - vcmpneqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $6, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x06] - vcmpnleps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $5, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x05] - vcmpnltps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $7, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x07] - vcmpordps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $3, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x03] - vcmpunordps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x02] - vcmpleps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnleps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %ymm6, %ymm2 -// CHECK: encoding: [0xc5,0xcc,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordps -4(%ebx,%ecx,8), %ymm6, %ymm2 - -// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $0, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x00] - vcmpeqpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $2, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x02] - vcmplepd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $1, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x01] - vcmpltpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $4, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x04] - vcmpneqpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $6, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x06] - vcmpnlepd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $5, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x05] - vcmpnltpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $7, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x07] - vcmpordpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $3, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x03] - vcmpunordpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqpd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x02] - vcmplepd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltpd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqpd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnlepd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltpd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %ymm6, %ymm2 -// CHECK: encoding: [0xc5,0xcd,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordpd -4(%ebx,%ecx,8), %ymm6, %ymm2 - -// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordpd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $8, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x08] - vcmpeq_uqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $9, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x09] - vcmpngeps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $10, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0a] - vcmpngtps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $11, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0b] - vcmpfalseps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $12, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0c] - vcmpneq_oqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $13, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0d] - vcmpgeps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $14, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0e] - vcmpgtps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $15, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0f] - vcmptrueps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $16, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x10] - vcmpeq_osps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $17, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x11] - vcmplt_oqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $18, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x12] - vcmple_oqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $19, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x13] - vcmpunord_sps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $20, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x14] - vcmpneq_usps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $21, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x15] - vcmpnlt_uqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $22, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x16] - vcmpnle_uqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $23, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x17] - vcmpord_sps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $24, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x18] - vcmpeq_usps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $25, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x19] - vcmpnge_uqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $26, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1a] - vcmpngt_uqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $27, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1b] - vcmpfalse_osps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $28, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1c] - vcmpneq_osps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $29, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1d] - vcmpge_oqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $30, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1e] - vcmpgt_oqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $31, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1f] - vcmptrue_usps %ymm1, %ymm2, %ymm3 +// CHECK: movl (%ecx,%eiz,8), %eax +// CHECK: encoding: [0x8b,0x04,0xe1] + movl (%ecx,%eiz,8), %eax diff --git a/test/MC/AsmParser/X86/x86_32-fma3-encoding.s b/test/MC/AsmParser/X86/x86_32-fma3-encoding.s new file mode 100644 index 000000000000..db7efecfb51b --- /dev/null +++ b/test/MC/AsmParser/X86/x86_32-fma3-encoding.s @@ -0,0 +1,674 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vfmadd132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0xca] + vfmadd132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0x08] + vfmadd132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0xca] + vfmadd132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0x08] + vfmadd132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0xca] + vfmadd213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0x08] + vfmadd213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0xca] + vfmadd213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0x08] + vfmadd213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0xca] + vfmadd231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0x08] + vfmadd231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0xca] + vfmadd231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0x08] + vfmadd231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0xca] + vfmadd132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0x08] + vfmadd132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0xca] + vfmadd132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0x08] + vfmadd132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0xca] + vfmadd213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0x08] + vfmadd213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0xca] + vfmadd213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0x08] + vfmadd213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0xca] + vfmadd231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0x08] + vfmadd231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0xca] + vfmadd231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0x08] + vfmadd231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0xca] + vfmadd132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0x08] + vfmadd132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0xca] + vfmadd132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0x08] + vfmadd132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0xca] + vfmadd213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0x08] + vfmadd213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0xca] + vfmadd213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0x08] + vfmadd213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0xca] + vfmadd231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0x08] + vfmadd231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0xca] + vfmadd231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0x08] + vfmadd231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x96,0xca] + vfmaddsub132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x96,0x08] + vfmaddsub132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x96,0xca] + vfmaddsub132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x96,0x08] + vfmaddsub132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa6,0xca] + vfmaddsub213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa6,0x08] + vfmaddsub213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa6,0xca] + vfmaddsub213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa6,0x08] + vfmaddsub213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb6,0xca] + vfmaddsub231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb6,0x08] + vfmaddsub231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb6,0xca] + vfmaddsub231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb6,0x08] + vfmaddsub231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x97,0xca] + vfmsubadd132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x97,0x08] + vfmsubadd132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x97,0xca] + vfmsubadd132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x97,0x08] + vfmsubadd132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa7,0xca] + vfmsubadd213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa7,0x08] + vfmsubadd213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa7,0xca] + vfmsubadd213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa7,0x08] + vfmsubadd213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb7,0xca] + vfmsubadd231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb7,0x08] + vfmsubadd231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb7,0xca] + vfmsubadd231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb7,0x08] + vfmsubadd231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9a,0xca] + vfmsub132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9a,0x08] + vfmsub132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9a,0xca] + vfmsub132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9a,0x08] + vfmsub132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xaa,0xca] + vfmsub213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xaa,0x08] + vfmsub213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xaa,0xca] + vfmsub213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xaa,0x08] + vfmsub213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xba,0xca] + vfmsub231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xba,0x08] + vfmsub231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xba,0xca] + vfmsub231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xba,0x08] + vfmsub231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9c,0xca] + vfnmadd132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9c,0x08] + vfnmadd132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9c,0xca] + vfnmadd132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9c,0x08] + vfnmadd132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xac,0xca] + vfnmadd213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xac,0x08] + vfnmadd213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xac,0xca] + vfnmadd213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xac,0x08] + vfnmadd213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xbc,0xca] + vfnmadd231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xbc,0x08] + vfnmadd231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xbc,0xca] + vfnmadd231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xbc,0x08] + vfnmadd231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9e,0xca] + vfnmsub132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9e,0x08] + vfnmsub132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9e,0xca] + vfnmsub132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9e,0x08] + vfnmsub132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xae,0xca] + vfnmsub213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xae,0x08] + vfnmsub213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xae,0xca] + vfnmsub213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xae,0x08] + vfnmsub213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xbe,0xca] + vfnmsub231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xbe,0x08] + vfnmsub231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xbe,0xca] + vfnmsub231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xbe,0x08] + vfnmsub231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0xca] + vfmadd132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0x08] + vfmadd132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0xca] + vfmadd132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0x08] + vfmadd132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0xca] + vfmadd213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0x08] + vfmadd213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0xca] + vfmadd213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0x08] + vfmadd213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0xca] + vfmadd231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0x08] + vfmadd231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0xca] + vfmadd231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0x08] + vfmadd231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x96,0xca] + vfmaddsub132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x96,0x08] + vfmaddsub132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x96,0xca] + vfmaddsub132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x96,0x08] + vfmaddsub132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa6,0xca] + vfmaddsub213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa6,0x08] + vfmaddsub213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa6,0xca] + vfmaddsub213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa6,0x08] + vfmaddsub213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb6,0xca] + vfmaddsub231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb6,0x08] + vfmaddsub231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb6,0xca] + vfmaddsub231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb6,0x08] + vfmaddsub231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x97,0xca] + vfmsubadd132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x97,0x08] + vfmsubadd132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x97,0xca] + vfmsubadd132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x97,0x08] + vfmsubadd132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa7,0xca] + vfmsubadd213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa7,0x08] + vfmsubadd213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa7,0xca] + vfmsubadd213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa7,0x08] + vfmsubadd213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb7,0xca] + vfmsubadd231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb7,0x08] + vfmsubadd231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb7,0xca] + vfmsubadd231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb7,0x08] + vfmsubadd231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9a,0xca] + vfmsub132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9a,0x08] + vfmsub132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9a,0xca] + vfmsub132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9a,0x08] + vfmsub132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xaa,0xca] + vfmsub213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xaa,0x08] + vfmsub213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xaa,0xca] + vfmsub213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xaa,0x08] + vfmsub213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xba,0xca] + vfmsub231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xba,0x08] + vfmsub231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xba,0xca] + vfmsub231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xba,0x08] + vfmsub231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9c,0xca] + vfnmadd132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9c,0x08] + vfnmadd132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9c,0xca] + vfnmadd132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9c,0x08] + vfnmadd132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xac,0xca] + vfnmadd213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xac,0x08] + vfnmadd213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xac,0xca] + vfnmadd213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xac,0x08] + vfnmadd213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xbc,0xca] + vfnmadd231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xbc,0x08] + vfnmadd231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xbc,0xca] + vfnmadd231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xbc,0x08] + vfnmadd231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9e,0xca] + vfnmsub132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9e,0x08] + vfnmsub132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9e,0xca] + vfnmsub132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9e,0x08] + vfnmsub132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xae,0xca] + vfnmsub213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xae,0x08] + vfnmsub213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xae,0xca] + vfnmsub213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xae,0x08] + vfnmsub213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xbe,0xca] + vfnmsub231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xbe,0x08] + vfnmsub231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xbe,0xca] + vfnmsub231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xbe,0x08] + vfnmsub231ps (%eax), %ymm5, %ymm1 + diff --git a/test/MC/AsmParser/X86/x86_32-new-encoder.s b/test/MC/AsmParser/X86/x86_32-new-encoder.s index e97e4940a42c..e3aa1887ef81 100644 --- a/test/MC/AsmParser/X86/x86_32-new-encoder.s +++ b/test/MC/AsmParser/X86/x86_32-new-encoder.s @@ -415,3 +415,11 @@ retl // CHECK: encoding: [0x61] popal +// CHECK: jmpl *8(%eax) +// CHECK: encoding: [0xff,0x60,0x08] + jmp *8(%eax) + +// PR7465 +// CHECK: lcalll $2, $4660 +// CHECK: encoding: [0x9a,0x34,0x12,0x00,0x00,0x02,0x00] +lcalll $0x2, $0x1234 diff --git a/test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s b/test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s new file mode 100644 index 000000000000..67e82c6cd0d2 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s @@ -0,0 +1,42 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vpclmulqdq $17, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x11] + vpclmulhqhqdq %xmm12, %xmm10, %xmm11 + +// CHECK: vpclmulqdq $17, (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x11] + vpclmulhqhqdq (%rax), %xmm10, %xmm13 + +// CHECK: vpclmulqdq $1, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x01] + vpclmulhqlqdq %xmm12, %xmm10, %xmm11 + +// CHECK: vpclmulqdq $1, (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x01] + vpclmulhqlqdq (%rax), %xmm10, %xmm13 + +// CHECK: vpclmulqdq $16, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x10] + vpclmullqhqdq %xmm12, %xmm10, %xmm11 + +// CHECK: vpclmulqdq $16, (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x10] + vpclmullqhqdq (%rax), %xmm10, %xmm13 + +// CHECK: vpclmulqdq $0, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x00] + vpclmullqlqdq %xmm12, %xmm10, %xmm11 + +// CHECK: vpclmulqdq $0, (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x00] + vpclmullqlqdq (%rax), %xmm10, %xmm13 + +// CHECK: vpclmulqdq $17, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x11] + vpclmulqdq $17, %xmm12, %xmm10, %xmm11 + +// CHECK: vpclmulqdq $17, (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x11] + vpclmulqdq $17, (%rax), %xmm10, %xmm13 + diff --git a/test/MC/AsmParser/X86/x86_64-avx-encoding.s b/test/MC/AsmParser/X86/x86_64-avx-encoding.s new file mode 100644 index 000000000000..7a96bb5a2b48 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_64-avx-encoding.s @@ -0,0 +1,3318 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vaddss %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x32,0x58,0xd0] +vaddss %xmm8, %xmm9, %xmm10 + +// CHECK: vmulss %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x32,0x59,0xd0] +vmulss %xmm8, %xmm9, %xmm10 + +// CHECK: vsubss %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x32,0x5c,0xd0] +vsubss %xmm8, %xmm9, %xmm10 + +// CHECK: vdivss %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x32,0x5e,0xd0] +vdivss %xmm8, %xmm9, %xmm10 + +// CHECK: vaddsd %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x33,0x58,0xd0] +vaddsd %xmm8, %xmm9, %xmm10 + +// CHECK: vmulsd %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x33,0x59,0xd0] +vmulsd %xmm8, %xmm9, %xmm10 + +// CHECK: vsubsd %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x33,0x5c,0xd0] +vsubsd %xmm8, %xmm9, %xmm10 + +// CHECK: vdivsd %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x33,0x5e,0xd0] +vdivsd %xmm8, %xmm9, %xmm10 + +// CHECK: vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2a,0x58,0x5c,0xd9,0xfc] +vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2a,0x5c,0x5c,0xd9,0xfc] +vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2a,0x59,0x5c,0xd9,0xfc] +vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2a,0x5e,0x5c,0xd9,0xfc] +vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2b,0x58,0x5c,0xd9,0xfc] +vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2b,0x5c,0x5c,0xd9,0xfc] +vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2b,0x59,0x5c,0xd9,0xfc] +vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2b,0x5e,0x5c,0xd9,0xfc] +vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vaddps %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x20,0x58,0xfa] +vaddps %xmm10, %xmm11, %xmm15 + +// CHECK: vsubps %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x20,0x5c,0xfa] +vsubps %xmm10, %xmm11, %xmm15 + +// CHECK: vmulps %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x20,0x59,0xfa] +vmulps %xmm10, %xmm11, %xmm15 + +// CHECK: vdivps %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x20,0x5e,0xfa] +vdivps %xmm10, %xmm11, %xmm15 + +// CHECK: vaddpd %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x21,0x58,0xfa] +vaddpd %xmm10, %xmm11, %xmm15 + +// CHECK: vsubpd %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x21,0x5c,0xfa] +vsubpd %xmm10, %xmm11, %xmm15 + +// CHECK: vmulpd %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x21,0x59,0xfa] +vmulpd %xmm10, %xmm11, %xmm15 + +// CHECK: vdivpd %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x21,0x5e,0xfa] +vdivpd %xmm10, %xmm11, %xmm15 + +// CHECK: vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x28,0x58,0x5c,0xd9,0xfc] +vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x28,0x5c,0x5c,0xd9,0xfc] +vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x28,0x59,0x5c,0xd9,0xfc] +vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x28,0x5e,0x5c,0xd9,0xfc] +vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x29,0x58,0x5c,0xd9,0xfc] +vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x29,0x5c,0x5c,0xd9,0xfc] +vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x29,0x59,0x5c,0xd9,0xfc] +vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x29,0x5e,0x5c,0xd9,0xfc] +vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vmaxss %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x0a,0x5f,0xe2] + vmaxss %xmm10, %xmm14, %xmm12 + +// CHECK: vmaxsd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x0b,0x5f,0xe2] + vmaxsd %xmm10, %xmm14, %xmm12 + +// CHECK: vminss %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x0a,0x5d,0xe2] + vminss %xmm10, %xmm14, %xmm12 + +// CHECK: vminsd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x0b,0x5d,0xe2] + vminsd %xmm10, %xmm14, %xmm12 + +// CHECK: vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x5f,0x54,0xcb,0xfc] + vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1b,0x5f,0x54,0xcb,0xfc] + vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vminss -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x5d,0x54,0xcb,0xfc] + vminss -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1b,0x5d,0x54,0xcb,0xfc] + vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vmaxps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x5f,0xe2] + vmaxps %xmm10, %xmm14, %xmm12 + +// CHECK: vmaxpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x5f,0xe2] + vmaxpd %xmm10, %xmm14, %xmm12 + +// CHECK: vminps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x5d,0xe2] + vminps %xmm10, %xmm14, %xmm12 + +// CHECK: vminpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x5d,0xe2] + vminpd %xmm10, %xmm14, %xmm12 + +// CHECK: vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x5f,0x54,0xcb,0xfc] + vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x5f,0x54,0xcb,0xfc] + vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vminps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x5d,0x54,0xcb,0xfc] + vminps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc] + vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vandps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x54,0xe2] + vandps %xmm10, %xmm14, %xmm12 + +// CHECK: vandpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x54,0xe2] + vandpd %xmm10, %xmm14, %xmm12 + +// CHECK: vandps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x54,0x54,0xcb,0xfc] + vandps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x54,0x54,0xcb,0xfc] + vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vorps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x56,0xe2] + vorps %xmm10, %xmm14, %xmm12 + +// CHECK: vorpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x56,0xe2] + vorpd %xmm10, %xmm14, %xmm12 + +// CHECK: vorps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x56,0x54,0xcb,0xfc] + vorps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x56,0x54,0xcb,0xfc] + vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vxorps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x57,0xe2] + vxorps %xmm10, %xmm14, %xmm12 + +// CHECK: vxorpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x57,0xe2] + vxorpd %xmm10, %xmm14, %xmm12 + +// CHECK: vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x57,0x54,0xcb,0xfc] + vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x57,0x54,0xcb,0xfc] + vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vandnps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x55,0xe2] + vandnps %xmm10, %xmm14, %xmm12 + +// CHECK: vandnpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x55,0xe2] + vandnpd %xmm10, %xmm14, %xmm12 + +// CHECK: vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x55,0x54,0xcb,0xfc] + vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x55,0x54,0xcb,0xfc] + vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vmovss -4(%rbx,%rcx,8), %xmm10 +// CHECK: encoding: [0xc5,0x7a,0x10,0x54,0xcb,0xfc] + vmovss -4(%rbx,%rcx,8), %xmm10 + +// CHECK: vmovss %xmm14, %xmm10, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x2a,0x10,0xfe] + vmovss %xmm14, %xmm10, %xmm15 + +// CHECK: vmovsd -4(%rbx,%rcx,8), %xmm10 +// CHECK: encoding: [0xc5,0x7b,0x10,0x54,0xcb,0xfc] + vmovsd -4(%rbx,%rcx,8), %xmm10 + +// CHECK: vmovsd %xmm14, %xmm10, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x2b,0x10,0xfe] + vmovsd %xmm14, %xmm10, %xmm15 + +// CHECK: vunpckhps %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x15,0xef] + vunpckhps %xmm15, %xmm12, %xmm13 + +// CHECK: vunpckhpd %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x15,0xef] + vunpckhpd %xmm15, %xmm12, %xmm13 + +// CHECK: vunpcklps %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x14,0xef] + vunpcklps %xmm15, %xmm12, %xmm13 + +// CHECK: vunpcklpd %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x14,0xef] + vunpcklpd %xmm15, %xmm12, %xmm13 + +// CHECK: vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x18,0x15,0x7c,0xcb,0xfc] + vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15 + +// CHECK: vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x19,0x15,0x7c,0xcb,0xfc] + vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15 + +// CHECK: vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x18,0x14,0x7c,0xcb,0xfc] + vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15 + +// CHECK: vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x19,0x14,0x7c,0xcb,0xfc] + vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15 + +// CHECK: vcmpps $0, %xmm10, %xmm12, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x00] + vcmpps $0, %xmm10, %xmm12, %xmm15 + +// CHECK: vcmpps $0, (%rax), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x18,0xc2,0x38,0x00] + vcmpps $0, (%rax), %xmm12, %xmm15 + +// CHECK: vcmpps $7, %xmm10, %xmm12, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x07] + vcmpps $7, %xmm10, %xmm12, %xmm15 + +// CHECK: vcmppd $0, %xmm10, %xmm12, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x00] + vcmppd $0, %xmm10, %xmm12, %xmm15 + +// CHECK: vcmppd $0, (%rax), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x19,0xc2,0x38,0x00] + vcmppd $0, (%rax), %xmm12, %xmm15 + +// CHECK: vcmppd $7, %xmm10, %xmm12, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x07] + vcmppd $7, %xmm10, %xmm12, %xmm15 + +// CHECK: vshufps $8, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc6,0xeb,0x08] + vshufps $8, %xmm11, %xmm12, %xmm13 + +// CHECK: vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc6,0x6c,0xcb,0xfc,0x08] + vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vshufpd $8, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc6,0xeb,0x08] + vshufpd $8, %xmm11, %xmm12, %xmm13 + +// CHECK: vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc6,0x6c,0xcb,0xfc,0x08] + vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $0, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x00] + vcmpeqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $2, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x02] + vcmpleps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $1, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x01] + vcmpltps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $4, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x04] + vcmpneqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $6, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x06] + vcmpnleps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $5, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x05] + vcmpnltps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x07] + vcmpordps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $3, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x03] + vcmpunordps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x02] + vcmpleps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnleps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordps -4(%rbx,%rcx,8), %xmm6, %xmm2 + +// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $0, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x00] + vcmpeqpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $2, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x02] + vcmplepd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $1, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x01] + vcmpltpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $4, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x04] + vcmpneqpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $6, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x06] + vcmpnlepd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $5, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x05] + vcmpnltpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x07] + vcmpordpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $3, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x03] + vcmpunordpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqpd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x02] + vcmplepd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltpd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqpd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnlepd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltpd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordpd -4(%rbx,%rcx,8), %xmm6, %xmm2 + +// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordpd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $0, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x00] + vcmpeqss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $2, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x02] + vcmpless %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $1, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x01] + vcmpltss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $4, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x04] + vcmpneqss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $6, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x06] + vcmpnless %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $5, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x05] + vcmpnltss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x07] + vcmpordss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $3, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x03] + vcmpunordss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x02] + vcmpless -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnless -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordss -4(%rbx,%rcx,8), %xmm6, %xmm2 + +// CHECK: vcmpss $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $0, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x00] + vcmpeqsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $2, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x02] + vcmplesd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $1, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x01] + vcmpltsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $4, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x04] + vcmpneqsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $6, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x06] + vcmpnlesd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $5, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x05] + vcmpnltsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x07] + vcmpordsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $3, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x03] + vcmpunordsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x02] + vcmplesd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnlesd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordsd -4(%rbx,%rcx,8), %xmm6, %xmm2 + +// CHECK: vcmpsd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vucomiss %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3] + vucomiss %xmm11, %xmm12 + +// CHECK: vucomiss (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x2e,0x20] + vucomiss (%rax), %xmm12 + +// CHECK: vcomiss %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x2f,0xe3] + vcomiss %xmm11, %xmm12 + +// CHECK: vcomiss (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x2f,0x20] + vcomiss (%rax), %xmm12 + +// CHECK: vucomisd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xe3] + vucomisd %xmm11, %xmm12 + +// CHECK: vucomisd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x2e,0x20] + vucomisd (%rax), %xmm12 + +// CHECK: vcomisd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x2f,0xe3] + vcomisd %xmm11, %xmm12 + +// CHECK: vcomisd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x2f,0x20] + vcomisd (%rax), %xmm12 + +// CHECK: vcvttss2si (%rcx), %eax +// CHECK: encoding: [0xc5,0xfa,0x2c,0x01] + vcvttss2si (%rcx), %eax + +// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x22,0x2a,0x20] + vcvtsi2ss (%rax), %xmm11, %xmm12 + +// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x22,0x2a,0x20] + vcvtsi2ss (%rax), %xmm11, %xmm12 + +// CHECK: vcvttsd2si (%rcx), %eax +// CHECK: encoding: [0xc5,0xfb,0x2c,0x01] + vcvttsd2si (%rcx), %eax + +// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x23,0x2a,0x20] + vcvtsi2sd (%rax), %xmm11, %xmm12 + +// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x23,0x2a,0x20] + vcvtsi2sd (%rax), %xmm11, %xmm12 + +// CHECK: vmovaps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x28,0x20] + vmovaps (%rax), %xmm12 + +// CHECK: vmovaps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x28,0xe3] + vmovaps %xmm11, %xmm12 + +// CHECK: vmovaps %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x29,0x18] + vmovaps %xmm11, (%rax) + +// CHECK: vmovapd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x28,0x20] + vmovapd (%rax), %xmm12 + +// CHECK: vmovapd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x28,0xe3] + vmovapd %xmm11, %xmm12 + +// CHECK: vmovapd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x29,0x18] + vmovapd %xmm11, (%rax) + +// CHECK: vmovups (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x10,0x20] + vmovups (%rax), %xmm12 + +// CHECK: vmovups %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x10,0xe3] + vmovups %xmm11, %xmm12 + +// CHECK: vmovups %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x11,0x18] + vmovups %xmm11, (%rax) + +// CHECK: vmovupd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x10,0x20] + vmovupd (%rax), %xmm12 + +// CHECK: vmovupd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x10,0xe3] + vmovupd %xmm11, %xmm12 + +// CHECK: vmovupd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x11,0x18] + vmovupd %xmm11, (%rax) + +// CHECK: vmovlps %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x13,0x18] + vmovlps %xmm11, (%rax) + +// CHECK: vmovlps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0x12,0x28] + vmovlps (%rax), %xmm12, %xmm13 + +// CHECK: vmovlpd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x13,0x18] + vmovlpd %xmm11, (%rax) + +// CHECK: vmovlpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x12,0x28] + vmovlpd (%rax), %xmm12, %xmm13 + +// CHECK: vmovhps %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x17,0x18] + vmovhps %xmm11, (%rax) + +// CHECK: vmovhps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0x16,0x28] + vmovhps (%rax), %xmm12, %xmm13 + +// CHECK: vmovhpd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x17,0x18] + vmovhpd %xmm11, (%rax) + +// CHECK: vmovhpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x16,0x28] + vmovhpd (%rax), %xmm12, %xmm13 + +// CHECK: vmovlhps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x16,0xeb] + vmovlhps %xmm11, %xmm12, %xmm13 + +// CHECK: vmovhlps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb] + vmovhlps %xmm11, %xmm12, %xmm13 + +// CHECK: vcvtss2sil %xmm11, %eax +// CHECK: encoding: [0xc4,0xc1,0x7a,0x2d,0xc3] + vcvtss2si %xmm11, %eax + +// CHECK: vcvtss2sil (%rax), %ebx +// CHECK: encoding: [0xc5,0xfa,0x2d,0x18] + vcvtss2si (%rax), %ebx + +// CHECK: vcvtdq2ps %xmm10, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xe2] + vcvtdq2ps %xmm10, %xmm12 + +// CHECK: vcvtdq2ps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x5b,0x20] + vcvtdq2ps (%rax), %xmm12 + +// CHECK: vcvtsd2ss %xmm12, %xmm13, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x13,0x5a,0xd4] + vcvtsd2ss %xmm12, %xmm13, %xmm10 + +// CHECK: vcvtsd2ss (%rax), %xmm13, %xmm10 +// CHECK: encoding: [0xc5,0x13,0x5a,0x10] + vcvtsd2ss (%rax), %xmm13, %xmm10 + +// CHECK: vcvtps2dq %xmm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x79,0x5b,0xdc] + vcvtps2dq %xmm12, %xmm11 + +// CHECK: vcvtps2dq (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x79,0x5b,0x18] + vcvtps2dq (%rax), %xmm11 + +// CHECK: vcvtss2sd %xmm12, %xmm13, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x12,0x5a,0xd4] + vcvtss2sd %xmm12, %xmm13, %xmm10 + +// CHECK: vcvtss2sd (%rax), %xmm13, %xmm10 +// CHECK: encoding: [0xc5,0x12,0x5a,0x10] + vcvtss2sd (%rax), %xmm13, %xmm10 + +// CHECK: vcvtdq2ps %xmm13, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xd5] + vcvtdq2ps %xmm13, %xmm10 + +// CHECK: vcvtdq2ps (%ecx), %xmm13 +// CHECK: encoding: [0xc5,0x78,0x5b,0x29] + vcvtdq2ps (%ecx), %xmm13 + +// CHECK: vcvttps2dq %xmm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x7a,0x5b,0xdc] + vcvttps2dq %xmm12, %xmm11 + +// CHECK: vcvttps2dq (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x7a,0x5b,0x18] + vcvttps2dq (%rax), %xmm11 + +// CHECK: vcvtps2pd %xmm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x78,0x5a,0xdc] + vcvtps2pd %xmm12, %xmm11 + +// CHECK: vcvtps2pd (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x78,0x5a,0x18] + vcvtps2pd (%rax), %xmm11 + +// CHECK: vcvtpd2ps %xmm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc] + vcvtpd2ps %xmm12, %xmm11 + +// CHECK: vsqrtpd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3] + vsqrtpd %xmm11, %xmm12 + +// CHECK: vsqrtpd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x51,0x20] + vsqrtpd (%rax), %xmm12 + +// CHECK: vsqrtps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3] + vsqrtps %xmm11, %xmm12 + +// CHECK: vsqrtps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x51,0x20] + vsqrtps (%rax), %xmm12 + +// CHECK: vsqrtsd %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3] + vsqrtsd %xmm11, %xmm12, %xmm10 + +// CHECK: vsqrtsd (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1b,0x51,0x10] + vsqrtsd (%rax), %xmm12, %xmm10 + +// CHECK: vsqrtss %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3] + vsqrtss %xmm11, %xmm12, %xmm10 + +// CHECK: vsqrtss (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x51,0x10] + vsqrtss (%rax), %xmm12, %xmm10 + +// CHECK: vrsqrtps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3] + vrsqrtps %xmm11, %xmm12 + +// CHECK: vrsqrtps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x52,0x20] + vrsqrtps (%rax), %xmm12 + +// CHECK: vrsqrtss %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3] + vrsqrtss %xmm11, %xmm12, %xmm10 + +// CHECK: vrsqrtss (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x52,0x10] + vrsqrtss (%rax), %xmm12, %xmm10 + +// CHECK: vrcpps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3] + vrcpps %xmm11, %xmm12 + +// CHECK: vrcpps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x53,0x20] + vrcpps (%rax), %xmm12 + +// CHECK: vrcpss %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3] + vrcpss %xmm11, %xmm12, %xmm10 + +// CHECK: vrcpss (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x53,0x10] + vrcpss (%rax), %xmm12, %xmm10 + +// CHECK: vmovntdq %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0xe7,0x18] + vmovntdq %xmm11, (%rax) + +// CHECK: vmovntpd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x2b,0x18] + vmovntpd %xmm11, (%rax) + +// CHECK: vmovntps %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x2b,0x18] + vmovntps %xmm11, (%rax) + +// CHECK: vldmxcsr -4(%rip) +// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xfc,0xff,0xff,0xff] + vldmxcsr -4(%rip) + +// CHECK: vstmxcsr -4(%rsp) +// CHECK: encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc] + vstmxcsr -4(%rsp) + +// CHECK: vpsubb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf8,0xeb] + vpsubb %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf8,0x28] + vpsubb (%rax), %xmm12, %xmm13 + +// CHECK: vpsubw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf9,0xeb] + vpsubw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf9,0x28] + vpsubw (%rax), %xmm12, %xmm13 + +// CHECK: vpsubd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xfa,0xeb] + vpsubd %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xfa,0x28] + vpsubd (%rax), %xmm12, %xmm13 + +// CHECK: vpsubq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xfb,0xeb] + vpsubq %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xfb,0x28] + vpsubq (%rax), %xmm12, %xmm13 + +// CHECK: vpsubsb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe8,0xeb] + vpsubsb %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubsb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe8,0x28] + vpsubsb (%rax), %xmm12, %xmm13 + +// CHECK: vpsubsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe9,0xeb] + vpsubsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe9,0x28] + vpsubsw (%rax), %xmm12, %xmm13 + +// CHECK: vpsubusb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd8,0xeb] + vpsubusb %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubusb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd8,0x28] + vpsubusb (%rax), %xmm12, %xmm13 + +// CHECK: vpsubusw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd9,0xeb] + vpsubusw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubusw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd9,0x28] + vpsubusw (%rax), %xmm12, %xmm13 + +// CHECK: vpaddb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xfc,0xeb] + vpaddb %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xfc,0x28] + vpaddb (%rax), %xmm12, %xmm13 + +// CHECK: vpaddw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xfd,0xeb] + vpaddw %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xfd,0x28] + vpaddw (%rax), %xmm12, %xmm13 + +// CHECK: vpaddd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xfe,0xeb] + vpaddd %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xfe,0x28] + vpaddd (%rax), %xmm12, %xmm13 + +// CHECK: vpaddq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd4,0xeb] + vpaddq %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd4,0x28] + vpaddq (%rax), %xmm12, %xmm13 + +// CHECK: vpaddsb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xec,0xeb] + vpaddsb %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddsb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xec,0x28] + vpaddsb (%rax), %xmm12, %xmm13 + +// CHECK: vpaddsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xed,0xeb] + vpaddsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xed,0x28] + vpaddsw (%rax), %xmm12, %xmm13 + +// CHECK: vpaddusb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xdc,0xeb] + vpaddusb %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddusb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xdc,0x28] + vpaddusb (%rax), %xmm12, %xmm13 + +// CHECK: vpaddusw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xdd,0xeb] + vpaddusw %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddusw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xdd,0x28] + vpaddusw (%rax), %xmm12, %xmm13 + +// CHECK: vpmulhuw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe4,0xeb] + vpmulhuw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmulhuw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe4,0x28] + vpmulhuw (%rax), %xmm12, %xmm13 + +// CHECK: vpmulhw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe5,0xeb] + vpmulhw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmulhw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe5,0x28] + vpmulhw (%rax), %xmm12, %xmm13 + +// CHECK: vpmullw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd5,0xeb] + vpmullw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmullw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd5,0x28] + vpmullw (%rax), %xmm12, %xmm13 + +// CHECK: vpmuludq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf4,0xeb] + vpmuludq %xmm11, %xmm12, %xmm13 + +// CHECK: vpmuludq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf4,0x28] + vpmuludq (%rax), %xmm12, %xmm13 + +// CHECK: vpavgb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe0,0xeb] + vpavgb %xmm11, %xmm12, %xmm13 + +// CHECK: vpavgb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe0,0x28] + vpavgb (%rax), %xmm12, %xmm13 + +// CHECK: vpavgw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe3,0xeb] + vpavgw %xmm11, %xmm12, %xmm13 + +// CHECK: vpavgw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe3,0x28] + vpavgw (%rax), %xmm12, %xmm13 + +// CHECK: vpminsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xea,0xeb] + vpminsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpminsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xea,0x28] + vpminsw (%rax), %xmm12, %xmm13 + +// CHECK: vpminub %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xda,0xeb] + vpminub %xmm11, %xmm12, %xmm13 + +// CHECK: vpminub (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xda,0x28] + vpminub (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xee,0xeb] + vpmaxsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmaxsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xee,0x28] + vpmaxsw (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxub %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xde,0xeb] + vpmaxub %xmm11, %xmm12, %xmm13 + +// CHECK: vpmaxub (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xde,0x28] + vpmaxub (%rax), %xmm12, %xmm13 + +// CHECK: vpsadbw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf6,0xeb] + vpsadbw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsadbw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf6,0x28] + vpsadbw (%rax), %xmm12, %xmm13 + +// CHECK: vpsllw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf1,0xeb] + vpsllw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsllw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf1,0x28] + vpsllw (%rax), %xmm12, %xmm13 + +// CHECK: vpslld %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf2,0xeb] + vpslld %xmm11, %xmm12, %xmm13 + +// CHECK: vpslld (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf2,0x28] + vpslld (%rax), %xmm12, %xmm13 + +// CHECK: vpsllq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf3,0xeb] + vpsllq %xmm11, %xmm12, %xmm13 + +// CHECK: vpsllq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf3,0x28] + vpsllq (%rax), %xmm12, %xmm13 + +// CHECK: vpsraw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe1,0xeb] + vpsraw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsraw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe1,0x28] + vpsraw (%rax), %xmm12, %xmm13 + +// CHECK: vpsrad %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe2,0xeb] + vpsrad %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrad (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe2,0x28] + vpsrad (%rax), %xmm12, %xmm13 + +// CHECK: vpsrlw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd1,0xeb] + vpsrlw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrlw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd1,0x28] + vpsrlw (%rax), %xmm12, %xmm13 + +// CHECK: vpsrld %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd2,0xeb] + vpsrld %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrld (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd2,0x28] + vpsrld (%rax), %xmm12, %xmm13 + +// CHECK: vpsrlq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd3,0xeb] + vpsrlq %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrlq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd3,0x28] + vpsrlq (%rax), %xmm12, %xmm13 + +// CHECK: vpslld $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a] + vpslld $10, %xmm12, %xmm13 + +// CHECK: vpslldq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xfc,0x0a] + vpslldq $10, %xmm12, %xmm13 + +// CHECK: vpsllq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xf4,0x0a] + vpsllq $10, %xmm12, %xmm13 + +// CHECK: vpsllw $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xf4,0x0a] + vpsllw $10, %xmm12, %xmm13 + +// CHECK: vpsrad $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xe4,0x0a] + vpsrad $10, %xmm12, %xmm13 + +// CHECK: vpsraw $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xe4,0x0a] + vpsraw $10, %xmm12, %xmm13 + +// CHECK: vpsrld $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xd4,0x0a] + vpsrld $10, %xmm12, %xmm13 + +// CHECK: vpsrldq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xdc,0x0a] + vpsrldq $10, %xmm12, %xmm13 + +// CHECK: vpsrlq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xd4,0x0a] + vpsrlq $10, %xmm12, %xmm13 + +// CHECK: vpsrlw $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xd4,0x0a] + vpsrlw $10, %xmm12, %xmm13 + +// CHECK: vpslld $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a] + vpslld $10, %xmm12, %xmm13 + +// CHECK: vpand %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xdb,0xeb] + vpand %xmm11, %xmm12, %xmm13 + +// CHECK: vpand (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xdb,0x28] + vpand (%rax), %xmm12, %xmm13 + +// CHECK: vpor %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xeb,0xeb] + vpor %xmm11, %xmm12, %xmm13 + +// CHECK: vpor (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xeb,0x28] + vpor (%rax), %xmm12, %xmm13 + +// CHECK: vpxor %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xef,0xeb] + vpxor %xmm11, %xmm12, %xmm13 + +// CHECK: vpxor (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xef,0x28] + vpxor (%rax), %xmm12, %xmm13 + +// CHECK: vpandn %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xdf,0xeb] + vpandn %xmm11, %xmm12, %xmm13 + +// CHECK: vpandn (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xdf,0x28] + vpandn (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpeqb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x74,0xeb] + vpcmpeqb %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpeqb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x74,0x28] + vpcmpeqb (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpeqw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x75,0xeb] + vpcmpeqw %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpeqw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x75,0x28] + vpcmpeqw (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpeqd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x76,0xeb] + vpcmpeqd %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpeqd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x76,0x28] + vpcmpeqd (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpgtb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x64,0xeb] + vpcmpgtb %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpgtb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x64,0x28] + vpcmpgtb (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpgtw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x65,0xeb] + vpcmpgtw %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpgtw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x65,0x28] + vpcmpgtw (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpgtd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x66,0xeb] + vpcmpgtd %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpgtd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x66,0x28] + vpcmpgtd (%rax), %xmm12, %xmm13 + +// CHECK: vpacksswb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x63,0xeb] + vpacksswb %xmm11, %xmm12, %xmm13 + +// CHECK: vpacksswb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x63,0x28] + vpacksswb (%rax), %xmm12, %xmm13 + +// CHECK: vpackssdw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x6b,0xeb] + vpackssdw %xmm11, %xmm12, %xmm13 + +// CHECK: vpackssdw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x6b,0x28] + vpackssdw (%rax), %xmm12, %xmm13 + +// CHECK: vpackuswb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x67,0xeb] + vpackuswb %xmm11, %xmm12, %xmm13 + +// CHECK: vpackuswb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x67,0x28] + vpackuswb (%rax), %xmm12, %xmm13 + +// CHECK: vpshufd $4, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x79,0x70,0xec,0x04] + vpshufd $4, %xmm12, %xmm13 + +// CHECK: vpshufd $4, (%rax), %xmm13 +// CHECK: encoding: [0xc5,0x79,0x70,0x28,0x04] + vpshufd $4, (%rax), %xmm13 + +// CHECK: vpshufhw $4, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x7a,0x70,0xec,0x04] + vpshufhw $4, %xmm12, %xmm13 + +// CHECK: vpshufhw $4, (%rax), %xmm13 +// CHECK: encoding: [0xc5,0x7a,0x70,0x28,0x04] + vpshufhw $4, (%rax), %xmm13 + +// CHECK: vpshuflw $4, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x7b,0x70,0xec,0x04] + vpshuflw $4, %xmm12, %xmm13 + +// CHECK: vpshuflw $4, (%rax), %xmm13 +// CHECK: encoding: [0xc5,0x7b,0x70,0x28,0x04] + vpshuflw $4, (%rax), %xmm13 + +// CHECK: vpunpcklbw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x60,0xeb] + vpunpcklbw %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpcklbw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x60,0x28] + vpunpcklbw (%rax), %xmm12, %xmm13 + +// CHECK: vpunpcklwd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x61,0xeb] + vpunpcklwd %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpcklwd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x61,0x28] + vpunpcklwd (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckldq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x62,0xeb] + vpunpckldq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckldq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x62,0x28] + vpunpckldq (%rax), %xmm12, %xmm13 + +// CHECK: vpunpcklqdq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x6c,0xeb] + vpunpcklqdq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpcklqdq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x6c,0x28] + vpunpcklqdq (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhbw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x68,0xeb] + vpunpckhbw %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhbw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x68,0x28] + vpunpckhbw (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhwd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x69,0xeb] + vpunpckhwd %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhwd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x69,0x28] + vpunpckhwd (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhdq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x6a,0xeb] + vpunpckhdq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhdq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x6a,0x28] + vpunpckhdq (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhqdq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x6d,0xeb] + vpunpckhqdq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhqdq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x6d,0x28] + vpunpckhqdq (%rax), %xmm12, %xmm13 + +// CHECK: vpinsrw $7, %eax, %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc4,0xe8,0x07] + vpinsrw $7, %eax, %xmm12, %xmm13 + +// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc4,0x28,0x07] + vpinsrw $7, (%rax), %xmm12, %xmm13 + +// CHECK: vpextrw $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07] + vpextrw $7, %xmm12, %eax + +// CHECK: vpmovmskb %xmm12, %eax +// CHECK: encoding: [0xc4,0xc1,0x79,0xd7,0xc4] + vpmovmskb %xmm12, %eax + +// CHECK: vmaskmovdqu %xmm14, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x79,0xf7,0xfe] + vmaskmovdqu %xmm14, %xmm15 + +// CHECK: vmovd %eax, %xmm14 +// CHECK: encoding: [0xc5,0x79,0x6e,0xf0] + vmovd %eax, %xmm14 + +// CHECK: vmovd (%rax), %xmm14 +// CHECK: encoding: [0xc5,0x79,0x6e,0x30] + vmovd (%rax), %xmm14 + +// CHECK: vmovd %xmm14, (%rax) +// CHECK: encoding: [0xc5,0x79,0x7e,0x30] + vmovd %xmm14, (%rax) + +// CHECK: vmovd %rax, %xmm14 +// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0] + vmovd %rax, %xmm14 + +// CHECK: vmovq %xmm14, (%rax) +// CHECK: encoding: [0xc5,0x79,0xd6,0x30] + vmovq %xmm14, (%rax) + +// CHECK: vmovq %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7a,0x7e,0xe6] + vmovq %xmm14, %xmm12 + +// CHECK: vmovq (%rax), %xmm14 +// CHECK: encoding: [0xc5,0x7a,0x7e,0x30] + vmovq (%rax), %xmm14 + +// CHECK: vmovq %rax, %xmm14 +// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0] + vmovq %rax, %xmm14 + +// CHECK: vmovq %xmm14, %rax +// CHECK: encoding: [0xc4,0x61,0xf9,0x7e,0xf0] + vmovq %xmm14, %rax + +// CHECK: vcvtpd2dq %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xe3] + vcvtpd2dq %xmm11, %xmm12 + +// CHECK: vcvtdq2pd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7a,0xe6,0xe3] + vcvtdq2pd %xmm11, %xmm12 + +// CHECK: vcvtdq2pd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x7a,0xe6,0x20] + vcvtdq2pd (%rax), %xmm12 + +// CHECK: vmovshdup %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7a,0x16,0xe3] + vmovshdup %xmm11, %xmm12 + +// CHECK: vmovshdup (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x7a,0x16,0x20] + vmovshdup (%rax), %xmm12 + +// CHECK: vmovsldup %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7a,0x12,0xe3] + vmovsldup %xmm11, %xmm12 + +// CHECK: vmovsldup (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x7a,0x12,0x20] + vmovsldup (%rax), %xmm12 + +// CHECK: vmovddup %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7b,0x12,0xe3] + vmovddup %xmm11, %xmm12 + +// CHECK: vmovddup (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x7b,0x12,0x20] + vmovddup (%rax), %xmm12 + +// CHECK: vaddsubps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xd0,0xeb] + vaddsubps %xmm11, %xmm12, %xmm13 + +// CHECK: vaddsubps (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x23,0xd0,0x20] + vaddsubps (%rax), %xmm11, %xmm12 + +// CHECK: vaddsubpd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd0,0xeb] + vaddsubpd %xmm11, %xmm12, %xmm13 + +// CHECK: vaddsubpd (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x21,0xd0,0x20] + vaddsubpd (%rax), %xmm11, %xmm12 + +// CHECK: vhaddps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0x7c,0xeb] + vhaddps %xmm11, %xmm12, %xmm13 + +// CHECK: vhaddps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0x7c,0x28] + vhaddps (%rax), %xmm12, %xmm13 + +// CHECK: vhaddpd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x7c,0xeb] + vhaddpd %xmm11, %xmm12, %xmm13 + +// CHECK: vhaddpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x7c,0x28] + vhaddpd (%rax), %xmm12, %xmm13 + +// CHECK: vhsubps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0x7d,0xeb] + vhsubps %xmm11, %xmm12, %xmm13 + +// CHECK: vhsubps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0x7d,0x28] + vhsubps (%rax), %xmm12, %xmm13 + +// CHECK: vhsubpd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x7d,0xeb] + vhsubpd %xmm11, %xmm12, %xmm13 + +// CHECK: vhsubpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x7d,0x28] + vhsubpd (%rax), %xmm12, %xmm13 + +// CHECK: vpabsb %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x42,0x79,0x1c,0xe3] + vpabsb %xmm11, %xmm12 + +// CHECK: vpabsb (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x1c,0x20] + vpabsb (%rax), %xmm12 + +// CHECK: vpabsw %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x42,0x79,0x1d,0xe3] + vpabsw %xmm11, %xmm12 + +// CHECK: vpabsw (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x1d,0x20] + vpabsw (%rax), %xmm12 + +// CHECK: vpabsd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x42,0x79,0x1e,0xe3] + vpabsd %xmm11, %xmm12 + +// CHECK: vpabsd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x1e,0x20] + vpabsd (%rax), %xmm12 + +// CHECK: vphaddw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x01,0xeb] + vphaddw %xmm11, %xmm12, %xmm13 + +// CHECK: vphaddw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x01,0x28] + vphaddw (%rax), %xmm12, %xmm13 + +// CHECK: vphaddd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x02,0xeb] + vphaddd %xmm11, %xmm12, %xmm13 + +// CHECK: vphaddd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x02,0x28] + vphaddd (%rax), %xmm12, %xmm13 + +// CHECK: vphaddsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x03,0xeb] + vphaddsw %xmm11, %xmm12, %xmm13 + +// CHECK: vphaddsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x03,0x28] + vphaddsw (%rax), %xmm12, %xmm13 + +// CHECK: vphsubw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x05,0xeb] + vphsubw %xmm11, %xmm12, %xmm13 + +// CHECK: vphsubw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x05,0x28] + vphsubw (%rax), %xmm12, %xmm13 + +// CHECK: vphsubd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x06,0xeb] + vphsubd %xmm11, %xmm12, %xmm13 + +// CHECK: vphsubd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x06,0x28] + vphsubd (%rax), %xmm12, %xmm13 + +// CHECK: vphsubsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x07,0xeb] + vphsubsw %xmm11, %xmm12, %xmm13 + +// CHECK: vphsubsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x07,0x28] + vphsubsw (%rax), %xmm12, %xmm13 + +// CHECK: vpmaddubsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x04,0xeb] + vpmaddubsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmaddubsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x04,0x28] + vpmaddubsw (%rax), %xmm12, %xmm13 + +// CHECK: vpshufb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x00,0xeb] + vpshufb %xmm11, %xmm12, %xmm13 + +// CHECK: vpshufb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x00,0x28] + vpshufb (%rax), %xmm12, %xmm13 + +// CHECK: vpsignb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x08,0xeb] + vpsignb %xmm11, %xmm12, %xmm13 + +// CHECK: vpsignb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x08,0x28] + vpsignb (%rax), %xmm12, %xmm13 + +// CHECK: vpsignw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x09,0xeb] + vpsignw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsignw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x09,0x28] + vpsignw (%rax), %xmm12, %xmm13 + +// CHECK: vpsignd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x0a,0xeb] + vpsignd %xmm11, %xmm12, %xmm13 + +// CHECK: vpsignd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x0a,0x28] + vpsignd (%rax), %xmm12, %xmm13 + +// CHECK: vpmulhrsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x0b,0xeb] + vpmulhrsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmulhrsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x0b,0x28] + vpmulhrsw (%rax), %xmm12, %xmm13 + +// CHECK: vpalignr $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x19,0x0f,0xeb,0x07] + vpalignr $7, %xmm11, %xmm12, %xmm13 + +// CHECK: vpalignr $7, (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x19,0x0f,0x28,0x07] + vpalignr $7, (%rax), %xmm12, %xmm13 + +// CHECK: vroundsd $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x19,0x0b,0xeb,0x07] + vroundsd $7, %xmm11, %xmm12, %xmm13 + +// CHECK: vroundsd $7, (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x19,0x0b,0x28,0x07] + vroundsd $7, (%rax), %xmm12, %xmm13 + +// CHECK: vroundss $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x19,0x0a,0xeb,0x07] + vroundss $7, %xmm11, %xmm12, %xmm13 + +// CHECK: vroundss $7, (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x19,0x0a,0x28,0x07] + vroundss $7, (%rax), %xmm12, %xmm13 + +// CHECK: vroundpd $7, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x79,0x09,0xec,0x07] + vroundpd $7, %xmm12, %xmm13 + +// CHECK: vroundpd $7, (%rax), %xmm13 +// CHECK: encoding: [0xc4,0x63,0x79,0x09,0x28,0x07] + vroundpd $7, (%rax), %xmm13 + +// CHECK: vroundps $7, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x79,0x08,0xec,0x07] + vroundps $7, %xmm12, %xmm13 + +// CHECK: vroundps $7, (%rax), %xmm13 +// CHECK: encoding: [0xc4,0x63,0x79,0x08,0x28,0x07] + vroundps $7, (%rax), %xmm13 + +// CHECK: vphminposuw %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x79,0x41,0xec] + vphminposuw %xmm12, %xmm13 + +// CHECK: vphminposuw (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x41,0x20] + vphminposuw (%rax), %xmm12 + +// CHECK: vpackusdw %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x2b,0xdc] + vpackusdw %xmm12, %xmm13, %xmm11 + +// CHECK: vpackusdw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x2b,0x28] + vpackusdw (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpeqq %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x29,0xdc] + vpcmpeqq %xmm12, %xmm13, %xmm11 + +// CHECK: vpcmpeqq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x29,0x28] + vpcmpeqq (%rax), %xmm12, %xmm13 + +// CHECK: vpminsb %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x38,0xdc] + vpminsb %xmm12, %xmm13, %xmm11 + +// CHECK: vpminsb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x38,0x28] + vpminsb (%rax), %xmm12, %xmm13 + +// CHECK: vpminsd %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x39,0xdc] + vpminsd %xmm12, %xmm13, %xmm11 + +// CHECK: vpminsd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x39,0x28] + vpminsd (%rax), %xmm12, %xmm13 + +// CHECK: vpminud %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3b,0xdc] + vpminud %xmm12, %xmm13, %xmm11 + +// CHECK: vpminud (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3b,0x28] + vpminud (%rax), %xmm12, %xmm13 + +// CHECK: vpminuw %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3a,0xdc] + vpminuw %xmm12, %xmm13, %xmm11 + +// CHECK: vpminuw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3a,0x28] + vpminuw (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxsb %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3c,0xdc] + vpmaxsb %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxsb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3c,0x28] + vpmaxsb (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxsd %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3d,0xdc] + vpmaxsd %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxsd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3d,0x28] + vpmaxsd (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxud %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3f,0xdc] + vpmaxud %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxud (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3f,0x28] + vpmaxud (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxuw %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3e,0xdc] + vpmaxuw %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxuw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3e,0x28] + vpmaxuw (%rax), %xmm12, %xmm13 + +// CHECK: vpmuldq %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x28,0xdc] + vpmuldq %xmm12, %xmm13, %xmm11 + +// CHECK: vpmuldq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x28,0x28] + vpmuldq (%rax), %xmm12, %xmm13 + +// CHECK: vpmulld %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x51,0x40,0xdc] + vpmulld %xmm12, %xmm5, %xmm11 + +// CHECK: vpmulld (%rax), %xmm5, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x51,0x40,0x28] + vpmulld (%rax), %xmm5, %xmm13 + +// CHECK: vblendps $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x0c,0xdc,0x03] + vblendps $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vblendps $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x0c,0x18,0x03] + vblendps $3, (%rax), %xmm5, %xmm11 + +// CHECK: vblendpd $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x0d,0xdc,0x03] + vblendpd $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vblendpd $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x0d,0x18,0x03] + vblendpd $3, (%rax), %xmm5, %xmm11 + +// CHECK: vpblendw $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x0e,0xdc,0x03] + vpblendw $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vpblendw $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x0e,0x18,0x03] + vpblendw $3, (%rax), %xmm5, %xmm11 + +// CHECK: vmpsadbw $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x42,0xdc,0x03] + vmpsadbw $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vmpsadbw $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x42,0x18,0x03] + vmpsadbw $3, (%rax), %xmm5, %xmm11 + +// CHECK: vdpps $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x40,0xdc,0x03] + vdpps $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vdpps $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x40,0x18,0x03] + vdpps $3, (%rax), %xmm5, %xmm11 + +// CHECK: vdppd $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x41,0xdc,0x03] + vdppd $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vdppd $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x41,0x18,0x03] + vdppd $3, (%rax), %xmm5, %xmm11 + +// CHECK: vblendvpd %xmm12, %xmm5, %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0xed,0xc0] + vblendvpd %xmm12, %xmm5, %xmm11, %xmm13 + +// CHECK: vblendvpd %xmm12, (%rax), %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0x28,0xc0] + vblendvpd %xmm12, (%rax), %xmm11, %xmm13 + +// CHECK: vblendvps %xmm12, %xmm5, %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0xed,0xc0] + vblendvps %xmm12, %xmm5, %xmm11, %xmm13 + +// CHECK: vblendvps %xmm12, (%rax), %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0x28,0xc0] + vblendvps %xmm12, (%rax), %xmm11, %xmm13 + +// CHECK: vpblendvb %xmm12, %xmm5, %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0xed,0xc0] + vpblendvb %xmm12, %xmm5, %xmm11, %xmm13 + +// CHECK: vpblendvb %xmm12, (%rax), %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0x28,0xc0] + vpblendvb %xmm12, (%rax), %xmm11, %xmm13 + +// CHECK: vpmovsxbw %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x20,0xd4] + vpmovsxbw %xmm12, %xmm10 + +// CHECK: vpmovsxbw (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x20,0x20] + vpmovsxbw (%rax), %xmm12 + +// CHECK: vpmovsxwd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x23,0xd4] + vpmovsxwd %xmm12, %xmm10 + +// CHECK: vpmovsxwd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x23,0x20] + vpmovsxwd (%rax), %xmm12 + +// CHECK: vpmovsxdq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x25,0xd4] + vpmovsxdq %xmm12, %xmm10 + +// CHECK: vpmovsxdq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x25,0x20] + vpmovsxdq (%rax), %xmm12 + +// CHECK: vpmovzxbw %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x30,0xd4] + vpmovzxbw %xmm12, %xmm10 + +// CHECK: vpmovzxbw (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x30,0x20] + vpmovzxbw (%rax), %xmm12 + +// CHECK: vpmovzxwd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x33,0xd4] + vpmovzxwd %xmm12, %xmm10 + +// CHECK: vpmovzxwd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x33,0x20] + vpmovzxwd (%rax), %xmm12 + +// CHECK: vpmovzxdq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x35,0xd4] + vpmovzxdq %xmm12, %xmm10 + +// CHECK: vpmovzxdq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x35,0x20] + vpmovzxdq (%rax), %xmm12 + +// CHECK: vpmovsxbq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x22,0xd4] + vpmovsxbq %xmm12, %xmm10 + +// CHECK: vpmovsxbq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x22,0x20] + vpmovsxbq (%rax), %xmm12 + +// CHECK: vpmovzxbq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x32,0xd4] + vpmovzxbq %xmm12, %xmm10 + +// CHECK: vpmovzxbq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x32,0x20] + vpmovzxbq (%rax), %xmm12 + +// CHECK: vpmovsxbd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x21,0xd4] + vpmovsxbd %xmm12, %xmm10 + +// CHECK: vpmovsxbd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x21,0x20] + vpmovsxbd (%rax), %xmm12 + +// CHECK: vpmovsxwq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x24,0xd4] + vpmovsxwq %xmm12, %xmm10 + +// CHECK: vpmovsxwq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x24,0x20] + vpmovsxwq (%rax), %xmm12 + +// CHECK: vpmovzxbd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x31,0xd4] + vpmovzxbd %xmm12, %xmm10 + +// CHECK: vpmovzxbd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x31,0x20] + vpmovzxbd (%rax), %xmm12 + +// CHECK: vpmovzxwq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x34,0xd4] + vpmovzxwq %xmm12, %xmm10 + +// CHECK: vpmovzxwq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x34,0x20] + vpmovzxwq (%rax), %xmm12 + +// CHECK: vpextrw $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07] + vpextrw $7, %xmm12, %eax + +// CHECK: vpextrw $7, %xmm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x79,0x15,0x20,0x07] + vpextrw $7, %xmm12, (%rax) + +// CHECK: vpextrd $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0x63,0x79,0x16,0xe0,0x07] + vpextrd $7, %xmm12, %eax + +// CHECK: vpextrd $7, %xmm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x79,0x16,0x20,0x07] + vpextrd $7, %xmm12, (%rax) + +// CHECK: vpextrb $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0x63,0x79,0x14,0xe0,0x07] + vpextrb $7, %xmm12, %eax + +// CHECK: vpextrb $7, %xmm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x79,0x14,0x20,0x07] + vpextrb $7, %xmm12, (%rax) + +// CHECK: vpextrq $7, %xmm12, %rcx +// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0xe1,0x07] + vpextrq $7, %xmm12, %rcx + +// CHECK: vpextrq $7, %xmm12, (%rcx) +// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0x21,0x07] + vpextrq $7, %xmm12, (%rcx) + +// CHECK: vextractps $7, %xmm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x79,0x17,0x20,0x07] + vextractps $7, %xmm12, (%rax) + +// CHECK: vextractps $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0x63,0x79,0x17,0xe0,0x07] + vextractps $7, %xmm12, %eax + +// CHECK: vpinsrw $7, %eax, %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0xc4,0xd0,0x07] + vpinsrw $7, %eax, %xmm12, %xmm10 + +// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0xc4,0x10,0x07] + vpinsrw $7, (%rax), %xmm12, %xmm10 + +// CHECK: vpinsrb $7, %eax, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x20,0xd0,0x07] + vpinsrb $7, %eax, %xmm12, %xmm10 + +// CHECK: vpinsrb $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x20,0x10,0x07] + vpinsrb $7, (%rax), %xmm12, %xmm10 + +// CHECK: vpinsrd $7, %eax, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x22,0xd0,0x07] + vpinsrd $7, %eax, %xmm12, %xmm10 + +// CHECK: vpinsrd $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x22,0x10,0x07] + vpinsrd $7, (%rax), %xmm12, %xmm10 + +// CHECK: vpinsrq $7, %rax, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x99,0x22,0xd0,0x07] + vpinsrq $7, %rax, %xmm12, %xmm10 + +// CHECK: vpinsrq $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07] + vpinsrq $7, (%rax), %xmm12, %xmm10 + +// CHECK: vinsertps $7, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x21,0xdc,0x07] + vinsertps $7, %xmm12, %xmm10, %xmm11 + +// CHECK: vinsertps $7, (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x29,0x21,0x18,0x07] + vinsertps $7, (%rax), %xmm10, %xmm11 + +// CHECK: vptest %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x17,0xd4] + vptest %xmm12, %xmm10 + +// CHECK: vptest (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x17,0x20] + vptest (%rax), %xmm12 + +// CHECK: vmovntdqa (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x2a,0x20] + vmovntdqa (%rax), %xmm12 + +// CHECK: vpcmpgtq %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x37,0xdc] + vpcmpgtq %xmm12, %xmm10, %xmm11 + +// CHECK: vpcmpgtq (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0x37,0x28] + vpcmpgtq (%rax), %xmm10, %xmm13 + +// CHECK: vpcmpistrm $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x62,0xd4,0x07] + vpcmpistrm $7, %xmm12, %xmm10 + +// CHECK: vpcmpistrm $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x62,0x10,0x07] + vpcmpistrm $7, (%rax), %xmm10 + +// CHECK: vpcmpestrm $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x60,0xd4,0x07] + vpcmpestrm $7, %xmm12, %xmm10 + +// CHECK: vpcmpestrm $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x60,0x10,0x07] + vpcmpestrm $7, (%rax), %xmm10 + +// CHECK: vpcmpistri $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x63,0xd4,0x07] + vpcmpistri $7, %xmm12, %xmm10 + +// CHECK: vpcmpistri $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x63,0x10,0x07] + vpcmpistri $7, (%rax), %xmm10 + +// CHECK: vpcmpestri $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x61,0xd4,0x07] + vpcmpestri $7, %xmm12, %xmm10 + +// CHECK: vpcmpestri $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x61,0x10,0x07] + vpcmpestri $7, (%rax), %xmm10 + +// CHECK: vaesimc %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0xdb,0xd4] + vaesimc %xmm12, %xmm10 + +// CHECK: vaesimc (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0xdb,0x20] + vaesimc (%rax), %xmm12 + +// CHECK: vaesenc %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xdc,0xdc] + vaesenc %xmm12, %xmm10, %xmm11 + +// CHECK: vaesenc (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0xdc,0x28] + vaesenc (%rax), %xmm10, %xmm13 + +// CHECK: vaesenclast %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xdd,0xdc] + vaesenclast %xmm12, %xmm10, %xmm11 + +// CHECK: vaesenclast (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0xdd,0x28] + vaesenclast (%rax), %xmm10, %xmm13 + +// CHECK: vaesdec %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xde,0xdc] + vaesdec %xmm12, %xmm10, %xmm11 + +// CHECK: vaesdec (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0xde,0x28] + vaesdec (%rax), %xmm10, %xmm13 + +// CHECK: vaesdeclast %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xdf,0xdc] + vaesdeclast %xmm12, %xmm10, %xmm11 + +// CHECK: vaesdeclast (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0xdf,0x28] + vaesdeclast (%rax), %xmm10, %xmm13 + +// CHECK: vaeskeygenassist $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0xdf,0xd4,0x07] + vaeskeygenassist $7, %xmm12, %xmm10 + +// CHECK: vaeskeygenassist $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0xdf,0x10,0x07] + vaeskeygenassist $7, (%rax), %xmm10 + +// CHECK: vcmpps $8, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x08] + vcmpeq_uqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $9, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x09] + vcmpngeps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $10, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0a] + vcmpngtps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $11, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0b] + vcmpfalseps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $12, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0c] + vcmpneq_oqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $13, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0d] + vcmpgeps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $14, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0e] + vcmpgtps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $15, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0f] + vcmptrueps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $16, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x10] + vcmpeq_osps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $17, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x11] + vcmplt_oqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $18, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x12] + vcmple_oqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $19, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x13] + vcmpunord_sps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $20, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x14] + vcmpneq_usps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $21, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x15] + vcmpnlt_uqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $22, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x16] + vcmpnle_uqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $23, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x17] + vcmpord_sps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $24, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x18] + vcmpeq_usps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $25, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x19] + vcmpnge_uqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $26, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1a] + vcmpngt_uqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $27, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1b] + vcmpfalse_osps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $28, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1c] + vcmpneq_osps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $29, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1d] + vcmpge_oqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $30, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1e] + vcmpgt_oqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $31, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1f] + vcmptrue_usps %xmm11, %xmm12, %xmm13 + +// CHECK: vmovaps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x28,0x20] + vmovaps (%rax), %ymm12 + +// CHECK: vmovaps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x28,0xe3] + vmovaps %ymm11, %ymm12 + +// CHECK: vmovaps %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7c,0x29,0x18] + vmovaps %ymm11, (%rax) + +// CHECK: vmovapd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x28,0x20] + vmovapd (%rax), %ymm12 + +// CHECK: vmovapd %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7d,0x28,0xe3] + vmovapd %ymm11, %ymm12 + +// CHECK: vmovapd %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7d,0x29,0x18] + vmovapd %ymm11, (%rax) + +// CHECK: vmovups (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x10,0x20] + vmovups (%rax), %ymm12 + +// CHECK: vmovups %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x10,0xe3] + vmovups %ymm11, %ymm12 + +// CHECK: vmovups %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7c,0x11,0x18] + vmovups %ymm11, (%rax) + +// CHECK: vmovupd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x10,0x20] + vmovupd (%rax), %ymm12 + +// CHECK: vmovupd %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7d,0x10,0xe3] + vmovupd %ymm11, %ymm12 + +// CHECK: vmovupd %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7d,0x11,0x18] + vmovupd %ymm11, (%rax) + +// CHECK: vunpckhps %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1c,0x15,0xe3] + vunpckhps %ymm11, %ymm12, %ymm4 + +// CHECK: vunpckhpd %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1d,0x15,0xe3] + vunpckhpd %ymm11, %ymm12, %ymm4 + +// CHECK: vunpcklps %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1c,0x14,0xe3] + vunpcklps %ymm11, %ymm12, %ymm4 + +// CHECK: vunpcklpd %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1d,0x14,0xe3] + vunpcklpd %ymm11, %ymm12, %ymm4 + +// CHECK: vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x15,0x54,0xcb,0xfc] + vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x15,0x54,0xcb,0xfc] + vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x14,0x54,0xcb,0xfc] + vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x14,0x54,0xcb,0xfc] + vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vmovntdq %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7d,0xe7,0x18] + vmovntdq %ymm11, (%rax) + +// CHECK: vmovntpd %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7d,0x2b,0x18] + vmovntpd %ymm11, (%rax) + +// CHECK: vmovntps %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7c,0x2b,0x18] + vmovntps %ymm11, (%rax) + +// CHECK: vmovmskps %xmm12, %eax +// CHECK: encoding: [0xc4,0xc1,0x78,0x50,0xc4] + vmovmskps %xmm12, %eax + +// CHECK: vmovmskpd %xmm12, %eax +// CHECK: encoding: [0xc4,0xc1,0x79,0x50,0xc4] + vmovmskpd %xmm12, %eax + +// CHECK: vmaxps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x5f,0xf4] + vmaxps %ymm12, %ymm4, %ymm6 + +// CHECK: vmaxpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x5f,0xf4] + vmaxpd %ymm12, %ymm4, %ymm6 + +// CHECK: vminps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x5d,0xf4] + vminps %ymm12, %ymm4, %ymm6 + +// CHECK: vminpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x5d,0xf4] + vminpd %ymm12, %ymm4, %ymm6 + +// CHECK: vsubps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x5c,0xf4] + vsubps %ymm12, %ymm4, %ymm6 + +// CHECK: vsubpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x5c,0xf4] + vsubpd %ymm12, %ymm4, %ymm6 + +// CHECK: vdivps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x5e,0xf4] + vdivps %ymm12, %ymm4, %ymm6 + +// CHECK: vdivpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x5e,0xf4] + vdivpd %ymm12, %ymm4, %ymm6 + +// CHECK: vaddps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x58,0xf4] + vaddps %ymm12, %ymm4, %ymm6 + +// CHECK: vaddpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x58,0xf4] + vaddpd %ymm12, %ymm4, %ymm6 + +// CHECK: vmulps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x59,0xf4] + vmulps %ymm12, %ymm4, %ymm6 + +// CHECK: vmulpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x59,0xf4] + vmulpd %ymm12, %ymm4, %ymm6 + +// CHECK: vmaxps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5f,0x30] + vmaxps (%rax), %ymm4, %ymm6 + +// CHECK: vmaxpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5f,0x30] + vmaxpd (%rax), %ymm4, %ymm6 + +// CHECK: vminps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5d,0x30] + vminps (%rax), %ymm4, %ymm6 + +// CHECK: vminpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5d,0x30] + vminpd (%rax), %ymm4, %ymm6 + +// CHECK: vsubps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5c,0x30] + vsubps (%rax), %ymm4, %ymm6 + +// CHECK: vsubpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5c,0x30] + vsubpd (%rax), %ymm4, %ymm6 + +// CHECK: vdivps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5e,0x30] + vdivps (%rax), %ymm4, %ymm6 + +// CHECK: vdivpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5e,0x30] + vdivpd (%rax), %ymm4, %ymm6 + +// CHECK: vaddps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x58,0x30] + vaddps (%rax), %ymm4, %ymm6 + +// CHECK: vaddpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x58,0x30] + vaddpd (%rax), %ymm4, %ymm6 + +// CHECK: vmulps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x59,0x30] + vmulps (%rax), %ymm4, %ymm6 + +// CHECK: vmulpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x59,0x30] + vmulpd (%rax), %ymm4, %ymm6 + +// CHECK: vsqrtpd %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7d,0x51,0xe3] + vsqrtpd %ymm11, %ymm12 + +// CHECK: vsqrtpd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x51,0x20] + vsqrtpd (%rax), %ymm12 + +// CHECK: vsqrtps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x51,0xe3] + vsqrtps %ymm11, %ymm12 + +// CHECK: vsqrtps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x51,0x20] + vsqrtps (%rax), %ymm12 + +// CHECK: vrsqrtps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x52,0xe3] + vrsqrtps %ymm11, %ymm12 + +// CHECK: vrsqrtps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x52,0x20] + vrsqrtps (%rax), %ymm12 + +// CHECK: vrcpps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x53,0xe3] + vrcpps %ymm11, %ymm12 + +// CHECK: vrcpps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x53,0x20] + vrcpps (%rax), %ymm12 + +// CHECK: vandps %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0c,0x54,0xdc] + vandps %ymm12, %ymm14, %ymm11 + +// CHECK: vandpd %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0d,0x54,0xdc] + vandpd %ymm12, %ymm14, %ymm11 + +// CHECK: vandps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x54,0x54,0xcb,0xfc] + vandps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x54,0x54,0xcb,0xfc] + vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vorps %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0c,0x56,0xdc] + vorps %ymm12, %ymm14, %ymm11 + +// CHECK: vorpd %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0d,0x56,0xdc] + vorpd %ymm12, %ymm14, %ymm11 + +// CHECK: vorps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x56,0x54,0xcb,0xfc] + vorps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x56,0x54,0xcb,0xfc] + vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vxorps %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0c,0x57,0xdc] + vxorps %ymm12, %ymm14, %ymm11 + +// CHECK: vxorpd %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0d,0x57,0xdc] + vxorpd %ymm12, %ymm14, %ymm11 + +// CHECK: vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x57,0x54,0xcb,0xfc] + vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x57,0x54,0xcb,0xfc] + vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vandnps %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0c,0x55,0xdc] + vandnps %ymm12, %ymm14, %ymm11 + +// CHECK: vandnpd %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0d,0x55,0xdc] + vandnpd %ymm12, %ymm14, %ymm11 + +// CHECK: vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x55,0x54,0xcb,0xfc] + vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x55,0x54,0xcb,0xfc] + vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vcvtps2pd %xmm13, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x5a,0xe5] + vcvtps2pd %xmm13, %ymm12 + +// CHECK: vcvtps2pd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x5a,0x20] + vcvtps2pd (%rax), %ymm12 + +// CHECK: vcvtdq2pd %xmm13, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7e,0xe6,0xe5] + vcvtdq2pd %xmm13, %ymm12 + +// CHECK: vcvtdq2pd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7e,0xe6,0x20] + vcvtdq2pd (%rax), %ymm12 + +// CHECK: vcvtdq2ps %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7c,0x5b,0xd4] + vcvtdq2ps %ymm12, %ymm10 + +// CHECK: vcvtdq2ps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x5b,0x20] + vcvtdq2ps (%rax), %ymm12 + +// CHECK: vcvtps2dq %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7d,0x5b,0xd4] + vcvtps2dq %ymm12, %ymm10 + +// CHECK: vcvtps2dq (%rax), %ymm10 +// CHECK: encoding: [0xc5,0x7d,0x5b,0x10] + vcvtps2dq (%rax), %ymm10 + +// CHECK: vcvttps2dq %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7e,0x5b,0xd4] + vcvttps2dq %ymm12, %ymm10 + +// CHECK: vcvttps2dq (%rax), %ymm10 +// CHECK: encoding: [0xc5,0x7e,0x5b,0x10] + vcvttps2dq (%rax), %ymm10 + +// CHECK: vcvttpd2dq %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3] + vcvttpd2dq %xmm11, %xmm10 + +// CHECK: vcvttpd2dq %ymm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xd4] + vcvttpd2dq %ymm12, %xmm10 + +// CHECK: vcvttpd2dqx %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3] + vcvttpd2dqx %xmm11, %xmm10 + +// CHECK: vcvttpd2dqx (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x79,0xe6,0x18] + vcvttpd2dqx (%rax), %xmm11 + +// CHECK: vcvttpd2dqy %ymm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xdc] + vcvttpd2dqy %ymm12, %xmm11 + +// CHECK: vcvttpd2dqy (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x7d,0xe6,0x18] + vcvttpd2dqy (%rax), %xmm11 + +// CHECK: vcvtpd2ps %ymm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xd4] + vcvtpd2ps %ymm12, %xmm10 + +// CHECK: vcvtpd2psx %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xd3] + vcvtpd2psx %xmm11, %xmm10 + +// CHECK: vcvtpd2psx (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x79,0x5a,0x18] + vcvtpd2psx (%rax), %xmm11 + +// CHECK: vcvtpd2psy %ymm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xdc] + vcvtpd2psy %ymm12, %xmm11 + +// CHECK: vcvtpd2psy (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x7d,0x5a,0x18] + vcvtpd2psy (%rax), %xmm11 + +// CHECK: vcvtpd2dq %ymm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xd4] + vcvtpd2dq %ymm12, %xmm10 + +// CHECK: vcvtpd2dqy %ymm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xdc] + vcvtpd2dqy %ymm12, %xmm11 + +// CHECK: vcvtpd2dqy (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x7f,0xe6,0x18] + vcvtpd2dqy (%rax), %xmm11 + +// CHECK: vcvtpd2dqx %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xd3] + vcvtpd2dqx %xmm11, %xmm10 + +// CHECK: vcvtpd2dqx (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x7b,0xe6,0x18] + vcvtpd2dqx (%rax), %xmm11 + +// CHECK: vcmpps $0, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x00] + vcmpeqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $2, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x02] + vcmpleps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $1, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x01] + vcmpltps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $4, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x04] + vcmpneqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $6, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x06] + vcmpnleps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $5, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x05] + vcmpnltps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $7, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x07] + vcmpordps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $3, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x03] + vcmpunordps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x02] + vcmpleps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnleps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %ymm6, %ymm12 +// CHECK: encoding: [0xc5,0x4c,0xc2,0x64,0xcb,0xfc,0x07] + vcmpordps -4(%rbx,%rcx,8), %ymm6, %ymm12 + +// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $0, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x00] + vcmpeqpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $2, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x02] + vcmplepd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $1, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x01] + vcmpltpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $4, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x04] + vcmpneqpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $6, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x06] + vcmpnlepd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $5, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x05] + vcmpnltpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $7, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x07] + vcmpordpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $3, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x03] + vcmpunordpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqpd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x02] + vcmplepd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltpd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqpd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnlepd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltpd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %ymm6, %ymm12 +// CHECK: encoding: [0xc5,0x4d,0xc2,0x64,0xcb,0xfc,0x07] + vcmpordpd -4(%rbx,%rcx,8), %ymm6, %ymm12 + +// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordpd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $8, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x08] + vcmpeq_uqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $9, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x09] + vcmpngeps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $10, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0a] + vcmpngtps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $11, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0b] + vcmpfalseps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $12, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0c] + vcmpneq_oqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $13, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0d] + vcmpgeps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $14, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0e] + vcmpgtps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $15, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0f] + vcmptrueps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $16, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x10] + vcmpeq_osps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $17, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x11] + vcmplt_oqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $18, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x12] + vcmple_oqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $19, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x13] + vcmpunord_sps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $20, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x14] + vcmpneq_usps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $21, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x15] + vcmpnlt_uqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $22, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x16] + vcmpnle_uqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $23, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x17] + vcmpord_sps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $24, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x18] + vcmpeq_usps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $25, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x19] + vcmpnge_uqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $26, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1a] + vcmpngt_uqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $27, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1b] + vcmpfalse_osps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $28, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1c] + vcmpneq_osps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $29, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1d] + vcmpge_oqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $30, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1e] + vcmpgt_oqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $31, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1f] + vcmptrue_usps %ymm11, %ymm12, %ymm13 + +// CHECK: vaddsubps %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1f,0xd0,0xeb] + vaddsubps %ymm11, %ymm12, %ymm13 + +// CHECK: vaddsubps (%rax), %ymm11, %ymm12 +// CHECK: encoding: [0xc5,0x27,0xd0,0x20] + vaddsubps (%rax), %ymm11, %ymm12 + +// CHECK: vaddsubpd %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xd0,0xeb] + vaddsubpd %ymm11, %ymm12, %ymm13 + +// CHECK: vaddsubpd (%rax), %ymm11, %ymm12 +// CHECK: encoding: [0xc5,0x25,0xd0,0x20] + vaddsubpd (%rax), %ymm11, %ymm12 + +// CHECK: vhaddps %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1f,0x7c,0xeb] + vhaddps %ymm11, %ymm12, %ymm13 + +// CHECK: vhaddps (%rax), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1f,0x7c,0x28] + vhaddps (%rax), %ymm12, %ymm13 + +// CHECK: vhaddpd %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0x7c,0xeb] + vhaddpd %ymm11, %ymm12, %ymm13 + +// CHECK: vhaddpd (%rax), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0x7c,0x28] + vhaddpd (%rax), %ymm12, %ymm13 + +// CHECK: vhsubps %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1f,0x7d,0xeb] + vhsubps %ymm11, %ymm12, %ymm13 + +// CHECK: vhsubps (%rax), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1f,0x7d,0x28] + vhsubps (%rax), %ymm12, %ymm13 + +// CHECK: vhsubpd %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0x7d,0xeb] + vhsubpd %ymm11, %ymm12, %ymm13 + +// CHECK: vhsubpd (%rax), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0x7d,0x28] + vhsubpd (%rax), %ymm12, %ymm13 + +// CHECK: vblendps $3, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x2d,0x0c,0xdc,0x03] + vblendps $3, %ymm12, %ymm10, %ymm11 + +// CHECK: vblendps $3, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x63,0x2d,0x0c,0x18,0x03] + vblendps $3, (%rax), %ymm10, %ymm11 + +// CHECK: vblendpd $3, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x2d,0x0d,0xdc,0x03] + vblendpd $3, %ymm12, %ymm10, %ymm11 + +// CHECK: vblendpd $3, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x63,0x2d,0x0d,0x18,0x03] + vblendpd $3, (%rax), %ymm10, %ymm11 + +// CHECK: vdpps $3, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x2d,0x40,0xdc,0x03] + vdpps $3, %ymm12, %ymm10, %ymm11 + +// CHECK: vdpps $3, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x63,0x2d,0x40,0x18,0x03] + vdpps $3, (%rax), %ymm10, %ymm11 + +// CHECK: vbroadcastf128 (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x1a,0x20] + vbroadcastf128 (%rax), %ymm12 + +// CHECK: vbroadcastsd (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x19,0x20] + vbroadcastsd (%rax), %ymm12 + +// CHECK: vbroadcastss (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x18,0x20] + vbroadcastss (%rax), %xmm12 + +// CHECK: vbroadcastss (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x18,0x20] + vbroadcastss (%rax), %ymm12 + +// CHECK: vinsertf128 $7, %xmm12, %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x43,0x1d,0x18,0xd4,0x07] + vinsertf128 $7, %xmm12, %ymm12, %ymm10 + +// CHECK: vinsertf128 $7, (%rax), %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x63,0x1d,0x18,0x10,0x07] + vinsertf128 $7, (%rax), %ymm12, %ymm10 + +// CHECK: vextractf128 $7, %ymm12, %xmm12 +// CHECK: encoding: [0xc4,0x43,0x7d,0x19,0xe4,0x07] + vextractf128 $7, %ymm12, %xmm12 + +// CHECK: vextractf128 $7, %ymm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x7d,0x19,0x20,0x07] + vextractf128 $7, %ymm12, (%rax) + +// CHECK: vmaskmovpd %xmm12, %xmm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x29,0x2f,0x20] + vmaskmovpd %xmm12, %xmm10, (%rax) + +// CHECK: vmaskmovpd %ymm12, %ymm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x2d,0x2f,0x20] + vmaskmovpd %ymm12, %ymm10, (%rax) + +// CHECK: vmaskmovpd (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x62,0x19,0x2d,0x10] + vmaskmovpd (%rax), %xmm12, %xmm10 + +// CHECK: vmaskmovpd (%rax), %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x62,0x1d,0x2d,0x10] + vmaskmovpd (%rax), %ymm12, %ymm10 + +// CHECK: vmaskmovps %xmm12, %xmm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x29,0x2e,0x20] + vmaskmovps %xmm12, %xmm10, (%rax) + +// CHECK: vmaskmovps %ymm12, %ymm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x2d,0x2e,0x20] + vmaskmovps %ymm12, %ymm10, (%rax) + +// CHECK: vmaskmovps (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x62,0x19,0x2c,0x10] + vmaskmovps (%rax), %xmm12, %xmm10 + +// CHECK: vmaskmovps (%rax), %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x62,0x1d,0x2c,0x10] + vmaskmovps (%rax), %ymm12, %ymm10 + +// CHECK: vpermilps $7, %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x04,0xd3,0x07] + vpermilps $7, %xmm11, %xmm10 + +// CHECK: vpermilps $7, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x7d,0x04,0xda,0x07] + vpermilps $7, %ymm10, %ymm11 + +// CHECK: vpermilps $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x04,0x10,0x07] + vpermilps $7, (%rax), %xmm10 + +// CHECK: vpermilps $7, (%rax), %ymm10 +// CHECK: encoding: [0xc4,0x63,0x7d,0x04,0x10,0x07] + vpermilps $7, (%rax), %ymm10 + +// CHECK: vpermilps %xmm11, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x0c,0xdb] + vpermilps %xmm11, %xmm10, %xmm11 + +// CHECK: vpermilps %ymm11, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x0c,0xdb] + vpermilps %ymm11, %ymm10, %ymm11 + +// CHECK: vpermilps (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0x0c,0x28] + vpermilps (%rax), %xmm10, %xmm13 + +// CHECK: vpermilps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x0c,0x18] + vpermilps (%rax), %ymm10, %ymm11 + +// CHECK: vpermilpd $7, %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x05,0xd3,0x07] + vpermilpd $7, %xmm11, %xmm10 + +// CHECK: vpermilpd $7, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x7d,0x05,0xda,0x07] + vpermilpd $7, %ymm10, %ymm11 + +// CHECK: vpermilpd $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x05,0x10,0x07] + vpermilpd $7, (%rax), %xmm10 + +// CHECK: vpermilpd $7, (%rax), %ymm10 +// CHECK: encoding: [0xc4,0x63,0x7d,0x05,0x10,0x07] + vpermilpd $7, (%rax), %ymm10 + +// CHECK: vpermilpd %xmm11, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x0d,0xdb] + vpermilpd %xmm11, %xmm10, %xmm11 + +// CHECK: vpermilpd %ymm11, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x0d,0xdb] + vpermilpd %ymm11, %ymm10, %ymm11 + +// CHECK: vpermilpd (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0x0d,0x28] + vpermilpd (%rax), %xmm10, %xmm13 + +// CHECK: vpermilpd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x0d,0x18] + vpermilpd (%rax), %ymm10, %ymm11 + +// CHECK: vperm2f128 $7, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x2d,0x06,0xdc,0x07] + vperm2f128 $7, %ymm12, %ymm10, %ymm11 + +// CHECK: vperm2f128 $7, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x63,0x2d,0x06,0x18,0x07] + vperm2f128 $7, (%rax), %ymm10, %ymm11 + +// CHECK: vcvtsd2si %xmm8, %r8d +// CHECK: encoding: [0xc4,0x41,0x7b,0x2d,0xc0] + vcvtsd2si %xmm8, %r8d + +// CHECK: vcvtsd2si (%rcx), %ecx +// CHECK: encoding: [0xc5,0xfb,0x2d,0x09] + vcvtsd2si (%rcx), %ecx + +// CHECK: vcvtss2si %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe1,0xfa,0x2d,0xcc] + vcvtss2si %xmm4, %rcx + +// CHECK: vcvtss2si (%rcx), %r8 +// CHECK: encoding: [0xc4,0x61,0xfa,0x2d,0x01] + vcvtss2si (%rcx), %r8 + +// CHECK: vcvtsi2sdl %r8d, %xmm8, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x3b,0x2a,0xf8] + vcvtsi2sdl %r8d, %xmm8, %xmm15 + +// CHECK: vcvtsi2sdl (%rbp), %xmm8, %xmm15 +// CHECK: encoding: [0xc5,0x3b,0x2a,0x7d,0x00] + vcvtsi2sdl (%rbp), %xmm8, %xmm15 + +// CHECK: vcvtsi2sdq %rcx, %xmm4, %xmm6 +// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0xf1] + vcvtsi2sdq %rcx, %xmm4, %xmm6 + +// CHECK: vcvtsi2sdq (%rcx), %xmm4, %xmm6 +// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0x31] + vcvtsi2sdq (%rcx), %xmm4, %xmm6 + +// CHECK: vcvtsi2ssq %rcx, %xmm4, %xmm6 +// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0xf1] + vcvtsi2ssq %rcx, %xmm4, %xmm6 + +// CHECK: vcvtsi2ssq (%rcx), %xmm4, %xmm6 +// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0x31] + vcvtsi2ssq (%rcx), %xmm4, %xmm6 + +// CHECK: vcvttsd2si %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0xcc] + vcvttsd2si %xmm4, %rcx + +// CHECK: vcvttsd2si (%rcx), %rcx +// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0x09] + vcvttsd2si (%rcx), %rcx + +// CHECK: vcvttss2si %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0xcc] + vcvttss2si %xmm4, %rcx + +// CHECK: vcvttss2si (%rcx), %rcx +// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0x09] + vcvttss2si (%rcx), %rcx + +// CHECK: vlddqu (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7f,0xf0,0x20] + vlddqu (%rax), %ymm12 + +// CHECK: vmovddup %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7f,0x12,0xd4] + vmovddup %ymm12, %ymm10 + +// CHECK: vmovddup (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7f,0x12,0x20] + vmovddup (%rax), %ymm12 + +// CHECK: vmovdqa %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7d,0x6f,0xd4] + vmovdqa %ymm12, %ymm10 + +// CHECK: vmovdqa %ymm12, (%rax) +// CHECK: encoding: [0xc5,0x7d,0x7f,0x20] + vmovdqa %ymm12, (%rax) + +// CHECK: vmovdqa (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x6f,0x20] + vmovdqa (%rax), %ymm12 + +// CHECK: vmovdqu %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7e,0x6f,0xd4] + vmovdqu %ymm12, %ymm10 + +// CHECK: vmovdqu %ymm12, (%rax) +// CHECK: encoding: [0xc5,0x7e,0x7f,0x20] + vmovdqu %ymm12, (%rax) + +// CHECK: vmovdqu (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7e,0x6f,0x20] + vmovdqu (%rax), %ymm12 + +// CHECK: vmovshdup %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7e,0x16,0xd4] + vmovshdup %ymm12, %ymm10 + +// CHECK: vmovshdup (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7e,0x16,0x20] + vmovshdup (%rax), %ymm12 + +// CHECK: vmovsldup %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7e,0x12,0xd4] + vmovsldup %ymm12, %ymm10 + +// CHECK: vmovsldup (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7e,0x12,0x20] + vmovsldup (%rax), %ymm12 + +// CHECK: vptest %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x42,0x7d,0x17,0xd4] + vptest %ymm12, %ymm10 + +// CHECK: vptest (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x17,0x20] + vptest (%rax), %ymm12 + +// CHECK: vroundpd $7, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x7d,0x09,0xda,0x07] + vroundpd $7, %ymm10, %ymm11 + +// CHECK: vroundpd $7, (%rax), %ymm10 +// CHECK: encoding: [0xc4,0x63,0x7d,0x09,0x10,0x07] + vroundpd $7, (%rax), %ymm10 + +// CHECK: vroundps $7, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x7d,0x08,0xda,0x07] + vroundps $7, %ymm10, %ymm11 + +// CHECK: vroundps $7, (%rax), %ymm10 +// CHECK: encoding: [0xc4,0x63,0x7d,0x08,0x10,0x07] + vroundps $7, (%rax), %ymm10 + +// CHECK: vshufpd $7, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x2d,0xc6,0xdc,0x07] + vshufpd $7, %ymm12, %ymm10, %ymm11 + +// CHECK: vshufpd $7, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc5,0x2d,0xc6,0x18,0x07] + vshufpd $7, (%rax), %ymm10, %ymm11 + +// CHECK: vshufps $7, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x2c,0xc6,0xdc,0x07] + vshufps $7, %ymm12, %ymm10, %ymm11 + +// CHECK: vshufps $7, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc5,0x2c,0xc6,0x18,0x07] + vshufps $7, (%rax), %ymm10, %ymm11 + +// CHECK: vtestpd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x0f,0xd4] + vtestpd %xmm12, %xmm10 + +// CHECK: vtestpd %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x42,0x7d,0x0f,0xd4] + vtestpd %ymm12, %ymm10 + +// CHECK: vtestpd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x0f,0x20] + vtestpd (%rax), %xmm12 + +// CHECK: vtestpd (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x0f,0x20] + vtestpd (%rax), %ymm12 + +// CHECK: vtestps %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x0e,0xd4] + vtestps %xmm12, %xmm10 + +// CHECK: vtestps %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x42,0x7d,0x0e,0xd4] + vtestps %ymm12, %ymm10 + +// CHECK: vtestps (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x0e,0x20] + vtestps (%rax), %xmm12 + +// CHECK: vtestps (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x0e,0x20] + vtestps (%rax), %ymm12 + +// CHECK: vextractps $10, %xmm8, %r8 +// CHECK: encoding: [0xc4,0x43,0x79,0x17,0xc0,0x0a] + vextractps $10, %xmm8, %r8 + +// CHECK: vextractps $7, %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xe1,0x07] + vextractps $7, %xmm4, %rcx + +// CHECK: vmovd %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xe1] + vmovd %xmm4, %rcx + +// CHECK: vmovmskpd %xmm4, %rcx +// CHECK: encoding: [0xc5,0xf9,0x50,0xcc] + vmovmskpd %xmm4, %rcx + +// CHECK: vmovmskpd %ymm4, %rcx +// CHECK: encoding: [0xc5,0xfd,0x50,0xcc] + vmovmskpd %ymm4, %rcx + +// CHECK: vmovmskps %xmm4, %rcx +// CHECK: encoding: [0xc5,0xf8,0x50,0xcc] + vmovmskps %xmm4, %rcx + +// CHECK: vmovmskps %ymm4, %rcx +// CHECK: encoding: [0xc5,0xfc,0x50,0xcc] + vmovmskps %ymm4, %rcx + +// CHECK: vpextrb $7, %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xe1,0x07] + vpextrb $7, %xmm4, %rcx + +// CHECK: vpinsrw $7, %r8, %xmm15, %xmm8 +// CHECK: encoding: [0xc4,0x41,0x01,0xc4,0xc0,0x07] + vpinsrw $7, %r8, %xmm15, %xmm8 + +// CHECK: vpinsrw $7, %rcx, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0xc4,0xf1,0x07] + vpinsrw $7, %rcx, %xmm4, %xmm6 + +// CHECK: vpmovmskb %xmm4, %rcx +// CHECK: encoding: [0xc5,0xf9,0xd7,0xcc] + vpmovmskb %xmm4, %rcx + +// CHECK: vblendvpd %ymm11, 57005(%rax,%riz), %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x63,0x1d,0x4b,0xac,0x20,0xad,0xde,0x00,0x00,0xb0] + vblendvpd %ymm11, 0xdead(%rax,%riz), %ymm12, %ymm13 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index 0fce5925cba9..f45b0a23d5e8 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -4,7 +4,7 @@ // CHECK: callw 42 // CHECK: encoding: [0x66,0xe8,A,A] callw 42 - + // rdar://8127102 // CHECK: movq %gs:(%rdi), %rax // CHECK: encoding: [0x65,0x48,0x8b,0x07] @@ -114,2889 +114,29 @@ movd %mm1, %rdx // CHECK: encoding: [0x0f,0x7e,0xca] movd %mm1, %edx -// CHECK: vaddss %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x32,0x58,0xd0] -vaddss %xmm8, %xmm9, %xmm10 - -// CHECK: vmulss %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x32,0x59,0xd0] -vmulss %xmm8, %xmm9, %xmm10 - -// CHECK: vsubss %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x32,0x5c,0xd0] -vsubss %xmm8, %xmm9, %xmm10 - -// CHECK: vdivss %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x32,0x5e,0xd0] -vdivss %xmm8, %xmm9, %xmm10 - -// CHECK: vaddsd %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x33,0x58,0xd0] -vaddsd %xmm8, %xmm9, %xmm10 - -// CHECK: vmulsd %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x33,0x59,0xd0] -vmulsd %xmm8, %xmm9, %xmm10 - -// CHECK: vsubsd %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x33,0x5c,0xd0] -vsubsd %xmm8, %xmm9, %xmm10 - -// CHECK: vdivsd %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x33,0x5e,0xd0] -vdivsd %xmm8, %xmm9, %xmm10 - -// CHECK: vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2a,0x58,0x5c,0xd9,0xfc] -vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2a,0x5c,0x5c,0xd9,0xfc] -vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2a,0x59,0x5c,0xd9,0xfc] -vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2a,0x5e,0x5c,0xd9,0xfc] -vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2b,0x58,0x5c,0xd9,0xfc] -vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2b,0x5c,0x5c,0xd9,0xfc] -vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2b,0x59,0x5c,0xd9,0xfc] -vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2b,0x5e,0x5c,0xd9,0xfc] -vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vaddps %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x20,0x58,0xfa] -vaddps %xmm10, %xmm11, %xmm15 - -// CHECK: vsubps %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x20,0x5c,0xfa] -vsubps %xmm10, %xmm11, %xmm15 - -// CHECK: vmulps %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x20,0x59,0xfa] -vmulps %xmm10, %xmm11, %xmm15 - -// CHECK: vdivps %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x20,0x5e,0xfa] -vdivps %xmm10, %xmm11, %xmm15 - -// CHECK: vaddpd %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x21,0x58,0xfa] -vaddpd %xmm10, %xmm11, %xmm15 - -// CHECK: vsubpd %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x21,0x5c,0xfa] -vsubpd %xmm10, %xmm11, %xmm15 - -// CHECK: vmulpd %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x21,0x59,0xfa] -vmulpd %xmm10, %xmm11, %xmm15 - -// CHECK: vdivpd %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x21,0x5e,0xfa] -vdivpd %xmm10, %xmm11, %xmm15 - -// CHECK: vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x28,0x58,0x5c,0xd9,0xfc] -vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x28,0x5c,0x5c,0xd9,0xfc] -vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x28,0x59,0x5c,0xd9,0xfc] -vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x28,0x5e,0x5c,0xd9,0xfc] -vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x29,0x58,0x5c,0xd9,0xfc] -vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x29,0x5c,0x5c,0xd9,0xfc] -vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x29,0x59,0x5c,0xd9,0xfc] -vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x29,0x5e,0x5c,0xd9,0xfc] -vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vmaxss %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x0a,0x5f,0xe2] - vmaxss %xmm10, %xmm14, %xmm12 - -// CHECK: vmaxsd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x0b,0x5f,0xe2] - vmaxsd %xmm10, %xmm14, %xmm12 - -// CHECK: vminss %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x0a,0x5d,0xe2] - vminss %xmm10, %xmm14, %xmm12 - -// CHECK: vminsd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x0b,0x5d,0xe2] - vminsd %xmm10, %xmm14, %xmm12 - -// CHECK: vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1a,0x5f,0x54,0xcb,0xfc] - vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1b,0x5f,0x54,0xcb,0xfc] - vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vminss -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1a,0x5d,0x54,0xcb,0xfc] - vminss -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1b,0x5d,0x54,0xcb,0xfc] - vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vmaxps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x5f,0xe2] - vmaxps %xmm10, %xmm14, %xmm12 - -// CHECK: vmaxpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x5f,0xe2] - vmaxpd %xmm10, %xmm14, %xmm12 - -// CHECK: vminps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x5d,0xe2] - vminps %xmm10, %xmm14, %xmm12 - -// CHECK: vminpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x5d,0xe2] - vminpd %xmm10, %xmm14, %xmm12 - -// CHECK: vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x5f,0x54,0xcb,0xfc] - vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x5f,0x54,0xcb,0xfc] - vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vminps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x5d,0x54,0xcb,0xfc] - vminps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc] - vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vandps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x54,0xe2] - vandps %xmm10, %xmm14, %xmm12 - -// CHECK: vandpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x54,0xe2] - vandpd %xmm10, %xmm14, %xmm12 - -// CHECK: vandps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x54,0x54,0xcb,0xfc] - vandps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x54,0x54,0xcb,0xfc] - vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vorps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x56,0xe2] - vorps %xmm10, %xmm14, %xmm12 - -// CHECK: vorpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x56,0xe2] - vorpd %xmm10, %xmm14, %xmm12 - -// CHECK: vorps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x56,0x54,0xcb,0xfc] - vorps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x56,0x54,0xcb,0xfc] - vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vxorps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x57,0xe2] - vxorps %xmm10, %xmm14, %xmm12 - -// CHECK: vxorpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x57,0xe2] - vxorpd %xmm10, %xmm14, %xmm12 - -// CHECK: vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x57,0x54,0xcb,0xfc] - vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x57,0x54,0xcb,0xfc] - vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vandnps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x55,0xe2] - vandnps %xmm10, %xmm14, %xmm12 - -// CHECK: vandnpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x55,0xe2] - vandnpd %xmm10, %xmm14, %xmm12 - -// CHECK: vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x55,0x54,0xcb,0xfc] - vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x55,0x54,0xcb,0xfc] - vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vmovss -4(%rbx,%rcx,8), %xmm10 -// CHECK: encoding: [0xc5,0x7a,0x10,0x54,0xcb,0xfc] - vmovss -4(%rbx,%rcx,8), %xmm10 - -// CHECK: vmovss %xmm14, %xmm10, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x2a,0x10,0xfe] - vmovss %xmm14, %xmm10, %xmm15 - -// CHECK: vmovsd -4(%rbx,%rcx,8), %xmm10 -// CHECK: encoding: [0xc5,0x7b,0x10,0x54,0xcb,0xfc] - vmovsd -4(%rbx,%rcx,8), %xmm10 - -// CHECK: vmovsd %xmm14, %xmm10, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x2b,0x10,0xfe] - vmovsd %xmm14, %xmm10, %xmm15 - // rdar://7840289 // CHECK: pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0x66,0x0f,0x38,0x00,0x0d,A,A,A,A] // CHECK: fixup A - offset: 5, value: CPI1_0-4 pshufb CPI1_0(%rip), %xmm1 -// CHECK: vunpckhps %xmm15, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0x15,0xef] - vunpckhps %xmm15, %xmm12, %xmm13 - -// CHECK: vunpckhpd %xmm15, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x15,0xef] - vunpckhpd %xmm15, %xmm12, %xmm13 - -// CHECK: vunpcklps %xmm15, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0x14,0xef] - vunpcklps %xmm15, %xmm12, %xmm13 - -// CHECK: vunpcklpd %xmm15, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x14,0xef] - vunpcklpd %xmm15, %xmm12, %xmm13 - -// CHECK: vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x18,0x15,0x7c,0xcb,0xfc] - vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15 - -// CHECK: vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x19,0x15,0x7c,0xcb,0xfc] - vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15 - -// CHECK: vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x18,0x14,0x7c,0xcb,0xfc] - vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15 - -// CHECK: vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x19,0x14,0x7c,0xcb,0xfc] - vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15 - -// CHECK: vcmpps $0, %xmm10, %xmm12, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x00] - vcmpps $0, %xmm10, %xmm12, %xmm15 - -// CHECK: vcmpps $0, (%rax), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x18,0xc2,0x38,0x00] - vcmpps $0, (%rax), %xmm12, %xmm15 - -// CHECK: vcmpps $7, %xmm10, %xmm12, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x07] - vcmpps $7, %xmm10, %xmm12, %xmm15 - -// CHECK: vcmppd $0, %xmm10, %xmm12, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x00] - vcmppd $0, %xmm10, %xmm12, %xmm15 - -// CHECK: vcmppd $0, (%rax), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x19,0xc2,0x38,0x00] - vcmppd $0, (%rax), %xmm12, %xmm15 - -// CHECK: vcmppd $7, %xmm10, %xmm12, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x07] - vcmppd $7, %xmm10, %xmm12, %xmm15 - -// CHECK: vshufps $8, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc6,0xeb,0x08] - vshufps $8, %xmm11, %xmm12, %xmm13 - -// CHECK: vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc6,0x6c,0xcb,0xfc,0x08] - vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vshufpd $8, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc6,0xeb,0x08] - vshufpd $8, %xmm11, %xmm12, %xmm13 - -// CHECK: vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc6,0x6c,0xcb,0xfc,0x08] - vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $0, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x00] - vcmpeqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $2, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x02] - vcmpleps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $1, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x01] - vcmpltps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $4, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x04] - vcmpneqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $6, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x06] - vcmpnleps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $5, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x05] - vcmpnltps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x07] - vcmpordps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $3, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x03] - vcmpunordps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x02] - vcmpleps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnleps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordps -4(%rbx,%rcx,8), %xmm6, %xmm2 - -// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $0, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x00] - vcmpeqpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $2, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x02] - vcmplepd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $1, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x01] - vcmpltpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $4, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x04] - vcmpneqpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $6, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x06] - vcmpnlepd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $5, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x05] - vcmpnltpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x07] - vcmpordpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $3, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x03] - vcmpunordpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqpd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x02] - vcmplepd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltpd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqpd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnlepd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltpd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordpd -4(%rbx,%rcx,8), %xmm6, %xmm2 - -// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordpd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $0, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x00] - vcmpeqss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $2, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x02] - vcmpless %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $1, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x01] - vcmpltss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $4, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x04] - vcmpneqss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $6, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x06] - vcmpnless %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $5, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x05] - vcmpnltss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x07] - vcmpordss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $3, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x03] - vcmpunordss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqss -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x02] - vcmpless -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltss -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqss -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnless -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltss -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordss -4(%rbx,%rcx,8), %xmm6, %xmm2 - -// CHECK: vcmpss $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordss -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $0, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x00] - vcmpeqsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $2, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x02] - vcmplesd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $1, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x01] - vcmpltsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $4, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x04] - vcmpneqsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $6, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x06] - vcmpnlesd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $5, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x05] - vcmpnltsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x07] - vcmpordsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $3, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x03] - vcmpunordsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqsd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x02] - vcmplesd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltsd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqsd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnlesd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltsd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordsd -4(%rbx,%rcx,8), %xmm6, %xmm2 - -// CHECK: vcmpsd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordsd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vucomiss %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3] - vucomiss %xmm11, %xmm12 - -// CHECK: vucomiss (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x2e,0x20] - vucomiss (%rax), %xmm12 - -// CHECK: vcomiss %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x2f,0xe3] - vcomiss %xmm11, %xmm12 - -// CHECK: vcomiss (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x2f,0x20] - vcomiss (%rax), %xmm12 - -// CHECK: vucomisd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xe3] - vucomisd %xmm11, %xmm12 - -// CHECK: vucomisd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x79,0x2e,0x20] - vucomisd (%rax), %xmm12 - -// CHECK: vcomisd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x79,0x2f,0xe3] - vcomisd %xmm11, %xmm12 - -// CHECK: vcomisd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x79,0x2f,0x20] - vcomisd (%rax), %xmm12 - -// CHECK: vcvttss2si (%rcx), %eax -// CHECK: encoding: [0xc5,0xfa,0x2c,0x01] - vcvttss2si (%rcx), %eax - -// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x22,0x2a,0x20] - vcvtsi2ss (%rax), %xmm11, %xmm12 - -// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x22,0x2a,0x20] - vcvtsi2ss (%rax), %xmm11, %xmm12 - -// CHECK: vcvttsd2si (%rcx), %eax -// CHECK: encoding: [0xc5,0xfb,0x2c,0x01] - vcvttsd2si (%rcx), %eax - -// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x23,0x2a,0x20] - vcvtsi2sd (%rax), %xmm11, %xmm12 - -// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x23,0x2a,0x20] - vcvtsi2sd (%rax), %xmm11, %xmm12 - -// CHECK: vmovaps (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x28,0x20] - vmovaps (%rax), %xmm12 - -// CHECK: vmovaps %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x28,0xe3] - vmovaps %xmm11, %xmm12 - -// CHECK: vmovaps %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x78,0x29,0x18] - vmovaps %xmm11, (%rax) - -// CHECK: vmovapd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x79,0x28,0x20] - vmovapd (%rax), %xmm12 - -// CHECK: vmovapd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x79,0x28,0xe3] - vmovapd %xmm11, %xmm12 - -// CHECK: vmovapd %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0x29,0x18] - vmovapd %xmm11, (%rax) - -// CHECK: vmovups (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x10,0x20] - vmovups (%rax), %xmm12 - -// CHECK: vmovups %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x10,0xe3] - vmovups %xmm11, %xmm12 - -// CHECK: vmovups %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x78,0x11,0x18] - vmovups %xmm11, (%rax) - -// CHECK: vmovupd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x79,0x10,0x20] - vmovupd (%rax), %xmm12 - -// CHECK: vmovupd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x79,0x10,0xe3] - vmovupd %xmm11, %xmm12 - -// CHECK: vmovupd %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0x11,0x18] - vmovupd %xmm11, (%rax) - -// CHECK: vmovlps %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x78,0x13,0x18] - vmovlps %xmm11, (%rax) - -// CHECK: vmovlps (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0x12,0x28] - vmovlps (%rax), %xmm12, %xmm13 - -// CHECK: vmovlpd %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0x13,0x18] - vmovlpd %xmm11, (%rax) - -// CHECK: vmovlpd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x12,0x28] - vmovlpd (%rax), %xmm12, %xmm13 - -// CHECK: vmovhps %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x78,0x17,0x18] - vmovhps %xmm11, (%rax) - -// CHECK: vmovhps (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0x16,0x28] - vmovhps (%rax), %xmm12, %xmm13 - -// CHECK: vmovhpd %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0x17,0x18] - vmovhpd %xmm11, (%rax) - -// CHECK: vmovhpd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x16,0x28] - vmovhpd (%rax), %xmm12, %xmm13 - -// CHECK: vmovlhps %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0x16,0xeb] - vmovlhps %xmm11, %xmm12, %xmm13 - -// CHECK: vmovhlps %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb] - vmovhlps %xmm11, %xmm12, %xmm13 - -// CHECK: vcvtss2sil %xmm11, %eax -// CHECK: encoding: [0xc4,0xc1,0x7a,0x2d,0xc3] - vcvtss2si %xmm11, %eax - -// CHECK: vcvtss2sil (%rax), %ebx -// CHECK: encoding: [0xc5,0xfa,0x2d,0x18] - vcvtss2si (%rax), %ebx - -// CHECK: vcvtdq2ps %xmm10, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xe2] - vcvtdq2ps %xmm10, %xmm12 - -// CHECK: vcvtdq2ps (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x5b,0x20] - vcvtdq2ps (%rax), %xmm12 - -// CHECK: vcvtsd2ss %xmm12, %xmm13, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x13,0x5a,0xd4] - vcvtsd2ss %xmm12, %xmm13, %xmm10 - -// CHECK: vcvtsd2ss (%rax), %xmm13, %xmm10 -// CHECK: encoding: [0xc5,0x13,0x5a,0x10] - vcvtsd2ss (%rax), %xmm13, %xmm10 - -// CHECK: vcvtps2dq %xmm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x79,0x5b,0xdc] - vcvtps2dq %xmm12, %xmm11 - -// CHECK: vcvtps2dq (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x79,0x5b,0x18] - vcvtps2dq (%rax), %xmm11 - -// CHECK: vcvtss2sd %xmm12, %xmm13, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x12,0x5a,0xd4] - vcvtss2sd %xmm12, %xmm13, %xmm10 - -// CHECK: vcvtss2sd (%rax), %xmm13, %xmm10 -// CHECK: encoding: [0xc5,0x12,0x5a,0x10] - vcvtss2sd (%rax), %xmm13, %xmm10 - -// CHECK: vcvtdq2ps %xmm13, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xd5] - vcvtdq2ps %xmm13, %xmm10 - -// CHECK: vcvtdq2ps (%ecx), %xmm13 -// CHECK: encoding: [0xc5,0x78,0x5b,0x29] - vcvtdq2ps (%ecx), %xmm13 - -// CHECK: vcvttps2dq %xmm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x7a,0x5b,0xdc] - vcvttps2dq %xmm12, %xmm11 - -// CHECK: vcvttps2dq (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x7a,0x5b,0x18] - vcvttps2dq (%rax), %xmm11 - -// CHECK: vcvtps2pd %xmm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x78,0x5a,0xdc] - vcvtps2pd %xmm12, %xmm11 - -// CHECK: vcvtps2pd (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x78,0x5a,0x18] - vcvtps2pd (%rax), %xmm11 - -// CHECK: vcvtpd2ps %xmm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc] - vcvtpd2ps %xmm12, %xmm11 - -// CHECK: vsqrtpd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3] - vsqrtpd %xmm11, %xmm12 - -// CHECK: vsqrtpd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x79,0x51,0x20] - vsqrtpd (%rax), %xmm12 - -// CHECK: vsqrtps %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3] - vsqrtps %xmm11, %xmm12 - -// CHECK: vsqrtps (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x51,0x20] - vsqrtps (%rax), %xmm12 - -// CHECK: vsqrtsd %xmm11, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3] - vsqrtsd %xmm11, %xmm12, %xmm10 - -// CHECK: vsqrtsd (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1b,0x51,0x10] - vsqrtsd (%rax), %xmm12, %xmm10 - -// CHECK: vsqrtss %xmm11, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3] - vsqrtss %xmm11, %xmm12, %xmm10 - -// CHECK: vsqrtss (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1a,0x51,0x10] - vsqrtss (%rax), %xmm12, %xmm10 - -// CHECK: vrsqrtps %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3] - vrsqrtps %xmm11, %xmm12 - -// CHECK: vrsqrtps (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x52,0x20] - vrsqrtps (%rax), %xmm12 - -// CHECK: vrsqrtss %xmm11, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3] - vrsqrtss %xmm11, %xmm12, %xmm10 - -// CHECK: vrsqrtss (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1a,0x52,0x10] - vrsqrtss (%rax), %xmm12, %xmm10 - -// CHECK: vrcpps %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3] - vrcpps %xmm11, %xmm12 - -// CHECK: vrcpps (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x53,0x20] - vrcpps (%rax), %xmm12 - -// CHECK: vrcpss %xmm11, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3] - vrcpss %xmm11, %xmm12, %xmm10 - -// CHECK: vrcpss (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1a,0x53,0x10] - vrcpss (%rax), %xmm12, %xmm10 - -// CHECK: vmovntdq %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0xe7,0x18] - vmovntdq %xmm11, (%rax) - -// CHECK: vmovntpd %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0x2b,0x18] - vmovntpd %xmm11, (%rax) - -// CHECK: vmovntps %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x78,0x2b,0x18] - vmovntps %xmm11, (%rax) - -// CHECK: vldmxcsr -4(%rip) -// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xfc,0xff,0xff,0xff] - vldmxcsr -4(%rip) - -// CHECK: vstmxcsr -4(%rsp) -// CHECK: encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc] - vstmxcsr -4(%rsp) - -// CHECK: vpsubb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf8,0xeb] - vpsubb %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf8,0x28] - vpsubb (%rax), %xmm12, %xmm13 - -// CHECK: vpsubw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf9,0xeb] - vpsubw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf9,0x28] - vpsubw (%rax), %xmm12, %xmm13 - -// CHECK: vpsubd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xfa,0xeb] - vpsubd %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xfa,0x28] - vpsubd (%rax), %xmm12, %xmm13 - -// CHECK: vpsubq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xfb,0xeb] - vpsubq %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xfb,0x28] - vpsubq (%rax), %xmm12, %xmm13 - -// CHECK: vpsubsb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe8,0xeb] - vpsubsb %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubsb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe8,0x28] - vpsubsb (%rax), %xmm12, %xmm13 - -// CHECK: vpsubsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe9,0xeb] - vpsubsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe9,0x28] - vpsubsw (%rax), %xmm12, %xmm13 - -// CHECK: vpsubusb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd8,0xeb] - vpsubusb %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubusb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd8,0x28] - vpsubusb (%rax), %xmm12, %xmm13 - -// CHECK: vpsubusw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd9,0xeb] - vpsubusw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubusw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd9,0x28] - vpsubusw (%rax), %xmm12, %xmm13 - -// CHECK: vpaddb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xfc,0xeb] - vpaddb %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xfc,0x28] - vpaddb (%rax), %xmm12, %xmm13 - -// CHECK: vpaddw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xfd,0xeb] - vpaddw %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xfd,0x28] - vpaddw (%rax), %xmm12, %xmm13 - -// CHECK: vpaddd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xfe,0xeb] - vpaddd %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xfe,0x28] - vpaddd (%rax), %xmm12, %xmm13 - -// CHECK: vpaddq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd4,0xeb] - vpaddq %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd4,0x28] - vpaddq (%rax), %xmm12, %xmm13 - -// CHECK: vpaddsb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xec,0xeb] - vpaddsb %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddsb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xec,0x28] - vpaddsb (%rax), %xmm12, %xmm13 - -// CHECK: vpaddsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xed,0xeb] - vpaddsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xed,0x28] - vpaddsw (%rax), %xmm12, %xmm13 - -// CHECK: vpaddusb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xdc,0xeb] - vpaddusb %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddusb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xdc,0x28] - vpaddusb (%rax), %xmm12, %xmm13 - -// CHECK: vpaddusw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xdd,0xeb] - vpaddusw %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddusw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xdd,0x28] - vpaddusw (%rax), %xmm12, %xmm13 - -// CHECK: vpmulhuw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe4,0xeb] - vpmulhuw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmulhuw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe4,0x28] - vpmulhuw (%rax), %xmm12, %xmm13 - -// CHECK: vpmulhw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe5,0xeb] - vpmulhw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmulhw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe5,0x28] - vpmulhw (%rax), %xmm12, %xmm13 - -// CHECK: vpmullw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd5,0xeb] - vpmullw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmullw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd5,0x28] - vpmullw (%rax), %xmm12, %xmm13 - -// CHECK: vpmuludq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf4,0xeb] - vpmuludq %xmm11, %xmm12, %xmm13 - -// CHECK: vpmuludq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf4,0x28] - vpmuludq (%rax), %xmm12, %xmm13 - -// CHECK: vpavgb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe0,0xeb] - vpavgb %xmm11, %xmm12, %xmm13 - -// CHECK: vpavgb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe0,0x28] - vpavgb (%rax), %xmm12, %xmm13 - -// CHECK: vpavgw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe3,0xeb] - vpavgw %xmm11, %xmm12, %xmm13 - -// CHECK: vpavgw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe3,0x28] - vpavgw (%rax), %xmm12, %xmm13 - -// CHECK: vpminsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xea,0xeb] - vpminsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpminsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xea,0x28] - vpminsw (%rax), %xmm12, %xmm13 - -// CHECK: vpminub %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xda,0xeb] - vpminub %xmm11, %xmm12, %xmm13 - -// CHECK: vpminub (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xda,0x28] - vpminub (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xee,0xeb] - vpmaxsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmaxsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xee,0x28] - vpmaxsw (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxub %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xde,0xeb] - vpmaxub %xmm11, %xmm12, %xmm13 - -// CHECK: vpmaxub (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xde,0x28] - vpmaxub (%rax), %xmm12, %xmm13 - -// CHECK: vpsadbw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf6,0xeb] - vpsadbw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsadbw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf6,0x28] - vpsadbw (%rax), %xmm12, %xmm13 - -// CHECK: vpsllw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf1,0xeb] - vpsllw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsllw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf1,0x28] - vpsllw (%rax), %xmm12, %xmm13 - -// CHECK: vpslld %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf2,0xeb] - vpslld %xmm11, %xmm12, %xmm13 - -// CHECK: vpslld (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf2,0x28] - vpslld (%rax), %xmm12, %xmm13 - -// CHECK: vpsllq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf3,0xeb] - vpsllq %xmm11, %xmm12, %xmm13 - -// CHECK: vpsllq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf3,0x28] - vpsllq (%rax), %xmm12, %xmm13 - -// CHECK: vpsraw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe1,0xeb] - vpsraw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsraw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe1,0x28] - vpsraw (%rax), %xmm12, %xmm13 - -// CHECK: vpsrad %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe2,0xeb] - vpsrad %xmm11, %xmm12, %xmm13 - -// CHECK: vpsrad (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe2,0x28] - vpsrad (%rax), %xmm12, %xmm13 - -// CHECK: vpsrlw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd1,0xeb] - vpsrlw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsrlw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd1,0x28] - vpsrlw (%rax), %xmm12, %xmm13 - -// CHECK: vpsrld %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd2,0xeb] - vpsrld %xmm11, %xmm12, %xmm13 - -// CHECK: vpsrld (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd2,0x28] - vpsrld (%rax), %xmm12, %xmm13 - -// CHECK: vpsrlq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd3,0xeb] - vpsrlq %xmm11, %xmm12, %xmm13 - -// CHECK: vpsrlq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd3,0x28] - vpsrlq (%rax), %xmm12, %xmm13 - -// CHECK: vpslld $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a] - vpslld $10, %xmm12, %xmm13 - -// CHECK: vpslldq $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xfc,0x0a] - vpslldq $10, %xmm12, %xmm13 - -// CHECK: vpsllq $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xf4,0x0a] - vpsllq $10, %xmm12, %xmm13 - -// CHECK: vpsllw $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xf4,0x0a] - vpsllw $10, %xmm12, %xmm13 - -// CHECK: vpsrad $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xe4,0x0a] - vpsrad $10, %xmm12, %xmm13 - -// CHECK: vpsraw $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xe4,0x0a] - vpsraw $10, %xmm12, %xmm13 - -// CHECK: vpsrld $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xd4,0x0a] - vpsrld $10, %xmm12, %xmm13 - -// CHECK: vpsrldq $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xdc,0x0a] - vpsrldq $10, %xmm12, %xmm13 - -// CHECK: vpsrlq $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xd4,0x0a] - vpsrlq $10, %xmm12, %xmm13 - -// CHECK: vpsrlw $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xd4,0x0a] - vpsrlw $10, %xmm12, %xmm13 - -// CHECK: vpslld $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a] - vpslld $10, %xmm12, %xmm13 - -// CHECK: vpand %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xdb,0xeb] - vpand %xmm11, %xmm12, %xmm13 - -// CHECK: vpand (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xdb,0x28] - vpand (%rax), %xmm12, %xmm13 - -// CHECK: vpor %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xeb,0xeb] - vpor %xmm11, %xmm12, %xmm13 - -// CHECK: vpor (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xeb,0x28] - vpor (%rax), %xmm12, %xmm13 - -// CHECK: vpxor %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xef,0xeb] - vpxor %xmm11, %xmm12, %xmm13 - -// CHECK: vpxor (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xef,0x28] - vpxor (%rax), %xmm12, %xmm13 - -// CHECK: vpandn %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xdf,0xeb] - vpandn %xmm11, %xmm12, %xmm13 - -// CHECK: vpandn (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xdf,0x28] - vpandn (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpeqb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x74,0xeb] - vpcmpeqb %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpeqb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x74,0x28] - vpcmpeqb (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpeqw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x75,0xeb] - vpcmpeqw %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpeqw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x75,0x28] - vpcmpeqw (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpeqd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x76,0xeb] - vpcmpeqd %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpeqd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x76,0x28] - vpcmpeqd (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpgtb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x64,0xeb] - vpcmpgtb %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpgtb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x64,0x28] - vpcmpgtb (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpgtw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x65,0xeb] - vpcmpgtw %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpgtw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x65,0x28] - vpcmpgtw (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpgtd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x66,0xeb] - vpcmpgtd %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpgtd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x66,0x28] - vpcmpgtd (%rax), %xmm12, %xmm13 - -// CHECK: vpacksswb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x63,0xeb] - vpacksswb %xmm11, %xmm12, %xmm13 - -// CHECK: vpacksswb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x63,0x28] - vpacksswb (%rax), %xmm12, %xmm13 - -// CHECK: vpackssdw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x6b,0xeb] - vpackssdw %xmm11, %xmm12, %xmm13 - -// CHECK: vpackssdw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x6b,0x28] - vpackssdw (%rax), %xmm12, %xmm13 - -// CHECK: vpackuswb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x67,0xeb] - vpackuswb %xmm11, %xmm12, %xmm13 - -// CHECK: vpackuswb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x67,0x28] - vpackuswb (%rax), %xmm12, %xmm13 - -// CHECK: vpshufd $4, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x79,0x70,0xec,0x04] - vpshufd $4, %xmm12, %xmm13 - -// CHECK: vpshufd $4, (%rax), %xmm13 -// CHECK: encoding: [0xc5,0x79,0x70,0x28,0x04] - vpshufd $4, (%rax), %xmm13 - -// CHECK: vpshufhw $4, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x7a,0x70,0xec,0x04] - vpshufhw $4, %xmm12, %xmm13 - -// CHECK: vpshufhw $4, (%rax), %xmm13 -// CHECK: encoding: [0xc5,0x7a,0x70,0x28,0x04] - vpshufhw $4, (%rax), %xmm13 - -// CHECK: vpshuflw $4, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x7b,0x70,0xec,0x04] - vpshuflw $4, %xmm12, %xmm13 - -// CHECK: vpshuflw $4, (%rax), %xmm13 -// CHECK: encoding: [0xc5,0x7b,0x70,0x28,0x04] - vpshuflw $4, (%rax), %xmm13 - -// CHECK: vpunpcklbw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x60,0xeb] - vpunpcklbw %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpcklbw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x60,0x28] - vpunpcklbw (%rax), %xmm12, %xmm13 - -// CHECK: vpunpcklwd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x61,0xeb] - vpunpcklwd %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpcklwd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x61,0x28] - vpunpcklwd (%rax), %xmm12, %xmm13 - -// CHECK: vpunpckldq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x62,0xeb] - vpunpckldq %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpckldq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x62,0x28] - vpunpckldq (%rax), %xmm12, %xmm13 - -// CHECK: vpunpcklqdq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x6c,0xeb] - vpunpcklqdq %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpcklqdq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x6c,0x28] - vpunpcklqdq (%rax), %xmm12, %xmm13 - -// CHECK: vpunpckhbw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x68,0xeb] - vpunpckhbw %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpckhbw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x68,0x28] - vpunpckhbw (%rax), %xmm12, %xmm13 - -// CHECK: vpunpckhwd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x69,0xeb] - vpunpckhwd %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpckhwd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x69,0x28] - vpunpckhwd (%rax), %xmm12, %xmm13 - -// CHECK: vpunpckhdq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x6a,0xeb] - vpunpckhdq %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpckhdq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x6a,0x28] - vpunpckhdq (%rax), %xmm12, %xmm13 - -// CHECK: vpunpckhqdq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x6d,0xeb] - vpunpckhqdq %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpckhqdq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x6d,0x28] - vpunpckhqdq (%rax), %xmm12, %xmm13 - -// CHECK: vpinsrw $7, %eax, %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc4,0xe8,0x07] - vpinsrw $7, %eax, %xmm12, %xmm13 - -// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc4,0x28,0x07] - vpinsrw $7, (%rax), %xmm12, %xmm13 - -// CHECK: vpextrw $7, %xmm12, %eax -// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07] - vpextrw $7, %xmm12, %eax - -// CHECK: vpmovmskb %xmm12, %eax -// CHECK: encoding: [0xc4,0xc1,0x79,0xd7,0xc4] - vpmovmskb %xmm12, %eax - -// CHECK: vmaskmovdqu %xmm14, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x79,0xf7,0xfe] - vmaskmovdqu %xmm14, %xmm15 - -// CHECK: vmovd %eax, %xmm14 -// CHECK: encoding: [0xc5,0x79,0x6e,0xf0] - vmovd %eax, %xmm14 - -// CHECK: vmovd (%rax), %xmm14 -// CHECK: encoding: [0xc5,0x79,0x6e,0x30] - vmovd (%rax), %xmm14 - -// CHECK: vmovd %xmm14, (%rax) -// CHECK: encoding: [0xc5,0x79,0x7e,0x30] - vmovd %xmm14, (%rax) - -// CHECK: vmovd %rax, %xmm14 -// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0] - vmovd %rax, %xmm14 - -// CHECK: vmovq %xmm14, (%rax) -// CHECK: encoding: [0xc5,0x79,0xd6,0x30] - vmovq %xmm14, (%rax) - -// CHECK: vmovq %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7a,0x7e,0xe6] - vmovq %xmm14, %xmm12 - -// CHECK: vmovq (%rax), %xmm14 -// CHECK: encoding: [0xc5,0x7a,0x7e,0x30] - vmovq (%rax), %xmm14 - -// CHECK: vmovq %rax, %xmm14 -// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0] - vmovq %rax, %xmm14 - -// CHECK: vmovq %xmm14, %rax -// CHECK: encoding: [0xc4,0x61,0xf9,0x7e,0xf0] - vmovq %xmm14, %rax - -// CHECK: vcvtpd2dq %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xe3] - vcvtpd2dq %xmm11, %xmm12 - -// CHECK: vcvtdq2pd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7a,0xe6,0xe3] - vcvtdq2pd %xmm11, %xmm12 - -// CHECK: vcvtdq2pd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x7a,0xe6,0x20] - vcvtdq2pd (%rax), %xmm12 - -// CHECK: vmovshdup %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7a,0x16,0xe3] - vmovshdup %xmm11, %xmm12 - -// CHECK: vmovshdup (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x7a,0x16,0x20] - vmovshdup (%rax), %xmm12 - -// CHECK: vmovsldup %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7a,0x12,0xe3] - vmovsldup %xmm11, %xmm12 - -// CHECK: vmovsldup (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x7a,0x12,0x20] - vmovsldup (%rax), %xmm12 - -// CHECK: vmovddup %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7b,0x12,0xe3] - vmovddup %xmm11, %xmm12 - -// CHECK: vmovddup (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x7b,0x12,0x20] - vmovddup (%rax), %xmm12 - -// CHECK: vaddsubps %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xd0,0xeb] - vaddsubps %xmm11, %xmm12, %xmm13 - -// CHECK: vaddsubps (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x23,0xd0,0x20] - vaddsubps (%rax), %xmm11, %xmm12 - -// CHECK: vaddsubpd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd0,0xeb] - vaddsubpd %xmm11, %xmm12, %xmm13 - -// CHECK: vaddsubpd (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x21,0xd0,0x20] - vaddsubpd (%rax), %xmm11, %xmm12 - -// CHECK: vhaddps %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0x7c,0xeb] - vhaddps %xmm11, %xmm12, %xmm13 - -// CHECK: vhaddps (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0x7c,0x28] - vhaddps (%rax), %xmm12, %xmm13 - -// CHECK: vhaddpd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x7c,0xeb] - vhaddpd %xmm11, %xmm12, %xmm13 - -// CHECK: vhaddpd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x7c,0x28] - vhaddpd (%rax), %xmm12, %xmm13 - -// CHECK: vhsubps %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0x7d,0xeb] - vhsubps %xmm11, %xmm12, %xmm13 - -// CHECK: vhsubps (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0x7d,0x28] - vhsubps (%rax), %xmm12, %xmm13 - -// CHECK: vhsubpd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x7d,0xeb] - vhsubpd %xmm11, %xmm12, %xmm13 - -// CHECK: vhsubpd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x7d,0x28] - vhsubpd (%rax), %xmm12, %xmm13 - -// CHECK: vpabsb %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x42,0x79,0x1c,0xe3] - vpabsb %xmm11, %xmm12 - -// CHECK: vpabsb (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x1c,0x20] - vpabsb (%rax), %xmm12 - -// CHECK: vpabsw %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x42,0x79,0x1d,0xe3] - vpabsw %xmm11, %xmm12 - -// CHECK: vpabsw (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x1d,0x20] - vpabsw (%rax), %xmm12 - -// CHECK: vpabsd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x42,0x79,0x1e,0xe3] - vpabsd %xmm11, %xmm12 - -// CHECK: vpabsd (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x1e,0x20] - vpabsd (%rax), %xmm12 - -// CHECK: vphaddw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x01,0xeb] - vphaddw %xmm11, %xmm12, %xmm13 - -// CHECK: vphaddw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x01,0x28] - vphaddw (%rax), %xmm12, %xmm13 - -// CHECK: vphaddd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x02,0xeb] - vphaddd %xmm11, %xmm12, %xmm13 - -// CHECK: vphaddd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x02,0x28] - vphaddd (%rax), %xmm12, %xmm13 - -// CHECK: vphaddsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x03,0xeb] - vphaddsw %xmm11, %xmm12, %xmm13 - -// CHECK: vphaddsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x03,0x28] - vphaddsw (%rax), %xmm12, %xmm13 - -// CHECK: vphsubw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x05,0xeb] - vphsubw %xmm11, %xmm12, %xmm13 - -// CHECK: vphsubw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x05,0x28] - vphsubw (%rax), %xmm12, %xmm13 - -// CHECK: vphsubd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x06,0xeb] - vphsubd %xmm11, %xmm12, %xmm13 - -// CHECK: vphsubd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x06,0x28] - vphsubd (%rax), %xmm12, %xmm13 - -// CHECK: vphsubsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x07,0xeb] - vphsubsw %xmm11, %xmm12, %xmm13 - -// CHECK: vphsubsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x07,0x28] - vphsubsw (%rax), %xmm12, %xmm13 - -// CHECK: vpmaddubsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x04,0xeb] - vpmaddubsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmaddubsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x04,0x28] - vpmaddubsw (%rax), %xmm12, %xmm13 - -// CHECK: vpshufb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x00,0xeb] - vpshufb %xmm11, %xmm12, %xmm13 - -// CHECK: vpshufb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x00,0x28] - vpshufb (%rax), %xmm12, %xmm13 - -// CHECK: vpsignb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x08,0xeb] - vpsignb %xmm11, %xmm12, %xmm13 - -// CHECK: vpsignb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x08,0x28] - vpsignb (%rax), %xmm12, %xmm13 - -// CHECK: vpsignw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x09,0xeb] - vpsignw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsignw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x09,0x28] - vpsignw (%rax), %xmm12, %xmm13 - -// CHECK: vpsignd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x0a,0xeb] - vpsignd %xmm11, %xmm12, %xmm13 - -// CHECK: vpsignd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x0a,0x28] - vpsignd (%rax), %xmm12, %xmm13 - -// CHECK: vpmulhrsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x0b,0xeb] - vpmulhrsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmulhrsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x0b,0x28] - vpmulhrsw (%rax), %xmm12, %xmm13 - -// CHECK: vpalignr $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x43,0x19,0x0f,0xeb,0x07] - vpalignr $7, %xmm11, %xmm12, %xmm13 - -// CHECK: vpalignr $7, (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x19,0x0f,0x28,0x07] - vpalignr $7, (%rax), %xmm12, %xmm13 - -// CHECK: vroundsd $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x43,0x19,0x0b,0xeb,0x07] - vroundsd $7, %xmm11, %xmm12, %xmm13 - -// CHECK: vroundsd $7, (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x19,0x0b,0x28,0x07] - vroundsd $7, (%rax), %xmm12, %xmm13 - -// CHECK: vroundss $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x43,0x19,0x0a,0xeb,0x07] - vroundss $7, %xmm11, %xmm12, %xmm13 - -// CHECK: vroundss $7, (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x19,0x0a,0x28,0x07] - vroundss $7, (%rax), %xmm12, %xmm13 - -// CHECK: vroundpd $7, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x43,0x79,0x09,0xec,0x07] - vroundpd $7, %xmm12, %xmm13 - -// CHECK: vroundpd $7, (%rax), %xmm13 -// CHECK: encoding: [0xc4,0x63,0x79,0x09,0x28,0x07] - vroundpd $7, (%rax), %xmm13 - -// CHECK: vroundps $7, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x43,0x79,0x08,0xec,0x07] - vroundps $7, %xmm12, %xmm13 - -// CHECK: vroundps $7, (%rax), %xmm13 -// CHECK: encoding: [0xc4,0x63,0x79,0x08,0x28,0x07] - vroundps $7, (%rax), %xmm13 - -// CHECK: vphminposuw %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x79,0x41,0xec] - vphminposuw %xmm12, %xmm13 - -// CHECK: vphminposuw (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x41,0x20] - vphminposuw (%rax), %xmm12 - -// CHECK: vpackusdw %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x2b,0xdc] - vpackusdw %xmm12, %xmm13, %xmm11 - -// CHECK: vpackusdw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x2b,0x28] - vpackusdw (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpeqq %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x29,0xdc] - vpcmpeqq %xmm12, %xmm13, %xmm11 - -// CHECK: vpcmpeqq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x29,0x28] - vpcmpeqq (%rax), %xmm12, %xmm13 - -// CHECK: vpminsb %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x38,0xdc] - vpminsb %xmm12, %xmm13, %xmm11 - -// CHECK: vpminsb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x38,0x28] - vpminsb (%rax), %xmm12, %xmm13 - -// CHECK: vpminsd %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x39,0xdc] - vpminsd %xmm12, %xmm13, %xmm11 - -// CHECK: vpminsd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x39,0x28] - vpminsd (%rax), %xmm12, %xmm13 - -// CHECK: vpminud %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3b,0xdc] - vpminud %xmm12, %xmm13, %xmm11 - -// CHECK: vpminud (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3b,0x28] - vpminud (%rax), %xmm12, %xmm13 - -// CHECK: vpminuw %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3a,0xdc] - vpminuw %xmm12, %xmm13, %xmm11 - -// CHECK: vpminuw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3a,0x28] - vpminuw (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxsb %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3c,0xdc] - vpmaxsb %xmm12, %xmm13, %xmm11 - -// CHECK: vpmaxsb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3c,0x28] - vpmaxsb (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxsd %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3d,0xdc] - vpmaxsd %xmm12, %xmm13, %xmm11 - -// CHECK: vpmaxsd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3d,0x28] - vpmaxsd (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxud %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3f,0xdc] - vpmaxud %xmm12, %xmm13, %xmm11 - -// CHECK: vpmaxud (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3f,0x28] - vpmaxud (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxuw %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3e,0xdc] - vpmaxuw %xmm12, %xmm13, %xmm11 - -// CHECK: vpmaxuw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3e,0x28] - vpmaxuw (%rax), %xmm12, %xmm13 - -// CHECK: vpmuldq %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x28,0xdc] - vpmuldq %xmm12, %xmm13, %xmm11 - -// CHECK: vpmuldq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x28,0x28] - vpmuldq (%rax), %xmm12, %xmm13 - -// CHECK: vpmulld %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x51,0x40,0xdc] - vpmulld %xmm12, %xmm5, %xmm11 - -// CHECK: vpmulld (%rax), %xmm5, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x51,0x40,0x28] - vpmulld (%rax), %xmm5, %xmm13 - -// CHECK: vblendps $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x0c,0xdc,0x03] - vblendps $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vblendps $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x0c,0x18,0x03] - vblendps $3, (%rax), %xmm5, %xmm11 - -// CHECK: vblendpd $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x0d,0xdc,0x03] - vblendpd $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vblendpd $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x0d,0x18,0x03] - vblendpd $3, (%rax), %xmm5, %xmm11 - -// CHECK: vpblendw $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x0e,0xdc,0x03] - vpblendw $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vpblendw $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x0e,0x18,0x03] - vpblendw $3, (%rax), %xmm5, %xmm11 - -// CHECK: vmpsadbw $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x42,0xdc,0x03] - vmpsadbw $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vmpsadbw $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x42,0x18,0x03] - vmpsadbw $3, (%rax), %xmm5, %xmm11 - -// CHECK: vdpps $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x40,0xdc,0x03] - vdpps $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vdpps $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x40,0x18,0x03] - vdpps $3, (%rax), %xmm5, %xmm11 - -// CHECK: vdppd $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x41,0xdc,0x03] - vdppd $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vdppd $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x41,0x18,0x03] - vdppd $3, (%rax), %xmm5, %xmm11 - -// CHECK: vblendvpd %xmm12, %xmm5, %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0xed,0xc0] - vblendvpd %xmm12, %xmm5, %xmm11, %xmm13 - -// CHECK: vblendvpd %xmm12, (%rax), %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0x28,0xc0] - vblendvpd %xmm12, (%rax), %xmm11, %xmm13 - -// CHECK: vblendvps %xmm12, %xmm5, %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0xed,0xc0] - vblendvps %xmm12, %xmm5, %xmm11, %xmm13 - -// CHECK: vblendvps %xmm12, (%rax), %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0x28,0xc0] - vblendvps %xmm12, (%rax), %xmm11, %xmm13 - -// CHECK: vpblendvb %xmm12, %xmm5, %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0xed,0xc0] - vpblendvb %xmm12, %xmm5, %xmm11, %xmm13 - -// CHECK: vpblendvb %xmm12, (%rax), %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0x28,0xc0] - vpblendvb %xmm12, (%rax), %xmm11, %xmm13 - -// CHECK: vpmovsxbw %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x20,0xd4] - vpmovsxbw %xmm12, %xmm10 - -// CHECK: vpmovsxbw (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x20,0x20] - vpmovsxbw (%rax), %xmm12 - -// CHECK: vpmovsxwd %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x23,0xd4] - vpmovsxwd %xmm12, %xmm10 - -// CHECK: vpmovsxwd (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x23,0x20] - vpmovsxwd (%rax), %xmm12 - -// CHECK: vpmovsxdq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x25,0xd4] - vpmovsxdq %xmm12, %xmm10 - -// CHECK: vpmovsxdq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x25,0x20] - vpmovsxdq (%rax), %xmm12 - -// CHECK: vpmovzxbw %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x30,0xd4] - vpmovzxbw %xmm12, %xmm10 - -// CHECK: vpmovzxbw (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x30,0x20] - vpmovzxbw (%rax), %xmm12 - -// CHECK: vpmovzxwd %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x33,0xd4] - vpmovzxwd %xmm12, %xmm10 - -// CHECK: vpmovzxwd (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x33,0x20] - vpmovzxwd (%rax), %xmm12 - -// CHECK: vpmovzxdq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x35,0xd4] - vpmovzxdq %xmm12, %xmm10 - -// CHECK: vpmovzxdq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x35,0x20] - vpmovzxdq (%rax), %xmm12 - -// CHECK: vpmovsxbq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x22,0xd4] - vpmovsxbq %xmm12, %xmm10 - -// CHECK: vpmovsxbq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x22,0x20] - vpmovsxbq (%rax), %xmm12 - -// CHECK: vpmovzxbq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x32,0xd4] - vpmovzxbq %xmm12, %xmm10 - -// CHECK: vpmovzxbq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x32,0x20] - vpmovzxbq (%rax), %xmm12 - -// CHECK: vpmovsxbd %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x21,0xd4] - vpmovsxbd %xmm12, %xmm10 - -// CHECK: vpmovsxbd (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x21,0x20] - vpmovsxbd (%rax), %xmm12 - -// CHECK: vpmovsxwq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x24,0xd4] - vpmovsxwq %xmm12, %xmm10 - -// CHECK: vpmovsxwq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x24,0x20] - vpmovsxwq (%rax), %xmm12 - -// CHECK: vpmovzxbd %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x31,0xd4] - vpmovzxbd %xmm12, %xmm10 - -// CHECK: vpmovzxbd (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x31,0x20] - vpmovzxbd (%rax), %xmm12 - -// CHECK: vpmovzxwq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x34,0xd4] - vpmovzxwq %xmm12, %xmm10 - -// CHECK: vpmovzxwq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x34,0x20] - vpmovzxwq (%rax), %xmm12 - -// CHECK: vpextrw $7, %xmm12, %eax -// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07] - vpextrw $7, %xmm12, %eax - -// CHECK: vpextrw $7, %xmm12, (%rax) -// CHECK: encoding: [0xc4,0x63,0x79,0x15,0x20,0x07] - vpextrw $7, %xmm12, (%rax) - -// CHECK: vpextrd $7, %xmm12, %eax -// CHECK: encoding: [0xc4,0x63,0x79,0x16,0xe0,0x07] - vpextrd $7, %xmm12, %eax - -// CHECK: vpextrd $7, %xmm12, (%rax) -// CHECK: encoding: [0xc4,0x63,0x79,0x16,0x20,0x07] - vpextrd $7, %xmm12, (%rax) - -// CHECK: vpextrb $7, %xmm12, %eax -// CHECK: encoding: [0xc4,0x63,0x79,0x14,0xe0,0x07] - vpextrb $7, %xmm12, %eax - -// CHECK: vpextrb $7, %xmm12, (%rax) -// CHECK: encoding: [0xc4,0x63,0x79,0x14,0x20,0x07] - vpextrb $7, %xmm12, (%rax) - -// CHECK: vpextrq $7, %xmm12, %rcx -// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0xe1,0x07] - vpextrq $7, %xmm12, %rcx - -// CHECK: vpextrq $7, %xmm12, (%rcx) -// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0x21,0x07] - vpextrq $7, %xmm12, (%rcx) - -// CHECK: vextractps $7, %xmm12, (%rax) -// CHECK: encoding: [0xc4,0x63,0x79,0x17,0x20,0x07] - vextractps $7, %xmm12, (%rax) - -// CHECK: vextractps $7, %xmm12, %eax -// CHECK: encoding: [0xc4,0x63,0x79,0x17,0xe0,0x07] - vextractps $7, %xmm12, %eax - -// CHECK: vpinsrw $7, %eax, %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0xc4,0xd0,0x07] - vpinsrw $7, %eax, %xmm12, %xmm10 - -// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0xc4,0x10,0x07] - vpinsrw $7, (%rax), %xmm12, %xmm10 - -// CHECK: vpinsrb $7, %eax, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x19,0x20,0xd0,0x07] - vpinsrb $7, %eax, %xmm12, %xmm10 - -// CHECK: vpinsrb $7, (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x19,0x20,0x10,0x07] - vpinsrb $7, (%rax), %xmm12, %xmm10 - -// CHECK: vpinsrd $7, %eax, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x19,0x22,0xd0,0x07] - vpinsrd $7, %eax, %xmm12, %xmm10 - -// CHECK: vpinsrd $7, (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x19,0x22,0x10,0x07] - vpinsrd $7, (%rax), %xmm12, %xmm10 - -// CHECK: vpinsrq $7, %rax, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x99,0x22,0xd0,0x07] - vpinsrq $7, %rax, %xmm12, %xmm10 - -// CHECK: vpinsrq $7, (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07] - vpinsrq $7, (%rax), %xmm12, %xmm10 - -// CHECK: vinsertps $7, %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x29,0x21,0xdc,0x07] - vinsertps $7, %xmm12, %xmm10, %xmm11 - -// CHECK: vinsertps $7, (%rax), %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x29,0x21,0x18,0x07] - vinsertps $7, (%rax), %xmm10, %xmm11 - -// CHECK: vptest %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x17,0xd4] - vptest %xmm12, %xmm10 - -// CHECK: vptest (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x17,0x20] - vptest (%rax), %xmm12 - -// CHECK: vmovntdqa (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x2a,0x20] - vmovntdqa (%rax), %xmm12 - -// CHECK: vpcmpgtq %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x29,0x37,0xdc] - vpcmpgtq %xmm12, %xmm10, %xmm11 - -// CHECK: vpcmpgtq (%rax), %xmm10, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x29,0x37,0x28] - vpcmpgtq (%rax), %xmm10, %xmm13 - -// CHECK: vpcmpistrm $7, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x43,0x79,0x62,0xd4,0x07] - vpcmpistrm $7, %xmm12, %xmm10 - -// CHECK: vpcmpistrm $7, (%rax), %xmm10 -// CHECK: encoding: [0xc4,0x63,0x79,0x62,0x10,0x07] - vpcmpistrm $7, (%rax), %xmm10 - -// CHECK: vpcmpestrm $7, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x43,0x79,0x60,0xd4,0x07] - vpcmpestrm $7, %xmm12, %xmm10 - -// CHECK: vpcmpestrm $7, (%rax), %xmm10 -// CHECK: encoding: [0xc4,0x63,0x79,0x60,0x10,0x07] - vpcmpestrm $7, (%rax), %xmm10 - -// CHECK: vpcmpistri $7, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x43,0x79,0x63,0xd4,0x07] - vpcmpistri $7, %xmm12, %xmm10 - -// CHECK: vpcmpistri $7, (%rax), %xmm10 -// CHECK: encoding: [0xc4,0x63,0x79,0x63,0x10,0x07] - vpcmpistri $7, (%rax), %xmm10 - -// CHECK: vpcmpestri $7, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x43,0x79,0x61,0xd4,0x07] - vpcmpestri $7, %xmm12, %xmm10 - -// CHECK: vpcmpestri $7, (%rax), %xmm10 -// CHECK: encoding: [0xc4,0x63,0x79,0x61,0x10,0x07] - vpcmpestri $7, (%rax), %xmm10 - -// CHECK: vaesimc %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0xdb,0xd4] - vaesimc %xmm12, %xmm10 - -// CHECK: vaesimc (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0xdb,0x20] - vaesimc (%rax), %xmm12 - -// CHECK: vaesenc %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x29,0xdc,0xdc] - vaesenc %xmm12, %xmm10, %xmm11 - -// CHECK: vaesenc (%rax), %xmm10, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x29,0xdc,0x28] - vaesenc (%rax), %xmm10, %xmm13 - -// CHECK: vaesenclast %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x29,0xdd,0xdc] - vaesenclast %xmm12, %xmm10, %xmm11 - -// CHECK: vaesenclast (%rax), %xmm10, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x29,0xdd,0x28] - vaesenclast (%rax), %xmm10, %xmm13 - -// CHECK: vaesdec %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x29,0xde,0xdc] - vaesdec %xmm12, %xmm10, %xmm11 - -// CHECK: vaesdec (%rax), %xmm10, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x29,0xde,0x28] - vaesdec (%rax), %xmm10, %xmm13 - -// CHECK: vaesdeclast %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x29,0xdf,0xdc] - vaesdeclast %xmm12, %xmm10, %xmm11 - -// CHECK: vaesdeclast (%rax), %xmm10, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x29,0xdf,0x28] - vaesdeclast (%rax), %xmm10, %xmm13 - -// CHECK: vaeskeygenassist $7, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x43,0x79,0xdf,0xd4,0x07] - vaeskeygenassist $7, %xmm12, %xmm10 - -// CHECK: vaeskeygenassist $7, (%rax), %xmm10 -// CHECK: encoding: [0xc4,0x63,0x79,0xdf,0x10,0x07] - vaeskeygenassist $7, (%rax), %xmm10 - -// CHECK: vcmpps $8, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x08] - vcmpeq_uqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $9, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x09] - vcmpngeps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $10, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0a] - vcmpngtps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $11, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0b] - vcmpfalseps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $12, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0c] - vcmpneq_oqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $13, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0d] - vcmpgeps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $14, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0e] - vcmpgtps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $15, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0f] - vcmptrueps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $16, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x10] - vcmpeq_osps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $17, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x11] - vcmplt_oqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $18, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x12] - vcmple_oqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $19, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x13] - vcmpunord_sps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $20, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x14] - vcmpneq_usps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $21, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x15] - vcmpnlt_uqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $22, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x16] - vcmpnle_uqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $23, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x17] - vcmpord_sps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $24, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x18] - vcmpeq_usps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $25, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x19] - vcmpnge_uqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $26, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1a] - vcmpngt_uqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $27, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1b] - vcmpfalse_osps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $28, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1c] - vcmpneq_osps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $29, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1d] - vcmpge_oqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $30, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1e] - vcmpgt_oqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $31, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1f] - vcmptrue_usps %xmm11, %xmm12, %xmm13 - -// CHECK: vmovaps (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x28,0x20] - vmovaps (%rax), %ymm12 - -// CHECK: vmovaps %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x28,0xe3] - vmovaps %ymm11, %ymm12 - -// CHECK: vmovaps %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7c,0x29,0x18] - vmovaps %ymm11, (%rax) - -// CHECK: vmovapd (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7d,0x28,0x20] - vmovapd (%rax), %ymm12 - -// CHECK: vmovapd %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7d,0x28,0xe3] - vmovapd %ymm11, %ymm12 - -// CHECK: vmovapd %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7d,0x29,0x18] - vmovapd %ymm11, (%rax) - -// CHECK: vmovups (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x10,0x20] - vmovups (%rax), %ymm12 - -// CHECK: vmovups %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x10,0xe3] - vmovups %ymm11, %ymm12 - -// CHECK: vmovups %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7c,0x11,0x18] - vmovups %ymm11, (%rax) - -// CHECK: vmovupd (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7d,0x10,0x20] - vmovupd (%rax), %ymm12 - -// CHECK: vmovupd %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7d,0x10,0xe3] - vmovupd %ymm11, %ymm12 - -// CHECK: vmovupd %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7d,0x11,0x18] - vmovupd %ymm11, (%rax) - -// CHECK: vunpckhps %ymm11, %ymm12, %ymm4 -// CHECK: encoding: [0xc4,0xc1,0x1c,0x15,0xe3] - vunpckhps %ymm11, %ymm12, %ymm4 - -// CHECK: vunpckhpd %ymm11, %ymm12, %ymm4 -// CHECK: encoding: [0xc4,0xc1,0x1d,0x15,0xe3] - vunpckhpd %ymm11, %ymm12, %ymm4 - -// CHECK: vunpcklps %ymm11, %ymm12, %ymm4 -// CHECK: encoding: [0xc4,0xc1,0x1c,0x14,0xe3] - vunpcklps %ymm11, %ymm12, %ymm4 - -// CHECK: vunpcklpd %ymm11, %ymm12, %ymm4 -// CHECK: encoding: [0xc4,0xc1,0x1d,0x14,0xe3] - vunpcklpd %ymm11, %ymm12, %ymm4 - -// CHECK: vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x15,0x54,0xcb,0xfc] - vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x15,0x54,0xcb,0xfc] - vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x14,0x54,0xcb,0xfc] - vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x14,0x54,0xcb,0xfc] - vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vmovntdq %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7d,0xe7,0x18] - vmovntdq %ymm11, (%rax) - -// CHECK: vmovntpd %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7d,0x2b,0x18] - vmovntpd %ymm11, (%rax) - -// CHECK: vmovntps %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7c,0x2b,0x18] - vmovntps %ymm11, (%rax) - -// CHECK: vmovmskps %xmm12, %eax -// CHECK: encoding: [0xc4,0xc1,0x78,0x50,0xc4] - vmovmskps %xmm12, %eax - -// CHECK: vmovmskpd %xmm12, %eax -// CHECK: encoding: [0xc4,0xc1,0x79,0x50,0xc4] - vmovmskpd %xmm12, %eax - -// CHECK: vmaxps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x5f,0xf4] - vmaxps %ymm12, %ymm4, %ymm6 - -// CHECK: vmaxpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x5f,0xf4] - vmaxpd %ymm12, %ymm4, %ymm6 - -// CHECK: vminps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x5d,0xf4] - vminps %ymm12, %ymm4, %ymm6 - -// CHECK: vminpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x5d,0xf4] - vminpd %ymm12, %ymm4, %ymm6 - -// CHECK: vsubps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x5c,0xf4] - vsubps %ymm12, %ymm4, %ymm6 - -// CHECK: vsubpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x5c,0xf4] - vsubpd %ymm12, %ymm4, %ymm6 - -// CHECK: vdivps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x5e,0xf4] - vdivps %ymm12, %ymm4, %ymm6 - -// CHECK: vdivpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x5e,0xf4] - vdivpd %ymm12, %ymm4, %ymm6 - -// CHECK: vaddps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x58,0xf4] - vaddps %ymm12, %ymm4, %ymm6 - -// CHECK: vaddpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x58,0xf4] - vaddpd %ymm12, %ymm4, %ymm6 - -// CHECK: vmulps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x59,0xf4] - vmulps %ymm12, %ymm4, %ymm6 - -// CHECK: vmulpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x59,0xf4] - vmulpd %ymm12, %ymm4, %ymm6 - -// CHECK: vmaxps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5f,0x30] - vmaxps (%rax), %ymm4, %ymm6 - -// CHECK: vmaxpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5f,0x30] - vmaxpd (%rax), %ymm4, %ymm6 - -// CHECK: vminps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5d,0x30] - vminps (%rax), %ymm4, %ymm6 - -// CHECK: vminpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5d,0x30] - vminpd (%rax), %ymm4, %ymm6 - -// CHECK: vsubps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5c,0x30] - vsubps (%rax), %ymm4, %ymm6 - -// CHECK: vsubpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5c,0x30] - vsubpd (%rax), %ymm4, %ymm6 - -// CHECK: vdivps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5e,0x30] - vdivps (%rax), %ymm4, %ymm6 - -// CHECK: vdivpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5e,0x30] - vdivpd (%rax), %ymm4, %ymm6 - -// CHECK: vaddps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x58,0x30] - vaddps (%rax), %ymm4, %ymm6 - -// CHECK: vaddpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x58,0x30] - vaddpd (%rax), %ymm4, %ymm6 - -// CHECK: vmulps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x59,0x30] - vmulps (%rax), %ymm4, %ymm6 - -// CHECK: vmulpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x59,0x30] - vmulpd (%rax), %ymm4, %ymm6 - -// CHECK: vsqrtpd %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7d,0x51,0xe3] - vsqrtpd %ymm11, %ymm12 - -// CHECK: vsqrtpd (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7d,0x51,0x20] - vsqrtpd (%rax), %ymm12 - -// CHECK: vsqrtps %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x51,0xe3] - vsqrtps %ymm11, %ymm12 - -// CHECK: vsqrtps (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x51,0x20] - vsqrtps (%rax), %ymm12 - -// CHECK: vrsqrtps %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x52,0xe3] - vrsqrtps %ymm11, %ymm12 - -// CHECK: vrsqrtps (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x52,0x20] - vrsqrtps (%rax), %ymm12 - -// CHECK: vrcpps %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x53,0xe3] - vrcpps %ymm11, %ymm12 - -// CHECK: vrcpps (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x53,0x20] - vrcpps (%rax), %ymm12 - -// CHECK: vandps %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0c,0x54,0xdc] - vandps %ymm12, %ymm14, %ymm11 - -// CHECK: vandpd %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0d,0x54,0xdc] - vandpd %ymm12, %ymm14, %ymm11 - -// CHECK: vandps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x54,0x54,0xcb,0xfc] - vandps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x54,0x54,0xcb,0xfc] - vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vorps %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0c,0x56,0xdc] - vorps %ymm12, %ymm14, %ymm11 - -// CHECK: vorpd %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0d,0x56,0xdc] - vorpd %ymm12, %ymm14, %ymm11 - -// CHECK: vorps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x56,0x54,0xcb,0xfc] - vorps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x56,0x54,0xcb,0xfc] - vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vxorps %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0c,0x57,0xdc] - vxorps %ymm12, %ymm14, %ymm11 - -// CHECK: vxorpd %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0d,0x57,0xdc] - vxorpd %ymm12, %ymm14, %ymm11 - -// CHECK: vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x57,0x54,0xcb,0xfc] - vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x57,0x54,0xcb,0xfc] - vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vandnps %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0c,0x55,0xdc] - vandnps %ymm12, %ymm14, %ymm11 - -// CHECK: vandnpd %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0d,0x55,0xdc] - vandnpd %ymm12, %ymm14, %ymm11 - -// CHECK: vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x55,0x54,0xcb,0xfc] - vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x55,0x54,0xcb,0xfc] - vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vcvtps2pd %xmm13, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x5a,0xe5] - vcvtps2pd %xmm13, %ymm12 - -// CHECK: vcvtps2pd (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x5a,0x20] - vcvtps2pd (%rax), %ymm12 - -// CHECK: vcvtdq2pd %xmm13, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7e,0xe6,0xe5] - vcvtdq2pd %xmm13, %ymm12 - -// CHECK: vcvtdq2pd (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7e,0xe6,0x20] - vcvtdq2pd (%rax), %ymm12 - -// CHECK: vcvtdq2ps %ymm12, %ymm10 -// CHECK: encoding: [0xc4,0x41,0x7c,0x5b,0xd4] - vcvtdq2ps %ymm12, %ymm10 - -// CHECK: vcvtdq2ps (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x5b,0x20] - vcvtdq2ps (%rax), %ymm12 - -// CHECK: vcvtps2dq %ymm12, %ymm10 -// CHECK: encoding: [0xc4,0x41,0x7d,0x5b,0xd4] - vcvtps2dq %ymm12, %ymm10 - -// CHECK: vcvtps2dq (%rax), %ymm10 -// CHECK: encoding: [0xc5,0x7d,0x5b,0x10] - vcvtps2dq (%rax), %ymm10 - -// CHECK: vcvttps2dq %ymm12, %ymm10 -// CHECK: encoding: [0xc4,0x41,0x7e,0x5b,0xd4] - vcvttps2dq %ymm12, %ymm10 - -// CHECK: vcvttps2dq (%rax), %ymm10 -// CHECK: encoding: [0xc5,0x7e,0x5b,0x10] - vcvttps2dq (%rax), %ymm10 - -// CHECK: vcvttpd2dq %xmm11, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3] - vcvttpd2dq %xmm11, %xmm10 - -// CHECK: vcvttpd2dq %ymm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xd4] - vcvttpd2dq %ymm12, %xmm10 - -// CHECK: vcvttpd2dqx %xmm11, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3] - vcvttpd2dqx %xmm11, %xmm10 - -// CHECK: vcvttpd2dqx (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x79,0xe6,0x18] - vcvttpd2dqx (%rax), %xmm11 - -// CHECK: vcvttpd2dqy %ymm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xdc] - vcvttpd2dqy %ymm12, %xmm11 - -// CHECK: vcvttpd2dqy (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x7d,0xe6,0x18] - vcvttpd2dqy (%rax), %xmm11 - -// CHECK: vcvtpd2ps %ymm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xd4] - vcvtpd2ps %ymm12, %xmm10 - -// CHECK: vcvtpd2psx %xmm11, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xd3] - vcvtpd2psx %xmm11, %xmm10 - -// CHECK: vcvtpd2psx (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x79,0x5a,0x18] - vcvtpd2psx (%rax), %xmm11 - -// CHECK: vcvtpd2psy %ymm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xdc] - vcvtpd2psy %ymm12, %xmm11 - -// CHECK: vcvtpd2psy (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x7d,0x5a,0x18] - vcvtpd2psy (%rax), %xmm11 - -// CHECK: vcvtpd2dq %ymm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xd4] - vcvtpd2dq %ymm12, %xmm10 - -// CHECK: vcvtpd2dqy %ymm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xdc] - vcvtpd2dqy %ymm12, %xmm11 - -// CHECK: vcvtpd2dqy (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x7f,0xe6,0x18] - vcvtpd2dqy (%rax), %xmm11 - -// CHECK: vcvtpd2dqx %xmm11, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xd3] - vcvtpd2dqx %xmm11, %xmm10 - -// CHECK: vcvtpd2dqx (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x7b,0xe6,0x18] - vcvtpd2dqx (%rax), %xmm11 - -// CHECK: vcmpps $0, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x00] - vcmpeqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $2, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x02] - vcmpleps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $1, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x01] - vcmpltps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $4, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x04] - vcmpneqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $6, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x06] - vcmpnleps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $5, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x05] - vcmpnltps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $7, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x07] - vcmpordps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $3, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x03] - vcmpunordps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x02] - vcmpleps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnleps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %ymm6, %ymm12 -// CHECK: encoding: [0xc5,0x4c,0xc2,0x64,0xcb,0xfc,0x07] - vcmpordps -4(%rbx,%rcx,8), %ymm6, %ymm12 - -// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $0, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x00] - vcmpeqpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $2, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x02] - vcmplepd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $1, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x01] - vcmpltpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $4, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x04] - vcmpneqpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $6, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x06] - vcmpnlepd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $5, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x05] - vcmpnltpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $7, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x07] - vcmpordpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $3, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x03] - vcmpunordpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqpd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x02] - vcmplepd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltpd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqpd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnlepd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltpd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %ymm6, %ymm12 -// CHECK: encoding: [0xc5,0x4d,0xc2,0x64,0xcb,0xfc,0x07] - vcmpordpd -4(%rbx,%rcx,8), %ymm6, %ymm12 - -// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordpd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $8, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x08] - vcmpeq_uqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $9, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x09] - vcmpngeps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $10, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0a] - vcmpngtps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $11, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0b] - vcmpfalseps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $12, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0c] - vcmpneq_oqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $13, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0d] - vcmpgeps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $14, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0e] - vcmpgtps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $15, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0f] - vcmptrueps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $16, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x10] - vcmpeq_osps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $17, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x11] - vcmplt_oqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $18, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x12] - vcmple_oqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $19, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x13] - vcmpunord_sps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $20, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x14] - vcmpneq_usps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $21, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x15] - vcmpnlt_uqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $22, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x16] - vcmpnle_uqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $23, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x17] - vcmpord_sps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $24, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x18] - vcmpeq_usps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $25, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x19] - vcmpnge_uqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $26, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1a] - vcmpngt_uqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $27, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1b] - vcmpfalse_osps %ymm11, %ymm12, %ymm13 +// CHECK: movq 57005(,%riz), %rbx +// CHECK: encoding: [0x48,0x8b,0x1c,0x25,0xad,0xde,0x00,0x00] + movq 57005(,%riz), %rbx -// CHECK: vcmpps $28, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1c] - vcmpneq_osps %ymm11, %ymm12, %ymm13 +// CHECK: movq 48879(,%riz), %rax +// CHECK: encoding: [0x48,0x8b,0x04,0x25,0xef,0xbe,0x00,0x00] + movq 48879(,%riz), %rax -// CHECK: vcmpps $29, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1d] - vcmpge_oqps %ymm11, %ymm12, %ymm13 +// CHECK: movq -4(,%riz,8), %rax +// CHECK: encoding: [0x48,0x8b,0x04,0xe5,0xfc,0xff,0xff,0xff] + movq -4(,%riz,8), %rax -// CHECK: vcmpps $30, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1e] - vcmpgt_oqps %ymm11, %ymm12, %ymm13 +// CHECK: movq (%rcx,%riz), %rax +// CHECK: encoding: [0x48,0x8b,0x04,0x21] + movq (%rcx,%riz), %rax -// CHECK: vcmpps $31, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1f] - vcmptrue_usps %ymm11, %ymm12, %ymm13 +// CHECK: movq (%rcx,%riz,8), %rax +// CHECK: encoding: [0x48,0x8b,0x04,0xe1] + movq (%rcx,%riz,8), %rax diff --git a/test/MC/AsmParser/X86/x86_64-fma3-encoding.s b/test/MC/AsmParser/X86/x86_64-fma3-encoding.s new file mode 100644 index 000000000000..d08a7329a09f --- /dev/null +++ b/test/MC/AsmParser/X86/x86_64-fma3-encoding.s @@ -0,0 +1,674 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vfmadd132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x98,0xdc] + vfmadd132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x98,0x18] + vfmadd132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x98,0xdc] + vfmadd132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x98,0x18] + vfmadd132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xa8,0xdc] + vfmadd213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xa8,0x18] + vfmadd213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xa8,0xdc] + vfmadd213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xa8,0x18] + vfmadd213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xb8,0xdc] + vfmadd231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xb8,0x18] + vfmadd231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xb8,0xdc] + vfmadd231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xb8,0x18] + vfmadd231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x98,0xdc] + vfmadd132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x98,0x18] + vfmadd132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x98,0xdc] + vfmadd132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x98,0x18] + vfmadd132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xa8,0xdc] + vfmadd213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xa8,0x18] + vfmadd213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xa8,0xdc] + vfmadd213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xa8,0x18] + vfmadd213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xb8,0xdc] + vfmadd231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xb8,0x18] + vfmadd231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xb8,0xdc] + vfmadd231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xb8,0x18] + vfmadd231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x98,0xdc] + vfmadd132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x98,0x18] + vfmadd132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x98,0xdc] + vfmadd132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x98,0x18] + vfmadd132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xa8,0xdc] + vfmadd213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xa8,0x18] + vfmadd213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xa8,0xdc] + vfmadd213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xa8,0x18] + vfmadd213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xb8,0xdc] + vfmadd231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xb8,0x18] + vfmadd231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xb8,0xdc] + vfmadd231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xb8,0x18] + vfmadd231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x96,0xdc] + vfmaddsub132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x96,0x18] + vfmaddsub132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x96,0xdc] + vfmaddsub132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x96,0x18] + vfmaddsub132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xa6,0xdc] + vfmaddsub213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xa6,0x18] + vfmaddsub213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xa6,0xdc] + vfmaddsub213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xa6,0x18] + vfmaddsub213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xb6,0xdc] + vfmaddsub231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xb6,0x18] + vfmaddsub231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xb6,0xdc] + vfmaddsub231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xb6,0x18] + vfmaddsub231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x97,0xdc] + vfmsubadd132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x97,0x18] + vfmsubadd132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x97,0xdc] + vfmsubadd132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x97,0x18] + vfmsubadd132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xa7,0xdc] + vfmsubadd213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xa7,0x18] + vfmsubadd213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xa7,0xdc] + vfmsubadd213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xa7,0x18] + vfmsubadd213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xb7,0xdc] + vfmsubadd231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xb7,0x18] + vfmsubadd231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xb7,0xdc] + vfmsubadd231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xb7,0x18] + vfmsubadd231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x9a,0xdc] + vfmsub132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x9a,0x18] + vfmsub132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x9a,0xdc] + vfmsub132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x9a,0x18] + vfmsub132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xaa,0xdc] + vfmsub213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xaa,0x18] + vfmsub213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xaa,0xdc] + vfmsub213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xaa,0x18] + vfmsub213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xba,0xdc] + vfmsub231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xba,0x18] + vfmsub231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xba,0xdc] + vfmsub231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xba,0x18] + vfmsub231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x9c,0xdc] + vfnmadd132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x9c,0x18] + vfnmadd132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x9c,0xdc] + vfnmadd132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x9c,0x18] + vfnmadd132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xac,0xdc] + vfnmadd213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xac,0x18] + vfnmadd213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xac,0xdc] + vfnmadd213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xac,0x18] + vfnmadd213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xbc,0xdc] + vfnmadd231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xbc,0x18] + vfnmadd231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xbc,0xdc] + vfnmadd231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xbc,0x18] + vfnmadd231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x9e,0xdc] + vfnmsub132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x9e,0x18] + vfnmsub132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x9e,0xdc] + vfnmsub132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x9e,0x18] + vfnmsub132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xae,0xdc] + vfnmsub213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xae,0x18] + vfnmsub213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xae,0xdc] + vfnmsub213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xae,0x18] + vfnmsub213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xbe,0xdc] + vfnmsub231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xbe,0x18] + vfnmsub231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xbe,0xdc] + vfnmsub231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xbe,0x18] + vfnmsub231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x98,0xdc] + vfmadd132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x98,0x18] + vfmadd132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x98,0xdc] + vfmadd132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x98,0x18] + vfmadd132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xa8,0xdc] + vfmadd213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xa8,0x18] + vfmadd213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xa8,0xdc] + vfmadd213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xa8,0x18] + vfmadd213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xb8,0xdc] + vfmadd231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xb8,0x18] + vfmadd231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xb8,0xdc] + vfmadd231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xb8,0x18] + vfmadd231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x96,0xdc] + vfmaddsub132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x96,0x18] + vfmaddsub132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x96,0xdc] + vfmaddsub132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x96,0x18] + vfmaddsub132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xa6,0xdc] + vfmaddsub213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xa6,0x18] + vfmaddsub213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xa6,0xdc] + vfmaddsub213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xa6,0x18] + vfmaddsub213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xb6,0xdc] + vfmaddsub231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xb6,0x18] + vfmaddsub231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xb6,0xdc] + vfmaddsub231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xb6,0x18] + vfmaddsub231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x97,0xdc] + vfmsubadd132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x97,0x18] + vfmsubadd132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x97,0xdc] + vfmsubadd132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x97,0x18] + vfmsubadd132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xa7,0xdc] + vfmsubadd213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xa7,0x18] + vfmsubadd213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xa7,0xdc] + vfmsubadd213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xa7,0x18] + vfmsubadd213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xb7,0xdc] + vfmsubadd231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xb7,0x18] + vfmsubadd231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xb7,0xdc] + vfmsubadd231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xb7,0x18] + vfmsubadd231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x9a,0xdc] + vfmsub132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x9a,0x18] + vfmsub132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x9a,0xdc] + vfmsub132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x9a,0x18] + vfmsub132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xaa,0xdc] + vfmsub213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xaa,0x18] + vfmsub213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xaa,0xdc] + vfmsub213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xaa,0x18] + vfmsub213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xba,0xdc] + vfmsub231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xba,0x18] + vfmsub231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xba,0xdc] + vfmsub231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xba,0x18] + vfmsub231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x9c,0xdc] + vfnmadd132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x9c,0x18] + vfnmadd132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x9c,0xdc] + vfnmadd132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x9c,0x18] + vfnmadd132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xac,0xdc] + vfnmadd213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xac,0x18] + vfnmadd213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xac,0xdc] + vfnmadd213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xac,0x18] + vfnmadd213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xbc,0xdc] + vfnmadd231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xbc,0x18] + vfnmadd231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xbc,0xdc] + vfnmadd231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xbc,0x18] + vfnmadd231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x9e,0xdc] + vfnmsub132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x9e,0x18] + vfnmsub132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x9e,0xdc] + vfnmsub132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x9e,0x18] + vfnmsub132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xae,0xdc] + vfnmsub213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xae,0x18] + vfnmsub213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xae,0xdc] + vfnmsub213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xae,0x18] + vfnmsub213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xbe,0xdc] + vfnmsub231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xbe,0x18] + vfnmsub231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xbe,0xdc] + vfnmsub231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xbe,0x18] + vfnmsub231ps (%rax), %ymm10, %ymm11 + diff --git a/test/MC/AsmParser/X86/x86_64-new-encoder.s b/test/MC/AsmParser/X86/x86_64-new-encoder.s index 1858441870ac..9f94d8404f42 100644 --- a/test/MC/AsmParser/X86/x86_64-new-encoder.s +++ b/test/MC/AsmParser/X86/x86_64-new-encoder.s @@ -72,9 +72,9 @@ stosl // Not moffset forms of moves, they are x86-32 only! rdar://7947184 -movb 0, %al // CHECK: movb 0, %al # encoding: [0x8a,0x04,0x25,A,A,A,A] -movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0x8b,0x04,0x25,A,A,A,A] -movl 0, %eax // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,A,A,A,A] +movb 0, %al // CHECK: movb 0, %al # encoding: [0x8a,0x04,0x25,0x00,0x00,0x00,0x00] +movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0x8b,0x04,0x25,0x00,0x00,0x00,0x00] +movl 0, %eax // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,0x00,0x00,0x00,0x00] // CHECK: pushfq # encoding: [0x9c] pushf @@ -150,3 +150,10 @@ btq $0x01,%rdx // CHECK: [0x65,0x8b,0x04,0x25,0x7c,0x00,0x00,0x00] movl %gs:124, %eax +// CHECK: jmpq *8(%rax) +// CHECK: encoding: [0xff,0x60,0x08] + jmp *8(%rax) + +// CHECK: btq $61, -216(%rbp) +// CHECK: encoding: [0x48,0x0f,0xba,0xa5,0x28,0xff,0xff,0xff,0x3d] + btq $61, -216(%rbp) diff --git a/test/MC/AsmParser/X86/x86_instruction_errors.s b/test/MC/AsmParser/X86/x86_instruction_errors.s new file mode 100644 index 000000000000..183306be2c11 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_instruction_errors.s @@ -0,0 +1,5 @@ +// RUN: not llvm-mc -triple x86_64-unknown-unknown %s 2> %t.err +// RUN: FileCheck < %t.err %s + +// CHECK: error: ambiguous instructions require an explicit suffix (could be 'cmpb', 'cmpw', 'cmpl', or 'cmpq') +cmp $0, 0(%eax) diff --git a/test/MC/AsmParser/X86/x86_instructions.s b/test/MC/AsmParser/X86/x86_instructions.s index 4bc8a4bb3a84..a82d2a1c0d41 100644 --- a/test/MC/AsmParser/X86/x86_instructions.s +++ b/test/MC/AsmParser/X86/x86_instructions.s @@ -1,4 +1,6 @@ -// RUN: llvm-mc -triple x86_64-unknown-unknown %s | FileCheck %s +// RUN: llvm-mc -triple x86_64-unknown-unknown %s > %t 2> %t.err +// RUN: FileCheck < %t %s +// RUN: FileCheck --check-prefix=CHECK-STDERR < %t.err %s // CHECK: subb %al, %al subb %al, %al @@ -56,7 +58,7 @@ subl %eax, %ebx // FIXME: Check that this matches the correct instruction. -// CHECK: call *%rax +// CHECK: callq *%rax call *%rax // FIXME: Check that this matches the correct instruction. @@ -151,3 +153,23 @@ fadd %st(7) // CHECK: int3 INT3 + +// Allow scale factor without index register. +// CHECK: movaps %xmm3, (%esi) +// CHECK-STDERR: warning: scale factor without index register is ignored +movaps %xmm3, (%esi, 2) + +// CHECK: imull $12, %eax, %eax +imul $12, %eax + +// CHECK: imull %ecx, %eax +imull %ecx, %eax + +// PR8114 +// CHECK: outb %al, %dx +// CHECK: outw %ax, %dx +// CHECK: outl %eax, %dx + +out %al, (%dx) +out %ax, (%dx) +outl %eax, (%dx) diff --git a/test/MC/AsmParser/X86/x86_operands.s b/test/MC/AsmParser/X86/x86_operands.s index bf958d8478ca..ddadf7931895 100644 --- a/test/MC/AsmParser/X86/x86_operands.s +++ b/test/MC/AsmParser/X86/x86_operands.s @@ -1,5 +1,3 @@ -// FIXME: Actually test that we get the expected results. - // RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s # Immediates diff --git a/test/MC/AsmParser/dg.exp b/test/MC/AsmParser/dg.exp index 64cb75b20ff1..a6d81da5b716 100644 --- a/test/MC/AsmParser/dg.exp +++ b/test/MC/AsmParser/dg.exp @@ -1,4 +1,5 @@ load_lib llvm.exp -RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]] - +if { [llvm_supports_target X86] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]] +} diff --git a/test/MC/AsmParser/directive_abort.s b/test/MC/AsmParser/directive_abort.s index 3eb8e96f2f88..1fd1f6e44a32 100644 --- a/test/MC/AsmParser/directive_abort.s +++ b/test/MC/AsmParser/directive_abort.s @@ -1,6 +1,6 @@ # RUN: llvm-mc -triple i386-unknown-unknown %s 2> %t # RUN: FileCheck -input-file %t %s -# CHECK: .abort "please stop assembing" -TEST0: - .abort "please stop assembing" +# CHECK: error: .abort 'please stop assembing' +TEST0: + .abort please stop assembing diff --git a/test/MC/AsmParser/directive_elf_size.s b/test/MC/AsmParser/directive_elf_size.s new file mode 100644 index 000000000000..af35ae07ed6c --- /dev/null +++ b/test/MC/AsmParser/directive_elf_size.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s + +a: + ret +.Lt: +# CHECK: .size a, .Lt-a + .size a, .Lt-a + diff --git a/test/MC/AsmParser/directive_values.s b/test/MC/AsmParser/directive_values.s index beac69a4aeb1..c7617a337e02 100644 --- a/test/MC/AsmParser/directive_values.s +++ b/test/MC/AsmParser/directive_values.s @@ -19,3 +19,20 @@ TEST2: # CHECK: .quad 9 TEST3: .quad 9 + + +# rdar://7997827 +TEST4: + .quad 0b0100 + .quad 4294967295 + .quad 4294967295+1 + .quad 4294967295LL+1 + .quad 0b10LL + 07ULL + 0x42AULL +# CHECK: TEST4 +# CHECK: .quad 4 +# CHECK: .quad 4294967295 +# CHECK: .quad 4294967296 +# CHECK: .quad 4294967296 +# CHECK: .quad 1075 + + diff --git a/test/MC/AsmParser/dollars-in-identifiers.s b/test/MC/AsmParser/dollars-in-identifiers.s new file mode 100644 index 000000000000..e56959062ad9 --- /dev/null +++ b/test/MC/AsmParser/dollars-in-identifiers.s @@ -0,0 +1,7 @@ +# RUN: llvm-mc -triple i386-unknown-unknown %s > %t +# RUN: FileCheck < %t %s + +// CHECK: .globl $foo +.globl $foo +// CHECK: .long ($foo) +.long ($foo) diff --git a/test/MC/AsmParser/macro-def-in-instantiation.s b/test/MC/AsmParser/macro-def-in-instantiation.s new file mode 100644 index 000000000000..b6483b3b32b2 --- /dev/null +++ b/test/MC/AsmParser/macro-def-in-instantiation.s @@ -0,0 +1,13 @@ +// RUN: llvm-mc -triple x86_64-apple-darwin10 %s | FileCheck %s + +.macro .make_macro +$0 $1 +$2 $3 +$4 +.endmacro + +.make_macro .macro,.mybyte,.byte,$0,.endmacro + +.data +// CHECK: .byte 10 +.mybyte 10 diff --git a/test/MC/AsmParser/macros-parsing.s b/test/MC/AsmParser/macros-parsing.s new file mode 100644 index 000000000000..65f64546cc13 --- /dev/null +++ b/test/MC/AsmParser/macros-parsing.s @@ -0,0 +1,23 @@ +// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err +// RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err + +.macro .test0 +.endmacro + +.macros_off +// CHECK-ERRORS: 9:1: warning: ignoring directive for now +.test0 +.macros_on + +.test0 + +// CHECK-ERRORS: macro '.test0' is already defined +.macro .test0 +.endmacro + +// CHECK-ERRORS: unexpected '.endmacro' in file +.endmacro + +// CHECK-ERRORS: no matching '.endmacro' in definition +.macro dummy + diff --git a/test/MC/AsmParser/macros.s b/test/MC/AsmParser/macros.s new file mode 100644 index 000000000000..214274d9aa84 --- /dev/null +++ b/test/MC/AsmParser/macros.s @@ -0,0 +1,39 @@ +// RUN: llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err | FileCheck %s +// RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err + +.macro .test0 +.macrobody0 +.endmacro +.macro .test1 +.test0 +.endmacro + +.test1 +// CHECK-ERRORS: <instantiation>:1:1: warning: ignoring directive for now +// CHECK-ERRORS-NEXT: macrobody0 +// CHECK-ERRORS-NEXT: ^ +// CHECK-ERRORS: <instantiation>:1:1: note: while in macro instantiation +// CHECK-ERRORS-NEXT: .test0 +// CHECK-ERRORS-NEXT: ^ +// CHECK-ERRORS: 11:1: note: while in macro instantiation +// CHECK-ERRORS-NEXT: .test1 +// CHECK-ERRORS-NEXT: ^ + +.macro test2 +.byte $0 +.endmacro +test2 10 + +.macro test3 +.globl "$0 $1 $2 $$3 $n" +.endmacro + +// CHECK: .globl "1 23 $3 2" +test3 1,2 3 + +.macro test4 +.globl "$0 -- $1" +.endmacro + +// CHECK: .globl "ab)(,) -- (cd)" +test4 a b)(,),(cd) diff --git a/test/MC/COFF/basic-coff.ll b/test/MC/COFF/basic-coff.ll new file mode 100644 index 000000000000..1e67db0e5c42 --- /dev/null +++ b/test/MC/COFF/basic-coff.ll @@ -0,0 +1,136 @@ +; This test checks that the COFF object emitter works for the most basic +; programs. + +; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t +; RUN: coff-dump.py %abs_tmp | FileCheck %s +; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t + +@.str = private constant [12 x i8] c"Hello World\00" ; <[12 x i8]*> [#uses=1] + +define i32 @main() nounwind { +entry: + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0)) nounwind ; <i32> [#uses=0] + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +; CHECK: { +; CHECK: MachineType = IMAGE_FILE_MACHINE_I386 (0x14C) +; CHECK: NumberOfSections = 2 +; CHECK: TimeDateStamp = {{[0-9]+}} +; CHECK: PointerToSymbolTable = 0x99 +; CHECK: NumberOfSymbols = 7 +; CHECK: SizeOfOptionalHeader = 0 +; CHECK: Characteristics = 0x0 +; CHECK: Sections = [ +; CHECK: 0 = { +; CHECK: Name = .text +; CHECK: VirtualSize = 0 +; CHECK: VirtualAddress = 0 +; CHECK: SizeOfRawData = 21 +; CHECK: PointerToRawData = 0x64 +; CHECK: PointerToRelocations = 0x79 +; CHECK: PointerToLineNumbers = 0x0 +; CHECK: NumberOfRelocations = 2 +; CHECK: NumberOfLineNumbers = 0 +; CHECK: Charateristics = 0x60500020 +; CHECK: IMAGE_SCN_CNT_CODE +; CHECK: IMAGE_SCN_ALIGN_16BYTES +; CHECK: IMAGE_SCN_MEM_EXECUTE +; CHECK: IMAGE_SCN_MEM_READ +; CHECK: SectionData = +; CHECK: 83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 31 |.....$.........1| +; CHECK: C0 83 C4 04 C3 |.....| +; CHECK: Relocations = [ +; CHECK: 0 = { +; CHECK: VirtualAddress = 0x6 +; CHECK: SymbolTableIndex = 5 +; CHECK: Type = IMAGE_REL_I386_DIR32 (6) +; CHECK: SymbolName = _main +; CHECK: } +; CHECK: 1 = { +; CHECK: VirtualAddress = 0xB +; CHECK: SymbolTableIndex = 6 +; CHECK: Type = IMAGE_REL_I386_REL32 (20) +; CHECK: SymbolName = L_.str +; CHECK: } +; CHECK: ] +; CHECK: } +; CHECK: 1 = { +; CHECK: Name = .data +; CHECK: VirtualSize = 0 +; CHECK: VirtualAddress = 0 +; CHECK: SizeOfRawData = 12 +; CHECK: PointerToRawData = 0x8D +; CHECK: PointerToRelocations = 0x0 +; CHECK: PointerToLineNumbers = 0x0 +; CHECK: NumberOfRelocations = 0 +; CHECK: NumberOfLineNumbers = 0 +; CHECK: Charateristics = 0xC0100040 +; CHECK: IMAGE_SCN_CNT_INITIALIZED_DATA +; CHECK: IMAGE_SCN_ALIGN_1BYTES +; CHECK: IMAGE_SCN_MEM_READ +; CHECK: IMAGE_SCN_MEM_WRITE +; CHECK: SectionData = +; CHECK: 48 65 6C 6C 6F 20 57 6F - 72 6C 64 00 |Hello World.| +; CHECK: Relocations = None +; CHECK: } +; CHECK: ] +; CHECK: Symbols = [ +; CHECK: 0 = { +; CHECK: Name = .text +; CHECK: Value = 0 +; CHECK: SectionNumber = 1 +; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0) +; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0) +; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3) +; CHECK: NumberOfAuxSymbols = 1 +; CHECK: AuxillaryData = +; CHECK: 15 00 00 00 02 00 00 00 - 00 00 00 00 01 00 00 00 |................| +; CHECK: 00 00 |..| +; CHECK: } +; CHECK: 1 = { +; CHECK: Name = .data +; CHECK: Value = 0 +; CHECK: SectionNumber = 2 +; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0) +; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0) +; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3) +; CHECK: NumberOfAuxSymbols = 1 +; CHECK: AuxillaryData = +; CHECK: 0C 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................| +; CHECK: 00 00 |..| +; CHECK: } +; CHECK: 2 = { +; CHECK: Name = _main +; CHECK: Value = 0 +; CHECK: SectionNumber = 1 +; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0) +; CHECK: ComplexType = IMAGE_SYM_DTYPE_FUNCTION (2) +; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2) +; CHECK: NumberOfAuxSymbols = 0 +; CHECK: AuxillaryData = +; CHECK: } +; CHECK: 3 = { +; CHECK: Name = L_.str +; CHECK: Value = 0 +; CHECK: SectionNumber = 2 +; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0) +; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0) +; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3) +; CHECK: NumberOfAuxSymbols = 0 +; CHECK: AuxillaryData = +; CHECK: } +; CHECK: 4 = { +; CHECK: Name = _printf +; CHECK: Value = 0 +; CHECK: SectionNumber = 0 +; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0) +; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0) +; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2) +; CHECK: NumberOfAuxSymbols = 0 +; CHECK: AuxillaryData = +; CHECK: } +; CHECK: ] +; CHECK: } diff --git a/test/MC/COFF/dg.exp b/test/MC/COFF/dg.exp new file mode 100644 index 000000000000..7b7bd4e73807 --- /dev/null +++ b/test/MC/COFF/dg.exp @@ -0,0 +1,5 @@ +load_lib llvm.exp + +if { [llvm_supports_target X86] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]] +} diff --git a/test/MC/COFF/switch-relocations.ll b/test/MC/COFF/switch-relocations.ll new file mode 100644 index 000000000000..300c10732ec6 --- /dev/null +++ b/test/MC/COFF/switch-relocations.ll @@ -0,0 +1,34 @@ +; The purpose of this test is to see if the COFF object writer can properly +; relax the fixups that are created for jump tables on x86-64. See PR7960. + +; This test case was reduced from Lua/lapi.c. + +; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t +; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t + +define void @lua_gc(i32 %what) nounwind { +entry: + switch i32 %what, label %sw.epilog [ + i32 0, label %sw.bb + i32 1, label %sw.bb + i32 2, label %sw.bb + i32 3, label %sw.bb14 + i32 4, label %sw.bb18 + i32 6, label %sw.bb57 + ] + +sw.bb: ; preds = %entry, %entry, %entry + ret void + +sw.bb14: ; preds = %entry + ret void + +sw.bb18: ; preds = %entry + ret void + +sw.bb57: ; preds = %entry + ret void + +sw.epilog: ; preds = %entry + ret void +} diff --git a/test/MC/COFF/symbol-fragment-offset.ll b/test/MC/COFF/symbol-fragment-offset.ll new file mode 100644 index 000000000000..af7ace19d59f --- /dev/null +++ b/test/MC/COFF/symbol-fragment-offset.ll @@ -0,0 +1,182 @@ +; The purpose of this test is to see if the COFF object writer is emitting the
+; proper relocations for multiple pieces of data in a single data fragment.
+
+; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t
+; RUN: coff-dump.py %abs_tmp | FileCheck %s
+; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t
+
+@.str = private constant [7 x i8] c"Hello \00" ; <[7 x i8]*> [#uses=1]
+@str = internal constant [7 x i8] c"World!\00" ; <[7 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+ %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
+ %puts = tail call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @str, i32 0, i32 0)) ; <i32> [#uses=0]
+ ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
+
+; CHECK: {
+; CHECK: MachineType = IMAGE_FILE_MACHINE_I386 (0x14C)
+; CHECK: NumberOfSections = 2
+; CHECK: TimeDateStamp = {{[0-9]+}}
+; CHECK: PointerToSymbolTable = 0xBB
+; CHECK: NumberOfSymbols = 9
+; CHECK: SizeOfOptionalHeader = 0
+; CHECK: Characteristics = 0x0
+; CHECK: Sections = [
+; CHECK: 0 = {
+; CHECK: Name = .text
+; CHECK: VirtualSize = 0
+; CHECK: VirtualAddress = 0
+; CHECK: SizeOfRawData = 33
+; CHECK: PointerToRawData = 0x64
+; CHECK: PointerToRelocations = 0x85
+; CHECK: PointerToLineNumbers = 0x0
+; CHECK: NumberOfRelocations = 4
+; CHECK: NumberOfLineNumbers = 0
+; CHECK: Charateristics = 0x60500020
+; CHECK: IMAGE_SCN_CNT_CODE
+; CHECK: IMAGE_SCN_ALIGN_16BYTES
+; CHECK: IMAGE_SCN_MEM_EXECUTE
+; CHECK: IMAGE_SCN_MEM_READ
+; CHECK: SectionData =
+; CHECK: 83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 C7 |.....$..........|
+; CHECK: 04 24 00 00 00 00 E8 00 - 00 00 00 31 C0 83 C4 04 |.$.........1....|
+; CHECK: C3 |.|
+
+; CHECK: Relocations = [
+; CHECK: 0 = {
+; CHECK: VirtualAddress = 0x6
+; CHECK: SymbolTableIndex = 5
+; CHECK: Type = IMAGE_REL_I386_DIR32 (6)
+; CHECK: SymbolName = _main
+; CHECK: }
+; CHECK: 1 = {
+; CHECK: VirtualAddress = 0xB
+; CHECK: SymbolTableIndex = 6
+; CHECK: Type = IMAGE_REL_I386_REL32 (20)
+; CHECK: SymbolName = L_.str
+; CHECK: }
+; CHECK: 2 = {
+; CHECK: VirtualAddress = 0x12
+; CHECK: SymbolTableIndex = 7
+; CHECK: Type = IMAGE_REL_I386_DIR32 (6)
+; CHECK: SymbolName = _printf
+; CHECK: }
+; CHECK: 3 = {
+; CHECK: VirtualAddress = 0x17
+; CHECK: SymbolTableIndex = 8
+; CHECK: Type = IMAGE_REL_I386_REL32 (20)
+; CHECK: SymbolName = _str
+; CHECK: }
+; CHECK: ]
+; CHECK: }
+; CHECK: 1 = {
+; CHECK: Name = .data
+; CHECK: VirtualSize = 0
+; CHECK: VirtualAddress = 0
+; CHECK: SizeOfRawData = 14
+; CHECK: PointerToRawData = 0xAD
+; CHECK: PointerToRelocations = 0x0
+; CHECK: PointerToLineNumbers = 0x0
+; CHECK: NumberOfRelocations = 0
+; CHECK: NumberOfLineNumbers = 0
+; CHECK: Charateristics = 0xC0100040
+; CHECK: IMAGE_SCN_CNT_INITIALIZED_DATA
+; CHECK: IMAGE_SCN_ALIGN_1BYTES
+; CHECK: IMAGE_SCN_MEM_READ
+; CHECK: IMAGE_SCN_MEM_WRITE
+; CHECK: SectionData =
+; CHECK: 48 65 6C 6C 6F 20 00 57 - 6F 72 6C 64 21 00 |Hello .World!.|
+
+; CHECK: Relocations = None
+; CHECK: }
+; CHECK: ]
+; CHECK: Symbols = [
+; CHECK: 0 = {
+; CHECK: Name = .text
+; CHECK: Value = 0
+; CHECK: SectionNumber = 1
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 1
+; CHECK: AuxillaryData =
+; CHECK: 21 00 00 00 04 00 00 00 - 00 00 00 00 01 00 00 00 |!...............|
+; CHECK: 00 00 |..|
+
+; CHECK: }
+; CHECK: 1 = {
+; CHECK: Name = .data
+; CHECK: Value = 0
+; CHECK: SectionNumber = 2
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 1
+; CHECK: AuxillaryData =
+; CHECK: 0E 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................|
+; CHECK: 00 00 |..|
+
+; CHECK: }
+; CHECK: 2 = {
+; CHECK: Name = _main
+; CHECK: Value = 0
+; CHECK: SectionNumber = 1
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_FUNCTION (2)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 3 = {
+; CHECK: Name = L_.str
+; CHECK: Value = 0
+; CHECK: SectionNumber = 2
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 4 = {
+; CHECK: Name = _printf
+; CHECK: Value = 0
+; CHECK: SectionNumber = 0
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 5 = {
+; CHECK: Name = _str
+; CHECK: Value = 7
+; CHECK: SectionNumber = 2
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 6 = {
+; CHECK: Name = _puts
+; CHECK: Value = 0
+; CHECK: SectionNumber = 0
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: ]
+; CHECK: }
diff --git a/test/MC/Disassembler/arm-tests.txt b/test/MC/Disassembler/arm-tests.txt index a1e229caebf8..0b4c2978fe3d 100644 --- a/test/MC/Disassembler/arm-tests.txt +++ b/test/MC/Disassembler/arm-tests.txt @@ -12,9 +12,21 @@ # CHECK: cmn r0, #1 0x01 0x00 0x70 0xe3 +# CHECK: dmb +0x5f 0xf0 0x7f 0xf5 + # CHECK: dmb nshst 0x56 0xf0 0x7f 0xf5 +# CHECK: dsb +0x4f 0xf0 0x7f 0xf5 + +# CHECK: dsb st +0x4e 0xf0 0x7f 0xf5 + +# CHECK: isb +0x6f 0xf0 0x7f 0xf5 + # CHECK: ldclvc p5, cr15, [r8], #-0 0x00 0xf5 0x78 0x7c @@ -42,9 +54,17 @@ # CHECK: mvnpls r7, #245, 2 0xf5 0x71 0xf0 0x53 +# CHECK-NOT: orr r7, r8, r7, rrx #0 +# CHECK: orr r7, r8, r7, rrx +0x67 0x70 0x88 0xe1 + # CHECK: pkhbt r8, r9, r10, lsl #4 0x1a 0x82 0x89 0xe6 +# CHECK-NOT: pkhbtls pc, r11, r11, lsl #0 +# CHECK: pkhbtls pc, r11, r11 +0x1b 0xf0 0x8b 0x96 + # CHECK: pop {r0, r2, r4, r6, r8, r10} 0x55 0x05 0xbd 0xe8 @@ -57,6 +77,14 @@ # CHECK: rfedb r0! 0x00 0x0a 0x30 0xf9 +# CHECK-NOT: rsbeq r0, r2, r0, lsl #0 +# CHECK: rsbeq r0, r2, r0 +0x00 0x00 0x62 0x00 + +# CHECK-NOT: rsceqs r0, r0, r1, lsl #0 +# CHECK: rsceqs r0, r0, r1 +0x01 0x00 0xf0 0x00 + # CHECK: sbcs r0, pc, #1 0x01 0x00 0xdf 0xe2 @@ -66,6 +94,10 @@ # CHECK: ssat r8, #1, r10, lsl #8 0x1a 0x84 0xa0 0xe6 +# CHECK-NOT: ssatmi r0, #17, r12, lsl #0 +# CHECK: ssatmi r0, #17, r12 +0x1c 0x00 0xb0 0x46 + # CHECK: stmdb r10!, {r4, r5, r6, r7, lr} 0xf0 0x40 0x2a 0xe9 @@ -75,3 +107,5 @@ # CHECK: ubfx r0, r0, #16, #1 0x50 0x08 0xe0 0xe7 +# CHECK: usat r8, #0, r10, asr #32 +0x5a 0x80 0xe0 0xe6 diff --git a/test/MC/Disassembler/neon-tests.txt b/test/MC/Disassembler/neon-tests.txt index 51b31e7c1a6e..826ff2272efa 100644 --- a/test/MC/Disassembler/neon-tests.txt +++ b/test/MC/Disassembler/neon-tests.txt @@ -25,6 +25,9 @@ # CHECK: vmov.i64 q6, #0xFF00FF00FF 0x75 0xce 0x81 0xf2 +# CHECK: vmvn.i32 d0, #0x0 +0x30 0x00 0x80 0xf2 + # CHECK: vmul.f32 d0, d0, d6 0x16 0x0d 0x00 0xf3 diff --git a/test/MC/Disassembler/thumb-tests.txt b/test/MC/Disassembler/thumb-tests.txt index 14e91295276b..06d12fed87fb 100644 --- a/test/MC/Disassembler/thumb-tests.txt +++ b/test/MC/Disassembler/thumb-tests.txt @@ -42,6 +42,10 @@ # CHECK: pkhtb r2, r4, r6, asr #16 0xc4 0xea 0x26 0x42 +# CHECK-NOT: pkhbt r2, r4, r6, lsl #0 +# CHECK: pkhbt r2, r4, r6 +0xc4 0xea 0x06 0x02 + # CHECK: pop {r2, r4, r6, r8, r10, r12} 0xbd 0xe8 0x54 0x15 @@ -51,6 +55,14 @@ # CHECK: rsbs r0, r0, #0 0x40 0x42 +# CHECK-NOT: rsb r0, r2, r0, lsl #0 +# CHECK: rsb r0, r2, r0 +0xc2 0xeb 0x00 0x00 + +# CHECK-NOT: ssat r0, #17, r12, lsl #0 +# CHECK: ssat r0, #17, r12 +0x0c 0xf3 0x10 0x00 + # CHECK: strd r0, [r7, #64] 0xc7 0xe9 0x10 0x01 diff --git a/test/MC/ELF/bss.ll b/test/MC/ELF/bss.ll new file mode 100644 index 000000000000..5112d2c9b0a5 --- /dev/null +++ b/test/MC/ELF/bss.ll @@ -0,0 +1,8 @@ +; RUN: llc -filetype=obj %s -o %t +; FIXME: Add ELF dumping tool to check results. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" + +@g0 = global i8* null, align 4 ; <i8**> [#uses=0] + diff --git a/test/MC/ELF/dg.exp b/test/MC/ELF/dg.exp new file mode 100644 index 000000000000..7b7bd4e73807 --- /dev/null +++ b/test/MC/ELF/dg.exp @@ -0,0 +1,5 @@ +load_lib llvm.exp + +if { [llvm_supports_target X86] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]] +} diff --git a/test/Makefile b/test/Makefile index f6830e638393..7ca46beccc3f 100644 --- a/test/Makefile +++ b/test/Makefile @@ -10,11 +10,11 @@ LEVEL = .. DIRS = -# -# Make Dejagnu the default for testing -# all:: check-local +# 'lit' is the default test runner. +check-local:: check-local-lit + # Include other test rules include Makefile.tests @@ -84,18 +84,18 @@ else # !SunOS ifeq ($(HOST_OS),AuroraUX) ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ; else # !AuroraUX -# Fedora 13 x86-64 python fails with -v 51200 -ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 768000 ; +# Fedora 13 x86-64 python fails with -v 76800 +ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 1024000 ; endif # AuroraUX endif # SunOS ifneq ($(RUNTEST),) -check-local:: site.exp +check-local-dg:: site.exp ( $(ULIMIT) \ PATH="$(LLVMToolDir):$(LLVM_SRC_ROOT)/test/Scripts:$(LLVMGCCDIR)/bin:$(PATH)" \ $(RUNTEST) $(RUNTESTFLAGS) ) else -check-local:: site.exp +check-local-dg:: site.exp @echo "*** dejagnu not found. Make sure 'runtest' is in your PATH, then reconfigure LLVM." endif @@ -107,26 +107,6 @@ check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-lit-site-cfgs ( $(ULIMIT) \ $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_ALL_TESTSUITES) ) -ifdef TESTONE -CLEANED_TESTONE := $(patsubst %/,%,$(TESTONE)) -CLEANED_TESTONE := $(patsubst test/%,%,$(CLEANED_TESTONE)) -SUBDIR := $(shell dirname $(CLEANED_TESTONE)) -TESTPATH := $(LLVM_SRC_ROOT)/test/$(CLEANED_TESTONE) -check-one: site.exp $(TCLSH) - $(Verb)( echo "source $(LLVM_OBJ_ROOT)/test/site.exp" ; \ - echo "set subdir $(SUBDIR)" ; \ - echo "proc pass { msg } { puts \"PASS: \$$msg\" } "; \ - echo "proc fail { msg } { puts \"FAIL: \$$msg\" }" ; \ - echo "proc xfail { msg } { puts \"XFAIL: \$$msg\" }" ; \ - echo "proc xpass { msg } { puts \"XPASS: \$$msg\" }" ; \ - echo "proc verbose args { }" ; \ - echo "source $(LLVM_SRC_ROOT)/test/lib/llvm.exp" ; \ - echo "RunLLVMTests $(TESTPATH)" ) | \ - ( $(ULIMIT) \ - PATH="$(LLVMToolDir):$(LLVM_SRC_ROOT)/test/Scripts:$(PATH)" \ - $(TCLSH) ) -endif - clean:: $(RM) -rf `find $(LLVM_OBJ_ROOT)/test -name Output -type d -print` @@ -166,7 +146,7 @@ site.exp: FORCE @echo 'set gccpath "$(CC)"' >>site.tmp @echo 'set gxxpath "$(CXX)"' >>site.tmp @echo 'set compile_c "' $(CC) $(CPP.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c '"' >>site.tmp - @echo 'set compile_cxx "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c '"' >> site.tmp + @echo 'set compile_cxx "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c -x c++ '"' >> site.tmp @echo 'set link "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) '"' >>site.tmp @echo 'set llvmgcc "$(LLVMGCC) $(TargetCommonOpts) $(EXTRA_OPTIONS)"' >> site.tmp @echo 'set llvmgxx "$(LLVMGCC) $(TargetCommonOpts) $(EXTRA_OPTIONS)"' >> site.tmp @@ -203,6 +183,3 @@ Unit/lit.site.cfg: $(PROJ_OBJ_DIR)/Unit/.dir FORCE -e "s#@SHLIBPATH_VAR@#$(SHLIBPATH_VAR)#g" \ $(PROJ_SRC_DIR)/Unit/lit.site.cfg.in > $@ -# Daniel hates Chris. -chris-lit: - make check-lit LIT_ARGS='-j16 -s' diff --git a/test/Other/close-stderr.ll b/test/Other/close-stderr.ll new file mode 100644 index 000000000000..60fab3df0d9f --- /dev/null +++ b/test/Other/close-stderr.ll @@ -0,0 +1,9 @@ +; RUN: sh -c "\ +; RUN: opt --reject-this-option 2>&-; echo \$?; \ +; RUN: opt -o /dev/null /dev/null 2>&-; echo \$?; \ +; RUN: " | FileCheck %s +; CHECK: {{^1$}} +; CHECK: {{^0$}} + +; Test that the error handling when writing to stderr fails exits the +; program cleanly rather than aborting. diff --git a/test/Other/constant-fold-gep.ll b/test/Other/constant-fold-gep.ll index ecef9c48492e..926bdbc1b464 100644 --- a/test/Other/constant-fold-gep.ll +++ b/test/Other/constant-fold-gep.ll @@ -71,8 +71,6 @@ ; PLAIN: @g = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) ; PLAIN: @h = constant i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64) ; PLAIN: @i = constant i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) -; PLAIN: @j = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) -; PLAIN: @k = constant i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) ; OPT: @a = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310) ; OPT: @b = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) ; OPT: @c = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2) @@ -82,8 +80,6 @@ ; OPT: @g = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) ; OPT: @h = constant i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64) ; OPT: @i = constant i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) -; OPT: @j = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) -; OPT: @k = constant i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) ; TO: @a = constant i64 18480 ; TO: @b = constant i64 8 ; TO: @c = constant i64 16 @@ -93,8 +89,6 @@ ; TO: @g = constant i64 8 ; TO: @h = constant i64 8 ; TO: @i = constant i64 8 -; TO: @j = constant i64 8 -; TO: @k = constant i64 8 @a = constant i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}* null, i64 11) to i64), i64 5)) @b = constant i64 ptrtoint ([13 x double]* getelementptr ({i1, [13 x double]}* null, i64 0, i32 1) to i64) @@ -105,8 +99,6 @@ @g = constant i64 ptrtoint ({double, double}* getelementptr ({i1, {double, double}}* null, i64 0, i32 1) to i64) @h = constant i64 ptrtoint (double** getelementptr (double** null, i64 1) to i64) @i = constant i64 ptrtoint (double** getelementptr ({i1, double*}* null, i64 0, i32 1) to i64) -@j = constant i64 ptrtoint (union {double, double}* getelementptr ({i1, union {double, double}}* null, i64 0, i32 1) to i64) -@k = constant i64 ptrtoint (union {double, double}* getelementptr (union {double, double}* null, i64 1) to i64) ; The target-dependent folder should cast GEP indices to integer-sized pointers. @@ -275,14 +267,6 @@ define i1* @hoo1() nounwind { ; PLAIN: %t = bitcast i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) to i64 ; PLAIN: ret i64 %t ; PLAIN: } -; PLAIN: define i64 @fj() nounwind { -; PLAIN: %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64 -; PLAIN: ret i64 %t -; PLAIN: } -; PLAIN: define i64 @fk() nounwind { -; PLAIN: %t = bitcast i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) to i64 -; PLAIN: ret i64 %t -; PLAIN: } ; OPT: define i64 @fa() nounwind { ; OPT: ret i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310) ; OPT: } @@ -310,12 +294,6 @@ define i1* @hoo1() nounwind { ; OPT: define i64 @fi() nounwind { ; OPT: ret i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) ; OPT: } -; OPT: define i64 @fj() nounwind { -; OPT: ret i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) -; OPT: } -; OPT: define i64 @fk() nounwind { -; OPT: ret i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) -; OPT: } ; TO: define i64 @fa() nounwind { ; TO: ret i64 18480 ; TO: } @@ -343,12 +321,6 @@ define i1* @hoo1() nounwind { ; TO: define i64 @fi() nounwind { ; TO: ret i64 8 ; TO: } -; TO: define i64 @fj() nounwind { -; TO: ret i64 8 -; TO: } -; TO: define i64 @fk() nounwind { -; TO: ret i64 8 -; TO: } ; SCEV: Classifying expressions for: @fa ; SCEV: %t = bitcast i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310) to i64 ; SCEV: --> (2310 * sizeof(double)) @@ -376,12 +348,6 @@ define i1* @hoo1() nounwind { ; SCEV: Classifying expressions for: @fi ; SCEV: %t = bitcast i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) to i64 ; SCEV: --> alignof(i1*) -; SCEV: Classifying expressions for: @fj -; SCEV: %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64 -; SCEV: --> alignof(double) -; SCEV: Classifying expressions for: @fk -; SCEV: %t = bitcast i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) to i64 -; SCEV: --> sizeof(double) define i64 @fa() nounwind { %t = bitcast i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}* null, i64 11) to i64), i64 5)) to i64 @@ -419,14 +385,6 @@ define i64 @fi() nounwind { %t = bitcast i64 ptrtoint (double** getelementptr ({i1, double*}* null, i64 0, i32 1) to i64) to i64 ret i64 %t } -define i64 @fj() nounwind { - %t = bitcast i64 ptrtoint (union {double, double}* getelementptr ({i1, union {double, double}}* null, i64 0, i32 1) to i64) to i64 - ret i64 %t -} -define i64 @fk() nounwind { - %t = bitcast i64 ptrtoint (union {double, double}* getelementptr (union {double, double}* null, i64 1) to i64) to i64 - ret i64 %t -} ; PLAIN: define i64* @fM() nounwind { ; PLAIN: %t = bitcast i64* getelementptr (i64* null, i32 1) to i64* diff --git a/test/Other/inline-asm-newline-terminator.ll b/test/Other/inline-asm-newline-terminator.ll index f6cc5c1fb421..af93cc0dd2aa 100644 --- a/test/Other/inline-asm-newline-terminator.ll +++ b/test/Other/inline-asm-newline-terminator.ll @@ -1,5 +1,4 @@ ; RUN: llc -filetype=obj -o - < %s -; XFAIL: vg_leak ; ModuleID = 't.c' target triple = "x86_64-apple-darwin10.0.0" diff --git a/test/Other/lint.ll b/test/Other/lint.ll index dee3d11d2fb5..fcef7ee2d571 100644 --- a/test/Other/lint.ll +++ b/test/Other/lint.ll @@ -161,5 +161,7 @@ declare i32 @nonstruct_callee() nounwind define void @struct_caller() nounwind { entry: call %struct bitcast (i32 ()* @foo to %struct ()*)() - ret void + + ; CHECK: Undefined behavior: indirectbr with no destinations + indirectbr i8* null, [] } diff --git a/test/Scripts/coff-dump.py b/test/Scripts/coff-dump.py new file mode 100755 index 000000000000..0af3d368d5d0 --- /dev/null +++ b/test/Scripts/coff-dump.py @@ -0,0 +1,566 @@ +#!/usr/bin/env python +#===-- coff-dump.py - COFF object file dump utility-------------------------===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +# +# COFF File Definition +# + +def string_table_entry (offset): + return ('ptr', '+ + PointerToSymbolTable * NumberOfSymbols 18 %s' % offset, ('scalar', 'cstr', '%s')) + +def secname(value): + if value[0] == '/': + return string_table_entry (value [1:].rstrip('\0')) + else: + return '%s' + +def symname(value): + parts = struct.unpack("<2L", value) + if parts [0] == 0: + return string_table_entry (parts [1]) + else: + return '%s' + +file = ('struct', [ + ('MachineType', ('enum', '<H', '0x%X', { + 0x0: 'IMAGE_FILE_MACHINE_UNKNOWN', + 0x1d3: 'IMAGE_FILE_MACHINE_AM33', + 0x8664: 'IMAGE_FILE_MACHINE_AMD64', + 0x1c0: 'IMAGE_FILE_MACHINE_ARM', + 0xebc: 'IMAGE_FILE_MACHINE_EBC', + 0x14c: 'IMAGE_FILE_MACHINE_I386', + 0x200: 'IMAGE_FILE_MACHINE_IA64', + 0x904: 'IMAGE_FILE_MACHINE_M32R', + 0x266: 'IMAGE_FILE_MACHINE_MIPS16', + 0x366: 'IMAGE_FILE_MACHINE_MIPSFPU', + 0x466: 'IMAGE_FILE_MACHINE_MIPSFPU16', + 0x1f0: 'IMAGE_FILE_MACHINE_POWERPC', + 0x1f1: 'IMAGE_FILE_MACHINE_POWERPCFP', + 0x166: 'IMAGE_FILE_MACHINE_R4000', + 0x1a2: 'IMAGE_FILE_MACHINE_SH3', + 0x1a3: 'IMAGE_FILE_MACHINE_SH3DSP', + 0x1a6: 'IMAGE_FILE_MACHINE_SH4', + 0x1a8: 'IMAGE_FILE_MACHINE_SH5', + 0x1c2: 'IMAGE_FILE_MACHINE_THUMB', + 0x169: 'IMAGE_FILE_MACHINE_WCEMIPSV2', + })), + ('NumberOfSections', ('scalar', '<H', '%d')), + ('TimeDateStamp', ('scalar', '<L', '%d')), + ('PointerToSymbolTable', ('scalar', '<L', '0x%0X')), + ('NumberOfSymbols', ('scalar', '<L', '%d')), + ('SizeOfOptionalHeader', ('scalar', '<H', '%d')), + ('Characteristics', ('flags', '<H', '0x%x', [ + (0x0001, 'IMAGE_FILE_RELOCS_STRIPPED', ), + (0x0002, 'IMAGE_FILE_EXECUTABLE_IMAGE', ), + (0x0004, 'IMAGE_FILE_LINE_NUMS_STRIPPED', ), + (0x0008, 'IMAGE_FILE_LOCAL_SYMS_STRIPPED', ), + (0x0010, 'IMAGE_FILE_AGGRESSIVE_WS_TRIM', ), + (0x0020, 'IMAGE_FILE_LARGE_ADDRESS_AWARE', ), + (0x0080, 'IMAGE_FILE_BYTES_REVERSED_LO', ), + (0x0100, 'IMAGE_FILE_32BIT_MACHINE', ), + (0x0200, 'IMAGE_FILE_DEBUG_STRIPPED', ), + (0x0400, 'IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP', ), + (0x0800, 'IMAGE_FILE_NET_RUN_FROM_SWAP', ), + (0x1000, 'IMAGE_FILE_SYSTEM', ), + (0x2000, 'IMAGE_FILE_DLL', ), + (0x4000, 'IMAGE_FILE_UP_SYSTEM_ONLY', ), + (0x8000, 'IMAGE_FILE_BYTES_REVERSED_HI', ), + ])), + ('Sections', ('array', 'NumberOfSections', ('struct', [ + ('Name', ('scalar', '<8s', secname)), + ('VirtualSize', ('scalar', '<L', '%d' )), + ('VirtualAddress', ('scalar', '<L', '%d' )), + ('SizeOfRawData', ('scalar', '<L', '%d' )), + ('PointerToRawData', ('scalar', '<L', '0x%X' )), + ('PointerToRelocations', ('scalar', '<L', '0x%X' )), + ('PointerToLineNumbers', ('scalar', '<L', '0x%X' )), + ('NumberOfRelocations', ('scalar', '<H', '%d' )), + ('NumberOfLineNumbers', ('scalar', '<H', '%d' )), + ('Charateristics', ('flags', '<L', '0x%X', [ + (0x00000008, 'IMAGE_SCN_TYPE_NO_PAD'), + (0x00000020, 'IMAGE_SCN_CNT_CODE'), + (0x00000040, 'IMAGE_SCN_CNT_INITIALIZED_DATA'), + (0x00000080, 'IMAGE_SCN_CNT_UNINITIALIZED_DATA'), + (0x00000100, 'IMAGE_SCN_LNK_OTHER'), + (0x00000200, 'IMAGE_SCN_LNK_INFO'), + (0x00000800, 'IMAGE_SCN_LNK_REMOVE'), + (0x00001000, 'IMAGE_SCN_LNK_COMDAT'), + (0x00008000, 'IMAGE_SCN_GPREL'), + (0x00020000, 'IMAGE_SCN_MEM_PURGEABLE'), + (0x00020000, 'IMAGE_SCN_MEM_16BIT'), + (0x00040000, 'IMAGE_SCN_MEM_LOCKED'), + (0x00080000, 'IMAGE_SCN_MEM_PRELOAD'), + (0x00F00000, 'IMAGE_SCN_ALIGN', { + 0x00100000: 'IMAGE_SCN_ALIGN_1BYTES', + 0x00200000: 'IMAGE_SCN_ALIGN_2BYTES', + 0x00300000: 'IMAGE_SCN_ALIGN_4BYTES', + 0x00400000: 'IMAGE_SCN_ALIGN_8BYTES', + 0x00500000: 'IMAGE_SCN_ALIGN_16BYTES', + 0x00600000: 'IMAGE_SCN_ALIGN_32BYTES', + 0x00700000: 'IMAGE_SCN_ALIGN_64BYTES', + 0x00800000: 'IMAGE_SCN_ALIGN_128BYTES', + 0x00900000: 'IMAGE_SCN_ALIGN_256BYTES', + 0x00A00000: 'IMAGE_SCN_ALIGN_512BYTES', + 0x00B00000: 'IMAGE_SCN_ALIGN_1024BYTES', + 0x00C00000: 'IMAGE_SCN_ALIGN_2048BYTES', + 0x00D00000: 'IMAGE_SCN_ALIGN_4096BYTES', + 0x00E00000: 'IMAGE_SCN_ALIGN_8192BYTES', + }), + (0x01000000, 'IMAGE_SCN_LNK_NRELOC_OVFL'), + (0x02000000, 'IMAGE_SCN_MEM_DISCARDABLE'), + (0x04000000, 'IMAGE_SCN_MEM_NOT_CACHED'), + (0x08000000, 'IMAGE_SCN_MEM_NOT_PAGED'), + (0x10000000, 'IMAGE_SCN_MEM_SHARED'), + (0x20000000, 'IMAGE_SCN_MEM_EXECUTE'), + (0x40000000, 'IMAGE_SCN_MEM_READ'), + (0x80000000, 'IMAGE_SCN_MEM_WRITE'), + ])), + ('SectionData', ('ptr', 'PointerToRawData', ('blob', 'SizeOfRawData'))), + ('Relocations', ('ptr', 'PointerToRelocations', ('array', 'NumberOfRelocations', ('struct', [ + ('VirtualAddress', ('scalar', '<L', '0x%X')), + ('SymbolTableIndex', ('scalar', '<L', '%d' )), + ('Type', ('enum', '<H', '%d', ('MachineType', { + 0x14c: { + 0x0000: 'IMAGE_REL_I386_ABSOLUTE', + 0x0001: 'IMAGE_REL_I386_DIR16', + 0x0002: 'IMAGE_REL_I386_REL16', + 0x0006: 'IMAGE_REL_I386_DIR32', + 0x0007: 'IMAGE_REL_I386_DIR32NB', + 0x0009: 'IMAGE_REL_I386_SEG12', + 0x000A: 'IMAGE_REL_I386_SECTION', + 0x000B: 'IMAGE_REL_I386_SECREL', + 0x000C: 'IMAGE_REL_I386_TOKEN', + 0x000D: 'IMAGE_REL_I386_SECREL7', + 0x0014: 'IMAGE_REL_I386_REL32', + }, + 0x8664: { + 0x0000: 'IMAGE_REL_AMD64_ABSOLUTE', + 0x0001: 'IMAGE_REL_AMD64_ADDR64', + 0x0002: 'IMAGE_REL_AMD64_ADDR32', + 0x0003: 'IMAGE_REL_AMD64_ADDR32NB', + 0x0004: 'IMAGE_REL_AMD64_REL32', + 0x0005: 'IMAGE_REL_AMD64_REL32_1', + 0x0006: 'IMAGE_REL_AMD64_REL32_2', + 0x0007: 'IMAGE_REL_AMD64_REL32_3', + 0x0008: 'IMAGE_REL_AMD64_REL32_4', + 0x0009: 'IMAGE_REL_AMD64_REL32_5', + 0x000A: 'IMAGE_REL_AMD64_SECTION', + 0x000B: 'IMAGE_REL_AMD64_SECREL', + 0x000C: 'IMAGE_REL_AMD64_SECREL7', + 0x000D: 'IMAGE_REL_AMD64_TOKEN', + 0x000E: 'IMAGE_REL_AMD64_SREL32', + 0x000F: 'IMAGE_REL_AMD64_PAIR', + 0x0010: 'IMAGE_REL_AMD64_SSPAN32', + }, + }))), + ('SymbolName', ('ptr', '+ PointerToSymbolTable * - SymbolTableIndex 1 18', ('scalar', '<8s', symname))) + ])))), + ]))), + ('Symbols', ('ptr', 'PointerToSymbolTable', ('byte-array', '* NumberOfSymbols 18', ('struct', [ + ('Name', ('scalar', '<8s', symname)), + ('Value', ('scalar', '<L', '%d' )), + ('SectionNumber', ('scalar', '<H', '%d' )), + ('SimpleType', ('enum', '<B', '%d', { + 0: 'IMAGE_SYM_TYPE_NULL', + 1: 'IMAGE_SYM_TYPE_VOID', + 2: 'IMAGE_SYM_TYPE_CHAR', + 3: 'IMAGE_SYM_TYPE_SHORT', + 4: 'IMAGE_SYM_TYPE_INT', + 5: 'IMAGE_SYM_TYPE_LONG', + 6: 'IMAGE_SYM_TYPE_FLOAT', + 7: 'IMAGE_SYM_TYPE_DOUBLE', + 8: 'IMAGE_SYM_TYPE_STRUCT', + 9: 'IMAGE_SYM_TYPE_UNION', + 10: 'IMAGE_SYM_TYPE_ENUM', + 11: 'IMAGE_SYM_TYPE_MOE', + 12: 'IMAGE_SYM_TYPE_BYTE', + 13: 'IMAGE_SYM_TYPE_WORD', + 14: 'IMAGE_SYM_TYPE_UINT', + 15: 'IMAGE_SYM_TYPE_DWORD', + })), + ('ComplexType', ('enum', '<B', '%d', { + 0: 'IMAGE_SYM_DTYPE_NULL', + 1: 'IMAGE_SYM_DTYPE_POINTER', + 2: 'IMAGE_SYM_DTYPE_FUNCTION', + 3: 'IMAGE_SYM_DTYPE_ARRAY', + })), + ('StorageClass', ('enum', '<B', '%d', { + -1: 'IMAGE_SYM_CLASS_END_OF_FUNCTION', + 0: 'IMAGE_SYM_CLASS_NULL', + 1: 'IMAGE_SYM_CLASS_AUTOMATIC', + 2: 'IMAGE_SYM_CLASS_EXTERNAL', + 3: 'IMAGE_SYM_CLASS_STATIC', + 4: 'IMAGE_SYM_CLASS_REGISTER', + 5: 'IMAGE_SYM_CLASS_EXTERNAL_DEF', + 6: 'IMAGE_SYM_CLASS_LABEL', + 7: 'IMAGE_SYM_CLASS_UNDEFINED_LABEL', + 8: 'IMAGE_SYM_CLASS_MEMBER_OF_STRUCT', + 9: 'IMAGE_SYM_CLASS_ARGUMENT', + 10: 'IMAGE_SYM_CLASS_STRUCT_TAG', + 11: 'IMAGE_SYM_CLASS_MEMBER_OF_UNION', + 12: 'IMAGE_SYM_CLASS_UNION_TAG', + 13: 'IMAGE_SYM_CLASS_TYPE_DEFINITION', + 14: 'IMAGE_SYM_CLASS_UNDEFINED_STATIC', + 15: 'IMAGE_SYM_CLASS_ENUM_TAG', + 16: 'IMAGE_SYM_CLASS_MEMBER_OF_ENUM', + 17: 'IMAGE_SYM_CLASS_REGISTER_PARAM', + 18: 'IMAGE_SYM_CLASS_BIT_FIELD', + 100: 'IMAGE_SYM_CLASS_BLOCK', + 101: 'IMAGE_SYM_CLASS_FUNCTION', + 102: 'IMAGE_SYM_CLASS_END_OF_STRUCT', + 103: 'IMAGE_SYM_CLASS_FILE', + 104: 'IMAGE_SYM_CLASS_SECTION', + 105: 'IMAGE_SYM_CLASS_WEAK_EXTERNAL', + 107: 'IMAGE_SYM_CLASS_CLR_TOKEN', + })), + ('NumberOfAuxSymbols', ('scalar', '<B', '%d' )), + ('AuxillaryData', ('blob', '* NumberOfAuxSymbols 18')), + ])))), +]) + +# +# Definition Interpreter +# + +import sys, types, struct, re + +Input = None +Stack = [] +Fields = {} + +Indent = 0 +NewLine = True + +def indent(): + global Indent + Indent += 1 + +def dedent(): + global Indent + Indent -= 1 + +def write(input): + global NewLine + output = "" + + for char in input: + + if NewLine: + output += Indent * ' ' + NewLine = False + + output += char + + if char == '\n': + NewLine = True + + sys.stdout.write (output) + +def read(format): + return struct.unpack (format, Input.read(struct.calcsize(format))) + +def read_cstr (): + output = "" + while True: + char = Input.read (1) + if len (char) == 0: + raise RuntimeError ("EOF while reading cstr") + if char == '\0': + break + output += char + return output + +def push_pos(seek_to = None): + Stack [0:0] = [Input.tell ()] + if seek_to: + Input.seek (seek_to) + +def pop_pos(): + assert(len (Stack) > 0) + Input.seek (Stack [0]) + del Stack [0] + +def print_binary_data(size): + value = "" + while size > 0: + if size >= 16: + data = Input.read(16) + size -= 16 + else: + data = Input.read(size) + size = 0 + value += data + bytes = "" + text = "" + for index in xrange (16): + if index < len (data): + if index == 8: + bytes += "- " + ch = ord (data [index]) + bytes += "%02X " % ch + if ch >= 0x20 and ch <= 0x7F: + text += data [index] + else: + text += "." + else: + if index == 8: + bytes += " " + bytes += " " + + write ("%s|%s|\n" % (bytes, text)) + return value + +idlit = re.compile ("[a-zA-Z][a-zA-Z0-9_-]*") +numlit = re.compile ("[0-9]+") + +def read_value(expr): + + input = iter (expr.split ()) + + def eval(): + + token = input.next () + + if expr == 'cstr': + return read_cstr () + if expr == 'true': + return True + if expr == 'false': + return False + + if len (token) > 1 and token [0] in ('=', '@', '<', '!', '>'): + val = read(expr) + assert (len (val) == 1) + return val [0] + + if token == '+': + return eval () + eval () + if token == '-': + return eval () - eval () + if token == '*': + return eval () * eval () + if token == '/': + return eval () / eval () + + if idlit.match (token): + return Fields [token] + if numlit.match (token): + return int (token) + + raise RuntimeError ("unexpected token %s" % repr(token)) + + value = eval () + + try: + input.next () + except StopIteration: + return value + raise RuntimeError("unexpected input at end of expression") + +def write_value(format,value): + format_type = type (format) + if format_type is types.StringType: + write (format%value) + elif format_type is types.FunctionType: + write_value (format (value), value) + elif format_type is types.TupleType: + Fields ['this'] = value + handle_element (format) + else: + raise RuntimeError("unexpected type: %s" % repr(format_type)) + +def handle_scalar(entry): + iformat = entry [1] + oformat = entry [2] + + value = read_value (iformat) + + write_value (oformat, value) + + return value + +def handle_enum(entry): + iformat = entry [1] + oformat = entry [2] + definitions = entry [3] + + value = read_value (iformat) + + if type (definitions) is types.TupleType: + selector = read_value (definitions [0]) + definitions = definitions [1] [selector] + + if value in definitions: + description = definitions[value] + else: + description = "unknown" + + write ("%s (" % description) + write_value (oformat, value) + write (")") + + return value + +def handle_flags(entry): + iformat = entry [1] + oformat = entry [2] + definitions = entry [3] + + value = read_value (iformat) + + write_value (oformat, value) + + indent () + for entry in definitions: + mask = entry [0] + name = entry [1] + if len (entry) == 3: + map = entry [2] + selection = value & mask + if selection in map: + write("\n%s" % map[selection]) + else: + write("\n%s <%d>" % (name, selection)) + elif len (entry) == 2: + if value & mask != 0: + write("\n%s" % name) + dedent () + + return value + +def handle_struct(entry): + global Fields + members = entry [1] + + newFields = {} + + write ("{\n"); + indent () + + for member in members: + name = member [0] + type = member [1] + + write("%s = "%name.ljust(24)) + + value = handle_element(type) + + write("\n") + + Fields [name] = value + newFields [name] = value + + dedent () + write ("}") + + return newFields + +def handle_array(entry): + length = entry [1] + element = entry [2] + + newItems = [] + + write ("[\n") + indent () + + value = read_value (length) + + for index in xrange (value): + write ("%d = "%index) + value = handle_element(element) + write ("\n") + newItems.append (value) + + dedent () + write ("]") + + return newItems + +def handle_byte_array(entry): + length = entry [1] + element = entry [2] + + newItems = [] + + write ("[\n") + indent () + + value = read_value (length) + end_of_array = Input.tell () + value + + index = 0 + while Input.tell () < end_of_array: + write ("%d = "%index) + value = handle_element(element) + write ("\n") + newItems.append (value) + index += 1 + + dedent () + write ("]") + + return newItems + +def handle_ptr(entry): + offset = entry[1] + element = entry [2] + + value = None + offset = read_value (offset) + + if offset != 0: + + push_pos (offset) + + value = handle_element (element) + + pop_pos () + + else: + write ("None") + + return value + +def handle_blob(entry): + length = entry [1] + + write ("\n") + indent () + + value = print_binary_data (read_value (length)) + + dedent () + + return value + +def handle_element(entry): + handlers = { + 'struct': handle_struct, + 'scalar': handle_scalar, + 'enum': handle_enum, + 'flags': handle_flags, + 'ptr': handle_ptr, + 'blob': handle_blob, + 'array': handle_array, + 'byte-array': handle_byte_array, + } + + if not entry [0] in handlers: + raise RuntimeError ("unexpected type '%s'" % str (entry[0])) + + return handlers [entry [0]] (entry) + +Input = open (sys.argv [1], "rb") +try: + handle_element (file) +finally: + Input.close () + Input = None diff --git a/test/Scripts/coff-dump.py.bat b/test/Scripts/coff-dump.py.bat new file mode 100644 index 000000000000..cc83eba1c446 --- /dev/null +++ b/test/Scripts/coff-dump.py.bat @@ -0,0 +1,4 @@ +@echo off + +%PYTHON_EXECUTABLE% %LLVM_SRC_ROOT%\test\Scripts\coff-dump.py %1 %2 %3 %4 %5 %6 %7 %8 %9 + diff --git a/test/TableGen/FieldAccess.td b/test/TableGen/FieldAccess.td new file mode 100644 index 000000000000..ad652e79ea7c --- /dev/null +++ b/test/TableGen/FieldAccess.td @@ -0,0 +1,14 @@ +// RUN: tblgen %s +class Bla<string t> +{ + string blu = t; +} + +class Bli<Bla t> +{ + Bla bla = t; +} + +def a : Bli<Bla<"">>; +def b : Bla<!cast<Bla>(a.bla).blu>; // works +def c : Bla<a.bla.blu>; // doesn't work: Cannot access field 'blu' of value 'a.bla' diff --git a/test/TableGen/ListManip.td b/test/TableGen/ListManip.td new file mode 100644 index 000000000000..c221bb1335b6 --- /dev/null +++ b/test/TableGen/ListManip.td @@ -0,0 +1,10 @@ +// RUN: tblgen %s +class Bli<string _t> +{ + string t = _t; +} + +class Bla<list<Bli> _bli> +: Bli<!car(_bli).t> +{ +} diff --git a/test/TestRunner.sh b/test/TestRunner.sh index 4f04d81aac64..ab50856af11f 100755 --- a/test/TestRunner.sh +++ b/test/TestRunner.sh @@ -1,36 +1,5 @@ #!/bin/sh -# -# TestRunner.sh - This script is used to run the deja-gnu tests exactly like -# deja-gnu does, by executing the Tcl script specified in the test case's -# RUN: lines. This is made possible by a simple make target supported by the -# test/Makefile. All this script does is invoke that make target. -# -# Usage: -# TestRunner.sh {script_names} -# -# This script is typically used by cd'ing to a test directory and then -# running TestRunner.sh with a list of test file names you want to run. -# -TESTPATH=`pwd` -SUBDIR="" -if test `dirname $1` = "." ; then - while test `basename $TESTPATH` != "test" -a ! -z "$TESTPATH" ; do - tmp=`basename $TESTPATH` - SUBDIR="$tmp/$SUBDIR" - TESTPATH=`dirname $TESTPATH` - done -fi +# Deprecated, use 'llvm-lit'. -for TESTFILE in "$@" ; do - if test `dirname $TESTFILE` = . ; then - if test -d "$TESTPATH" ; then - cd $TESTPATH - make check-one TESTONE="$SUBDIR$TESTFILE" - cd $PWD - else - echo "Can't find llvm/test directory in " `pwd` - fi - else - make check-one TESTONE=$TESTFILE - fi -done +echo "warning: '$0' is deprecated, use 'llvm-lit' instead." +exec llvm-lit "$@" diff --git a/test/Transforms/ABCD/basic.ll b/test/Transforms/ABCD/basic.ll deleted file mode 100644 index f2ce1b9aa738..000000000000 --- a/test/Transforms/ABCD/basic.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: opt < %s -abcd -S | FileCheck %s - -define void @test() { -; CHECK: @test -; CHECK-NOT: br i1 %tmp95 -; CHECK: ret void -entry: - br label %bb19 - -bb: - br label %bb1 - -bb1: - %tmp7 = icmp sgt i32 %tmp94, 1 - br i1 %tmp7, label %bb.i.i, label %return - -bb.i.i: - br label %return - -bb19: - %tmp94 = ashr i32 undef, 3 - %tmp95 = icmp sgt i32 %tmp94, 16 - br i1 %tmp95, label %bb, label %return - -return: - ret void -} diff --git a/test/Transforms/ConstProp/constant-expr.ll b/test/Transforms/ConstProp/constant-expr.ll index 996303293d18..556ed1f652af 100644 --- a/test/Transforms/ConstProp/constant-expr.ll +++ b/test/Transforms/ConstProp/constant-expr.ll @@ -16,9 +16,9 @@ @E = global i1 udiv (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z)) ; CHECK: @E = global i1 icmp ult (i8* @X, i8* @Y) @F = global i1 srem (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z)) -; CHECK: @F = global i1 false ; <i1*> [#uses=0] +; CHECK: @F = global i1 false @G = global i1 urem (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z)) -; CHECK: @G = global i1 false ; <i1*> [#uses=0] +; CHECK: @G = global i1 false @H = global i1 icmp ule (i32* bitcast (i8* @X to i32*), i32* bitcast (i8* @Y to i32*)) ; CHECK: @H = global i1 icmp ule (i8* @X, i8* @Y) diff --git a/test/Transforms/ConstantMerge/dont-merge.ll b/test/Transforms/ConstantMerge/dont-merge.ll index 877cf8dc6710..e5337dff27df 100644 --- a/test/Transforms/ConstantMerge/dont-merge.ll +++ b/test/Transforms/ConstantMerge/dont-merge.ll @@ -28,3 +28,17 @@ define void @test2(i32** %P1, i32 addrspace(30)** %P2) { store i32 addrspace(30)* @T2b, i32 addrspace(30)** %P2 ret void } + +; PR8144 - Don't merge globals marked attribute(used) +; CHECK: @T3A = +; CHECK: @T3B = + +@T3A = internal constant i32 0 +@T3B = internal constant i32 0 +@llvm.used = appending global [2 x i32*] [i32* @T3A, i32* @T3B], section +"llvm.metadata" + +define void @test3() { + call void asm sideeffect "T3A, T3B",""() ; invisible use of T3A and T3B + ret void +} diff --git a/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll b/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll new file mode 100644 index 000000000000..fef5b8579eb5 --- /dev/null +++ b/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll @@ -0,0 +1,25 @@ +; RUN: opt -S < %s -correlated-propagation | FileCheck %s + +; CHECK: @test +define i16 @test(i32 %a, i1 %b) { +entry: + %c = icmp eq i32 %a, 0 + br i1 %c, label %left, label %right + +right: + %d = trunc i32 %a to i1 + br label %merge + +left: + br i1 %b, label %merge, label %other + +other: + ret i16 23 + +merge: + %f = phi i1 [%b, %left], [%d, %right] +; CHECK: select i1 %f, i16 1, i16 0 + %h = select i1 %f, i16 1, i16 0 +; CHECK: ret i16 %h + ret i16 %h +}
\ No newline at end of file diff --git a/test/Transforms/CorrelatedValuePropagation/basic.ll b/test/Transforms/CorrelatedValuePropagation/basic.ll new file mode 100644 index 000000000000..24666e901e9e --- /dev/null +++ b/test/Transforms/CorrelatedValuePropagation/basic.ll @@ -0,0 +1,83 @@ +; RUN: opt < %s -correlated-propagation -S | FileCheck %s +; PR2581 + +; CHECK: @test1 +define i32 @test1(i1 %C) nounwind { + br i1 %C, label %exit, label %body + +body: ; preds = %0 +; CHECK-NOT: select + %A = select i1 %C, i32 10, i32 11 ; <i32> [#uses=1] +; CHECK: ret i32 11 + ret i32 %A + +exit: ; preds = %0 +; CHECK: ret i32 10 + ret i32 10 +} + +; PR4420 +declare i1 @ext() +; CHECK: @test2 +define i1 @test2() { +entry: + %cond = tail call i1 @ext() ; <i1> [#uses=2] + br i1 %cond, label %bb1, label %bb2 + +bb1: ; preds = %entry + %cond2 = tail call i1 @ext() ; <i1> [#uses=1] + br i1 %cond2, label %bb3, label %bb2 + +bb2: ; preds = %bb1, %entry +; CHECK-NOT: phi i1 + %cond_merge = phi i1 [ %cond, %entry ], [ false, %bb1 ] ; <i1> [#uses=1] +; CHECK: ret i1 false + ret i1 %cond_merge + +bb3: ; preds = %bb1 + %res = tail call i1 @ext() ; <i1> [#uses=1] +; CHECK: ret i1 %res + ret i1 %res +} + +; PR4855 +@gv = internal constant i8 7 +; CHECK: @test3 +define i8 @test3(i8* %a) nounwind { +entry: + %cond = icmp eq i8* %a, @gv + br i1 %cond, label %bb2, label %bb + +bb: ; preds = %entry + ret i8 0 + +bb2: ; preds = %entry +; CHECK-NOT: load i8* %a + %should_be_const = load i8* %a +; CHECK: ret i8 7 + ret i8 %should_be_const +} + +; PR1757 +; CHECK: @test4 +define i32 @test4(i32) { +EntryBlock: +; CHECK: icmp sgt i32 %0, 2 + %.demorgan = icmp sgt i32 %0, 2 + br i1 %.demorgan, label %GreaterThanTwo, label %LessThanOrEqualToTwo + +GreaterThanTwo: +; CHECK-NOT: icmp eq i32 %0, 2 + icmp eq i32 %0, 2 +; CHECK: br i1 false + br i1 %1, label %Impossible, label %NotTwoAndGreaterThanTwo + +NotTwoAndGreaterThanTwo: + ret i32 2 + +Impossible: + ret i32 1 + +LessThanOrEqualToTwo: + ret i32 0 +}
\ No newline at end of file diff --git a/test/Transforms/CorrelatedValuePropagation/dg.exp b/test/Transforms/CorrelatedValuePropagation/dg.exp new file mode 100644 index 000000000000..de42dad163fd --- /dev/null +++ b/test/Transforms/CorrelatedValuePropagation/dg.exp @@ -0,0 +1,3 @@ +load_lib llvm.exp + +RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]] diff --git a/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll b/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll index 641e920006b2..f079108b9bda 100644 --- a/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll +++ b/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -gvn | llvm-dis ; PR4256 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" -target triple = "i386-linux-gnu" +target triple = "i386-pc-linux-gnu" %llvm.dbg.anchor.type = type { i32, i32 } %struct.cset = type { i8*, i8, i8, i32, i8* } %struct.lmat = type { %struct.re_guts*, i32, %llvm.dbg.anchor.type*, i8*, i8*, i8*, i8*, i8**, i32, i8*, i8*, i8*, i8*, i8* } diff --git a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll index 5e64f807f6ea..390e77a8cea8 100644 --- a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll +++ b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll @@ -4,7 +4,7 @@ ; RUN: opt < %s -globalopt -S > %t ; Check that the new global values still have their address space -; RUN: cat %t | grep global.*addrspace +; RUN: cat %t | grep addrspace.*global @struct = internal addrspace(1) global { i32, i32 } zeroinitializer @array = internal addrspace(1) global [ 2 x i32 ] zeroinitializer diff --git a/test/Transforms/GlobalOpt/crash.ll b/test/Transforms/GlobalOpt/crash.ll index 701472c059a8..bb1fc84f46f9 100644 --- a/test/Transforms/GlobalOpt/crash.ll +++ b/test/Transforms/GlobalOpt/crash.ll @@ -40,3 +40,18 @@ xx: } declare noalias i8* @malloc(i64) nounwind + + +; PR8063 +@permute_bitrev.bitrev = internal global i32* null, align 8 +define void @permute_bitrev() nounwind { +entry: + %tmp = load i32** @permute_bitrev.bitrev, align 8 + %conv = sext i32 0 to i64 + %mul = mul i64 %conv, 4 + %call = call i8* @malloc(i64 %mul) + %0 = bitcast i8* %call to i32* + store i32* %0, i32** @permute_bitrev.bitrev, align 8 + ret void +} + diff --git a/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll b/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll index f4bab353cd07..bd174a8be3ff 100644 --- a/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll +++ b/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll @@ -21,10 +21,10 @@ define internal i32 @vfu2(%struct.MYstr* byval align 4 %u) nounwind readonly { entry: %0 = getelementptr %struct.MYstr* %u, i32 0, i32 1 ; <i32*> [#uses=1] %1 = load i32* %0 -; CHECK: load i32* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 1) ; <i32> [#uses=1] +; CHECK: load i32* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 1) %2 = getelementptr %struct.MYstr* %u, i32 0, i32 0 ; <i8*> [#uses=1] %3 = load i8* %2 -; CHECK: load i8* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 0) ; <i8> [#uses=1] +; CHECK: load i8* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 0) %4 = zext i8 %3 to i32 %5 = add i32 %4, %1 ret i32 %5 diff --git a/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll b/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll deleted file mode 100644 index c8f97e39bef6..000000000000 --- a/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll +++ /dev/null @@ -1,25 +0,0 @@ -; The induction variable canonicalization pass shouldn't leave dead -; instructions laying around! -; -; RUN: opt < %s -indvars -S | \ -; RUN: not grep {#uses=0} - -define i32 @mul(i32 %x, i32 %y) { -entry: - br label %tailrecurse - -tailrecurse: ; preds = %endif, %entry - %accumulator.tr = phi i32 [ %x, %entry ], [ %tmp.9, %endif ] ; <i32> [#uses=2] - %y.tr = phi i32 [ %y, %entry ], [ %tmp.8, %endif ] ; <i32> [#uses=2] - %tmp.1 = icmp eq i32 %y.tr, 0 ; <i1> [#uses=1] - br i1 %tmp.1, label %return, label %endif - -endif: ; preds = %tailrecurse - %tmp.8 = add i32 %y.tr, -1 ; <i32> [#uses=1] - %tmp.9 = add i32 %accumulator.tr, %x ; <i32> [#uses=1] - br label %tailrecurse - -return: ; preds = %tailrecurse - ret i32 %accumulator.tr -} - diff --git a/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll b/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll index d73eee812b30..d211e3b824b2 100644 --- a/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll +++ b/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -indvars ; PR4258 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" -target triple = "i386-linux-gnu" +target triple = "i386-pc-linux-gnu" define void @0(i32*, i32*, i32, i32) nounwind { br i1 false, label %bb.nph1.preheader, label %.outer._crit_edge diff --git a/test/Transforms/IndVarSimplify/crash.ll b/test/Transforms/IndVarSimplify/crash.ll index ab438334c660..516fd8084d9e 100644 --- a/test/Transforms/IndVarSimplify/crash.ll +++ b/test/Transforms/IndVarSimplify/crash.ll @@ -1,4 +1,5 @@ ; RUN: opt -indvars %s -disable-output +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" declare i32 @putchar(i8) nounwind @@ -17,3 +18,38 @@ define void @t2(i1* %P) nounwind { ; <label>:6 ; preds = %1 ret void } + +; PR7562 +define void @fannkuch() nounwind { +entry: ; preds = %entry + br label %bb12 + +bb12: ; preds = %bb29, %entry + %i.1 = phi i32 [ undef, %entry ], [ %i.0, %bb29 ] ; <i32> [#uses=2] + %r.1 = phi i32 [ undef, %entry ], [ %r.0, %bb29 ] ; <i32> [#uses=2] + br i1 undef, label %bb13, label %bb24 + +bb13: ; preds = %bb12 + br label %bb24 + +bb24: ; preds = %bb30, %bb13, %bb12 + %i.2 = phi i32 [ %i.1, %bb13 ], [ %i.0, %bb30 ], [ %i.1, %bb12 ] ; <i32> [#uses=1] + %r.0 = phi i32 [ %r.1, %bb13 ], [ %2, %bb30 ], [ %r.1, %bb12 ] ; <i32> [#uses=3] + br label %bb28 + +bb27: ; preds = %bb28 + %0 = add nsw i32 %i.0, 1 ; <i32> [#uses=1] + br label %bb28 + +bb28: ; preds = %bb27, %bb26 + %i.0 = phi i32 [ %i.2, %bb24 ], [ %0, %bb27 ] ; <i32> [#uses=4] + %1 = icmp slt i32 %i.0, %r.0 ; <i1> [#uses=1] + br i1 %1, label %bb27, label %bb29 + +bb29: ; preds = %bb28 + br i1 undef, label %bb12, label %bb30 + +bb30: ; preds = %bb29 + %2 = add nsw i32 %r.0, 1 ; <i32> [#uses=1] + br label %bb24 +} diff --git a/test/Transforms/IndVarSimplify/loop_evaluate10.ll b/test/Transforms/IndVarSimplify/loop_evaluate10.ll index 4ec4acadb4a5..269478a5ed03 100644 --- a/test/Transforms/IndVarSimplify/loop_evaluate10.ll +++ b/test/Transforms/IndVarSimplify/loop_evaluate10.ll @@ -5,7 +5,7 @@ ; exit is taken. Indvars should correctly compute the exit values. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-linux-gnu" +target triple = "x86_64-pc-linux-gnu" %struct..0anon = type <{ i8, [3 x i8] }> define i32 @main() nounwind { diff --git a/test/Transforms/IndVarSimplify/uglygep.ll b/test/Transforms/IndVarSimplify/uglygep.ll new file mode 100644 index 000000000000..0014b683db4b --- /dev/null +++ b/test/Transforms/IndVarSimplify/uglygep.ll @@ -0,0 +1,40 @@ +; RUN: opt -indvars -S < %s | not grep uglygep +; rdar://8197217 + +; Indvars should be able to emit a clean GEP here, not an uglygep. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin11.0" + +@numf2s = external global i32 ; <i32*> [#uses=1] +@numf1s = external global i32 ; <i32*> [#uses=1] +@tds = external global double** ; <double***> [#uses=1] + +define void @init_td(i32 %tmp7) nounwind { +entry: + br label %bb4 + +bb4: ; preds = %bb3, %entry + %i.0 = phi i32 [ 0, %entry ], [ %tmp9, %bb3 ] ; <i32> [#uses=3] + br label %bb + +bb: ; preds = %bb4 + br label %bb2 + +bb2: ; preds = %bb1, %bb + %j.0 = phi i32 [ 0, %bb ], [ %tmp6, %bb1 ] ; <i32> [#uses=3] + %tmp8 = icmp slt i32 %j.0, %tmp7 ; <i1> [#uses=1] + br i1 %tmp8, label %bb1, label %bb3 + +bb1: ; preds = %bb2 + %tmp = load double*** @tds, align 8 ; <double**> [#uses=1] + %tmp1 = sext i32 %i.0 to i64 ; <i64> [#uses=1] + %tmp2 = getelementptr inbounds double** %tmp, i64 %tmp1 ; <double**> [#uses=1] + %tmp3 = load double** %tmp2, align 1 ; <double*> [#uses=1] + %tmp6 = add nsw i32 %j.0, 1 ; <i32> [#uses=1] + br label %bb2 + +bb3: ; preds = %bb2 + %tmp9 = add nsw i32 %i.0, 1 ; <i32> [#uses=1] + br label %bb4 +} diff --git a/test/Transforms/InstCombine/align-addr.ll b/test/Transforms/InstCombine/align-addr.ll index d8ad5a9864e2..27916b986030 100644 --- a/test/Transforms/InstCombine/align-addr.ll +++ b/test/Transforms/InstCombine/align-addr.ll @@ -1,10 +1,13 @@ -; RUN: opt < %s -instcombine -S | grep {align 16} | count 1 +; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" ; Instcombine should be able to prove vector alignment in the ; presence of a few mild address computation tricks. -define void @foo(i8* %b, i64 %n, i64 %u, i64 %y) nounwind { +; CHECK: @test0( +; CHECK: align 16 + +define void @test0(i8* %b, i64 %n, i64 %u, i64 %y) nounwind { entry: %c = ptrtoint i8* %b to i64 %d = and i64 %c, -16 @@ -29,3 +32,29 @@ return: ret void } +; When we see a unaligned load from an insufficiently aligned global or +; alloca, increase the alignment of the load, turning it into an aligned load. + +; CHECK: @test1( +; CHECK: tmp = load +; CHECK: GLOBAL{{.*}}align 16 + +@GLOBAL = internal global [4 x i32] zeroinitializer + +define <16 x i8> @test1(<2 x i64> %x) { +entry: + %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1 + ret <16 x i8> %tmp +} + +; When a load or store lacks an explicit alignment, add one. + +; CHECK: @test2( +; CHECK: load double* %p, align 8 +; CHECK: store double %n, double* %p, align 8 + +define double @test2(double* %p, double %n) nounwind { + %t = load double* %p + store double %n, double* %p + ret double %t +} diff --git a/test/Transforms/InstCombine/align-inc.ll b/test/Transforms/InstCombine/align-inc.ll deleted file mode 100644 index 71512b3a1494..000000000000 --- a/test/Transforms/InstCombine/align-inc.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: opt < %s -instcombine -S | grep {GLOBAL.*align 16} -; RUN: opt < %s -instcombine -S | grep {tmp = load} -target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" - -@GLOBAL = internal global [4 x i32] zeroinitializer - -define <16 x i8> @foo(<2 x i64> %x) { -entry: - %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1 - ret <16 x i8> %tmp -} - diff --git a/test/Transforms/InstCombine/bit-checks.ll b/test/Transforms/InstCombine/bit-checks.ll index f97fb45af161..d774c0972def 100644 --- a/test/Transforms/InstCombine/bit-checks.ll +++ b/test/Transforms/InstCombine/bit-checks.ll @@ -13,3 +13,14 @@ entry: %retval.0 = select i1 %or.cond, i32 2, i32 1 ; <i32> [#uses=1] ret i32 %retval.0 } + +define i32 @main2(i32 %argc, i8** nocapture %argv) nounwind readnone ssp { +entry: + %and = and i32 %argc, 1 ; <i32> [#uses=1] + %tobool = icmp eq i32 %and, 0 ; <i1> [#uses=1] + %and2 = and i32 %argc, 2 ; <i32> [#uses=1] + %tobool3 = icmp eq i32 %and2, 0 ; <i1> [#uses=1] + %or.cond = or i1 %tobool, %tobool3 ; <i1> [#uses=1] + %storemerge = select i1 %or.cond, i32 0, i32 1 ; <i32> [#uses=1] + ret i32 %storemerge +}
\ No newline at end of file diff --git a/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll b/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll deleted file mode 100644 index 4e9dfbb53b49..000000000000 --- a/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: opt < %s -instcombine -S | grep {ret i32 0} -; PR4487 - -; Bitcasts between vectors and scalars are valid, despite being ill-advised. - -define i32 @test(i64 %a) { -bb20: - %t1 = bitcast i64 %a to <2 x i32> - %t2 = bitcast i64 %a to <2 x i32> - %t3 = xor <2 x i32> %t1, %t2 - %t4 = extractelement <2 x i32> %t3, i32 0 - ret i32 %t4 -} - diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll new file mode 100644 index 000000000000..0718b8a3aee0 --- /dev/null +++ b/test/Transforms/InstCombine/bitcast.ll @@ -0,0 +1,105 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Bitcasts between vectors and scalars are valid. +; PR4487 +define i32 @test1(i64 %a) { + %t1 = bitcast i64 %a to <2 x i32> + %t2 = bitcast i64 %a to <2 x i32> + %t3 = xor <2 x i32> %t1, %t2 + %t4 = extractelement <2 x i32> %t3, i32 0 + ret i32 %t4 + +; CHECK: @test1 +; CHECK: ret i32 0 +} + +; Optimize bitcasts that are extracting low element of vector. This happens +; because of SRoA. +; rdar://7892780 +define float @test2(<2 x float> %A, <2 x i32> %B) { + %tmp28 = bitcast <2 x float> %A to i64 ; <i64> [#uses=2] + %tmp23 = trunc i64 %tmp28 to i32 ; <i32> [#uses=1] + %tmp24 = bitcast i32 %tmp23 to float ; <float> [#uses=1] + + %tmp = bitcast <2 x i32> %B to i64 + %tmp2 = trunc i64 %tmp to i32 ; <i32> [#uses=1] + %tmp4 = bitcast i32 %tmp2 to float ; <float> [#uses=1] + + %add = fadd float %tmp24, %tmp4 + ret float %add + +; CHECK: @test2 +; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 0 +; CHECK-NEXT: bitcast <2 x i32> %B to <2 x float> +; CHECK-NEXT: %tmp4 = extractelement <2 x float> {{.*}}, i32 0 +; CHECK-NEXT: %add = fadd float %tmp24, %tmp4 +; CHECK-NEXT: ret float %add +} + +; Optimize bitcasts that are extracting other elements of a vector. This +; happens because of SRoA. +; rdar://7892780 +define float @test3(<2 x float> %A, <2 x i64> %B) { + %tmp28 = bitcast <2 x float> %A to i64 + %tmp29 = lshr i64 %tmp28, 32 + %tmp23 = trunc i64 %tmp29 to i32 + %tmp24 = bitcast i32 %tmp23 to float + + %tmp = bitcast <2 x i64> %B to i128 + %tmp1 = lshr i128 %tmp, 64 + %tmp2 = trunc i128 %tmp1 to i32 + %tmp4 = bitcast i32 %tmp2 to float + + %add = fadd float %tmp24, %tmp4 + ret float %add + +; CHECK: @test3 +; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 1 +; CHECK-NEXT: bitcast <2 x i64> %B to <4 x float> +; CHECK-NEXT: %tmp4 = extractelement <4 x float> {{.*}}, i32 2 +; CHECK-NEXT: %add = fadd float %tmp24, %tmp4 +; CHECK-NEXT: ret float %add +} + + +define <2 x i32> @test4(i32 %A, i32 %B){ + %tmp38 = zext i32 %A to i64 + %tmp32 = zext i32 %B to i64 + %tmp33 = shl i64 %tmp32, 32 + %ins35 = or i64 %tmp33, %tmp38 + %tmp43 = bitcast i64 %ins35 to <2 x i32> + ret <2 x i32> %tmp43 + ; CHECK: @test4 + ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %A, i32 0 + ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %B, i32 1 + ; CHECK-NEXT: ret <2 x i32> + +} + +; rdar://8360454 +define <2 x float> @test5(float %A, float %B) { + %tmp37 = bitcast float %A to i32 + %tmp38 = zext i32 %tmp37 to i64 + %tmp31 = bitcast float %B to i32 + %tmp32 = zext i32 %tmp31 to i64 + %tmp33 = shl i64 %tmp32, 32 + %ins35 = or i64 %tmp33, %tmp38 + %tmp43 = bitcast i64 %ins35 to <2 x float> + ret <2 x float> %tmp43 + ; CHECK: @test5 + ; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0 + ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %B, i32 1 + ; CHECK-NEXT: ret <2 x float> +} + +define <2 x float> @test6(float %A){ + %tmp23 = bitcast float %A to i32 ; <i32> [#uses=1] + %tmp24 = zext i32 %tmp23 to i64 ; <i64> [#uses=1] + %tmp25 = shl i64 %tmp24, 32 ; <i64> [#uses=1] + %mask20 = or i64 %tmp25, 1109917696 ; <i64> [#uses=1] + %tmp35 = bitcast i64 %mask20 to <2 x float> ; <<2 x float>> [#uses=1] + ret <2 x float> %tmp35 +; CHECK: @test6 +; CHECK-NEXT: insertelement <2 x float> <float 4.200000e+01, float undef>, float %A, i32 1 +; CHECK: ret +} diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll index 08dcfa731a94..d672d8c1535e 100644 --- a/test/Transforms/InstCombine/intrinsics.ll +++ b/test/Transforms/InstCombine/intrinsics.ll @@ -10,16 +10,16 @@ declare i32 @llvm.ctlz.i32(i32) nounwind readnone declare i32 @llvm.ctpop.i32(i32) nounwind readnone declare i8 @llvm.ctlz.i8(i8) nounwind readnone -define i8 @test1(i8 %A, i8 %B) { +define i8 @uaddtest1(i8 %A, i8 %B) { %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B) %y = extractvalue %overflow.result %x, 0 ret i8 %y -; CHECK: @test1 +; CHECK: @uaddtest1 ; CHECK-NEXT: %y = add i8 %A, %B ; CHECK-NEXT: ret i8 %y } -define i8 @test2(i8 %A, i8 %B, i1* %overflowPtr) { +define i8 @uaddtest2(i8 %A, i8 %B, i1* %overflowPtr) { %and.A = and i8 %A, 127 %and.B = and i8 %B, 127 %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %and.A, i8 %and.B) @@ -27,7 +27,7 @@ define i8 @test2(i8 %A, i8 %B, i1* %overflowPtr) { %z = extractvalue %overflow.result %x, 1 store i1 %z, i1* %overflowPtr ret i8 %y -; CHECK: @test2 +; CHECK: @uaddtest2 ; CHECK-NEXT: %and.A = and i8 %A, 127 ; CHECK-NEXT: %and.B = and i8 %B, 127 ; CHECK-NEXT: %1 = add nuw i8 %and.A, %and.B @@ -35,7 +35,7 @@ define i8 @test2(i8 %A, i8 %B, i1* %overflowPtr) { ; CHECK-NEXT: ret i8 %1 } -define i8 @test3(i8 %A, i8 %B, i1* %overflowPtr) { +define i8 @uaddtest3(i8 %A, i8 %B, i1* %overflowPtr) { %or.A = or i8 %A, -128 %or.B = or i8 %B, -128 %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %or.A, i8 %or.B) @@ -43,7 +43,7 @@ define i8 @test3(i8 %A, i8 %B, i1* %overflowPtr) { %z = extractvalue %overflow.result %x, 1 store i1 %z, i1* %overflowPtr ret i8 %y -; CHECK: @test3 +; CHECK: @uaddtest3 ; CHECK-NEXT: %or.A = or i8 %A, -128 ; CHECK-NEXT: %or.B = or i8 %B, -128 ; CHECK-NEXT: %1 = add i8 %or.A, %or.B @@ -51,34 +51,44 @@ define i8 @test3(i8 %A, i8 %B, i1* %overflowPtr) { ; CHECK-NEXT: ret i8 %1 } -define i8 @test4(i8 %A, i1* %overflowPtr) { +define i8 @uaddtest4(i8 %A, i1* %overflowPtr) { %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 undef, i8 %A) %y = extractvalue %overflow.result %x, 0 %z = extractvalue %overflow.result %x, 1 store i1 %z, i1* %overflowPtr ret i8 %y -; CHECK: @test4 +; CHECK: @uaddtest4 ; CHECK-NEXT: ret i8 undef } -define i8 @test5(i8 %A, i1* %overflowPtr) { +define i8 @uaddtest5(i8 %A, i1* %overflowPtr) { + %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 0, i8 %A) + %y = extractvalue %overflow.result %x, 0 + %z = extractvalue %overflow.result %x, 1 + store i1 %z, i1* %overflowPtr + ret i8 %y +; CHECK: @uaddtest5 +; CHECK: ret i8 %A +} + +define i8 @umultest1(i8 %A, i1* %overflowPtr) { %x = call %overflow.result @llvm.umul.with.overflow.i8(i8 0, i8 %A) %y = extractvalue %overflow.result %x, 0 %z = extractvalue %overflow.result %x, 1 store i1 %z, i1* %overflowPtr ret i8 %y -; CHECK: @test5 +; CHECK: @umultest1 ; CHECK-NEXT: store i1 false, i1* %overflowPtr ; CHECK-NEXT: ret i8 0 } -define i8 @test6(i8 %A, i1* %overflowPtr) { +define i8 @umultest2(i8 %A, i1* %overflowPtr) { %x = call %overflow.result @llvm.umul.with.overflow.i8(i8 1, i8 %A) %y = extractvalue %overflow.result %x, 0 %z = extractvalue %overflow.result %x, 1 store i1 %z, i1* %overflowPtr ret i8 %y -; CHECK: @test6 +; CHECK: @umultest2 ; CHECK-NEXT: store i1 false, i1* %overflowPtr ; CHECK-NEXT: ret i8 %A } diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll index fc321e968224..c6c3f2ff6a68 100644 --- a/test/Transforms/InstCombine/phi.ll +++ b/test/Transforms/InstCombine/phi.ll @@ -402,3 +402,24 @@ if.else: ; preds = %entry store i32 %tmp5, i32* %res br label %if.end } + +; PR4413 +declare i32 @ext() +; CHECK: @test17 +define i32 @test17(i1 %a) { +entry: + br i1 %a, label %bb1, label %bb2 + +bb1: ; preds = %entry + %0 = tail call i32 @ext() ; <i32> [#uses=1] + br label %bb2 + +bb2: ; preds = %bb1, %entry + %cond = phi i1 [ true, %bb1 ], [ false, %entry ] ; <i1> [#uses=1] +; CHECK-NOT: %val = phi i32 [ %0, %bb1 ], [ 0, %entry ] + %val = phi i32 [ %0, %bb1 ], [ 0, %entry ] ; <i32> [#uses=1] + %res = select i1 %cond, i32 %val, i32 0 ; <i32> [#uses=1] +; CHECK: ret i32 %cond + ret i32 %res +} + diff --git a/test/Transforms/InstCombine/shift-simplify.ll b/test/Transforms/InstCombine/shift-simplify.ll deleted file mode 100644 index e5cc705350f9..000000000000 --- a/test/Transforms/InstCombine/shift-simplify.ll +++ /dev/null @@ -1,42 +0,0 @@ -; RUN: opt < %s -instcombine -S | \ -; RUN: egrep {shl|lshr|ashr} | count 3 - -define i32 @test0(i32 %A, i32 %B, i32 %C) { - %X = shl i32 %A, %C - %Y = shl i32 %B, %C - %Z = and i32 %X, %Y - ret i32 %Z -} - -define i32 @test1(i32 %A, i32 %B, i32 %C) { - %X = lshr i32 %A, %C - %Y = lshr i32 %B, %C - %Z = or i32 %X, %Y - ret i32 %Z -} - -define i32 @test2(i32 %A, i32 %B, i32 %C) { - %X = ashr i32 %A, %C - %Y = ashr i32 %B, %C - %Z = xor i32 %X, %Y - ret i32 %Z -} - -define i1 @test3(i32 %X) { - %tmp1 = shl i32 %X, 7 - %tmp2 = icmp slt i32 %tmp1, 0 - ret i1 %tmp2 -} - -define i1 @test4(i32 %X) { - %tmp1 = lshr i32 %X, 7 - %tmp2 = icmp slt i32 %tmp1, 0 - ret i1 %tmp2 -} - -define i1 @test5(i32 %X) { - %tmp1 = ashr i32 %X, 7 - %tmp2 = icmp slt i32 %tmp1, 0 - ret i1 %tmp2 -} - diff --git a/test/Transforms/InstCombine/shift-trunc-shift.ll b/test/Transforms/InstCombine/shift-trunc-shift.ll deleted file mode 100644 index 7133d299a2bd..000000000000 --- a/test/Transforms/InstCombine/shift-trunc-shift.ll +++ /dev/null @@ -1,10 +0,0 @@ -; RUN: opt < %s -instcombine -S | grep lshr.*63 - -define i32 @t1(i64 %d18) { -entry: - %tmp916 = lshr i64 %d18, 32 ; <i64> [#uses=1] - %tmp917 = trunc i64 %tmp916 to i32 ; <i32> [#uses=1] - %tmp10 = lshr i32 %tmp917, 31 ; <i32> [#uses=1] - ret i32 %tmp10 -} - diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll index feed37bd10ab..871e9fe070e7 100644 --- a/test/Transforms/InstCombine/shift.ll +++ b/test/Transforms/InstCombine/shift.ll @@ -130,8 +130,8 @@ define i8 @test13(i8 %A) { ;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4) define i32 @test14(i32 %A) { ; CHECK: @test14 -; CHECK-NEXT: or i32 %A, 19744 -; CHECK-NEXT: and i32 +; CHECK-NEXT: %B = and i32 %A, -19760 +; CHECK-NEXT: or i32 %B, 19744 ; CHECK-NEXT: ret i32 %B = lshr i32 %A, 4 ; <i32> [#uses=1] %C = or i32 %B, 1234 ; <i32> [#uses=1] @@ -343,3 +343,101 @@ bb2: } +define i32 @test29(i64 %d18) { +entry: + %tmp916 = lshr i64 %d18, 32 + %tmp917 = trunc i64 %tmp916 to i32 + %tmp10 = lshr i32 %tmp917, 31 + ret i32 %tmp10 +; CHECK: @test29 +; CHECK: %tmp916 = lshr i64 %d18, 63 +; CHECK: %tmp10 = trunc i64 %tmp916 to i32 +} + + +define i32 @test30(i32 %A, i32 %B, i32 %C) { + %X = shl i32 %A, %C + %Y = shl i32 %B, %C + %Z = and i32 %X, %Y + ret i32 %Z +; CHECK: @test30 +; CHECK: %X1 = and i32 %A, %B +; CHECK: %Z = shl i32 %X1, %C +} + +define i32 @test31(i32 %A, i32 %B, i32 %C) { + %X = lshr i32 %A, %C + %Y = lshr i32 %B, %C + %Z = or i32 %X, %Y + ret i32 %Z +; CHECK: @test31 +; CHECK: %X1 = or i32 %A, %B +; CHECK: %Z = lshr i32 %X1, %C +} + +define i32 @test32(i32 %A, i32 %B, i32 %C) { + %X = ashr i32 %A, %C + %Y = ashr i32 %B, %C + %Z = xor i32 %X, %Y + ret i32 %Z +; CHECK: @test32 +; CHECK: %X1 = xor i32 %A, %B +; CHECK: %Z = ashr i32 %X1, %C +; CHECK: ret i32 %Z +} + +define i1 @test33(i32 %X) { + %tmp1 = shl i32 %X, 7 + %tmp2 = icmp slt i32 %tmp1, 0 + ret i1 %tmp2 +; CHECK: @test33 +; CHECK: %tmp1.mask = and i32 %X, 16777216 +; CHECK: %tmp2 = icmp ne i32 %tmp1.mask, 0 +} + +define i1 @test34(i32 %X) { + %tmp1 = lshr i32 %X, 7 + %tmp2 = icmp slt i32 %tmp1, 0 + ret i1 %tmp2 +; CHECK: @test34 +; CHECK: ret i1 false +} + +define i1 @test35(i32 %X) { + %tmp1 = ashr i32 %X, 7 + %tmp2 = icmp slt i32 %tmp1, 0 + ret i1 %tmp2 +; CHECK: @test35 +; CHECK: %tmp2 = icmp slt i32 %X, 0 +; CHECK: ret i1 %tmp2 +} + +define i128 @test36(i128 %A, i128 %B) { +entry: + %tmp27 = shl i128 %A, 64 + %tmp23 = shl i128 %B, 64 + %ins = or i128 %tmp23, %tmp27 + %tmp45 = lshr i128 %ins, 64 + ret i128 %tmp45 + +; CHECK: @test36 +; CHECK: %tmp231 = or i128 %B, %A +; CHECK: %ins = and i128 %tmp231, 18446744073709551615 +; CHECK: ret i128 %ins +} + +define i64 @test37(i128 %A, i32 %B) { +entry: + %tmp27 = shl i128 %A, 64 + %tmp22 = zext i32 %B to i128 + %tmp23 = shl i128 %tmp22, 96 + %ins = or i128 %tmp23, %tmp27 + %tmp45 = lshr i128 %ins, 64 + %tmp46 = trunc i128 %tmp45 to i64 + ret i64 %tmp46 + +; CHECK: @test37 +; CHECK: %tmp23 = shl i128 %tmp22, 32 +; CHECK: %ins = or i128 %tmp23, %A +; CHECK: %tmp46 = trunc i128 %ins to i64 +} diff --git a/test/Transforms/InstCombine/sqrt.ll b/test/Transforms/InstCombine/sqrt.ll new file mode 100644 index 000000000000..69e511bfb3bd --- /dev/null +++ b/test/Transforms/InstCombine/sqrt.ll @@ -0,0 +1,32 @@ +; RUN: opt -S -instcombine %s | FileCheck %s + +define float @test1(float %x) nounwind readnone ssp { +entry: +; CHECK: @test1 +; CHECK-NOT: fpext +; CHECK-NOT: sqrt( +; CHECK: sqrtf( +; CHECK-NOT: fptrunc + %conv = fpext float %x to double ; <double> [#uses=1] + %call = tail call double @sqrt(double %conv) readnone nounwind ; <double> [#uses=1] + %conv1 = fptrunc double %call to float ; <float> [#uses=1] +; CHECK: ret float + ret float %conv1 +} + +declare double @sqrt(double) + +; PR8096 +define float @test2(float %x) nounwind readnone ssp { +entry: +; CHECK: @test2 +; CHECK-NOT: fpext +; CHECK-NOT: sqrt( +; CHECK: sqrtf( +; CHECK-NOT: fptrunc + %conv = fpext float %x to double ; <double> [#uses=1] + %call = tail call double @sqrt(double %conv) nounwind ; <double> [#uses=1] + %conv1 = fptrunc double %call to float ; <float> [#uses=1] +; CHECK: ret float + ret float %conv1 +} diff --git a/test/Transforms/InstCombine/trunc-mask-ext.ll b/test/Transforms/InstCombine/trunc-mask-ext.ll deleted file mode 100644 index 93e3753cf502..000000000000 --- a/test/Transforms/InstCombine/trunc-mask-ext.ll +++ /dev/null @@ -1,38 +0,0 @@ -; RUN: opt < %s -instcombine -S > %t -; RUN: not grep zext %t -; RUN: not grep sext %t - -; Instcombine should be able to eliminate all of these ext casts. - -declare void @use(i32) - -define i64 @foo(i64 %a) { - %b = trunc i64 %a to i32 - %c = and i32 %b, 15 - %d = zext i32 %c to i64 - call void @use(i32 %b) - ret i64 %d -} -define i64 @bar(i64 %a) { - %b = trunc i64 %a to i32 - %c = shl i32 %b, 4 - %q = ashr i32 %c, 4 - %d = sext i32 %q to i64 - call void @use(i32 %b) - ret i64 %d -} -define i64 @goo(i64 %a) { - %b = trunc i64 %a to i32 - %c = and i32 %b, 8 - %d = zext i32 %c to i64 - call void @use(i32 %b) - ret i64 %d -} -define i64 @hoo(i64 %a) { - %b = trunc i64 %a to i32 - %c = and i32 %b, 8 - %x = xor i32 %c, 8 - %d = zext i32 %x to i64 - call void @use(i32 %b) - ret i64 %d -} diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll new file mode 100644 index 000000000000..f98bfd9236cd --- /dev/null +++ b/test/Transforms/InstCombine/trunc.ll @@ -0,0 +1,99 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +; Instcombine should be able to eliminate all of these ext casts. + +declare void @use(i32) + +define i64 @test1(i64 %a) { + %b = trunc i64 %a to i32 + %c = and i32 %b, 15 + %d = zext i32 %c to i64 + call void @use(i32 %b) + ret i64 %d +; CHECK: @test1 +; CHECK: %d = and i64 %a, 15 +; CHECK: ret i64 %d +} +define i64 @test2(i64 %a) { + %b = trunc i64 %a to i32 + %c = shl i32 %b, 4 + %q = ashr i32 %c, 4 + %d = sext i32 %q to i64 + call void @use(i32 %b) + ret i64 %d +; CHECK: @test2 +; CHECK: shl i64 %a, 36 +; CHECK: %d = ashr i64 {{.*}}, 36 +; CHECK: ret i64 %d +} +define i64 @test3(i64 %a) { + %b = trunc i64 %a to i32 + %c = and i32 %b, 8 + %d = zext i32 %c to i64 + call void @use(i32 %b) + ret i64 %d +; CHECK: @test3 +; CHECK: %d = and i64 %a, 8 +; CHECK: ret i64 %d +} +define i64 @test4(i64 %a) { + %b = trunc i64 %a to i32 + %c = and i32 %b, 8 + %x = xor i32 %c, 8 + %d = zext i32 %x to i64 + call void @use(i32 %b) + ret i64 %d +; CHECK: @test4 +; CHECK: = and i64 %a, 8 +; CHECK: %d = xor i64 {{.*}}, 8 +; CHECK: ret i64 %d +} + +define i32 @test5(i32 %A) { + %B = zext i32 %A to i128 + %C = lshr i128 %B, 16 + %D = trunc i128 %C to i32 + ret i32 %D +; CHECK: @test5 +; CHECK: %C = lshr i32 %A, 16 +; CHECK: ret i32 %C +} + +define i32 @test6(i64 %A) { + %B = zext i64 %A to i128 + %C = lshr i128 %B, 32 + %D = trunc i128 %C to i32 + ret i32 %D +; CHECK: @test6 +; CHECK: %C = lshr i64 %A, 32 +; CHECK: %D = trunc i64 %C to i32 +; CHECK: ret i32 %D +} + +define i92 @test7(i64 %A) { + %B = zext i64 %A to i128 + %C = lshr i128 %B, 32 + %D = trunc i128 %C to i92 + ret i92 %D +; CHECK: @test7 +; CHECK: %B = zext i64 %A to i92 +; CHECK: %C = lshr i92 %B, 32 +; CHECK: ret i92 %C +} + +define i64 @test8(i32 %A, i32 %B) { + %tmp38 = zext i32 %A to i128 + %tmp32 = zext i32 %B to i128 + %tmp33 = shl i128 %tmp32, 32 + %ins35 = or i128 %tmp33, %tmp38 + %tmp42 = trunc i128 %ins35 to i64 + ret i64 %tmp42 +; CHECK: @test8 +; CHECK: %tmp38 = zext i32 %A to i64 +; CHECK: %tmp32 = zext i32 %B to i64 +; CHECK: %tmp33 = shl i64 %tmp32, 32 +; CHECK: %ins35 = or i64 %tmp33, %tmp38 +; CHECK: ret i64 %ins35 +} + diff --git a/test/Transforms/InstCombine/urem-simplify-bug.ll b/test/Transforms/InstCombine/urem-simplify-bug.ll index 7c2b4b01ca66..229f1a85e860 100644 --- a/test/Transforms/InstCombine/urem-simplify-bug.ll +++ b/test/Transforms/InstCombine/urem-simplify-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {= or i32 %x, -5 } +; RUN: opt < %s -instcombine -S | grep {= or i32 %x, -5} @.str = internal constant [5 x i8] c"foo\0A\00" ; <[5 x i8]*> [#uses=1] @.str1 = internal constant [5 x i8] c"bar\0A\00" ; <[5 x i8]*> [#uses=1] diff --git a/test/Transforms/JumpThreading/2010-08-26-and.ll b/test/Transforms/JumpThreading/2010-08-26-and.ll new file mode 100644 index 000000000000..17a0aba2faef --- /dev/null +++ b/test/Transforms/JumpThreading/2010-08-26-and.ll @@ -0,0 +1,162 @@ +; RUN: opt -jump-threading -enable-jump-threading-lvi -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +%class.StringSwitch = type { i8*, i32, i32, i8 } + +@.str = private constant [4 x i8] c"red\00" ; <[4 x i8]*> [#uses=1] +@.str1 = private constant [7 x i8] c"orange\00" ; <[7 x i8]*> [#uses=1] +@.str2 = private constant [7 x i8] c"yellow\00" ; <[7 x i8]*> [#uses=1] +@.str3 = private constant [6 x i8] c"green\00" ; <[6 x i8]*> [#uses=1] +@.str4 = private constant [5 x i8] c"blue\00" ; <[5 x i8]*> [#uses=1] +@.str5 = private constant [7 x i8] c"indigo\00" ; <[7 x i8]*> [#uses=1] +@.str6 = private constant [7 x i8] c"violet\00" ; <[7 x i8]*> [#uses=1] +@.str7 = private constant [12 x i8] c"Color = %d\0A\00" ; <[12 x i8]*> [#uses=1] + +define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp { +entry: + %cmp142 = icmp sgt i32 %argc, 1 ; <i1> [#uses=1] + br i1 %cmp142, label %bb.nph, label %for.end + +bb.nph: ; preds = %entry + %tmp = add i32 %argc, -2 ; <i32> [#uses=1] + %tmp144 = zext i32 %tmp to i64 ; <i64> [#uses=1] + %tmp145 = add i64 %tmp144, 1 ; <i64> [#uses=1] + br label %land.lhs.true.i + +land.lhs.true.i: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134, %bb.nph + %retval.0.i.pre161 = phi i32 [ undef, %bb.nph ], [ %retval.0.i.pre, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 ] ; <i32> [#uses=3] + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp146, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 ] ; <i64> [#uses=1] + %tmp146 = add i64 %indvar, 1 ; <i64> [#uses=3] + %arrayidx = getelementptr i8** %argv, i64 %tmp146 ; <i8**> [#uses=1] + %tmp6 = load i8** %arrayidx, align 8 ; <i8*> [#uses=8] + %call.i.i = call i64 @strlen(i8* %tmp6) nounwind ; <i64> [#uses=1] + %conv.i.i = trunc i64 %call.i.i to i32 ; <i32> [#uses=6]\ +; CHECK: switch i32 %conv.i.i +; CHECK-NOT: if.then.i40 +; CHECK: } + switch i32 %conv.i.i, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit [ + i32 3, label %land.lhs.true5.i + i32 6, label %land.lhs.true5.i37 + ] + +land.lhs.true5.i: ; preds = %land.lhs.true.i + %call.i = call i32 @memcmp(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* %tmp6, i64 4) nounwind ; <i32> [#uses=1] + %cmp9.i = icmp eq i32 %call.i, 0 ; <i1> [#uses=1] + br i1 %cmp9.i, label %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + +_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit: ; preds = %land.lhs.true5.i + br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + +land.lhs.true5.i37: ; preds = %land.lhs.true.i + %call.i35 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str1, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1] + %cmp9.i36 = icmp eq i32 %call.i35, 0 ; <i1> [#uses=1] + br i1 %cmp9.i36, label %if.then.i40, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + +if.then.i40: ; preds = %land.lhs.true5.i37 + br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + +_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit: ; preds = %if.then.i40, %land.lhs.true5.i37, %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit, %land.lhs.true5.i, %land.lhs.true.i + %retval.0.i.pre159 = phi i32 [ 1, %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre161, %land.lhs.true5.i37 ], [ 2, %if.then.i40 ], [ %retval.0.i.pre161, %land.lhs.true5.i ], [ %retval.0.i.pre161, %land.lhs.true.i ] ; <i32> [#uses=2] + %tmp2.i44 = phi i8 [ 1, %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit ], [ 0, %land.lhs.true5.i37 ], [ 1, %if.then.i40 ], [ 0, %land.lhs.true5.i ], [ 0, %land.lhs.true.i ] ; <i8> [#uses=3] + %tobool.i46 = icmp eq i8 %tmp2.i44, 0 ; <i1> [#uses=1] + %cmp.i49 = icmp eq i32 %conv.i.i, 6 ; <i1> [#uses=1] + %or.cond = and i1 %tobool.i46, %cmp.i49 ; <i1> [#uses=1] + br i1 %or.cond, label %land.lhs.true5.i55, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 + +land.lhs.true5.i55: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + %call.i53 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str2, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1] + %cmp9.i54 = icmp eq i32 %call.i53, 0 ; <i1> [#uses=1] + br i1 %cmp9.i54, label %if.then.i58, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 + +if.then.i58: ; preds = %land.lhs.true5.i55 + br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 + +_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60: ; preds = %if.then.i58, %land.lhs.true5.i55, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + %retval.0.i.pre158 = phi i32 [ %retval.0.i.pre159, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre159, %land.lhs.true5.i55 ], [ 3, %if.then.i58 ] ; <i32> [#uses=2] + %tmp2.i63 = phi i8 [ %tmp2.i44, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit ], [ %tmp2.i44, %land.lhs.true5.i55 ], [ 1, %if.then.i58 ] ; <i8> [#uses=3] + %tmp14.i64 = and i8 %tmp2.i63, 1 ; <i8> [#uses=1] + %tobool.i65 = icmp eq i8 %tmp14.i64, 0 ; <i1> [#uses=1] + %cmp.i68 = icmp eq i32 %conv.i.i, 5 ; <i1> [#uses=1] + %or.cond168 = and i1 %tobool.i65, %cmp.i68 ; <i1> [#uses=1] + br i1 %or.cond168, label %land.lhs.true5.i74, label %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit + +land.lhs.true5.i74: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 + %call.i72 = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8]* @.str3, i64 0, i64 0), i8* %tmp6, i64 6) nounwind ; <i32> [#uses=1] + %cmp9.i73 = icmp eq i32 %call.i72, 0 ; <i1> [#uses=1] + br i1 %cmp9.i73, label %if.then.i77, label %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit + +if.then.i77: ; preds = %land.lhs.true5.i74 + br label %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit + +_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit: ; preds = %if.then.i77, %land.lhs.true5.i74, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 + %retval.0.i.pre157 = phi i32 [ %retval.0.i.pre158, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 ], [ %retval.0.i.pre158, %land.lhs.true5.i74 ], [ 4, %if.then.i77 ] ; <i32> [#uses=2] + %tmp2.i81 = phi i8 [ %tmp2.i63, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 ], [ %tmp2.i63, %land.lhs.true5.i74 ], [ 1, %if.then.i77 ] ; <i8> [#uses=3] + %tmp14.i82 = and i8 %tmp2.i81, 1 ; <i8> [#uses=1] + %tobool.i83 = icmp eq i8 %tmp14.i82, 0 ; <i1> [#uses=1] + %cmp.i86 = icmp eq i32 %conv.i.i, 4 ; <i1> [#uses=1] + %or.cond169 = and i1 %tobool.i83, %cmp.i86 ; <i1> [#uses=1] + br i1 %or.cond169, label %land.lhs.true5.i92, label %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit + +land.lhs.true5.i92: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit + %call.i90 = call i32 @memcmp(i8* getelementptr inbounds ([5 x i8]* @.str4, i64 0, i64 0), i8* %tmp6, i64 5) nounwind ; <i32> [#uses=1] + %cmp9.i91 = icmp eq i32 %call.i90, 0 ; <i1> [#uses=1] + br i1 %cmp9.i91, label %if.then.i95, label %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit + +if.then.i95: ; preds = %land.lhs.true5.i92 + br label %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit + +_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit: ; preds = %if.then.i95, %land.lhs.true5.i92, %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit + %retval.0.i.pre156 = phi i32 [ %retval.0.i.pre157, %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre157, %land.lhs.true5.i92 ], [ 5, %if.then.i95 ] ; <i32> [#uses=2] + %tmp2.i99 = phi i8 [ %tmp2.i81, %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit ], [ %tmp2.i81, %land.lhs.true5.i92 ], [ 1, %if.then.i95 ] ; <i8> [#uses=3] + %tmp14.i100 = and i8 %tmp2.i99, 1 ; <i8> [#uses=1] + %tobool.i101 = icmp eq i8 %tmp14.i100, 0 ; <i1> [#uses=1] + %cmp.i104 = icmp eq i32 %conv.i.i, 6 ; <i1> [#uses=1] + %or.cond170 = and i1 %tobool.i101, %cmp.i104 ; <i1> [#uses=1] + br i1 %or.cond170, label %land.lhs.true5.i110, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 + +land.lhs.true5.i110: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit + %call.i108 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1] + %cmp9.i109 = icmp eq i32 %call.i108, 0 ; <i1> [#uses=1] + br i1 %cmp9.i109, label %if.then.i113, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 + +if.then.i113: ; preds = %land.lhs.true5.i110 + br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 + +_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115: ; preds = %if.then.i113, %land.lhs.true5.i110, %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit + %retval.0.i.pre155 = phi i32 [ %retval.0.i.pre156, %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre156, %land.lhs.true5.i110 ], [ 6, %if.then.i113 ] ; <i32> [#uses=2] + %tmp2.i118 = phi i8 [ %tmp2.i99, %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit ], [ %tmp2.i99, %land.lhs.true5.i110 ], [ 1, %if.then.i113 ] ; <i8> [#uses=3] + %tmp14.i119 = and i8 %tmp2.i118, 1 ; <i8> [#uses=1] + %tobool.i120 = icmp eq i8 %tmp14.i119, 0 ; <i1> [#uses=1] + %cmp.i123 = icmp eq i32 %conv.i.i, 6 ; <i1> [#uses=1] + %or.cond171 = and i1 %tobool.i120, %cmp.i123 ; <i1> [#uses=1] + br i1 %or.cond171, label %land.lhs.true5.i129, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 + +land.lhs.true5.i129: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 + %call.i127 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str6, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1] + %cmp9.i128 = icmp eq i32 %call.i127, 0 ; <i1> [#uses=1] + br i1 %cmp9.i128, label %if.then.i132, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 + +if.then.i132: ; preds = %land.lhs.true5.i129 + br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 + +_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134: ; preds = %if.then.i132, %land.lhs.true5.i129, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 + %retval.0.i.pre = phi i32 [ %retval.0.i.pre155, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 ], [ %retval.0.i.pre155, %land.lhs.true5.i129 ], [ 7, %if.then.i132 ] ; <i32> [#uses=2] + %tmp2.i137 = phi i8 [ %tmp2.i118, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 ], [ %tmp2.i118, %land.lhs.true5.i129 ], [ 1, %if.then.i132 ] ; <i8> [#uses=1] + %tmp7.i138 = and i8 %tmp2.i137, 1 ; <i8> [#uses=1] + %tobool.i139 = icmp eq i8 %tmp7.i138, 0 ; <i1> [#uses=1] + %retval.0.i = select i1 %tobool.i139, i32 0, i32 %retval.0.i.pre ; <i32> [#uses=1] + %call22 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str7, i64 0, i64 0), i32 %retval.0.i) ; <i32> [#uses=0] + %exitcond = icmp eq i64 %tmp146, %tmp145 ; <i1> [#uses=1] + br i1 %exitcond, label %for.end, label %land.lhs.true.i + +for.end: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134, %entry + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +declare i32 @memcmp(i8* nocapture, i8* nocapture, i64) nounwind readonly + +declare i64 @strlen(i8* nocapture) nounwind readonly diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll index 503d301892ee..cd274e78c9fc 100644 --- a/test/Transforms/JumpThreading/basic.ll +++ b/test/Transforms/JumpThreading/basic.ll @@ -147,11 +147,17 @@ define i32 @test6(i32 %A) { ; CHECK: @test6 %tmp455 = icmp eq i32 %A, 42 br i1 %tmp455, label %BB1, label %BB2 - -BB2: + +; CHECK: call i32 @f2() +; CHECK-NEXT: ret i32 3 + ; CHECK: call i32 @f1() -; CHECK-NEXT: call void @f3() -; CHECK-NEXT: ret i32 4 +; CHECK-NOT: br +; CHECK: call void @f3() +; CHECK-NOT: br +; CHECK: ret i32 4 + +BB2: call i32 @f1() br label %BB1 @@ -415,4 +421,58 @@ F2: ; CHECK-NEXT: br i1 %N, label %T2, label %F2 } +; CHECK: @test14 +define i32 @test14(i32 %in) { +entry: + %A = icmp eq i32 %in, 0 +; CHECK: br i1 %A, label %right_ret, label %merge + br i1 %A, label %left, label %right + +; CHECK-NOT: left: +left: + br label %merge + +; CHECK-NOT: right: +right: + %B = call i32 @f1() + br label %merge + +merge: +; CHECK-NOT: %C = phi i32 [%in, %left], [%B, %right] + %C = phi i32 [%in, %left], [%B, %right] + %D = add i32 %C, 1 + %E = icmp eq i32 %D, 2 + br i1 %E, label %left_ret, label %right_ret + +; CHECK: left_ret: +left_ret: + ret i32 0 + +right_ret: + ret i32 1 +} + +; PR5652 +; CHECK: @test15 +define i32 @test15(i32 %len) { +entry: +; CHECK: icmp ult i32 %len, 13 + %tmp = icmp ult i32 %len, 13 + br i1 %tmp, label %check, label %exit0 + +exit0: + ret i32 0 + +check: + %tmp9 = icmp ult i32 %len, 21 + br i1 %tmp9, label %exit1, label %exit2 + +exit2: +; CHECK-NOT: ret i32 2 + ret i32 2 + +exit1: + ret i32 1 +; CHECK: } +} diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll index f0fc61e7370f..751bc6518a1a 100644 --- a/test/Transforms/JumpThreading/crash.ll +++ b/test/Transforms/JumpThreading/crash.ll @@ -216,6 +216,9 @@ bb61: ; PR5698 define void @test7(i32 %x) { +entry: + br label %tailrecurse + tailrecurse: switch i32 %x, label %return [ i32 2, label %bb2 @@ -433,4 +436,51 @@ for.cond1040: ; preds = %for.body1044, %for. ret void } +; PR7755 +define void @test16(i1 %c, i1 %c2, i1 %c3, i1 %c4) nounwind ssp { +entry: + %cmp = icmp sgt i32 undef, 1 ; <i1> [#uses=1] + br i1 %c, label %land.end, label %land.rhs + +land.rhs: ; preds = %entry + br i1 %c2, label %lor.lhs.false.i, label %land.end + +lor.lhs.false.i: ; preds = %land.rhs + br i1 %c3, label %land.end, label %land.end + +land.end: + %0 = phi i1 [ true, %entry ], [ false, %land.rhs ], [false, %lor.lhs.false.i], [false, %lor.lhs.false.i] ; <i1> [#uses=1] + %cmp12 = and i1 %cmp, %0 + %xor1 = xor i1 %cmp12, %c4 + br i1 %xor1, label %if.then, label %if.end + +if.then: + ret void + +if.end: + ret void +} + +define void @test17() { +entry: + br i1 undef, label %bb269.us.us, label %bb269.us.us.us + +bb269.us.us.us: + %indvar = phi i64 [ %indvar.next, %bb287.us.us.us ], [ 0, %entry ] + %0 = icmp eq i16 undef, 0 + br i1 %0, label %bb287.us.us.us, label %bb286.us.us.us + +bb287.us.us.us: + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 4 + br i1 %exitcond, label %bb288.bb289.loopexit_crit_edge, label %bb269.us.us.us +bb286.us.us.us: + unreachable + +bb269.us.us: + unreachable + +bb288.bb289.loopexit_crit_edge: + unreachable +} diff --git a/test/Transforms/JumpThreading/lvi-load.ll b/test/Transforms/JumpThreading/lvi-load.ll new file mode 100644 index 000000000000..0bf4187d544b --- /dev/null +++ b/test/Transforms/JumpThreading/lvi-load.ll @@ -0,0 +1,49 @@ +; RUN: opt -S -jump-threading -enable-jump-threading-lvi -dce < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.4" + +%"struct.llvm::PATypeHolder" = type { %"struct.llvm::Type"* } +%"struct.llvm::PointerIntPair<llvm::Use**,2u,llvm::Use::PrevPtrTag,llvm::PointerLikeTypeTraits<llvm::Use**> >" = type { i64 } +%"struct.llvm::Type" = type opaque +%"struct.llvm::Use" = type { %"struct.llvm::Value"*, %"struct.llvm::Use"*, %"struct.llvm::PointerIntPair<llvm::Use**,2u,llvm::Use::PrevPtrTag,llvm::PointerLikeTypeTraits<llvm::Use**> >" } +%"struct.llvm::Value" = type { i32 (...)**, i8, i8, i16, %"struct.llvm::PATypeHolder", %"struct.llvm::Use"*, %"struct.llvm::ValueName"* } +%"struct.llvm::ValueName" = type opaque + +@_ZZN4llvm4castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_E8__func__ = internal constant [5 x i8] c"cast\00", align 8 ; <[5 x i8]*> [#uses=1] +@.str = private constant [31 x i8] c"include/llvm/Support/Casting.h\00", align 8 ; <[31 x i8]*> [#uses=1] +@.str1 = private constant [59 x i8] c"isa<X>(Val) && \22cast<Ty>() argument of incompatible type!\22\00", align 8 ; <[59 x i8]*> [#uses=1] + +; CHECK: Z3fooPN4llvm5ValueE +define zeroext i8 @_Z3fooPN4llvm5ValueE(%"struct.llvm::Value"* %V) ssp { +entry: + %0 = getelementptr inbounds %"struct.llvm::Value"* %V, i64 0, i32 1 ; <i8*> [#uses=1] + %1 = load i8* %0, align 8 ; <i8> [#uses=2] + %2 = icmp ugt i8 %1, 20 ; <i1> [#uses=1] + br i1 %2, label %bb.i, label %bb2 + +bb.i: ; preds = %entry + %toBoolnot.i.i = icmp ult i8 %1, 21 ; <i1> [#uses=1] + br i1 %toBoolnot.i.i, label %bb6.i.i, label %_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit + +; CHECK-NOT: assert +bb6.i.i: ; preds = %bb.i + tail call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8]* @_ZZN4llvm4castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_E8__func__, i64 0, i64 0), i8* getelementptr inbounds ([31 x i8]* @.str, i64 0, i64 0), i32 202, i8* getelementptr inbounds ([59 x i8]* @.str1, i64 0, i64 0)) noreturn + unreachable + +_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit: ; preds = %bb.i +; CHECK-NOT: null + %3 = icmp eq %"struct.llvm::Value"* %V, null ; <i1> [#uses=1] + br i1 %3, label %bb2, label %bb + +bb: ; preds = %_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit + tail call void @_ZNK4llvm5Value4dumpEv(%"struct.llvm::Value"* %V) +; CHECK: ret + ret i8 1 + +bb2: ; preds = %entry, %_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit + ret i8 0 +} + +declare void @__assert_rtn(i8*, i8*, i32, i8*) noreturn + +declare void @_ZNK4llvm5Value4dumpEv(%"struct.llvm::Value"*) diff --git a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll index 7545641f1aee..5381c88aea63 100644 --- a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll +++ b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll @@ -1,6 +1,6 @@ -; RUN: opt < %s -lcssa -S | \ +; RUN: opt < %s -loopsimplify -lcssa -S | \ ; RUN: grep {%%SJE.0.0.lcssa = phi .struct.SetJmpMapEntry} -; RUN: opt < %s -lcssa -S | \ +; RUN: opt < %s -loopsimplify -lcssa -S | \ ; RUN: grep {%%SJE.0.0.lcssa1 = phi .struct.SetJmpMapEntry} %struct.SetJmpMapEntry = type { i8*, i32, %struct.SetJmpMapEntry* } diff --git a/test/Transforms/LICM/crash.ll b/test/Transforms/LICM/crash.ll new file mode 100644 index 000000000000..88be5c41ccc5 --- /dev/null +++ b/test/Transforms/LICM/crash.ll @@ -0,0 +1,61 @@ +; RUN: opt -licm %s -disable-output + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + + +; PR8068 +@g_12 = external global i8, align 1 +define void @test1() nounwind ssp { +entry: + br label %for.body + +for.body: ; preds = %for.cond, %bb.nph + store i8 0, i8* @g_12, align 1 + %tmp6 = load i8* @g_12, align 1 + br label %for.cond + +for.cond: ; preds = %for.body + store i8 %tmp6, i8* @g_12, align 1 + br i1 false, label %for.cond.for.end10_crit_edge, label %for.body + +for.cond.for.end10_crit_edge: ; preds = %for.cond + br label %for.end10 + +for.end10: ; preds = %for.cond.for.end10_crit_edge, %entry + ret void +} + +; PR8067 +@g_8 = external global i32, align 4 + +define void @test2() noreturn nounwind ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %tmp7 = load i32* @g_8, align 4 + store i32* @g_8, i32** undef, align 16 + store i32 undef, i32* @g_8, align 4 + br label %for.body +} + +; PR8102 +define void @test3() { +entry: + %__first = alloca { i32* } + br i1 undef, label %for.cond, label %for.end + +for.cond: ; preds = %for.cond, %entry + %tmp1 = getelementptr { i32*}* %__first, i32 0, i32 0 + %tmp2 = load i32** %tmp1, align 4 + %call = tail call i32* @test3helper(i32* %tmp2) + %tmp3 = getelementptr { i32*}* %__first, i32 0, i32 0 + store i32* %call, i32** %tmp3, align 4 + br i1 false, label %for.cond, label %for.end + +for.end: ; preds = %for.cond, %entry + ret void +} + +declare i32* @test3helper(i32*) diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll index e7d36afb91b1..6f28d53af66e 100644 --- a/test/Transforms/LICM/hoisting.ll +++ b/test/Transforms/LICM/hoisting.ll @@ -48,3 +48,19 @@ Out: ; preds = %Loop %C = sub i32 %A, %B ; <i32> [#uses=1] ret i32 %C } + + +; This loop invariant instruction should be constant folded, not hoisted. +define i32 @test3(i1 %c) { +; CHECK: define i32 @test3 +; CHECK: call void @foo2(i32 6) + %A = load i32* @X ; <i32> [#uses=2] + br label %Loop +Loop: + %B = add i32 4, 2 ; <i32> [#uses=2] + call void @foo2( i32 %B ) + br i1 %c, label %Loop, label %Out +Out: ; preds = %Loop + %C = sub i32 %A, %B ; <i32> [#uses=1] + ret i32 %C +} diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll index ef28c38ca607..c1d2b24b0bba 100644 --- a/test/Transforms/LICM/scalar_promote.ll +++ b/test/Transforms/LICM/scalar_promote.ll @@ -1,4 +1,6 @@ ; RUN: opt < %s -licm -S | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + @X = global i32 7 ; <i32*> [#uses=4] define void @test1(i32 %i) { @@ -32,23 +34,21 @@ Entry: br label %Loop ; CHECK: @test2 ; CHECK: Entry: -; CHECK-NEXT: %X1 = getelementptr i32* @X, i64 0 -; CHECK-NEXT: %X2 = getelementptr i32* @X, i64 0 -; CHECK-NEXT: %X1.promoted = load i32* %X1 +; CHECK-NEXT: %.promoted = load i32* getelementptr inbounds (i32* @X, i64 1) ; CHECK-NEXT: br label %Loop Loop: ; preds = %Loop, %0 - %X1 = getelementptr i32* @X, i64 0 ; <i32*> [#uses=1] + %X1 = getelementptr i32* @X, i64 1 ; <i32*> [#uses=1] %A = load i32* %X1 ; <i32> [#uses=1] %V = add i32 %A, 1 ; <i32> [#uses=1] - %X2 = getelementptr i32* @X, i64 0 ; <i32*> [#uses=1] + %X2 = getelementptr i32* @X, i64 1 ; <i32*> [#uses=1] store i32 %V, i32* %X2 br i1 false, label %Loop, label %Exit Exit: ; preds = %Loop ret void ; CHECK: Exit: -; CHECK-NEXT: store i32 %V, i32* %X1 +; CHECK-NEXT: store i32 %V, i32* getelementptr inbounds (i32* @X, i64 1) ; CHECK-NEXT: ret void } @@ -71,3 +71,50 @@ Out: ; preds = %Loop ret void } +; PR8041 +define void @test4(i8* %x, i8 %n) { +; CHECK: @test4 + %handle1 = alloca i8* + %handle2 = alloca i8* + store i8* %x, i8** %handle1 + br label %loop + +loop: + %tmp = getelementptr i8* %x, i64 8 + store i8* %tmp, i8** %handle2 + br label %subloop + +subloop: + %count = phi i8 [ 0, %loop ], [ %nextcount, %subloop ] + %offsetx2 = load i8** %handle2 + store i8 %n, i8* %offsetx2 + %newoffsetx2 = getelementptr i8* %offsetx2, i64 -1 + store i8* %newoffsetx2, i8** %handle2 + %nextcount = add i8 %count, 1 + %innerexitcond = icmp sge i8 %nextcount, 8 + br i1 %innerexitcond, label %innerexit, label %subloop + +; Should have promoted 'handle2' accesses. +; CHECK: subloop: +; CHECK-NEXT: phi i8* [ +; CHECK-NEXT: %count = phi i8 [ +; CHECK-NEXT: store i8 %n +; CHECK-NOT: store +; CHECK: br i1 + +innerexit: + %offsetx1 = load i8** %handle1 + %val = load i8* %offsetx1 + %cond = icmp eq i8 %val, %n + br i1 %cond, label %exit, label %loop + +; Should not have promoted offsetx1 loads. +; CHECK: innerexit: +; CHECK: %val = load i8* %offsetx1 +; CHECK: %cond = icmp eq i8 %val, %n +; CHECK: br i1 %cond, label %exit, label %loop + +exit: + ret void +} + diff --git a/test/Transforms/LICM/sinking.ll b/test/Transforms/LICM/sinking.ll index 11112eb74443..68e4b64bf9bf 100644 --- a/test/Transforms/LICM/sinking.ll +++ b/test/Transforms/LICM/sinking.ll @@ -233,3 +233,17 @@ Out: ; preds = %Loop ; CHECK-NEXT: ret i32 %tmp.6 } +; Should delete, not sink, dead instructions. +define void @test11() { + br label %Loop +Loop: + %dead = getelementptr %Ty* @X2, i64 0, i32 0 + br i1 false, label %Loop, label %Out +Out: + ret void +; CHECK: @test11 +; CHECK: Out: +; CHECK-NEXT: ret void +} + + diff --git a/test/Transforms/LoopRotate/phi-duplicate.ll b/test/Transforms/LoopRotate/phi-duplicate.ll index 9a64e2a9a830..5403e723ee15 100644 --- a/test/Transforms/LoopRotate/phi-duplicate.ll +++ b/test/Transforms/LoopRotate/phi-duplicate.ll @@ -27,9 +27,21 @@ for.body: ; preds = %for.cond for.end: ; preds = %for.cond ret void } -; Should only end up with one phi. -; CHECK: for.body: -; CHECK-NEXT: %j.02 = phi i64 -; CHECK-NOT: phi -; CHECK: ret void +; Should only end up with one phi. Also, the original for.cond block should +; be moved to the end of the loop so that the new loop header pleasantly +; ends up at the top. + +; CHECK: define void @test +; CHECK-NEXT: entry: +; CHECK-NEXT: icmp slt i64 +; CHECK-NEXT: br i1 +; CHECK-NOT: : +; CHECK: bb.nph: +; CHECK-NEXT: br label %for.body +; CHECK-NOT: : +; CHECK: for.body: +; CHECK-NEXT: %j.02 = phi i64 +; CHECK-NOT: phi +; CHECK: ret void +; CHECK-NEXT: } diff --git a/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll b/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll new file mode 100644 index 000000000000..2a1ee7d1a72f --- /dev/null +++ b/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll @@ -0,0 +1,20 @@ +; RUN: opt < %s -domfrontier -loopsimplify -domfrontier -verify-dom-info -analyze + + +define void @a() nounwind { +entry: + br i1 undef, label %bb37, label %bb1.i + +bb1.i: ; preds = %bb1.i, %bb + %indvar = phi i64 [ %indvar.next, %bb1.i ], [ 0, %entry ] ; <i64> [#uses=1] + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %indvar.next, 576 ; <i1> [#uses=1] + br i1 %exitcond, label %bb37, label %bb1.i + +bb37: ; preds = %bb1.i, %bb + br label %return + + +return: ; preds = %bb39 + ret void +} diff --git a/test/Transforms/LoopSimplify/indirectbr-backedge.ll b/test/Transforms/LoopSimplify/indirectbr-backedge.ll new file mode 100644 index 000000000000..ca6e47fcecd3 --- /dev/null +++ b/test/Transforms/LoopSimplify/indirectbr-backedge.ll @@ -0,0 +1,35 @@ +; RUN: opt -loopsimplify -S < %s | FileCheck %s + +; LoopSimplify shouldn't split loop backedges that use indirectbr. + +; CHECK: bb1: ; preds = %bb5, %bb +; CHECK-NEXT: indirectbr + +; CHECK: bb5: ; preds = %bb1 +; CHECK-NEXT: br label %bb1{{$}} + +define void @foo(i8* %p) nounwind { +bb: + br label %bb1 + +bb1: ; preds = %bb5, %bb1, %bb + indirectbr i8* %p, [label %bb6, label %bb7, label %bb1, label %bb2, label %bb3, label %bb5, label %bb4] + +bb2: ; preds = %bb1 + ret void + +bb3: ; preds = %bb1 + ret void + +bb4: ; preds = %bb1 + ret void + +bb5: ; preds = %bb1 + br label %bb1 + +bb6: ; preds = %bb1 + ret void + +bb7: ; preds = %bb1 + ret void +} diff --git a/test/Transforms/LoopSimplify/preserve-scev.ll b/test/Transforms/LoopSimplify/preserve-scev.ll new file mode 100644 index 000000000000..017a0d210849 --- /dev/null +++ b/test/Transforms/LoopSimplify/preserve-scev.ll @@ -0,0 +1,50 @@ +; RUN: opt -S < %s -indvars | opt -analyze -iv-users | grep {%cmp = icmp slt i32} | grep {= \{%\\.ph,+,1\}<%for.cond>} +; PR8079 + +; LoopSimplify should invalidate indvars when splitting out the +; inner loop. + +@maxStat = external global i32 + +define i32 @test() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %if.then5, %if.end, %entry + %cuts.1 = phi i32 [ 0, %entry ], [ %inc, %if.then5 ], [ %cuts.1, %if.end ] + %0 = phi i32 [ 0, %entry ], [ %add, %if.end ], [ %add, %if.then5 ] + %add = add i32 %0, 1 + %cmp = icmp slt i32 %0, 1 + %tmp1 = load i32* @maxStat, align 4 + br i1 %cmp, label %for.body, label %for.cond14.preheader + +for.cond14.preheader: ; preds = %for.cond + %cmp1726 = icmp sgt i32 %tmp1, 0 + br i1 %cmp1726, label %for.body18, label %return + +for.body: ; preds = %for.cond + %cmp2 = icmp sgt i32 %tmp1, 100 + br i1 %cmp2, label %return, label %if.end + +if.end: ; preds = %for.body + %cmp4 = icmp sgt i32 %tmp1, -1 + br i1 %cmp4, label %if.then5, label %for.cond + +if.then5: ; preds = %if.end + call void @foo() nounwind + %inc = add i32 %cuts.1, 1 + br label %for.cond + +for.body18: ; preds = %for.body18, %for.cond14.preheader + %i13.027 = phi i32 [ %1, %for.body18 ], [ 0, %for.cond14.preheader ] + call void @foo() nounwind + %1 = add nsw i32 %i13.027, 1 + %tmp16 = load i32* @maxStat, align 4 + %cmp17 = icmp slt i32 %1, %tmp16 + br i1 %cmp17, label %for.body18, label %return + +return: ; preds = %for.body18, %for.body, %for.cond14.preheader + ret i32 0 +} + +declare void @foo() nounwind diff --git a/test/Transforms/LoopStrengthReduce/pr3571.ll b/test/Transforms/LoopStrengthReduce/pr3571.ll index 9ad27d5ff114..a23e4db49705 100644 --- a/test/Transforms/LoopStrengthReduce/pr3571.ll +++ b/test/Transforms/LoopStrengthReduce/pr3571.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -loop-reduce | llvm-dis ; PR3571 -target triple = "i386-mingw32" +target triple = "i386-pc-mingw32" define void @_ZNK18qdesigner_internal10TreeWidget12drawBranchesEP8QPainterRK5QRectRK11QModelIndex() nounwind { entry: br label %_ZNK11QModelIndex7isValidEv.exit.i diff --git a/test/Transforms/LoopStrengthReduce/uglygep.ll b/test/Transforms/LoopStrengthReduce/uglygep.ll index dca97e9ad187..8af5cf1dfd72 100644 --- a/test/Transforms/LoopStrengthReduce/uglygep.ll +++ b/test/Transforms/LoopStrengthReduce/uglygep.ll @@ -4,7 +4,6 @@ ; should be able to form pretty GEPs. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" define void @Z4() nounwind { bb: diff --git a/test/Transforms/LoopUnswitch/infinite-loop.ll b/test/Transforms/LoopUnswitch/infinite-loop.ll new file mode 100644 index 000000000000..73391ca8d19d --- /dev/null +++ b/test/Transforms/LoopUnswitch/infinite-loop.ll @@ -0,0 +1,53 @@ +; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s +; RUN: opt -loop-unswitch -simplifycfg -S < %s | FileCheck %s +; PR5373 + +; Loop unswitching shouldn't trivially unswitch the true case of condition %a +; in the code here because it leads to an infinite loop. While this doesn't +; contain any instructions with side effects, it's still a kind of side effect. +; It can trivially unswitch on the false cas of condition %a though. + +; STATS: 2 loop-unswitch - Number of branches unswitched +; STATS: 1 loop-unswitch - Number of unswitches that are trivial + +; CHECK: @func_16 +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split + +; CHECK: entry.split: +; CHECK-NEXT: br i1 %b, label %cond.end.us, label %abort1 + +; CHECK: cond.end.us: +; CHECK-NEXT: br label %cond.end.us + +; CHECK: abort0.split: +; CHECK-NEXT: call void @end0() noreturn nounwind +; CHECK-NEXT: unreachable + +; CHECK: abort1: +; CHECK-NEXT: call void @end1() noreturn nounwind +; CHECK-NEXT: unreachable + +; CHECK: } + +define void @func_16(i1 %a, i1 %b) nounwind { +entry: + br label %for.body + +for.body: + br i1 %a, label %cond.end, label %abort0 + +cond.end: + br i1 %b, label %for.body, label %abort1 + +abort0: + call void @end0() noreturn nounwind + unreachable + +abort1: + call void @end1() noreturn nounwind + unreachable +} + +declare void @end0() noreturn +declare void @end1() noreturn diff --git a/test/Transforms/LowerAtomic/atomic-load.ll b/test/Transforms/LowerAtomic/atomic-load.ll new file mode 100644 index 000000000000..5b110d6b7eba --- /dev/null +++ b/test/Transforms/LowerAtomic/atomic-load.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -loweratomic -S | FileCheck %s + +declare i8 @llvm.atomic.load.add.i8.p0i8(i8* %ptr, i8 %delta) +declare i8 @llvm.atomic.load.nand.i8.p0i8(i8* %ptr, i8 %delta) +declare i8 @llvm.atomic.load.min.i8.p0i8(i8* %ptr, i8 %delta) + +define i8 @add() { +; CHECK: @add + %i = alloca i8 + %j = call i8 @llvm.atomic.load.add.i8.p0i8(i8* %i, i8 42) +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: add +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} + +define i8 @nand() { +; CHECK: @nand + %i = alloca i8 + %j = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* %i, i8 42) +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: and +; CHECK-NEXT: xor +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} + +define i8 @min() { +; CHECK: @min + %i = alloca i8 + %j = call i8 @llvm.atomic.load.min.i8.p0i8(i8* %i, i8 42) +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: icmp +; CHECK-NEXT: select +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} diff --git a/test/Transforms/LowerAtomic/atomic-swap.ll b/test/Transforms/LowerAtomic/atomic-swap.ll new file mode 100644 index 000000000000..0a59c8595e6a --- /dev/null +++ b/test/Transforms/LowerAtomic/atomic-swap.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -loweratomic -S | FileCheck %s + +declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* %ptr, i8 %cmp, i8 %val) +declare i8 @llvm.atomic.swap.i8.p0i8(i8* %ptr, i8 %val) + +define i8 @cmpswap() { +; CHECK: @cmpswap + %i = alloca i8 + %j = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* %i, i8 0, i8 42) +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: icmp +; CHECK-NEXT: select +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} + +define i8 @swap() { +; CHECK: @swap + %i = alloca i8 + %j = call i8 @llvm.atomic.swap.i8.p0i8(i8* %i, i8 42) +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} diff --git a/test/Transforms/LowerAtomic/barrier.ll b/test/Transforms/LowerAtomic/barrier.ll new file mode 100644 index 000000000000..218c5ba8d18e --- /dev/null +++ b/test/Transforms/LowerAtomic/barrier.ll @@ -0,0 +1,10 @@ +; RUN: opt < %s -loweratomic -S | FileCheck %s + +declare void @llvm.memory.barrier(i1 %ll, i1 %ls, i1 %sl, i1 %ss, i1 %device) + +define void @barrier() { +; CHECK: @barrier + call void @llvm.memory.barrier(i1 0, i1 0, i1 0, i1 0, i1 0) +; CHECK-NEXT: ret + ret void +} diff --git a/test/Transforms/SSI/dg.exp b/test/Transforms/LowerAtomic/dg.exp index f2005891a59a..f2005891a59a 100644 --- a/test/Transforms/SSI/dg.exp +++ b/test/Transforms/LowerAtomic/dg.exp diff --git a/test/Transforms/MergeFunc/vectors-and-arrays.ll b/test/Transforms/MergeFunc/vectors-and-arrays.ll new file mode 100644 index 000000000000..dc64a0858ba8 --- /dev/null +++ b/test/Transforms/MergeFunc/vectors-and-arrays.ll @@ -0,0 +1,18 @@ +; RUN: opt -mergefunc < %s -disable-output -stats | not grep merged +; This used to crash with an assert. + +define <2 x i8> @v1(<2 x i8> %x) { + ret <2 x i8> %x +} + +define <4 x i8> @v2(<4 x i8> %x) { + ret <4 x i8> %x +} + +define [2 x i8] @a1([2 x i8] %x) { + ret [2 x i8] %x +} + +define [4 x i8] @a2([4 x i8] %x) { + ret [4 x i8] %x +} diff --git a/test/Transforms/PartialSpecialize/two-specializations.ll b/test/Transforms/PartialSpecialize/two-specializations.ll index c85ddb78dd1a..bc3da22e1855 100644 --- a/test/Transforms/PartialSpecialize/two-specializations.ll +++ b/test/Transforms/PartialSpecialize/two-specializations.ll @@ -1,8 +1,8 @@ ; If there are two specializations of a function, make sure each callsite ; calls the right one. ; -; RN: opt -S -partialspecialization %s | FileCheck %s -; RUN: true +; RUN: opt -S -partialspecialization -disable-inlining %s | opt -S -inline | FileCheck %s -check-prefix=CORRECT +; RUN: opt -S -partialspecialization -disable-inlining %s | FileCheck %s declare void @callback1() declare void @callback2() @@ -14,14 +14,18 @@ define internal void @UseCallback(void()* %pCallback) { define void @foo(void()* %pNonConstCallback) { Entry: +; CORRECT: Entry +; CORRECT-NEXT: call void @callback1() +; CORRECT-NEXT: call void @callback1() +; CORRECT-NEXT: call void @callback2() +; CORRECT-NEXT: call void %pNonConstCallback() +; CORRECT-NEXT: call void @callback1() +; CORRECT-NEXT: call void @callback2() +; CORRECT-NEXT: call void @callback2() ; CHECK: Entry -; CHECK-NEXT: call void @callback1() -; CHECK-NEXT: call void @callback1() -; CHECK-NEXT: call void @callback2() -; CHECK-NEXT: call void %pNonConstCallback() -; CHECK-NEXT: call void @callback1() -; CHECK-NEXT: call void @callback2() -; CHECK-NEXT: call void @callback2() +; CHECK-NOT: call void @UseCallback(void ()* @callback1) +; CHECK-NOT: call void @UseCallback(void ()* @callback2) +; CHECK: ret void call void @UseCallback(void()* @callback1) call void @UseCallback(void()* @callback1) call void @UseCallback(void()* @callback2) diff --git a/test/Transforms/SCCP/ipsccp-addr-taken.ll b/test/Transforms/SCCP/ipsccp-addr-taken.ll new file mode 100644 index 000000000000..c6572fa5d141 --- /dev/null +++ b/test/Transforms/SCCP/ipsccp-addr-taken.ll @@ -0,0 +1,28 @@ +; RUN: opt %s -ipsccp -S | FileCheck %s +; PR7876 +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +define internal i32 @foo() nounwind noinline ssp { +entry: + ret i32 0 +; CHECK: @foo +; CHECK: entry: +; CHECK: ret i32 0 +} + +declare i32 @bar() + +define internal i32 @test(i32 %c) nounwind noinline ssp { +bb: + %tmp1 = icmp ne i32 %c, 0 ; <i1> [#uses=1] + %tmp2 = select i1 %tmp1, i32 ()* @foo, i32 ()* @bar ; <i32 ()*> [#uses=1] + %tmp3 = tail call i32 %tmp2() nounwind ; <i32> [#uses=1] + ret i32 %tmp3 +} + +define i32 @main() nounwind ssp { +bb: + %tmp = tail call i32 @test(i32 1) ; <i32> [#uses=1] + ret i32 %tmp +} diff --git a/test/Transforms/SSI/2009-07-09-Invoke.ll b/test/Transforms/SSI/2009-07-09-Invoke.ll deleted file mode 100644 index 20a22172806e..000000000000 --- a/test/Transforms/SSI/2009-07-09-Invoke.ll +++ /dev/null @@ -1,71 +0,0 @@ -; RUN: opt < %s -ssi-everything -disable-output -; PR4511 - - %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" } - %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* } - %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" } - %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* } - %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" } - %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" = type { i32, i32, i32 } - %"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" } - -declare void @_Unwind_Resume(i8*) - -declare fastcc %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* @_ZSt24__uninitialized_copy_auxIPSsS0_ET0_T_S2_S1_St12__false_type(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*) - -define fastcc void @_ZNSt6vectorISsSaISsEE9push_backERKSs(%"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >"* nocapture %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* nocapture %__x) { -entry: - br i1 undef, label %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i, label %bb - -bb: ; preds = %entry - ret void - -_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i: ; preds = %entry - %0 = invoke fastcc %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* @_ZSt24__uninitialized_copy_auxIPSsS0_ET0_T_S2_S1_St12__false_type(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef) - to label %invcont14.i unwind label %ppad81.i ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=3] - -invcont14.i: ; preds = %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i - %1 = icmp eq %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %0, null ; <i1> [#uses=1] - br i1 %1, label %bb19.i, label %bb.i17.i - -bb.i17.i: ; preds = %invcont14.i - %2 = invoke fastcc i8* @_ZNSs4_Rep8_M_cloneERKSaIcEj(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep"* undef, i32 0) - to label %bb2.i25.i unwind label %ppad.i.i.i23.i ; <i8*> [#uses=0] - -ppad.i.i.i23.i: ; preds = %bb.i17.i - invoke void @_Unwind_Resume(i8* undef) - to label %.noexc.i24.i unwind label %lpad.i29.i - -.noexc.i24.i: ; preds = %ppad.i.i.i23.i - unreachable - -bb2.i25.i: ; preds = %bb.i17.i - unreachable - -lpad.i29.i: ; preds = %ppad.i.i.i23.i - invoke void @_Unwind_Resume(i8* undef) - to label %.noexc.i9 unwind label %ppad81.i - -.noexc.i9: ; preds = %lpad.i29.i - unreachable - -bb19.i: ; preds = %invcont14.i - %3 = getelementptr %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %0, i32 1 ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=2] - %4 = invoke fastcc %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* @_ZSt24__uninitialized_copy_auxIPSsS0_ET0_T_S2_S1_St12__false_type(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %3) - to label %invcont20.i unwind label %ppad81.i ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=0] - -invcont20.i: ; preds = %bb19.i - unreachable - -invcont32.i: ; preds = %ppad81.i - unreachable - -ppad81.i: ; preds = %bb19.i, %lpad.i29.i, %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i - %__new_finish.0.i = phi %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* [ %0, %lpad.i29.i ], [ undef, %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i ], [ %3, %bb19.i ] ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=0] - br i1 undef, label %invcont32.i, label %bb.i.i.i.i - -bb.i.i.i.i: ; preds = %bb.i.i.i.i, %ppad81.i - br label %bb.i.i.i.i -} - -declare fastcc i8* @_ZNSs4_Rep8_M_cloneERKSaIcEj(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep"* nocapture, i32) diff --git a/test/Transforms/SSI/2009-08-15-UnreachableBB.ll b/test/Transforms/SSI/2009-08-15-UnreachableBB.ll deleted file mode 100644 index 0fe37ec74098..000000000000 --- a/test/Transforms/SSI/2009-08-15-UnreachableBB.ll +++ /dev/null @@ -1,19 +0,0 @@ -; RUN: opt < %s -ssi-everything -disable-output - -declare fastcc i32 @ras_Empty(i8** nocapture) nounwind readonly - -define i32 @cc_Tautology() nounwind { -entry: - unreachable - -cc_InitData.exit: ; No predecessors! - %0 = call fastcc i32 @ras_Empty(i8** undef) nounwind ; <i32> [#uses=1] - %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1] - br i1 %1, label %bb2, label %bb6 - -bb2: ; preds = %cc_InitData.exit - unreachable - -bb6: ; preds = %cc_InitData.exit - ret i32 undef -} diff --git a/test/Transforms/SSI/2009-08-17-CritEdge.ll b/test/Transforms/SSI/2009-08-17-CritEdge.ll deleted file mode 100644 index 61bd2dc693f4..000000000000 --- a/test/Transforms/SSI/2009-08-17-CritEdge.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: opt < %s -ssi-everything -disable-output - -define void @test(i32 %x) { -entry: - br label %label1 -label1: - %A = phi i32 [ 0, %entry ], [ %A.1, %label2 ] - %B = icmp slt i32 %A, %x - br i1 %B, label %label2, label %label2 -label2: - %A.1 = add i32 %A, 1 - br label %label1 -label3: ; No predecessors! - ret void -} diff --git a/test/Transforms/SSI/2009-08-19-UnreachableBB2.ll b/test/Transforms/SSI/2009-08-19-UnreachableBB2.ll deleted file mode 100644 index 64bed191def0..000000000000 --- a/test/Transforms/SSI/2009-08-19-UnreachableBB2.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: opt < %s -ssi-everything -disable-output - -define void @foo() { -entry: - %tmp0 = load i64* undef, align 4 ; <i64> [#uses=3] - br i1 undef, label %end_stmt_playback, label %bb16 - -readJournalHdr.exit: ; No predecessors! - br label %end_stmt_playback - -bb16: ; preds = %bb7 - %tmp1 = icmp slt i64 0, %tmp0 ; <i1> [#uses=1] - br i1 %tmp1, label %bb16, label %bb17 - -bb17: ; preds = %bb16 - store i64 %tmp0, i64* undef, align 4 - br label %end_stmt_playback - -end_stmt_playback: ; preds = %bb17, %readJournalHdr.exit, %bb6, %bb2 - store i64 %tmp0, i64* undef, align 4 - ret void -} diff --git a/test/Transforms/SSI/ssiphi.ll b/test/Transforms/SSI/ssiphi.ll deleted file mode 100644 index a42b70c3c021..000000000000 --- a/test/Transforms/SSI/ssiphi.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: opt < %s -ssi-everything -S | FileCheck %s - -declare void @use(i32) -declare i32 @create() - -define i32 @foo() { -entry: - %x = call i32 @create() - %y = icmp slt i32 %x, 10 - br i1 %y, label %T, label %F -T: -; CHECK: SSI_sigma - call void @use(i32 %x) - br label %join -F: -; CHECK: SSI_sigma - call void @use(i32 %x) - br label %join -join: -; CHECK: SSI_phi - ret i32 %x -} diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll index 4f875b0841b2..fe55426b24a1 100644 --- a/test/Transforms/ScalarRepl/vector_promote.ll +++ b/test/Transforms/ScalarRepl/vector_promote.ll @@ -1,8 +1,8 @@ -; RUN: opt < %s -scalarrepl -S | not grep alloca -; RUN: opt < %s -scalarrepl -S | grep {load <4 x float>} +; RUN: opt < %s -scalarrepl -S | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "x86_64-apple-darwin10.0.0" -define void @test(<4 x float>* %F, float %f) { +define void @test1(<4 x float>* %F, float %f) { entry: %G = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3] %tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2] @@ -14,6 +14,11 @@ entry: %tmp6 = fadd <4 x float> %tmp4, %tmp4 ; <<4 x float>> [#uses=1] store <4 x float> %tmp6, <4 x float>* %F ret void +; CHECK: @test1 +; CHECK-NOT: alloca +; CHECK: %tmp = load <4 x float>* %F +; CHECK: fadd <4 x float> %tmp, %tmp +; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 0 } define void @test2(<4 x float>* %F, float %f) { @@ -28,6 +33,11 @@ entry: %tmp6 = fadd <4 x float> %tmp4, %tmp4 ; <<4 x float>> [#uses=1] store <4 x float> %tmp6, <4 x float>* %F ret void +; CHECK: @test2 +; CHECK-NOT: alloca +; CHECK: %tmp = load <4 x float>* %F +; CHECK: fadd <4 x float> %tmp, %tmp +; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 2 } define void @test3(<4 x float>* %F, float* %f) { @@ -40,6 +50,11 @@ entry: %tmp.upgrd.4 = load float* %tmp.upgrd.3 ; <float> [#uses=1] store float %tmp.upgrd.4, float* %f ret void +; CHECK: @test3 +; CHECK-NOT: alloca +; CHECK: %tmp = load <4 x float>* %F +; CHECK: fadd <4 x float> %tmp, %tmp +; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 2 } define void @test4(<4 x float>* %F, float* %f) { @@ -52,6 +67,11 @@ entry: %tmp.upgrd.6 = load float* %G.upgrd.5 ; <float> [#uses=1] store float %tmp.upgrd.6, float* %f ret void +; CHECK: @test4 +; CHECK-NOT: alloca +; CHECK: %tmp = load <4 x float>* %F +; CHECK: fadd <4 x float> %tmp, %tmp +; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 0 } define i32 @test5(float %X) { ;; should turn into bitcast. @@ -61,5 +81,22 @@ define i32 @test5(float %X) { ;; should turn into bitcast. %a = bitcast float* %X1 to i32* %tmp = load i32* %a ret i32 %tmp +; CHECK: @test5 +; CHECK-NEXT: bitcast float %X to i32 +; CHECK-NEXT: ret i32 +} + + +;; should not turn into <1 x i64> - It is a banned MMX datatype. +;; rdar://8380055 +define i64 @test6(<2 x float> %X) { + %X_addr = alloca <2 x float> + store <2 x float> %X, <2 x float>* %X_addr + %P = bitcast <2 x float>* %X_addr to i64* + %tmp = load i64* %P + ret i64 %tmp +; CHECK: @test6 +; CHECK-NEXT: bitcast <2 x float> %X to i64 +; CHECK-NEXT: ret i64 } diff --git a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll index ba33d84f84aa..9c15efccd275 100644 --- a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll +++ b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -simplifycfg -disable-output ; PR2256 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-mingw32" +target triple = "x86_64-pc-mingw32" define { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval %Z, i1 %cond) nounwind { bb: ; preds = %entry diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll index 83a9fa7ad1b8..7315ff66bd12 100644 --- a/test/Transforms/SimplifyCFG/basictest.ll +++ b/test/Transforms/SimplifyCFG/basictest.ll @@ -54,6 +54,5 @@ bb1: ; preds = %entry return: ; preds = %entry ret void ; CHECK: @test5 -; CHECK-NEXT: bb: ; CHECK-NEXT: ret void } diff --git a/test/Transforms/SimplifyCFG/indirectbr.ll b/test/Transforms/SimplifyCFG/indirectbr.ll new file mode 100644 index 000000000000..de4f5b607551 --- /dev/null +++ b/test/Transforms/SimplifyCFG/indirectbr.ll @@ -0,0 +1,64 @@ +; RUN: opt -S -simplifycfg < %s | FileCheck %s + +; SimplifyCFG should eliminate redundant indirectbr edges. + +; CHECK: indbrtest0 +; CHECK: indirectbr i8* %t, [label %BB0, label %BB1, label %BB2] +; CHECK: %x = phi i32 [ 0, %BB0 ], [ 1, %entry ] + +declare void @foo() +declare void @A() +declare void @B(i32) +declare void @C() + +define void @indbrtest0(i8** %P, i8** %Q) { +entry: + store i8* blockaddress(@indbrtest0, %BB0), i8** %P + store i8* blockaddress(@indbrtest0, %BB1), i8** %P + store i8* blockaddress(@indbrtest0, %BB2), i8** %P + call void @foo() + %t = load i8** %Q + indirectbr i8* %t, [label %BB0, label %BB1, label %BB2, label %BB0, label %BB1, label %BB2] +BB0: + call void @A() + br label %BB1 +BB1: + %x = phi i32 [ 0, %BB0 ], [ 1, %entry ], [ 1, %entry ] + call void @B(i32 %x) + ret void +BB2: + call void @C() + ret void +} + +; SimplifyCFG should convert the indirectbr into a directbr. It would be even +; better if it removed the branch altogether, but simplifycfdg currently misses +; that because the predecessor is the entry block. + +; CHECK: indbrtest1 +; CHECK: br label %BB0 + +define void @indbrtest1(i8** %P, i8** %Q) { +entry: + store i8* blockaddress(@indbrtest1, %BB0), i8** %P + call void @foo() + %t = load i8** %Q + indirectbr i8* %t, [label %BB0, label %BB0] +BB0: + call void @A() + ret void +} + +; SimplifyCFG should notice that BB0 does not have its address taken and +; remove it from entry's successor list. + +; CHECK: indbrtest2 +; CHECK: entry: +; CHECK-NEXT: unreachable + +define void @indbrtest2(i8* %t) { +entry: + indirectbr i8* %t, [label %BB0, label %BB0] +BB0: + ret void +} diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll new file mode 100644 index 000000000000..3965c3782276 --- /dev/null +++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll @@ -0,0 +1,19 @@ +; RUN: opt -strip-dead-debug-info -disable-output %s +define i32 @foo() nounwind ssp { +entry: + ret i32 0, !dbg !8 +} + +!llvm.dbg.sp = !{!0} +!llvm.dbg.gv = !{!6} + +!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 524329, metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW", metadata !"clang version 2.8 (trunk 112062)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{metadata !5} +!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"i", metadata !1, i32 2, metadata !7, i1 true, i1 true, i32 0} ; [ DW_TAG_variable ] +!7 = metadata !{i32 524326, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_const_type ] +!8 = metadata !{i32 3, i32 13, metadata !9, null} +!9 = metadata !{i32 524299, metadata !0, i32 3, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] diff --git a/test/Transforms/TailCallElim/accum_recursion.ll b/test/Transforms/TailCallElim/accum_recursion.ll index b2a9ed2813d6..9475f87e8f5b 100644 --- a/test/Transforms/TailCallElim/accum_recursion.ll +++ b/test/Transforms/TailCallElim/accum_recursion.ll @@ -1,15 +1,74 @@ -; RUN: opt < %s -tailcallelim -S | not grep call +; RUN: opt < %s -tailcallelim -S | FileCheck %s -define i32 @factorial(i32 %x) { +define i32 @test1_factorial(i32 %x) { entry: %tmp.1 = icmp sgt i32 %x, 0 ; <i1> [#uses=1] br i1 %tmp.1, label %then, label %else then: ; preds = %entry %tmp.6 = add i32 %x, -1 ; <i32> [#uses=1] - %tmp.4 = call i32 @factorial( i32 %tmp.6 ) ; <i32> [#uses=1] + %tmp.4 = call i32 @test1_factorial( i32 %tmp.6 ) ; <i32> [#uses=1] %tmp.7 = mul i32 %tmp.4, %x ; <i32> [#uses=1] ret i32 %tmp.7 else: ; preds = %entry ret i32 1 } +; CHECK: define i32 @test1_factorial +; CHECK: phi i32 +; CHECK-NOT: call i32 +; CHECK: else: + +; This is a more aggressive form of accumulator recursion insertion, which +; requires noticing that X doesn't change as we perform the tailcall. + +define i32 @test2_mul(i32 %x, i32 %y) { +entry: + %tmp.1 = icmp eq i32 %y, 0 ; <i1> [#uses=1] + br i1 %tmp.1, label %return, label %endif +endif: ; preds = %entry + %tmp.8 = add i32 %y, -1 ; <i32> [#uses=1] + %tmp.5 = call i32 @test2_mul( i32 %x, i32 %tmp.8 ) ; <i32> [#uses=1] + %tmp.9 = add i32 %tmp.5, %x ; <i32> [#uses=1] + ret i32 %tmp.9 +return: ; preds = %entry + ret i32 %x +} + +; CHECK: define i32 @test2_mul +; CHECK: phi i32 +; CHECK-NOT: call i32 +; CHECK: return: + + +define i64 @test3_fib(i64 %n) nounwind readnone { +; CHECK: @test3_fib +entry: +; CHECK: tailrecurse: +; CHECK: %accumulator.tr = phi i64 [ %n, %entry ], [ %3, %bb1 ] +; CHECK: %n.tr = phi i64 [ %n, %entry ], [ %2, %bb1 ] + switch i64 %n, label %bb1 [ +; CHECK: switch i64 %n.tr, label %bb1 [ + i64 0, label %bb2 + i64 1, label %bb2 + ] + +bb1: +; CHECK: bb1: + %0 = add i64 %n, -1 +; CHECK: %0 = add i64 %n.tr, -1 + %1 = tail call i64 @test3_fib(i64 %0) nounwind +; CHECK: %1 = tail call i64 @test3_fib(i64 %0) + %2 = add i64 %n, -2 +; CHECK: %2 = add i64 %n.tr, -2 + %3 = tail call i64 @test3_fib(i64 %2) nounwind +; CHECK-NOT: tail call i64 @test3_fib + %4 = add nsw i64 %3, %1 +; CHECK: add nsw i64 %accumulator.tr, %1 + ret i64 %4 +; CHECK: br label %tailrecurse + +bb2: +; CHECK: bb2: + ret i64 %n +; CHECK: ret i64 %accumulator.tr +} diff --git a/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll b/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll deleted file mode 100644 index 2a90cf3b22d7..000000000000 --- a/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll +++ /dev/null @@ -1,20 +0,0 @@ -; This is a more aggressive form of accumulator recursion insertion, which -; requires noticing that X doesn't change as we perform the tailcall. Thanks -; go out to the anonymous users of the demo script for "suggesting" -; optimizations that should be done. :) - -; RUN: opt < %s -tailcallelim -S | not grep call - -define i32 @mul(i32 %x, i32 %y) { -entry: - %tmp.1 = icmp eq i32 %y, 0 ; <i1> [#uses=1] - br i1 %tmp.1, label %return, label %endif -endif: ; preds = %entry - %tmp.8 = add i32 %y, -1 ; <i32> [#uses=1] - %tmp.5 = call i32 @mul( i32 %x, i32 %tmp.8 ) ; <i32> [#uses=1] - %tmp.9 = add i32 %tmp.5, %x ; <i32> [#uses=1] - ret i32 %tmp.9 -return: ; preds = %entry - ret i32 %x -} - diff --git a/test/Transforms/TailCallElim/switch.ll b/test/Transforms/TailCallElim/switch.ll deleted file mode 100644 index 33884318b0c8..000000000000 --- a/test/Transforms/TailCallElim/switch.ll +++ /dev/null @@ -1,34 +0,0 @@ -; RUN: opt %s -tailcallelim -S | FileCheck %s - -define i64 @fib(i64 %n) nounwind readnone { -; CHECK: @fib -entry: -; CHECK: tailrecurse: -; CHECK: %accumulator.tr = phi i64 [ %n, %entry ], [ %3, %bb1 ] -; CHECK: %n.tr = phi i64 [ %n, %entry ], [ %2, %bb1 ] - switch i64 %n, label %bb1 [ -; CHECK: switch i64 %n.tr, label %bb1 [ - i64 0, label %bb2 - i64 1, label %bb2 - ] - -bb1: -; CHECK: bb1: - %0 = add i64 %n, -1 -; CHECK: %0 = add i64 %n.tr, -1 - %1 = tail call i64 @fib(i64 %0) nounwind -; CHECK: %1 = tail call i64 @fib(i64 %0) - %2 = add i64 %n, -2 -; CHECK: %2 = add i64 %n.tr, -2 - %3 = tail call i64 @fib(i64 %2) nounwind -; CHECK-NOT: tail call i64 @fib - %4 = add nsw i64 %3, %1 -; CHECK: add nsw i64 %accumulator.tr, %1 - ret i64 %4 -; CHECK: br label %tailrecurse - -bb2: -; CHECK: bb2: - ret i64 %n -; CHECK: ret i64 %accumulator.tr -} diff --git a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll index 88a565684c5d..03e99bc9bf6a 100644 --- a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll +++ b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output | not grep tailduplicate +; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output |& not grep tailduplicate ; XFAIL: * define i32 @foo(i32 %l) nounwind { diff --git a/test/Verifier/2010-08-07-PointerIntrinsic.ll b/test/Verifier/2010-08-07-PointerIntrinsic.ll new file mode 100644 index 000000000000..bf5563d9c051 --- /dev/null +++ b/test/Verifier/2010-08-07-PointerIntrinsic.ll @@ -0,0 +1,21 @@ +; RUN: not llvm-as < %s 2> %t +; RUN: grep {Broken module} %t +; PR7316 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32" +target triple = "x86-unknown-unknown" +@aa = global [32 x i8] zeroinitializer, align 1 +@bb = global [16 x i8] zeroinitializer, align 1 +define void @x() nounwind { +L.0: + %0 = getelementptr [32 x i8]* @aa, i32 0, i32 4 + %1 = bitcast i8* %0 to [16 x i8]* + %2 = bitcast [16 x i8]* %1 to [0 x i8]* + %3 = getelementptr [16 x i8]* @bb + %4 = bitcast [16 x i8]* %3 to [0 x i8]* + call void @llvm.memcpy.i32([0 x i8]* %2, [0 x i8]* %4, i32 16, i32 1) + br label %return +return: + ret void +} +declare void @llvm.memcpy.i32([0 x i8]*, [0 x i8]*, i32, i32) nounwind diff --git a/test/lit.cfg b/test/lit.cfg index 5e7e0e444980..f15777c99912 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -46,7 +46,16 @@ if llvm_obj_root is not None: config.environment['PATH'] = path # Propogate 'HOME' through the environment. -config.environment['HOME'] = os.environ['HOME'] +if 'HOME' in os.environ: + config.environment['HOME'] = os.environ['HOME'] + +# Propogate 'INCLUDE' through the environment. +if 'INCLUDE' in os.environ: + config.environment['INCLUDE'] = os.environ['INCLUDE'] + +# Propogate 'LIB' through the environment. +if 'LIB' in os.environ: + config.environment['LIB'] = os.environ['LIB'] # Propogate LLVM_SRC_ROOT into the environment. config.environment['LLVM_SRC_ROOT'] = getattr(config, 'llvm_src_root', '') @@ -110,7 +119,7 @@ import re site_exp = {} # FIXME: Implement lit.site.cfg. for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')): - m = re.match('set ([^ ]+) "([^"]*)"', line) + m = re.match('set ([^ ]+) "(.*)"', line) if m: site_exp[m.group(1)] = m.group(2) @@ -147,13 +156,13 @@ def llvm_supports_target(name): def llvm_supports_darwin_and_target(name): return 'darwin' in config.target_triple and llvm_supports_target(name) -langs = set(site_exp['llvmgcc_langs'].split(',')) +langs = set([s.strip() for s in site_exp['llvmgcc_langs'].split(',')]) def llvm_gcc_supports(name): - return name in langs + return name.strip() in langs -bindings = set(site_exp['llvm_bindings'].split(',')) +bindings = set([s.strip() for s in site_exp['llvm_bindings'].split(',')]) def llvm_supports_binding(name): - return name in bindings + return name.strip() in bindings # Provide on_clone hook for reading 'dg.exp'. import os |