diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-07-05 14:23:59 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-07-05 14:23:59 +0000 |
commit | c192b3dcffd5e672a2b2e1730e2440febb4fb192 (patch) | |
tree | ac719b5984165053bf83d71142e4d96b609b9784 /lib/Headers | |
parent | 2e645aa5697838f16ec570eb07c2bee7e13d0e0b (diff) | |
download | src-c192b3dcffd5e672a2b2e1730e2440febb4fb192.tar.gz src-c192b3dcffd5e672a2b2e1730e2440febb4fb192.zip |
Notes
Diffstat (limited to 'lib/Headers')
38 files changed, 4468 insertions, 1876 deletions
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt index 5f8857c41b49..87afc60de996 100644 --- a/lib/Headers/CMakeLists.txt +++ b/lib/Headers/CMakeLists.txt @@ -5,6 +5,7 @@ set(files arm_acle.h avx2intrin.h avx512bwintrin.h + avx512cdintrin.h avx512erintrin.h avx512fintrin.h avx512vlbwintrin.h @@ -21,11 +22,13 @@ set(files float.h fma4intrin.h fmaintrin.h + fxsrintrin.h htmintrin.h htmxlintrin.h ia32intrin.h immintrin.h Intrin.h + inttypes.h iso646.h limits.h lzcntintrin.h diff --git a/lib/Headers/Intrin.h b/lib/Headers/Intrin.h index dd04e06ad8bc..7ba311ea4b94 100644 --- a/lib/Headers/Intrin.h +++ b/lib/Headers/Intrin.h @@ -40,7 +40,7 @@ #endif /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) #ifdef __cplusplus extern "C" { @@ -180,8 +180,6 @@ unsigned long __cdecl _byteswap_ulong(unsigned long); unsigned short __cdecl _byteswap_ushort(unsigned short); void __cdecl _disable(void); void __cdecl _enable(void); -void __cdecl _fxrstor(void const *); -void __cdecl _fxsave(void *); long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value); static __inline__ long _InterlockedAnd(long volatile *_Value, long _Mask); @@ -358,8 +356,6 @@ unsigned char _bittestandreset64(__int64 *, __int64); static __inline__ unsigned char _bittestandset64(__int64 *, __int64); unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); -void __cdecl _fxrstor64(void const *); -void __cdecl _fxsave64(void *); long _InterlockedAnd_np(long volatile *_Value, long _Mask); short _InterlockedAnd16_np(short volatile *_Value, short _Mask); __int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask); @@ -424,7 +420,7 @@ unsigned __int64 _shrx_u64(unsigned __int64, unsigned int); * Multiply two 64-bit integers and obtain a 64-bit result. * The low-half is returned directly and the high half is in an out parameter. */ -static __inline__ unsigned __int64 DEFAULT_FN_ATTRS +static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS _umul128(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand, unsigned __int64 *_HighProduct) { unsigned __int128 _FullProduct = @@ -432,7 +428,7 @@ _umul128(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand, *_HighProduct = _FullProduct >> 64; return _FullProduct; } -static __inline__ unsigned __int64 DEFAULT_FN_ATTRS +static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS __umulh(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand) { unsigned __int128 _FullProduct = (unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand; @@ -447,54 +443,54 @@ void __cdecl _xsaveopt64(void *, unsigned __int64); /*----------------------------------------------------------------------------*\ |* Bit Twiddling \*----------------------------------------------------------------------------*/ -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _rotl8(unsigned char _Value, unsigned char _Shift) { _Shift &= 0x7; return _Shift ? (_Value << _Shift) | (_Value >> (8 - _Shift)) : _Value; } -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _rotr8(unsigned char _Value, unsigned char _Shift) { _Shift &= 0x7; return _Shift ? (_Value >> _Shift) | (_Value << (8 - _Shift)) : _Value; } -static __inline__ unsigned short DEFAULT_FN_ATTRS +static __inline__ unsigned short __DEFAULT_FN_ATTRS _rotl16(unsigned short _Value, unsigned char _Shift) { _Shift &= 0xf; return _Shift ? (_Value << _Shift) | (_Value >> (16 - _Shift)) : _Value; } -static __inline__ unsigned short DEFAULT_FN_ATTRS +static __inline__ unsigned short __DEFAULT_FN_ATTRS _rotr16(unsigned short _Value, unsigned char _Shift) { _Shift &= 0xf; return _Shift ? (_Value >> _Shift) | (_Value << (16 - _Shift)) : _Value; } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS _rotl(unsigned int _Value, int _Shift) { _Shift &= 0x1f; return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value; } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS _rotr(unsigned int _Value, int _Shift) { _Shift &= 0x1f; return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value; } -static __inline__ unsigned long DEFAULT_FN_ATTRS +static __inline__ unsigned long __DEFAULT_FN_ATTRS _lrotl(unsigned long _Value, int _Shift) { _Shift &= 0x1f; return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value; } -static __inline__ unsigned long DEFAULT_FN_ATTRS +static __inline__ unsigned long __DEFAULT_FN_ATTRS _lrotr(unsigned long _Value, int _Shift) { _Shift &= 0x1f; return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value; } static -__inline__ unsigned __int64 DEFAULT_FN_ATTRS +__inline__ unsigned __int64 __DEFAULT_FN_ATTRS _rotl64(unsigned __int64 _Value, int _Shift) { _Shift &= 0x3f; return _Shift ? (_Value << _Shift) | (_Value >> (64 - _Shift)) : _Value; } static -__inline__ unsigned __int64 DEFAULT_FN_ATTRS +__inline__ unsigned __int64 __DEFAULT_FN_ATTRS _rotr64(unsigned __int64 _Value, int _Shift) { _Shift &= 0x3f; return _Shift ? (_Value >> _Shift) | (_Value << (64 - _Shift)) : _Value; @@ -502,52 +498,52 @@ _rotr64(unsigned __int64 _Value, int _Shift) { /*----------------------------------------------------------------------------*\ |* Bit Counting and Testing \*----------------------------------------------------------------------------*/ -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _BitScanForward(unsigned long *_Index, unsigned long _Mask) { if (!_Mask) return 0; *_Index = __builtin_ctzl(_Mask); return 1; } -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _BitScanReverse(unsigned long *_Index, unsigned long _Mask) { if (!_Mask) return 0; *_Index = 31 - __builtin_clzl(_Mask); return 1; } -static __inline__ unsigned short DEFAULT_FN_ATTRS +static __inline__ unsigned short __DEFAULT_FN_ATTRS __popcnt16(unsigned short value) { return __builtin_popcount((int)value); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __popcnt(unsigned int value) { return __builtin_popcount(value); } -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _bittest(long const *a, long b) { return (*a >> b) & 1; } -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _bittestandcomplement(long *a, long b) { unsigned char x = (*a >> b) & 1; *a = *a ^ (1 << b); return x; } -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _bittestandreset(long *a, long b) { unsigned char x = (*a >> b) & 1; *a = *a & ~(1 << b); return x; } -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _bittestandset(long *a, long b) { unsigned char x = (*a >> b) & 1; *a = *a | (1 << b); return x; } #if defined(__i386__) || defined(__x86_64__) -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _interlockedbittestandset(long volatile *__BitBase, long __BitPos) { unsigned char __Res; __asm__ ("xor %0, %0\n" @@ -559,14 +555,14 @@ _interlockedbittestandset(long volatile *__BitBase, long __BitPos) { } #endif #ifdef __x86_64__ -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask) { if (!_Mask) return 0; *_Index = __builtin_ctzll(_Mask); return 1; } -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) { if (!_Mask) return 0; @@ -574,33 +570,33 @@ _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) { return 1; } static __inline__ -unsigned __int64 DEFAULT_FN_ATTRS +unsigned __int64 __DEFAULT_FN_ATTRS __popcnt64(unsigned __int64 value) { return __builtin_popcountll(value); } -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _bittest64(__int64 const *a, __int64 b) { return (*a >> b) & 1; } -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _bittestandcomplement64(__int64 *a, __int64 b) { unsigned char x = (*a >> b) & 1; *a = *a ^ (1ll << b); return x; } -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _bittestandreset64(__int64 *a, __int64 b) { unsigned char x = (*a >> b) & 1; *a = *a & ~(1ll << b); return x; } -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _bittestandset64(__int64 *a, __int64 b) { unsigned char x = (*a >> b) & 1; *a = *a | (1ll << b); return x; } -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS _interlockedbittestandset64(__int64 volatile *__BitBase, __int64 __BitPos) { unsigned char __Res; __asm__ ("xor %0, %0\n" @@ -614,16 +610,16 @@ _interlockedbittestandset64(__int64 volatile *__BitBase, __int64 __BitPos) { /*----------------------------------------------------------------------------*\ |* Interlocked Exchange Add \*----------------------------------------------------------------------------*/ -static __inline__ char DEFAULT_FN_ATTRS +static __inline__ char __DEFAULT_FN_ATTRS _InterlockedExchangeAdd8(char volatile *_Addend, char _Value) { return __atomic_add_fetch(_Addend, _Value, 0) - _Value; } -static __inline__ short DEFAULT_FN_ATTRS +static __inline__ short __DEFAULT_FN_ATTRS _InterlockedExchangeAdd16(short volatile *_Addend, short _Value) { return __atomic_add_fetch(_Addend, _Value, 0) - _Value; } #ifdef __x86_64__ -static __inline__ __int64 DEFAULT_FN_ATTRS +static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) { return __atomic_add_fetch(_Addend, _Value, 0) - _Value; } @@ -631,20 +627,20 @@ _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) { /*----------------------------------------------------------------------------*\ |* Interlocked Exchange Sub \*----------------------------------------------------------------------------*/ -static __inline__ char DEFAULT_FN_ATTRS +static __inline__ char __DEFAULT_FN_ATTRS _InterlockedExchangeSub8(char volatile *_Subend, char _Value) { return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; } -static __inline__ short DEFAULT_FN_ATTRS +static __inline__ short __DEFAULT_FN_ATTRS _InterlockedExchangeSub16(short volatile *_Subend, short _Value) { return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; } -static __inline__ long DEFAULT_FN_ATTRS +static __inline__ long __DEFAULT_FN_ATTRS _InterlockedExchangeSub(long volatile *_Subend, long _Value) { return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; } #ifdef __x86_64__ -static __inline__ __int64 DEFAULT_FN_ATTRS +static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) { return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; } @@ -652,12 +648,12 @@ _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) { /*----------------------------------------------------------------------------*\ |* Interlocked Increment \*----------------------------------------------------------------------------*/ -static __inline__ short DEFAULT_FN_ATTRS +static __inline__ short __DEFAULT_FN_ATTRS _InterlockedIncrement16(short volatile *_Value) { return __atomic_add_fetch(_Value, 1, 0); } #ifdef __x86_64__ -static __inline__ __int64 DEFAULT_FN_ATTRS +static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedIncrement64(__int64 volatile *_Value) { return __atomic_add_fetch(_Value, 1, 0); } @@ -665,12 +661,12 @@ _InterlockedIncrement64(__int64 volatile *_Value) { /*----------------------------------------------------------------------------*\ |* Interlocked Decrement \*----------------------------------------------------------------------------*/ -static __inline__ short DEFAULT_FN_ATTRS +static __inline__ short __DEFAULT_FN_ATTRS _InterlockedDecrement16(short volatile *_Value) { return __atomic_sub_fetch(_Value, 1, 0); } #ifdef __x86_64__ -static __inline__ __int64 DEFAULT_FN_ATTRS +static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedDecrement64(__int64 volatile *_Value) { return __atomic_sub_fetch(_Value, 1, 0); } @@ -678,20 +674,20 @@ _InterlockedDecrement64(__int64 volatile *_Value) { /*----------------------------------------------------------------------------*\ |* Interlocked And \*----------------------------------------------------------------------------*/ -static __inline__ char DEFAULT_FN_ATTRS +static __inline__ char __DEFAULT_FN_ATTRS _InterlockedAnd8(char volatile *_Value, char _Mask) { return __atomic_and_fetch(_Value, _Mask, 0); } -static __inline__ short DEFAULT_FN_ATTRS +static __inline__ short __DEFAULT_FN_ATTRS _InterlockedAnd16(short volatile *_Value, short _Mask) { return __atomic_and_fetch(_Value, _Mask, 0); } -static __inline__ long DEFAULT_FN_ATTRS +static __inline__ long __DEFAULT_FN_ATTRS _InterlockedAnd(long volatile *_Value, long _Mask) { return __atomic_and_fetch(_Value, _Mask, 0); } #ifdef __x86_64__ -static __inline__ __int64 DEFAULT_FN_ATTRS +static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) { return __atomic_and_fetch(_Value, _Mask, 0); } @@ -699,20 +695,20 @@ _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) { /*----------------------------------------------------------------------------*\ |* Interlocked Or \*----------------------------------------------------------------------------*/ -static __inline__ char DEFAULT_FN_ATTRS +static __inline__ char __DEFAULT_FN_ATTRS _InterlockedOr8(char volatile *_Value, char _Mask) { return __atomic_or_fetch(_Value, _Mask, 0); } -static __inline__ short DEFAULT_FN_ATTRS +static __inline__ short __DEFAULT_FN_ATTRS _InterlockedOr16(short volatile *_Value, short _Mask) { return __atomic_or_fetch(_Value, _Mask, 0); } -static __inline__ long DEFAULT_FN_ATTRS +static __inline__ long __DEFAULT_FN_ATTRS _InterlockedOr(long volatile *_Value, long _Mask) { return __atomic_or_fetch(_Value, _Mask, 0); } #ifdef __x86_64__ -static __inline__ __int64 DEFAULT_FN_ATTRS +static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) { return __atomic_or_fetch(_Value, _Mask, 0); } @@ -720,20 +716,20 @@ _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) { /*----------------------------------------------------------------------------*\ |* Interlocked Xor \*----------------------------------------------------------------------------*/ -static __inline__ char DEFAULT_FN_ATTRS +static __inline__ char __DEFAULT_FN_ATTRS _InterlockedXor8(char volatile *_Value, char _Mask) { return __atomic_xor_fetch(_Value, _Mask, 0); } -static __inline__ short DEFAULT_FN_ATTRS +static __inline__ short __DEFAULT_FN_ATTRS _InterlockedXor16(short volatile *_Value, short _Mask) { return __atomic_xor_fetch(_Value, _Mask, 0); } -static __inline__ long DEFAULT_FN_ATTRS +static __inline__ long __DEFAULT_FN_ATTRS _InterlockedXor(long volatile *_Value, long _Mask) { return __atomic_xor_fetch(_Value, _Mask, 0); } #ifdef __x86_64__ -static __inline__ __int64 DEFAULT_FN_ATTRS +static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) { return __atomic_xor_fetch(_Value, _Mask, 0); } @@ -741,18 +737,18 @@ _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) { /*----------------------------------------------------------------------------*\ |* Interlocked Exchange \*----------------------------------------------------------------------------*/ -static __inline__ char DEFAULT_FN_ATTRS +static __inline__ char __DEFAULT_FN_ATTRS _InterlockedExchange8(char volatile *_Target, char _Value) { __atomic_exchange(_Target, &_Value, &_Value, 0); return _Value; } -static __inline__ short DEFAULT_FN_ATTRS +static __inline__ short __DEFAULT_FN_ATTRS _InterlockedExchange16(short volatile *_Target, short _Value) { __atomic_exchange(_Target, &_Value, &_Value, 0); return _Value; } #ifdef __x86_64__ -static __inline__ __int64 DEFAULT_FN_ATTRS +static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) { __atomic_exchange(_Target, &_Value, &_Value, 0); return _Value; @@ -761,19 +757,19 @@ _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) { /*----------------------------------------------------------------------------*\ |* Interlocked Compare Exchange \*----------------------------------------------------------------------------*/ -static __inline__ char DEFAULT_FN_ATTRS +static __inline__ char __DEFAULT_FN_ATTRS _InterlockedCompareExchange8(char volatile *_Destination, char _Exchange, char _Comparand) { __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); return _Comparand; } -static __inline__ short DEFAULT_FN_ATTRS +static __inline__ short __DEFAULT_FN_ATTRS _InterlockedCompareExchange16(short volatile *_Destination, short _Exchange, short _Comparand) { __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); return _Comparand; } -static __inline__ __int64 DEFAULT_FN_ATTRS +static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedCompareExchange64(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand) { __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); @@ -783,24 +779,24 @@ _InterlockedCompareExchange64(__int64 volatile *_Destination, |* Barriers \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) _ReadWriteBarrier(void) { __asm__ volatile ("" : : : "memory"); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) _ReadBarrier(void) { __asm__ volatile ("" : : : "memory"); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) _WriteBarrier(void) { __asm__ volatile ("" : : : "memory"); } #endif #ifdef __x86_64__ -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __faststorefence(void) { __asm__ volatile("lock orq $0, (%%rsp)" : : : "memory"); } @@ -815,33 +811,33 @@ __faststorefence(void) { (__offset)) #ifdef __i386__ -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS __readfsbyte(unsigned long __offset) { return *__ptr_to_addr_space(257, unsigned char, __offset); } -static __inline__ unsigned __int64 DEFAULT_FN_ATTRS +static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS __readfsqword(unsigned long __offset) { return *__ptr_to_addr_space(257, unsigned __int64, __offset); } -static __inline__ unsigned short DEFAULT_FN_ATTRS +static __inline__ unsigned short __DEFAULT_FN_ATTRS __readfsword(unsigned long __offset) { return *__ptr_to_addr_space(257, unsigned short, __offset); } #endif #ifdef __x86_64__ -static __inline__ unsigned char DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS __readgsbyte(unsigned long __offset) { return *__ptr_to_addr_space(256, unsigned char, __offset); } -static __inline__ unsigned long DEFAULT_FN_ATTRS +static __inline__ unsigned long __DEFAULT_FN_ATTRS __readgsdword(unsigned long __offset) { return *__ptr_to_addr_space(256, unsigned long, __offset); } -static __inline__ unsigned __int64 DEFAULT_FN_ATTRS +static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS __readgsqword(unsigned long __offset) { return *__ptr_to_addr_space(256, unsigned __int64, __offset); } -static __inline__ unsigned short DEFAULT_FN_ATTRS +static __inline__ unsigned short __DEFAULT_FN_ATTRS __readgsword(unsigned long __offset) { return *__ptr_to_addr_space(256, unsigned short, __offset); } @@ -851,44 +847,44 @@ __readgsword(unsigned long __offset) { |* movs, stos \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) { __asm__("rep movsb" : : "D"(__dst), "S"(__src), "c"(__n) : "%edi", "%esi", "%ecx"); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) { __asm__("rep movsl" : : "D"(__dst), "S"(__src), "c"(__n) : "%edi", "%esi", "%ecx"); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) { __asm__("rep movsh" : : "D"(__dst), "S"(__src), "c"(__n) : "%edi", "%esi", "%ecx"); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __stosb(unsigned char *__dst, unsigned char __x, size_t __n) { __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n) : "%edi", "%ecx"); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst, unsigned long __x, size_t __n) { __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n) : "%edi", "%ecx"); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __stosw(unsigned short *__dst, unsigned short __x, size_t __n) { __asm__("rep stosh" : : "D"(__dst), "a"(__x), "c"(__n) : "%edi", "%ecx"); } #endif #ifdef __x86_64__ -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __movsq(unsigned long long *__dst, unsigned long long const *__src, size_t __n) { __asm__("rep movsq" : : "D"(__dst), "S"(__src), "c"(__n) : "%edi", "%esi", "%ecx"); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) { __asm__("rep stosq" : : "D"(__dst), "a"(__x), "c"(__n) : "%edi", "%ecx"); @@ -898,32 +894,32 @@ __stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) { /*----------------------------------------------------------------------------*\ |* Misc \*----------------------------------------------------------------------------*/ -static __inline__ void * DEFAULT_FN_ATTRS +static __inline__ void * __DEFAULT_FN_ATTRS _AddressOfReturnAddress(void) { return (void*)((char*)__builtin_frame_address(0) + sizeof(void*)); } -static __inline__ void * DEFAULT_FN_ATTRS +static __inline__ void * __DEFAULT_FN_ATTRS _ReturnAddress(void) { return __builtin_return_address(0); } #if defined(__i386__) || defined(__x86_64__) -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __cpuid(int __info[4], int __level) { __asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3]) : "a"(__level)); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __cpuidex(int __info[4], int __level, int __ecx) { __asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3]) : "a"(__level), "c"(__ecx)); } -static __inline__ unsigned __int64 __cdecl DEFAULT_FN_ATTRS +static __inline__ unsigned __int64 __cdecl __DEFAULT_FN_ATTRS _xgetbv(unsigned int __xcr_no) { unsigned int __eax, __edx; __asm__ ("xgetbv" : "=a" (__eax), "=d" (__edx) : "c" (__xcr_no)); return ((unsigned __int64)__edx << 32) | __eax; } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __halt(void) { __asm__ volatile ("hlt"); } @@ -933,7 +929,7 @@ __halt(void) { |* Privileged intrinsics \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) -static __inline__ unsigned __int64 DEFAULT_FN_ATTRS +static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS __readmsr(unsigned long __register) { // Loads the contents of a 64-bit model specific register (MSR) specified in // the ECX register into registers EDX:EAX. The EDX register is loaded with @@ -947,14 +943,14 @@ __readmsr(unsigned long __register) { return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax; } -static __inline__ unsigned long DEFAULT_FN_ATTRS +static __inline__ unsigned long __DEFAULT_FN_ATTRS __readcr3(void) { unsigned long __cr3_val; __asm__ __volatile__ ("mov %%cr3, %0" : "=q"(__cr3_val) : : "memory"); return __cr3_val; } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS __writecr3(unsigned int __cr3_val) { __asm__ ("mov %0, %%cr3" : : "q"(__cr3_val) : "memory"); } @@ -964,7 +960,7 @@ __writecr3(unsigned int __cr3_val) { } #endif -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __INTRIN_H */ #endif /* _MSC_VER */ diff --git a/lib/Headers/__wmmintrin_aes.h b/lib/Headers/__wmmintrin_aes.h index 17b3f1d55f76..81b2b8d0b0a4 100644 --- a/lib/Headers/__wmmintrin_aes.h +++ b/lib/Headers/__wmmintrin_aes.h @@ -26,33 +26,33 @@ #include <emmintrin.h> /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes"))) -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesenc_si128(__m128i __V, __m128i __R) { return (__m128i)__builtin_ia32_aesenc128(__V, __R); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesenclast_si128(__m128i __V, __m128i __R) { return (__m128i)__builtin_ia32_aesenclast128(__V, __R); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesdec_si128(__m128i __V, __m128i __R) { return (__m128i)__builtin_ia32_aesdec128(__V, __R); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesdeclast_si128(__m128i __V, __m128i __R) { return (__m128i)__builtin_ia32_aesdeclast128(__V, __R); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesimc_si128(__m128i __V) { return (__m128i)__builtin_ia32_aesimc128(__V); @@ -61,6 +61,6 @@ _mm_aesimc_si128(__m128i __V) #define _mm_aeskeygenassist_si128(C, R) \ __builtin_ia32_aeskeygenassist128((C), (R)) -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* _WMMINTRIN_AES_H */ diff --git a/lib/Headers/adxintrin.h b/lib/Headers/adxintrin.h index 050dc8aea8b9..ee347284178e 100644 --- a/lib/Headers/adxintrin.h +++ b/lib/Headers/adxintrin.h @@ -29,7 +29,7 @@ #define __ADXINTRIN_H /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) /* Intrinsics that are available only if __ADX__ defined */ static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__("adx"))) @@ -49,7 +49,7 @@ _addcarryx_u64(unsigned char __cf, unsigned long long __x, #endif /* Intrinsics that are also available if __ADX__ undefined */ -static __inline unsigned char DEFAULT_FN_ATTRS +static __inline unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y, unsigned int *__p) { @@ -57,7 +57,7 @@ _addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y, } #ifdef __x86_64__ -static __inline unsigned char DEFAULT_FN_ATTRS +static __inline unsigned char __DEFAULT_FN_ATTRS _addcarry_u64(unsigned char __cf, unsigned long long __x, unsigned long long __y, unsigned long long *__p) { @@ -65,7 +65,7 @@ _addcarry_u64(unsigned char __cf, unsigned long long __x, } #endif -static __inline unsigned char DEFAULT_FN_ATTRS +static __inline unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y, unsigned int *__p) { @@ -73,7 +73,7 @@ _subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y, } #ifdef __x86_64__ -static __inline unsigned char DEFAULT_FN_ATTRS +static __inline unsigned char __DEFAULT_FN_ATTRS _subborrow_u64(unsigned char __cf, unsigned long long __x, unsigned long long __y, unsigned long long *__p) { @@ -81,6 +81,6 @@ _subborrow_u64(unsigned char __cf, unsigned long long __x, } #endif -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __ADXINTRIN_H */ diff --git a/lib/Headers/altivec.h b/lib/Headers/altivec.h index 28df89057205..2c80e24db335 100644 --- a/lib/Headers/altivec.h +++ b/lib/Headers/altivec.h @@ -110,14 +110,28 @@ static vector signed int __ATTRS_o_ai vec_abs(vector signed int __a) { return __builtin_altivec_vmaxsw(__a, -__a); } +#if defined(__POWER8_VECTOR__) && defined(__powerpc64__) +static vector signed long long __ATTRS_o_ai +vec_abs(vector signed long long __a) { + return __builtin_altivec_vmaxsd(__a, -__a); +} +#endif + static vector float __ATTRS_o_ai vec_abs(vector float __a) { vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)(0x7FFFFFFF); return (vector float)__res; } -/* vec_abss */ +#if defined(__POWER8_VECTOR__) && defined(__powerpc64__) +static vector double __ATTRS_o_ai vec_abs(vector double __a) { + vector unsigned long long __res = { 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF }; + __res &= (vector unsigned int)__a; + return (vector double)__res; +} +#endif +/* vec_abss */ #define __builtin_altivec_abss_v16qi vec_abss #define __builtin_altivec_abss_v8hi vec_abss #define __builtin_altivec_abss_v4si vec_abss @@ -226,6 +240,16 @@ static vector unsigned int __ATTRS_o_ai vec_add(vector unsigned int __a, } #if defined(__POWER8_VECTOR__) && defined(__powerpc64__) +static vector signed long long __ATTRS_o_ai +vec_add(vector signed long long __a, vector signed long long __b) { + return __a + __b; +} + +static vector unsigned long long __ATTRS_o_ai +vec_add(vector unsigned long long __a, vector unsigned long long __b) { + return __a + __b; +} + static vector signed __int128 __ATTRS_o_ai vec_add(vector signed __int128 __a, vector signed __int128 __b) { return __a + __b; @@ -241,6 +265,13 @@ static vector float __ATTRS_o_ai vec_add(vector float __a, vector float __b) { return __a + __b; } +#ifdef __VSX__ +static vector double __ATTRS_o_ai +vec_add(vector double __a, vector double __b) { + return __a + __b; +} +#endif // __VSX__ + /* vec_vaddubm */ #define __builtin_altivec_vaddubm vec_vaddubm @@ -746,6 +777,24 @@ static vector float __ATTRS_o_ai vec_and(vector float __a, } #ifdef __VSX__ +static vector double __ATTRS_o_ai vec_and(vector bool long long __a, vector double __b) { + vector unsigned long long __res = + (vector unsigned long long)__a & (vector unsigned long long)__b; + return (vector double)__res; +} + +static vector double __ATTRS_o_ai vec_and(vector double __a, vector bool long long __b) { + vector unsigned long long __res = + (vector unsigned long long)__a & (vector unsigned long long)__b; + return (vector double)__res; +} + +static vector double __ATTRS_o_ai vec_and(vector double __a, vector double __b) { + vector unsigned long long __res = + (vector unsigned long long)__a & (vector unsigned long long)__b; + return (vector double)__res; +} + static vector signed long long __ATTRS_o_ai vec_and(vector signed long long __a, vector signed long long __b) { return __a & __b; @@ -1068,6 +1117,26 @@ static vector float __ATTRS_o_ai vec_andc(vector float __a, } #ifdef __VSX__ +static vector double __ATTRS_o_ai +vec_andc(vector bool long long __a, vector double __b) { + vector unsigned long long __res = + (vector unsigned long long)__a & ~(vector unsigned long long)__b; + return (vector double)__res; +} + +static vector double __ATTRS_o_ai +vec_andc(vector double __a, vector bool long long __b) { + vector unsigned long long __res = + (vector unsigned long long)__a & ~(vector unsigned long long)__b; + return (vector double)__res; +} + +static vector double __ATTRS_o_ai vec_andc(vector double __a, vector double __b) { + vector unsigned long long __res = + (vector unsigned long long)__a & ~(vector unsigned long long)__b; + return (vector double)__res; +} + static vector signed long long __ATTRS_o_ai vec_andc(vector signed long long __a, vector signed long long __b) { return __a & ~__b; @@ -1338,11 +1407,20 @@ vec_vavguw(vector unsigned int __a, vector unsigned int __b) { /* vec_ceil */ -static vector float __attribute__((__always_inline__)) -vec_ceil(vector float __a) { +static vector float __ATTRS_o_ai vec_ceil(vector float __a) { +#ifdef __VSX__ + return __builtin_vsx_xvrspip(__a); +#else return __builtin_altivec_vrfip(__a); +#endif } +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_ceil(vector double __a) { + return __builtin_vsx_xvrdpip(__a); +} +#endif + /* vec_vrfip */ static vector float __attribute__((__always_inline__)) @@ -1414,16 +1492,56 @@ vec_cmpeq(vector unsigned long long __a, vector unsigned long long __b) { static vector bool int __ATTRS_o_ai vec_cmpeq(vector float __a, vector float __b) { +#ifdef __VSX__ + return (vector bool int)__builtin_vsx_xvcmpeqsp(__a, __b); +#else return (vector bool int)__builtin_altivec_vcmpeqfp(__a, __b); +#endif +} + +#ifdef __VSX__ +static vector bool long long __ATTRS_o_ai +vec_cmpeq(vector double __a, vector double __b) { + return (vector bool long long)__builtin_vsx_xvcmpeqdp(__a, __b); } +#endif /* vec_cmpge */ -static vector bool int __attribute__((__always_inline__)) +static vector bool int __ATTRS_o_ai vec_cmpge(vector float __a, vector float __b) { +#ifdef __VSX__ + return (vector bool int)__builtin_vsx_xvcmpgesp(__a, __b); +#else return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b); +#endif } +#ifdef __VSX__ +static vector bool long long __ATTRS_o_ai +vec_cmpge(vector double __a, vector double __b) { + return (vector bool long long)__builtin_vsx_xvcmpgedp(__a, __b); +} +#endif + +#ifdef __POWER8_VECTOR__ +/* Forwrad declarations as the functions are used here */ +static vector bool long long __ATTRS_o_ai +vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b); +static vector bool long long __ATTRS_o_ai +vec_cmpgt(vector signed long long __a, vector signed long long __b); + +static vector bool long long __ATTRS_o_ai +vec_cmpge(vector signed long long __a, vector signed long long __b) { + return ~(vec_cmpgt(__b, __a)); +} + +static vector bool long long __ATTRS_o_ai +vec_cmpge(vector unsigned long long __a, vector unsigned long long __b) { + return ~(vec_cmpgt(__b, __a)); +} +#endif + /* vec_vcmpgefp */ static vector bool int __attribute__((__always_inline__)) @@ -1476,9 +1594,19 @@ vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b) { static vector bool int __ATTRS_o_ai vec_cmpgt(vector float __a, vector float __b) { +#ifdef __VSX__ + return (vector bool int)__builtin_vsx_xvcmpgtsp(__a, __b); +#else return (vector bool int)__builtin_altivec_vcmpgtfp(__a, __b); +#endif } +#ifdef __VSX__ +static vector bool long long __ATTRS_o_ai +vec_cmpgt(vector double __a, vector double __b) { + return (vector bool long long)__builtin_vsx_xvcmpgtdp(__a, __b); +} +#endif /* vec_vcmpgtsb */ static vector bool char __attribute__((__always_inline__)) @@ -1530,47 +1658,85 @@ vec_vcmpgtfp(vector float __a, vector float __b) { /* vec_cmple */ -static vector bool int __attribute__((__always_inline__)) +static vector bool int __ATTRS_o_ai vec_cmple(vector float __a, vector float __b) { - return (vector bool int)__builtin_altivec_vcmpgefp(__b, __a); + return vec_cmpge(__b, __a); } +#ifdef __VSX__ +static vector bool long long __ATTRS_o_ai +vec_cmple(vector double __a, vector double __b) { + return vec_cmpge(__b, __a); +} +#endif + +#ifdef __POWER8_VECTOR__ +static vector bool long long __ATTRS_o_ai +vec_cmple(vector signed long long __a, vector signed long long __b) { + return vec_cmpge(__b, __a); +} + +static vector bool long long __ATTRS_o_ai +vec_cmple(vector unsigned long long __a, vector unsigned long long __b) { + return vec_cmpge(__b, __a); +} +#endif + /* vec_cmplt */ static vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a, vector signed char __b) { - return (vector bool char)__builtin_altivec_vcmpgtsb(__b, __a); + return vec_cmpgt(__b, __a); } static vector bool char __ATTRS_o_ai vec_cmplt(vector unsigned char __a, vector unsigned char __b) { - return (vector bool char)__builtin_altivec_vcmpgtub(__b, __a); + return vec_cmpgt(__b, __a); } static vector bool short __ATTRS_o_ai vec_cmplt(vector short __a, vector short __b) { - return (vector bool short)__builtin_altivec_vcmpgtsh(__b, __a); + return vec_cmpgt(__b, __a); } static vector bool short __ATTRS_o_ai vec_cmplt(vector unsigned short __a, vector unsigned short __b) { - return (vector bool short)__builtin_altivec_vcmpgtuh(__b, __a); + return vec_cmpgt(__b, __a); } static vector bool int __ATTRS_o_ai vec_cmplt(vector int __a, vector int __b) { - return (vector bool int)__builtin_altivec_vcmpgtsw(__b, __a); + return vec_cmpgt(__b, __a); } static vector bool int __ATTRS_o_ai vec_cmplt(vector unsigned int __a, vector unsigned int __b) { - return (vector bool int)__builtin_altivec_vcmpgtuw(__b, __a); + return vec_cmpgt(__b, __a); } static vector bool int __ATTRS_o_ai vec_cmplt(vector float __a, vector float __b) { - return (vector bool int)__builtin_altivec_vcmpgtfp(__b, __a); + return vec_cmpgt(__b, __a); +} + +#ifdef __VSX__ +static vector bool long long __ATTRS_o_ai +vec_cmplt(vector double __a, vector double __b) { + return vec_cmpgt(__b, __a); +} +#endif + +#ifdef __POWER8_VECTOR__ +static vector bool long long __ATTRS_o_ai +vec_cmplt(vector signed long long __a, vector signed long long __b) { + return vec_cmpgt(__b, __a); } +static vector bool long long __ATTRS_o_ai +vec_cmplt(vector unsigned long long __a, vector unsigned long long __b) { + return vec_cmpgt(__b, __a); +} +#endif + /* vec_ctf */ static vector float __ATTRS_o_ai vec_ctf(vector int __a, int __b) { @@ -3014,6 +3180,56 @@ static vector float __ATTRS_o_ai vec_vmrglw(vector float __a, 0x1C, 0x1D, 0x1E, 0x1F)); } + +#ifdef __POWER8_VECTOR__ +/* vec_mergee */ + +static vector bool int __ATTRS_o_ai +vec_mergee(vector bool int __a, vector bool int __b) { + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B)); +} + +static vector signed int __ATTRS_o_ai +vec_mergee(vector signed int __a, vector signed int __b) { + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B)); +} + +static vector unsigned int __ATTRS_o_ai +vec_mergee(vector unsigned int __a, vector unsigned int __b) { + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B)); +} + +/* vec_mergeo */ + +static vector bool int __ATTRS_o_ai +vec_mergeo(vector bool int __a, vector bool int __b) { + return vec_perm(__a, __b, (vector unsigned char) + (0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); +} + +static vector signed int __ATTRS_o_ai +vec_mergeo(vector signed int __a, vector signed int __b) { + return vec_perm(__a, __b, (vector unsigned char) + (0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); +} + +static vector unsigned int __ATTRS_o_ai +vec_mergeo(vector unsigned int __a, vector unsigned int __b) { + return vec_perm(__a, __b, (vector unsigned char) + (0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); +} + +#endif + /* vec_mfvscr */ static vector unsigned short __attribute__((__always_inline__)) diff --git a/lib/Headers/ammintrin.h b/lib/Headers/ammintrin.h index 3f3820576671..91c633305b8b 100644 --- a/lib/Headers/ammintrin.h +++ b/lib/Headers/ammintrin.h @@ -27,7 +27,7 @@ #include <pmmintrin.h> /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"))) /// \brief Extracts the specified bits from the lower 64 bits of the 128-bit /// integer vector operand at the index idx and of the length len. @@ -80,7 +80,7 @@ /// non-zero, the result is undefined. /// \returns A 128-bit vector whose lower 64 bits contain the bits extracted /// from the source operand. -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_extract_si64(__m128i __x, __m128i __y) { return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); @@ -156,7 +156,7 @@ _mm_extract_si64(__m128i __x, __m128i __y) /// lower bits of source operand __y. The upper 64 bits of the return value /// are undefined. -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_insert_si64(__m128i __x, __m128i __y) { return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); @@ -177,7 +177,7 @@ _mm_insert_si64(__m128i __x, __m128i __y) /// \param __a /// The 64-bit double-precision floating-point register value to /// be stored. -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_sd(double *__p, __m128d __a) { __builtin_ia32_movntsd(__p, (__v2df)__a); @@ -198,12 +198,12 @@ _mm_stream_sd(double *__p, __m128d __a) /// \param __a /// The 32-bit single-precision floating-point register value to /// be stored. -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_ss(float *__p, __m128 __a) { __builtin_ia32_movntss(__p, (__v4sf)__a); } -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __AMMINTRIN_H */ diff --git a/lib/Headers/avx2intrin.h b/lib/Headers/avx2intrin.h index 5f1817e45a7a..cfa91410a338 100644 --- a/lib/Headers/avx2intrin.h +++ b/lib/Headers/avx2intrin.h @@ -29,96 +29,96 @@ #define __AVX2INTRIN_H /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx2"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx2"))) /* SSE4 Multiple Packed Sums of Absolute Difference. */ #define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M)) -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_abs_epi8(__m256i __a) { return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_abs_epi16(__m256i __a) { return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_abs_epi32(__m256i __a) { return (__m256i)__builtin_ia32_pabsd256((__v8si)__a); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_packs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_packs_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_packus_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_packus_epi32(__m256i __V1, __m256i __V2) { return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_add_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qi)__a + (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_add_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a + (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_add_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a + (__v8si)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_add_epi64(__m256i __a, __m256i __b) { return __a + __b; } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_adds_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_adds_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_adds_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_adds_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b); @@ -129,31 +129,31 @@ _mm256_adds_epu16(__m256i __a, __m256i __b) __m256i __b = (b); \ (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); }) -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_and_si256(__m256i __a, __m256i __b) { return __a & __b; } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_andnot_si256(__m256i __a, __m256i __b) { return ~__a & __b; } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_avg_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_avg_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) { return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2, @@ -181,307 +181,307 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) (((M) & 0x40) ? 30 : 14), \ (((M) & 0x80) ? 31 : 15)); }) -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cmpeq_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qi)__a == (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cmpeq_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a == (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cmpeq_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a == (__v8si)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cmpeq_epi64(__m256i __a, __m256i __b) { return (__m256i)(__a == __b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cmpgt_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qi)__a > (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cmpgt_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a > (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cmpgt_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a > (__v8si)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cmpgt_epi64(__m256i __a, __m256i __b) { return (__m256i)(__a > __b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_hadd_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_hadd_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_hadds_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_hsub_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_hsub_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_hsubs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maddubs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_madd_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epu32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epu32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm256_movemask_epi8(__m256i __a) { return __builtin_ia32_pmovmskb256((__v32qi)__a); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi16(__m128i __V) { return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi32(__m128i __V) { return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi64(__m128i __V) { return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi16_epi32(__m128i __V) { return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi16_epi64(__m128i __V) { return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi32_epi64(__m128i __V) { return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi16(__m128i __V) { return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi32(__m128i __V) { return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi64(__m128i __V) { return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu16_epi32(__m128i __V) { return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu16_epi64(__m128i __V) { return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu32_epi64(__m128i __V) { return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mul_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mulhrs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mulhi_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mulhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mullo_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a * (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mullo_epi32 (__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a * (__v8si)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mul_epu32(__m256i __a, __m256i __b) { return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_or_si256(__m256i __a, __m256i __b) { return __a | __b; } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sad_epu8(__m256i __a, __m256i __b) { return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_shuffle_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b); @@ -523,19 +523,19 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b) 8 + (((imm) & 0xc0) >> 6), \ 12, 13, 14, 15); }) -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sign_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sign_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sign_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); @@ -547,61 +547,61 @@ _mm256_sign_epi32(__m256i __a, __m256i __b) #define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count)) -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_slli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sll_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_slli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sll_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_slli_epi64(__m256i __a, int __count) { return __builtin_ia32_psllqi256(__a, __count); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sll_epi64(__m256i __a, __m128i __count) { return __builtin_ia32_psllq256(__a, __count); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srai_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sra_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srai_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sra_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count); @@ -613,175 +613,175 @@ _mm256_sra_epi32(__m256i __a, __m128i __count) #define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count)) -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srl_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srl_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srli_epi64(__m256i __a, int __count) { return __builtin_ia32_psrlqi256(__a, __count); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srl_epi64(__m256i __a, __m128i __count) { return __builtin_ia32_psrlq256(__a, __count); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sub_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qi)__a - (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sub_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a - (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sub_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a - (__v8si)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sub_epi64(__m256i __a, __m256i __b) { return __a - __b; } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_subs_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_subs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_subs_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_subs_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpackhi_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpackhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpackhi_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpackhi_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector(__a, __b, 1, 4+1, 3, 4+3); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpacklo_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpacklo_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpacklo_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpacklo_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector(__a, __b, 0, 4+0, 2, 4+2); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_xor_si256(__m256i __a, __m256i __b) { return __a ^ __b; } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_stream_load_si256(__m256i *__V) { return (__m256i)__builtin_ia32_movntdqa256((__v4di *)__V); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_broadcastss_ps(__m128 __X) { return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_broadcastsd_pd(__m128d __a) { return __builtin_shufflevector(__a, __a, 0, 0); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_broadcastss_ps(__m128 __X) { return (__m256)__builtin_ia32_vbroadcastss_ps256((__v4sf)__X); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_broadcastsd_pd(__m128d __X) { return (__m256d)__builtin_ia32_vbroadcastsd_pd256((__v2df)__X); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastsi128_si256(__m128i __X) { return (__m256i)__builtin_shufflevector(__X, __X, 0, 1, 0, 1); @@ -809,56 +809,56 @@ _mm256_broadcastsi128_si256(__m128i __X) (((M) & 0x40) ? 14 : 6), \ (((M) & 0x80) ? 15 : 7)); }) -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastb_epi8(__m128i __X) { return (__m256i)__builtin_ia32_pbroadcastb256((__v16qi)__X); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastw_epi16(__m128i __X) { return (__m256i)__builtin_ia32_pbroadcastw256((__v8hi)__X); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastd_epi32(__m128i __X) { return (__m256i)__builtin_ia32_pbroadcastd256((__v4si)__X); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastq_epi64(__m128i __X) { return (__m256i)__builtin_ia32_pbroadcastq256(__X); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcastb_epi8(__m128i __X) { return (__m128i)__builtin_ia32_pbroadcastb128((__v16qi)__X); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcastw_epi16(__m128i __X) { return (__m128i)__builtin_ia32_pbroadcastw128((__v8hi)__X); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcastd_epi32(__m128i __X) { return (__m128i)__builtin_ia32_pbroadcastd128((__v4si)__X); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcastq_epi64(__m128i __X) { return (__m128i)__builtin_ia32_pbroadcastq128(__X); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b); @@ -870,7 +870,7 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) (M) & 0x3, ((M) & 0xc) >> 2, \ ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); }) -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_permutevar8x32_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8sf)__b); @@ -903,109 +903,109 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256 __b) (((M) & 1) ? 4 : 2), \ (((M) & 1) ? 5 : 3) );}) -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskload_epi32(int const *__X, __m256i __M) { return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskload_epi64(long long const *__X, __m256i __M) { return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, __M); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskload_epi32(int const *__X, __m128i __M) { return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskload_epi64(long long const *__X, __m128i __M) { return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y) { __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y) { __builtin_ia32_maskstoreq256((__v4di *)__X, __M, __Y); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y) { __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y) { __builtin_ia32_maskstoreq(( __v2di *)__X, __M, __Y); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sllv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sllv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sllv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv4di(__X, __Y); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sllv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv2di(__X, __Y); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srav_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srav_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srlv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srlv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srlv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv4di(__X, __Y); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srlv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv2di(__X, __Y); @@ -1251,6 +1251,6 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y) (const __v4di *)__m, (__v4di)__i, \ (__v4di)_mm256_set1_epi64x(-1), (s)); }) -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __AVX2INTRIN_H */ diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h index 4eb97471781e..b0d3462d4db6 100644 --- a/lib/Headers/avx512bwintrin.h +++ b/lib/Headers/avx512bwintrin.h @@ -34,9 +34,9 @@ typedef char __v64qi __attribute__ ((__vector_size__ (64))); typedef short __v32hi __attribute__ ((__vector_size__ (64))); /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) -static __inline __v64qi DEFAULT_FN_ATTRS +static __inline __v64qi __DEFAULT_FN_ATTRS _mm512_setzero_qi (void) { return (__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -48,7 +48,7 @@ _mm512_setzero_qi (void) { 0, 0, 0, 0, 0, 0, 0, 0 }; } -static __inline __v32hi DEFAULT_FN_ATTRS +static __inline __v32hi __DEFAULT_FN_ATTRS _mm512_setzero_hi (void) { return (__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -58,300 +58,300 @@ _mm512_setzero_hi (void) { /* Integer compare */ -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b, (__mmask64)-1); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b, __u); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0, (__mmask64)-1); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0, __u); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, (__mmask64)-1); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, __u); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, (__mmask64)-1); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, __u); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b, (__mmask64)-1); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b, __u); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6, (__mmask64)-1); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6, __u); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_cmple_epi8_mask(__m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, (__mmask64)-1); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, __u); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_cmple_epu8_mask(__m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, (__mmask64)-1); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_cmple_epi16_mask(__m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_cmple_epu16_mask(__m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, __u); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, (__mmask64)-1); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, __u); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, (__mmask64)-1); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, __u); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, (__mmask64)-1); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, __u); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, (__mmask64)-1); } -static __inline__ __mmask64 DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, __u); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_add_epi8 (__m512i __A, __m512i __B) { return (__m512i) ((__v64qi) __A + (__v64qi) __B); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A, (__v64qi) __B, @@ -359,7 +359,7 @@ _mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { (__mmask64) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A, (__v64qi) __B, @@ -368,12 +368,12 @@ _mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { (__mmask64) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sub_epi8 (__m512i __A, __m512i __B) { return (__m512i) ((__v64qi) __A - (__v64qi) __B); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A, (__v64qi) __B, @@ -381,7 +381,7 @@ _mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { (__mmask64) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A, (__v64qi) __B, @@ -390,12 +390,12 @@ _mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { (__mmask64) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_add_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hi) __A + (__v32hi) __B); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A, (__v32hi) __B, @@ -403,7 +403,7 @@ _mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { (__mmask32) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A, (__v32hi) __B, @@ -412,12 +412,12 @@ _mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { (__mmask32) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sub_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hi) __A - (__v32hi) __B); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A, (__v32hi) __B, @@ -425,7 +425,7 @@ _mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { (__mmask32) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A, (__v32hi) __B, @@ -434,12 +434,12 @@ _mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { (__mmask32) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mullo_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hi) __A * (__v32hi) __B); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A, (__v32hi) __B, @@ -447,7 +447,7 @@ _mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { (__mmask32) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A, (__v32hi) __B, @@ -456,6 +456,754 @@ _mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { (__mmask32) __U); } +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) +{ + return (__m512i) __builtin_ia32_blendmb_512_mask ((__v64qi) __A, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) +{ + return (__m512i) __builtin_ia32_blendmw_512_mask ((__v32hi) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_abs_epi8 (__m512i __A) +{ + return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_abs_epi16 (__m512i __A) +{ + return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_packs_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A, + (__v16si) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_packs_epi32 (__mmask32 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A, + (__v16si) __B, + (__v32hi) _mm512_setzero_hi(), + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_packs_epi32 (__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A, + (__v16si) __B, + (__v32hi) __W, + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_packs_epi16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_packs_epi16 (__m512i __W, __mmask64 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v64qi) __W, + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_packs_epi16 (__mmask64 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v64qi) _mm512_setzero_qi(), + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_packus_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A, + (__v16si) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_packus_epi32 (__mmask32 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A, + (__v16si) __B, + (__v32hi) _mm512_setzero_hi(), + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_packus_epi32 (__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A, + (__v16si) __B, + (__v32hi) __W, + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_packus_epi16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_packus_epi16 (__m512i __W, __mmask64 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v64qi) __W, + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_packus_epi16 (__mmask64 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v64qi) _mm512_setzero_qi(), + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_adds_epi8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_adds_epi16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_adds_epu8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_adds_epu16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_avg_epu8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi(), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_avg_epu16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi(), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_max_epi8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi(), + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_max_epi16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi(), + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_max_epu8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi(), + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_max_epu16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi(), + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_min_epi8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi(), + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_min_epi16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi(), + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_min_epu8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi(), + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_min_epu16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi(), + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shuffle_epi8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shuffle_epi8 (__m512i __W, __mmask64 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shuffle_epi8 (__mmask64 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_subs_epi8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_subs_epi16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_subs_epu8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A, + (__v64qi) __B, + (__v64qi) _mm512_setzero_qi (), + (__mmask64) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_subs_epu16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask2_permutex2var_epi16 (__m512i __A, __m512i __I, + __mmask32 __U, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A, + (__v32hi) __I /* idx */ , + (__v32hi) __B, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_permutex2var_epi16 (__m512i __A, __m512i __I, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2varhi512_mask ((__v32hi) __I /* idx */, + (__v32hi) __A, + (__v32hi) __B, + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_permutex2var_epi16 (__m512i __A, __mmask32 __U, + __m512i __I, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2varhi512_mask ((__v32hi) __I /* idx */, + (__v32hi) __A, + (__v32hi) __B, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_permutex2var_epi16 (__mmask32 __U, __m512i __A, + __m512i __I, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2varhi512_maskz ((__v32hi) __I + /* idx */ , + (__v32hi) __A, + (__v32hi) __B, + (__mmask32) __U); +} + #define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \ (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ (__v64qi)(__m512i)(b), \ @@ -496,6 +1244,7 @@ _mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { (__v32hi)(__m512i)(b), \ (p), (__mmask32)(m)); }) -#undef DEFAULT_FN_ATTRS + +#undef __DEFAULT_FN_ATTRS #endif diff --git a/lib/Headers/avx512cdintrin.h b/lib/Headers/avx512cdintrin.h new file mode 100644 index 000000000000..3894b29f5725 --- /dev/null +++ b/lib/Headers/avx512cdintrin.h @@ -0,0 +1,131 @@ +/*===------------- avx512cdintrin.h - AVX512CD intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512cdintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512CDINTRIN_H +#define __AVX512CDINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_conflict_epi64 (__m512i __A) +{ + return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A, + (__v8di) _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A, + (__v8di) __W, + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A, + (__v8di) _mm512_setzero_si512 (), + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_conflict_epi32 (__m512i __A) +{ + return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A, + (__v16si) _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A, + (__v16si) _mm512_setzero_si512 (), + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_lzcnt_epi32 (__m512i __A) +{ + return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A, + (__v16si) _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A, + (__v16si) _mm512_setzero_si512 (), + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_lzcnt_epi64 (__m512i __A) +{ + return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A, + (__v8di) _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A, + (__v8di) __W, + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A, + (__v8di) _mm512_setzero_si512 (), + (__mmask8) __U); +} +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/Headers/avx512dqintrin.h b/lib/Headers/avx512dqintrin.h index cfcfc62b9455..8a69f7ffbf34 100644 --- a/lib/Headers/avx512dqintrin.h +++ b/lib/Headers/avx512dqintrin.h @@ -29,14 +29,14 @@ #define __AVX512DQINTRIN_H /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mullo_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8di) __A * (__v8di) __B); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, (__v8di) __B, @@ -44,7 +44,7 @@ _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { (__mmask8) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, (__v8di) __B, @@ -53,12 +53,12 @@ _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { (__mmask8) __U); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_xor_pd (__m512d __A, __m512d __B) { return (__m512d) ((__v8di) __A ^ (__v8di) __B); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, (__v8df) __B, @@ -66,7 +66,7 @@ _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { (__mmask8) __U); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, (__v8df) __B, @@ -75,12 +75,12 @@ _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) { (__mmask8) __U); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_xor_ps (__m512 __A, __m512 __B) { return (__m512) ((__v16si) __A ^ (__v16si) __B); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, (__v16sf) __B, @@ -88,7 +88,7 @@ _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { (__mmask16) __U); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, (__v16sf) __B, @@ -97,12 +97,12 @@ _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) { (__mmask16) __U); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_or_pd (__m512d __A, __m512d __B) { return (__m512d) ((__v8di) __A | (__v8di) __B); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, (__v8df) __B, @@ -110,7 +110,7 @@ _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { (__mmask8) __U); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, (__v8df) __B, @@ -119,12 +119,12 @@ _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) { (__mmask8) __U); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_or_ps (__m512 __A, __m512 __B) { return (__m512) ((__v16si) __A | (__v16si) __B); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, (__v16sf) __B, @@ -132,7 +132,7 @@ _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { (__mmask16) __U); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, (__v16sf) __B, @@ -141,12 +141,12 @@ _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) { (__mmask16) __U); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_and_pd (__m512d __A, __m512d __B) { return (__m512d) ((__v8di) __A & (__v8di) __B); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, (__v8df) __B, @@ -154,7 +154,7 @@ _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { (__mmask8) __U); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, (__v8df) __B, @@ -163,12 +163,12 @@ _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) { (__mmask8) __U); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_and_ps (__m512 __A, __m512 __B) { return (__m512) ((__v16si) __A & (__v16si) __B); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, (__v16sf) __B, @@ -176,7 +176,7 @@ _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { (__mmask16) __U); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, (__v16sf) __B, @@ -185,7 +185,7 @@ _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) { (__mmask16) __U); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_andnot_pd (__m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, (__v8df) __B, @@ -194,7 +194,7 @@ _mm512_andnot_pd (__m512d __A, __m512d __B) { (__mmask8) -1); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, (__v8df) __B, @@ -202,7 +202,7 @@ _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { (__mmask8) __U); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, (__v8df) __B, @@ -211,7 +211,7 @@ _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) { (__mmask8) __U); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_andnot_ps (__m512 __A, __m512 __B) { return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, (__v16sf) __B, @@ -220,7 +220,7 @@ _mm512_andnot_ps (__m512 __A, __m512 __B) { (__mmask16) -1); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, (__v16sf) __B, @@ -228,7 +228,7 @@ _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { (__mmask16) __U); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, (__v16sf) __B, @@ -237,6 +237,6 @@ _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) { (__mmask16) __U); } -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index 98eb73b3113f..099114453646 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -47,17 +47,17 @@ typedef unsigned short __mmask16; #define _MM_FROUND_CUR_DIRECTION 0x04 /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) /* Create vectors with repeated elements */ -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_setzero_si512(void) { return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_set1_epi32(__mmask16 __M, int __A) { return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, @@ -66,7 +66,7 @@ _mm512_maskz_set1_epi32(__mmask16 __M, int __A) __M); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_set1_epi64(__mmask8 __M, long long __A) { #ifdef __x86_64__ @@ -82,45 +82,45 @@ _mm512_maskz_set1_epi64(__mmask8 __M, long long __A) #endif } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_setzero_ps(void) { return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_setzero_pd(void) { return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_set1_ps(float __w) { return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_set1_pd(double __w) { return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi32(int __s) { return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s, __s, __s, __s, __s, __s, __s, __s, __s }; } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi64(long long __d) { return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_broadcastss_ps(__m128 __X) { float __f = __X[0]; @@ -130,7 +130,7 @@ _mm512_broadcastss_ps(__m128 __X) __f, __f, __f, __f }; } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_broadcastsd_pd(__m128d __X) { double __d = __X[0]; @@ -140,39 +140,39 @@ _mm512_broadcastsd_pd(__m128d __X) /* Cast between vector types */ -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_castpd256_pd512(__m256d __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_castps256_ps512(__m256 __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1); } -static __inline __m128d DEFAULT_FN_ATTRS +static __inline __m128d __DEFAULT_FN_ATTRS _mm512_castpd512_pd128(__m512d __a) { return __builtin_shufflevector(__a, __a, 0, 1); } -static __inline __m128 DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS _mm512_castps512_ps128(__m512 __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } /* Bitwise operators */ -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_epi32(__m512i __a, __m512i __b) { return __a & __b; } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) { return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a, @@ -180,7 +180,7 @@ _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) (__v16si) __src, (__mmask16) __k); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) { return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a, @@ -190,13 +190,13 @@ _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) (__mmask16) __k); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_epi64(__m512i __a, __m512i __b) { return __a & __b; } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) { return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a, @@ -204,7 +204,7 @@ _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) (__v8di) __src, (__mmask8) __k); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) { return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a, @@ -214,7 +214,7 @@ _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) (__mmask8) __k); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_andnot_epi32 (__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, @@ -224,7 +224,7 @@ _mm512_andnot_epi32 (__m512i __A, __m512i __B) (__mmask16) -1); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, @@ -233,7 +233,7 @@ _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__mmask16) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, @@ -243,7 +243,7 @@ _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B) (__mmask16) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_andnot_epi64 (__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, @@ -253,7 +253,7 @@ _mm512_andnot_epi64 (__m512i __A, __m512i __B) (__mmask8) -1); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, @@ -261,7 +261,7 @@ _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__v8di) __W, __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, @@ -270,13 +270,13 @@ _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B) _mm512_setzero_pd (), __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_or_epi32(__m512i __a, __m512i __b) { return __a | __b; } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) { return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a, @@ -284,7 +284,7 @@ _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) (__v16si) __src, (__mmask16) __k); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) { return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a, @@ -294,13 +294,13 @@ _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) (__mmask16) __k); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_or_epi64(__m512i __a, __m512i __b) { return __a | __b; } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) { return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a, @@ -308,7 +308,7 @@ _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) (__v8di) __src, (__mmask8) __k); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) { return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a, @@ -318,13 +318,13 @@ _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) (__mmask8) __k); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_xor_epi32(__m512i __a, __m512i __b) { return __a ^ __b; } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) { return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a, @@ -332,7 +332,7 @@ _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) (__v16si) __src, (__mmask16) __k); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) { return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a, @@ -342,13 +342,13 @@ _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) (__mmask16) __k); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_xor_epi64(__m512i __a, __m512i __b) { return __a ^ __b; } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) { return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a, @@ -356,7 +356,7 @@ _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) (__v8di) __src, (__mmask8) __k); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) { return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a, @@ -366,68 +366,68 @@ _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) (__mmask8) __k); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_si512(__m512i __a, __m512i __b) { return __a & __b; } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_or_si512(__m512i __a, __m512i __b) { return __a | __b; } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_xor_si512(__m512i __a, __m512i __b) { return __a ^ __b; } /* Arithmetic */ -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_add_pd(__m512d __a, __m512d __b) { return __a + __b; } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_add_ps(__m512 __a, __m512 __b) { return __a + __b; } -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_mul_pd(__m512d __a, __m512d __b) { return __a * __b; } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_mul_ps(__m512 __a, __m512 __b) { return __a * __b; } -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_sub_pd(__m512d __a, __m512d __b) { return __a - __b; } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_sub_ps(__m512 __a, __m512 __b) { return __a - __b; } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_add_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8di) __A + (__v8di) __B); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, @@ -436,7 +436,7 @@ _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__mmask8) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, @@ -446,13 +446,13 @@ _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B) (__mmask8) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sub_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8di) __A - (__v8di) __B); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, @@ -461,7 +461,7 @@ _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__mmask8) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, @@ -471,13 +471,13 @@ _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B) (__mmask8) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_add_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16si) __A + (__v16si) __B); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, @@ -486,7 +486,7 @@ _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__mmask16) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, @@ -496,13 +496,13 @@ _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) (__mmask16) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sub_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16si) __A - (__v16si) __B); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, @@ -511,7 +511,7 @@ _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__mmask16) __U); } -static __inline__ __m512i DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, @@ -521,7 +521,7 @@ _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B) (__mmask16) __U); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_max_pd(__m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, @@ -532,7 +532,7 @@ _mm512_max_pd(__m512d __A, __m512d __B) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_max_ps(__m512 __A, __m512 __B) { return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, @@ -544,7 +544,7 @@ _mm512_max_ps(__m512 __A, __m512 __B) } static __inline __m512i -DEFAULT_FN_ATTRS +__DEFAULT_FN_ATTRS _mm512_max_epi32(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, @@ -554,7 +554,7 @@ _mm512_max_epi32(__m512i __A, __m512i __B) (__mmask16) -1); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_max_epu32(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, @@ -564,7 +564,7 @@ _mm512_max_epu32(__m512i __A, __m512i __B) (__mmask16) -1); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_max_epi64(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, @@ -574,7 +574,7 @@ _mm512_max_epi64(__m512i __A, __m512i __B) (__mmask8) -1); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_max_epu64(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, @@ -584,7 +584,7 @@ _mm512_max_epu64(__m512i __A, __m512i __B) (__mmask8) -1); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_min_pd(__m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, @@ -595,7 +595,7 @@ _mm512_min_pd(__m512d __A, __m512d __B) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_min_ps(__m512 __A, __m512 __B) { return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, @@ -607,7 +607,7 @@ _mm512_min_ps(__m512 __A, __m512 __B) } static __inline __m512i -DEFAULT_FN_ATTRS +__DEFAULT_FN_ATTRS _mm512_min_epi32(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, @@ -617,7 +617,7 @@ _mm512_min_epi32(__m512i __A, __m512i __B) (__mmask16) -1); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_min_epu32(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, @@ -627,7 +627,7 @@ _mm512_min_epu32(__m512i __A, __m512i __B) (__mmask16) -1); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_min_epi64(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, @@ -637,7 +637,7 @@ _mm512_min_epi64(__m512i __A, __m512i __B) (__mmask8) -1); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_min_epu64(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, @@ -647,7 +647,7 @@ _mm512_min_epu64(__m512i __A, __m512i __B) (__mmask8) -1); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mul_epi32(__m512i __X, __m512i __Y) { return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, @@ -657,7 +657,7 @@ _mm512_mul_epi32(__m512i __X, __m512i __Y) (__mmask8) -1); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, @@ -665,7 +665,7 @@ _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) (__v8di) __W, __M); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, @@ -675,7 +675,7 @@ _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y) __M); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mul_epu32(__m512i __X, __m512i __Y) { return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, @@ -685,7 +685,7 @@ _mm512_mul_epu32(__m512i __X, __m512i __Y) (__mmask8) -1); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, @@ -693,7 +693,7 @@ _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) (__v8di) __W, __M); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, @@ -703,13 +703,13 @@ _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y) __M); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mullo_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16si) __A * (__v16si) __B); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, @@ -719,7 +719,7 @@ _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B) __M); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, @@ -727,7 +727,7 @@ _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) (__v16si) __W, __M); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_sqrt_pd(__m512d a) { return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a, @@ -736,7 +736,7 @@ _mm512_sqrt_pd(__m512d a) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_sqrt_ps(__m512 a) { return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a, @@ -745,7 +745,7 @@ _mm512_sqrt_ps(__m512 a) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_rsqrt14_pd(__m512d __A) { return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, @@ -753,7 +753,7 @@ _mm512_rsqrt14_pd(__m512d __A) _mm512_setzero_pd (), (__mmask8) -1);} -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_rsqrt14_ps(__m512 __A) { return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, @@ -762,7 +762,7 @@ _mm512_rsqrt14_ps(__m512 __A) (__mmask16) -1); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt14_ss(__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, @@ -772,7 +772,7 @@ _mm_rsqrt14_ss(__m128 __A, __m128 __B) (__mmask8) -1); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rsqrt14_sd(__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, @@ -782,7 +782,7 @@ _mm_rsqrt14_sd(__m128d __A, __m128d __B) (__mmask8) -1); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_rcp14_pd(__m512d __A) { return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, @@ -791,7 +791,7 @@ _mm512_rcp14_pd(__m512d __A) (__mmask8) -1); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_rcp14_ps(__m512 __A) { return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, @@ -799,7 +799,7 @@ _mm512_rcp14_ps(__m512 __A) _mm512_setzero_ps (), (__mmask16) -1); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp14_ss(__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, @@ -809,7 +809,7 @@ _mm_rcp14_ss(__m128 __A, __m128 __B) (__mmask8) -1); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rcp14_sd(__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, @@ -819,7 +819,7 @@ _mm_rcp14_sd(__m128d __A, __m128d __B) (__mmask8) -1); } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_floor_ps(__m512 __A) { return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, @@ -828,7 +828,7 @@ _mm512_floor_ps(__m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_floor_pd(__m512d __A) { return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, @@ -837,7 +837,7 @@ _mm512_floor_pd(__m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_ceil_ps(__m512 __A) { return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, @@ -846,7 +846,7 @@ _mm512_ceil_ps(__m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_ceil_pd(__m512d __A) { return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, @@ -855,7 +855,7 @@ _mm512_ceil_pd(__m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_abs_epi64(__m512i __A) { return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, @@ -864,7 +864,7 @@ _mm512_abs_epi64(__m512i __A) (__mmask8) -1); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_abs_epi32(__m512i __A) { return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, @@ -881,75 +881,779 @@ _mm512_abs_epi32(__m512i __A) (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \ -1, _MM_FROUND_CUR_DIRECTION); }) -static __inline__ __m512d DEFAULT_FN_ATTRS +#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) -1, (R)); }) + + +#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) -1, (R)); }) + + +#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) -1, (R)); }) + + +#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) -1, (R)); }) + + +#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) { - return (__m512d) - __builtin_ia32_vfmaddpd512_mask(__A, - __B, - __C, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) { - return (__m512d) - __builtin_ia32_vfmsubpd512_mask(__A, - __B, - __C, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) { - return (__m512d) - __builtin_ia32_vfnmaddpd512_mask(__A, - __B, - __C, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 DEFAULT_FN_ATTRS +#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) -1, (R)); }) + + +#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) -1, (R)); }) + + +#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) -1, (R)); }) + + +#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) -1, (R)); }) + + +#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) { - return (__m512) - __builtin_ia32_vfmaddps512_mask(__A, - __B, - __C, - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) { - return (__m512) - __builtin_ia32_vfmsubps512_mask(__A, - __B, - __C, - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) { - return (__m512) - __builtin_ia32_vfnmaddps512_mask(__A, - __B, - __C, - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); } +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) -1, (R)); }) + + +#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) -1, (R)); }) + + +#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \ + (__v8df) (B), -(__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) -1, (R)); }) + + +#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) -1, (R)); }) + + +#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \ + (__v16sf) (B), -(__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \ + (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \ + (__v8df) (B), (__v8df) (C), \ + (__mmask8) (U), (R)); }) + + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \ + (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \ + (__v16sf) (B), (__v16sf) (C), \ + (__mmask16) (U), (R)); }) + + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + + + /* Vector permutations */ -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) { return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I @@ -958,7 +1662,7 @@ _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) (__v16si) __B, (__mmask16) -1); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) { return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I @@ -968,7 +1672,7 @@ _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) (__mmask8) -1); } -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) { return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I @@ -977,7 +1681,7 @@ _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) (__v8df) __B, (__mmask8) -1); } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) { return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I @@ -1019,7 +1723,7 @@ _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) /* Vector Blend */ -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) { return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A, @@ -1027,7 +1731,7 @@ _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) (__mmask8) __U); } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) { return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A, @@ -1035,7 +1739,7 @@ _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) (__mmask16) __U); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A, @@ -1043,7 +1747,7 @@ _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) (__mmask8) __U); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A, @@ -1087,7 +1791,7 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) /* Conversion */ -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epu32(__m512 __A) { return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, @@ -1107,7 +1811,7 @@ _mm512_cvttps_epu32(__m512 __A) (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, (R)); }) -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_cvtepi32_pd(__m256i __A) { return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, @@ -1116,7 +1820,7 @@ _mm512_cvtepi32_pd(__m256i __A) (__mmask8) -1); } -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_cvtepu32_pd(__m256i __A) { return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, @@ -1135,7 +1839,7 @@ _mm512_cvtepu32_pd(__m256i __A) (__v16hi)_mm256_setzero_si256(), \ -1); }) -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_cvtph_ps(__m256i __A) { return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, @@ -1145,7 +1849,7 @@ _mm512_cvtph_ps(__m256i __A) _MM_FROUND_CUR_DIRECTION); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epi32(__m512 a) { return (__m512i) @@ -1154,7 +1858,7 @@ _mm512_cvttps_epi32(__m512 a) (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm512_cvttpd_epi32(__m512d a) { return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a, @@ -1194,19 +1898,19 @@ _mm512_cvttpd_epi32(__m512d a) (__mmask8) -1, (R)); }) /* Unpack and Interleave */ -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_unpackhi_pd(__m512d __a, __m512d __b) { return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); } -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_unpacklo_pd(__m512d __a, __m512d __b) { return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_unpackhi_ps(__m512 __a, __m512 __b) { return __builtin_shufflevector(__a, __b, @@ -1216,7 +1920,7 @@ _mm512_unpackhi_ps(__m512 __a, __m512 __b) 2+12, 18+12, 3+12, 19+12); } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_unpacklo_ps(__m512 __a, __m512 __b) { return __builtin_shufflevector(__a, __b, @@ -1228,7 +1932,7 @@ _mm512_unpacklo_ps(__m512 __a, __m512 __b) /* Bit Test */ -static __inline __mmask16 DEFAULT_FN_ATTRS +static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_test_epi32_mask(__m512i __A, __m512i __B) { return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, @@ -1236,7 +1940,7 @@ _mm512_test_epi32_mask(__m512i __A, __m512i __B) (__mmask16) -1); } -static __inline __mmask8 DEFAULT_FN_ATTRS +static __inline __mmask8 __DEFAULT_FN_ATTRS _mm512_test_epi64_mask(__m512i __A, __m512i __B) { return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, @@ -1246,7 +1950,7 @@ _mm512_test_epi64_mask(__m512i __A, __m512i __B) /* SIMD load ops */ -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P, @@ -1255,7 +1959,7 @@ _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline __m512i DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P, @@ -1264,7 +1968,7 @@ _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P, @@ -1273,7 +1977,7 @@ _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P, @@ -1282,7 +1986,7 @@ _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_maskz_load_ps(__mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, @@ -1291,7 +1995,7 @@ _mm512_maskz_load_ps(__mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_maskz_load_pd(__mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, @@ -1300,7 +2004,7 @@ _mm512_maskz_load_pd(__mmask8 __U, void const *__P) (__mmask8) __U); } -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_loadu_pd(double const *__p) { struct __loadu_pd { @@ -1309,7 +2013,7 @@ _mm512_loadu_pd(double const *__p) return ((struct __loadu_pd*)__p)->__v; } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_loadu_ps(float const *__p) { struct __loadu_ps { @@ -1318,7 +2022,7 @@ _mm512_loadu_ps(float const *__p) return ((struct __loadu_ps*)__p)->__v; } -static __inline __m512 DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS _mm512_load_ps(double const *__p) { return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p, @@ -1327,7 +2031,7 @@ _mm512_load_ps(double const *__p) (__mmask16) -1); } -static __inline __m512d DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS _mm512_load_pd(float const *__p) { return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p, @@ -1338,65 +2042,65 @@ _mm512_load_pd(float const *__p) /* SIMD store ops */ -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) { __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A, (__mmask8) __U); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) { __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A, (__mmask16) __U); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) { __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm512_storeu_pd(void *__P, __m512d __A) { __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) { __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A, (__mmask16) __U); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm512_storeu_ps(void *__P, __m512 __A) { __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) { __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm512_store_pd(void *__P, __m512d __A) { *(__m512d*)__P = __A; } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) { __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, (__mmask16) __U); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm512_store_ps(void *__P, __m512 __A) { *(__m512*)__P = __A; @@ -1404,7 +2108,7 @@ _mm512_store_ps(void *__P, __m512 __A) /* Mask ops */ -static __inline __mmask16 DEFAULT_FN_ATTRS +static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_knot(__mmask16 __M) { return __builtin_ia32_knothi(__M); @@ -1412,289 +2116,289 @@ _mm512_knot(__mmask16 __M) /* Integer compare */ -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, __u); @@ -1748,6 +2452,6 @@ _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ (__mmask8)(m)); }) -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif // __AVX512FINTRIN_H diff --git a/lib/Headers/avx512vlbwintrin.h b/lib/Headers/avx512vlbwintrin.h index 1fbffd42cf0c..59849e43fd32 100644 --- a/lib/Headers/avx512vlbwintrin.h +++ b/lib/Headers/avx512vlbwintrin.h @@ -29,587 +29,587 @@ #define __AVX512VLBWINTRIN_H /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"))) /* Integer compare */ -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_cmpeq_epi8_mask(__m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_pcmpeqb128_mask((__v16qi)__a, (__v16qi)__b, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_pcmpeqb128_mask((__v16qi)__a, (__v16qi)__b, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_cmpeq_epu8_mask(__m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_pcmpeqb256_mask((__v32qi)__a, (__v32qi)__b, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_pcmpeqb256_mask((__v32qi)__a, (__v32qi)__b, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_cmpeq_epu8_mask(__m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpeq_epi16_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpeq_epu16_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_cmpeq_epi16_mask(__m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_pcmpeqw256_mask((__v16hi)__a, (__v16hi)__b, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_pcmpeqw256_mask((__v16hi)__a, (__v16hi)__b, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_cmpeq_epu16_mask(__m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_cmpge_epi8_mask(__m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_cmpge_epu8_mask(__m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_cmpge_epi8_mask(__m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_cmpge_epu8_mask(__m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpge_epi16_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpge_epu16_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_cmpge_epi16_mask(__m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_cmpge_epu16_mask(__m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_cmpgt_epi8_mask(__m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_cmpgt_epu8_mask(__m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_cmpgt_epi8_mask(__m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_cmpgt_epu8_mask(__m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpgt_epi16_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpgt_epu16_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_cmpgt_epi16_mask(__m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_cmpgt_epu16_mask(__m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_cmple_epi8_mask(__m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_cmple_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_cmple_epu8_mask(__m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_cmple_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_cmple_epi8_mask(__m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_cmple_epu8_mask(__m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmple_epi16_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmple_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmple_epu16_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmple_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_cmple_epi16_mask(__m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_cmple_epu16_mask(__m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_cmplt_epi8_mask(__m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_cmplt_epu8_mask(__m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_cmplt_epi8_mask(__m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_cmplt_epu8_mask(__m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmplt_epi16_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmplt_epu16_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_cmplt_epi16_mask(__m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_cmplt_epu16_mask(__m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_cmpneq_epi8_mask(__m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_cmpneq_epu8_mask(__m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_cmpneq_epi8_mask(__m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4, __u); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_cmpneq_epu8_mask(__m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4, (__mmask32)-1); } -static __inline__ __mmask32 DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpneq_epi16_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpneq_epu16_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_cmpneq_epi16_mask(__m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4, __u); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_cmpneq_epu16_mask(__m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4, (__mmask16)-1); } -static __inline__ __mmask16 DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4, __u); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){ return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A, (__v32qi) __B, @@ -617,7 +617,7 @@ _mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){ (__mmask32) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A, (__v32qi) __B, @@ -626,7 +626,7 @@ _mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { (__mmask32) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A, (__v16hi) __B, @@ -634,7 +634,7 @@ _mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { (__mmask16) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A, (__v16hi) __B, @@ -643,7 +643,7 @@ _mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { (__mmask16) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A, (__v32qi) __B, @@ -651,7 +651,7 @@ _mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { (__mmask32) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A, (__v32qi) __B, @@ -660,7 +660,7 @@ _mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { (__mmask32) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A, (__v16hi) __B, @@ -668,7 +668,7 @@ _mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { (__mmask16) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A, (__v16hi) __B, @@ -676,7 +676,7 @@ _mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { _mm256_setzero_si256 (), (__mmask16) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A, (__v16qi) __B, @@ -684,7 +684,7 @@ _mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { (__mmask16) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A, (__v16qi) __B, @@ -693,7 +693,7 @@ _mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { (__mmask16) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A, (__v8hi) __B, @@ -701,7 +701,7 @@ _mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A, (__v8hi) __B, @@ -710,7 +710,7 @@ _mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A, (__v16qi) __B, @@ -718,7 +718,7 @@ _mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { (__mmask16) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A, (__v16qi) __B, @@ -727,7 +727,7 @@ _mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { (__mmask16) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A, (__v8hi) __B, @@ -735,7 +735,7 @@ _mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A, (__v8hi) __B, @@ -744,7 +744,7 @@ _mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A, (__v16hi) __B, @@ -752,7 +752,7 @@ _mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { (__mmask16) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A, (__v16hi) __B, @@ -761,7 +761,7 @@ _mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { (__mmask16) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A, (__v8hi) __B, @@ -769,7 +769,7 @@ _mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A, (__v8hi) __B, @@ -857,6 +857,6 @@ _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { (__v16hi)(__m256i)(b), \ (p), (__mmask16)(m)); }) -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __AVX512VLBWINTRIN_H */ diff --git a/lib/Headers/avx512vldqintrin.h b/lib/Headers/avx512vldqintrin.h index 2a32edd1ad81..032070bfb902 100644 --- a/lib/Headers/avx512vldqintrin.h +++ b/lib/Headers/avx512vldqintrin.h @@ -29,14 +29,14 @@ #define __AVX512VLDQINTRIN_H /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"))) -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mullo_epi64 (__m256i __A, __m256i __B) { return (__m256i) ((__v4di) __A * (__v4di) __B); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, (__v4di) __B, @@ -44,7 +44,7 @@ _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, (__v4di) __B, @@ -53,12 +53,12 @@ _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi64 (__m128i __A, __m128i __B) { return (__m128i) ((__v2di) __A * (__v2di) __B); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, (__v2di) __B, @@ -66,7 +66,7 @@ _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, (__v2di) __B, @@ -75,7 +75,7 @@ _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { (__mmask8) __U); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, (__v4df) __B, @@ -83,7 +83,7 @@ _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { (__mmask8) __U); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, (__v4df) __B, @@ -92,7 +92,7 @@ _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { (__mmask8) __U); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, (__v2df) __B, @@ -100,7 +100,7 @@ _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { (__mmask8) __U); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, (__v2df) __B, @@ -109,7 +109,7 @@ _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { (__mmask8) __U); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, (__v8sf) __B, @@ -117,7 +117,7 @@ _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { (__mmask8) __U); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, (__v8sf) __B, @@ -126,7 +126,7 @@ _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__mmask8) __U); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, (__v4sf) __B, @@ -134,7 +134,7 @@ _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { (__mmask8) __U); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, (__v4sf) __B, @@ -143,7 +143,7 @@ _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { (__mmask8) __U); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, (__v4df) __B, @@ -151,7 +151,7 @@ _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { (__mmask8) __U); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, (__v4df) __B, @@ -160,7 +160,7 @@ _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { (__mmask8) __U); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, (__v2df) __B, @@ -168,7 +168,7 @@ _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { (__mmask8) __U); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, (__v2df) __B, @@ -177,7 +177,7 @@ _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { (__mmask8) __U); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, (__v8sf) __B, @@ -185,7 +185,7 @@ _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { (__mmask8) __U); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, (__v8sf) __B, @@ -194,7 +194,7 @@ _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__mmask8) __U); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, (__v4sf) __B, @@ -202,7 +202,7 @@ _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { (__mmask8) __U); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, (__v4sf) __B, @@ -211,7 +211,7 @@ _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { (__mmask8) __U); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, @@ -220,7 +220,7 @@ _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, (__mmask8) __U); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, (__v4df) __B, @@ -229,7 +229,7 @@ _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { (__mmask8) __U); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, (__v2df) __B, @@ -237,7 +237,7 @@ _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { (__mmask8) __U); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, (__v2df) __B, @@ -246,7 +246,7 @@ _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { (__mmask8) __U); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, (__v8sf) __B, @@ -254,7 +254,7 @@ _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { (__mmask8) __U); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, (__v8sf) __B, @@ -263,7 +263,7 @@ _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__mmask8) __U); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, (__v4sf) __B, @@ -271,7 +271,7 @@ _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { (__mmask8) __U); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, (__v4sf) __B, @@ -280,7 +280,7 @@ _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { (__mmask8) __U); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, (__v4df) __B, @@ -288,7 +288,7 @@ _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { (__mmask8) __U); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, (__v4df) __B, @@ -297,7 +297,7 @@ _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { (__mmask8) __U); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, (__v2df) __B, @@ -305,7 +305,7 @@ _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { (__mmask8) __U); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, (__v2df) __B, @@ -314,7 +314,7 @@ _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { (__mmask8) __U); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, (__v8sf) __B, @@ -322,7 +322,7 @@ _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { (__mmask8) __U); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, (__v8sf) __B, @@ -331,7 +331,7 @@ _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__mmask8) __U); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, (__v4sf) __B, @@ -339,7 +339,7 @@ _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { (__mmask8) __U); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, (__v4sf) __B, @@ -348,6 +348,6 @@ _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { (__mmask8) __U); } -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h index 59ff5ebfd574..73f46d101a55 100644 --- a/lib/Headers/avx512vlintrin.h +++ b/lib/Headers/avx512vlintrin.h @@ -29,198 +29,198 @@ #define __AVX512VLINTRIN_H /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"))) /* Integer compare */ -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpge_epi32_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpge_epu32_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpge_epi64_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpge_epu64_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5, __u); @@ -229,391 +229,391 @@ _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmple_epi32_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmple_epu32_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmple_epi32_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmple_epu32_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmple_epi64_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmple_epu64_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmple_epi64_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmple_epu64_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmplt_epi32_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmplt_epu32_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmplt_epi64_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmplt_epu64_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4, __u); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4, (__mmask8)-1); } -static __inline__ __mmask8 DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4, __u); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -623,7 +623,7 @@ _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A, (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A, @@ -633,7 +633,7 @@ _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B) (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -643,7 +643,7 @@ _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A, (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A, @@ -653,7 +653,7 @@ _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B) (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -663,7 +663,7 @@ _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A, (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A, @@ -673,7 +673,7 @@ _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B) (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -683,7 +683,7 @@ _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A, (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A, @@ -693,7 +693,7 @@ _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B) (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -703,7 +703,7 @@ _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A, (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A, @@ -713,7 +713,7 @@ _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B) (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -723,7 +723,7 @@ _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A, (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A, @@ -733,7 +733,7 @@ _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B) (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -743,7 +743,7 @@ _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A, (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A, @@ -753,7 +753,7 @@ _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B) (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -763,7 +763,7 @@ _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A, (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A, @@ -773,7 +773,7 @@ _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B) (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { @@ -782,7 +782,7 @@ _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X, (__v4di) __W, __M); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X, @@ -792,7 +792,7 @@ _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) __M); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { @@ -801,7 +801,7 @@ _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X, (__v2di) __W, __M); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X, @@ -811,7 +811,7 @@ _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) __M); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { @@ -820,7 +820,7 @@ _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X, (__v4di) __W, __M); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X, @@ -830,7 +830,7 @@ _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) __M); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { @@ -839,7 +839,7 @@ _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X, (__v2di) __W, __M); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X, @@ -849,7 +849,7 @@ _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) __M); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A, @@ -859,7 +859,7 @@ _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B) __M); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { @@ -868,7 +868,7 @@ _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A, (__v8si) __W, __M); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A, @@ -878,7 +878,7 @@ _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B) __M); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { @@ -887,7 +887,7 @@ _mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A, (__v4si) __W, __M); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -897,7 +897,7 @@ _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A, (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A, @@ -907,7 +907,7 @@ _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B) (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A, @@ -916,7 +916,7 @@ _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A, @@ -926,7 +926,7 @@ _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B) (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -936,7 +936,7 @@ _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A, (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A, @@ -946,7 +946,7 @@ _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B) (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -956,7 +956,7 @@ _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A, (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A, @@ -966,7 +966,7 @@ _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B) (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -976,7 +976,7 @@ _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A, @@ -986,7 +986,7 @@ _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B) (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A, @@ -995,7 +995,7 @@ _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A, @@ -1005,7 +1005,7 @@ _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B) (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -1015,7 +1015,7 @@ _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A, (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A, @@ -1025,7 +1025,7 @@ _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B) (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -1035,7 +1035,7 @@ _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A, (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A, @@ -1045,7 +1045,7 @@ _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B) (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -1054,7 +1054,7 @@ _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A, (__v4di) __W, __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A, @@ -1064,7 +1064,7 @@ _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -1073,7 +1073,7 @@ _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A, (__v2di) __W, __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A, @@ -1083,7 +1083,7 @@ _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -1092,7 +1092,7 @@ _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A, (__v4di) __W, __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A, @@ -1102,7 +1102,7 @@ _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -1111,7 +1111,7 @@ _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A, (__v2di) __W, __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A, @@ -1121,7 +1121,7 @@ _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -1131,7 +1131,7 @@ _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A, (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A, @@ -1141,7 +1141,7 @@ _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B) (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A, @@ -1150,7 +1150,7 @@ _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A, @@ -1160,7 +1160,7 @@ _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B) (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -1170,7 +1170,7 @@ _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A, (__mmask8) __U); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A, @@ -1180,7 +1180,7 @@ _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B) (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -1190,7 +1190,7 @@ _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A, (__mmask8) __U); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A, @@ -1320,6 +1320,663 @@ _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B) (__v2df)(__m128)(b), \ (p), (__mmask8)(m)); }) -#undef DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, + (__v2df) __B, + -(__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, + (__v2df) __B, + -(__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, + (__v2df) __B, + -(__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, + (__v4df) __B, + -(__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, + (__v4df) __B, + -(__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, + (__v4df) __B, + -(__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) +{ + return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) +{ + return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) + __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) + __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, + (__v2df) __B, + -(__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, + (__v2df) __B, + -(__v2df) __C, + (__mmask8) + __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) +{ + return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) + __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) + __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, + (__v4df) __B, + -(__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, + (__v4df) __B, + -(__v4df) __C, + (__mmask8) + __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, + (__v4sf) __B, + -(__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, + __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) +{ + return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, + (__v8sf) __B, + -(__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) +{ + return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) +{ + return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) + __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) +{ + return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) + __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) +{ + return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) +{ + return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) +{ + return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) +{ + return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) +{ + return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) +{ + return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) +{ + return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); +} + +#undef __DEFAULT_FN_ATTRS #endif /* __AVX512VLINTRIN_H */ diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h index d28a9155266a..6a9972b65bb0 100644 --- a/lib/Headers/avxintrin.h +++ b/lib/Headers/avxintrin.h @@ -40,112 +40,112 @@ typedef double __m256d __attribute__((__vector_size__(32))); typedef long long __m256i __attribute__((__vector_size__(32))); /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"))) /* Arithmetic */ -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_add_pd(__m256d __a, __m256d __b) { return __a+__b; } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_add_ps(__m256 __a, __m256 __b) { return __a+__b; } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sub_pd(__m256d __a, __m256d __b) { return __a-__b; } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sub_ps(__m256 __a, __m256 __b) { return __a-__b; } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_addsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_addsub_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_div_pd(__m256d __a, __m256d __b) { return __a / __b; } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_div_ps(__m256 __a, __m256 __b) { return __a / __b; } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_max_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_max_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_min_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_min_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_mul_pd(__m256d __a, __m256d __b) { return __a * __b; } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_mul_ps(__m256 __a, __m256 __b) { return __a * __b; } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd(__m256d __a) { return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps(__m256 __a) { return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_rsqrt_ps(__m256 __a) { return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_rcp_ps(__m256 __a) { return (__m256)__builtin_ia32_rcpps256((__v8sf)__a); @@ -165,99 +165,99 @@ _mm256_rcp_ps(__m256 __a) #define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR) /* Logical */ -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_and_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4di)__a & (__v4di)__b); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_and_ps(__m256 __a, __m256 __b) { return (__m256)((__v8si)__a & (__v8si)__b); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_andnot_pd(__m256d __a, __m256d __b) { return (__m256d)(~(__v4di)__a & (__v4di)__b); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_andnot_ps(__m256 __a, __m256 __b) { return (__m256)(~(__v8si)__a & (__v8si)__b); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_or_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4di)__a | (__v4di)__b); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_or_ps(__m256 __a, __m256 __b) { return (__m256)((__v8si)__a | (__v8si)__b); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_xor_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4di)__a ^ (__v4di)__b); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_xor_ps(__m256 __a, __m256 __b) { return (__m256)((__v8si)__a ^ (__v8si)__b); } /* Horizontal arithmetic */ -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_hadd_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_hadd_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_hsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_hsub_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b); } /* Vector permutations */ -static __inline __m128d DEFAULT_FN_ATTRS +static __inline __m128d __DEFAULT_FN_ATTRS _mm_permutevar_pd(__m128d __a, __m128i __c) { return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_permutevar_pd(__m256d __a, __m256i __c) { return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c); } -static __inline __m128 DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS _mm_permutevar_ps(__m128 __a, __m128i __c) { return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_permutevar_ps(__m256 __a, __m256i __c) { return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c); @@ -329,14 +329,14 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) (((M) & 0x40) ? 14 : 6), \ (((M) & 0x80) ? 15 : 7)); }) -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) { return (__m256d)__builtin_ia32_blendvpd256( (__v4df)__a, (__v4df)__b, (__v4df)__c); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) { return (__m256)__builtin_ia32_blendvps256( @@ -432,21 +432,21 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) __m128 __b = (b); \ (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); }) -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi32(__m256i __a, const int __imm) { __v8si __b = (__v8si)__a; return __b[__imm & 7]; } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi16(__m256i __a, const int __imm) { __v16hi __b = (__v16hi)__a; return __b[__imm & 15]; } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi8(__m256i __a, const int __imm) { __v32qi __b = (__v32qi)__a; @@ -454,7 +454,7 @@ _mm256_extract_epi8(__m256i __a, const int __imm) } #ifdef __x86_64__ -static __inline long long DEFAULT_FN_ATTRS +static __inline long long __DEFAULT_FN_ATTRS _mm256_extract_epi64(__m256i __a, const int __imm) { __v4di __b = (__v4di)__a; @@ -462,7 +462,7 @@ _mm256_extract_epi64(__m256i __a, const int __imm) } #endif -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_insert_epi32(__m256i __a, int __b, int const __imm) { __v8si __c = (__v8si)__a; @@ -470,7 +470,7 @@ _mm256_insert_epi32(__m256i __a, int __b, int const __imm) return (__m256i)__c; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_insert_epi16(__m256i __a, int __b, int const __imm) { __v16hi __c = (__v16hi)__a; @@ -478,7 +478,7 @@ _mm256_insert_epi16(__m256i __a, int __b, int const __imm) return (__m256i)__c; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_insert_epi8(__m256i __a, int __b, int const __imm) { __v32qi __c = (__v32qi)__a; @@ -487,7 +487,7 @@ _mm256_insert_epi8(__m256i __a, int __b, int const __imm) } #ifdef __x86_64__ -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_insert_epi64(__m256i __a, long long __b, int const __imm) { __v4di __c = (__v4di)__a; @@ -497,263 +497,263 @@ _mm256_insert_epi64(__m256i __a, long long __b, int const __imm) #endif /* Conversion */ -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi32_pd(__m128i __a) { return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_cvtepi32_ps(__m256i __a) { return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) __a); } -static __inline __m128 DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS _mm256_cvtpd_ps(__m256d __a) { return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a); } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epi32(__m256 __a) { return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtps_pd(__m128 __a) { return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a); } -static __inline __m128i DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a) { return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); } -static __inline __m128i DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epi32(__m256d __a) { return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a); } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a) { return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); } /* Vector replicate */ -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_movehdup_ps(__m256 __a) { return __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_moveldup_ps(__m256 __a) { return __builtin_shufflevector(__a, __a, 0, 0, 2, 2, 4, 4, 6, 6); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_movedup_pd(__m256d __a) { return __builtin_shufflevector(__a, __a, 0, 0, 2, 2); } /* Unpack and Interleave */ -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpackhi_pd(__m256d __a, __m256d __b) { return __builtin_shufflevector(__a, __b, 1, 5, 1+2, 5+2); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpacklo_pd(__m256d __a, __m256d __b) { return __builtin_shufflevector(__a, __b, 0, 4, 0+2, 4+2); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpackhi_ps(__m256 __a, __m256 __b) { return __builtin_shufflevector(__a, __b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpacklo_ps(__m256 __a, __m256 __b) { return __builtin_shufflevector(__a, __b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1); } /* Bit Test */ -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm_testz_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b); } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm_testc_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b); } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm_testnzc_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b); } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm_testz_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b); } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm_testc_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b); } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm_testnzc_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b); } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm256_testz_pd(__m256d __a, __m256d __b) { return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b); } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm256_testc_pd(__m256d __a, __m256d __b) { return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b); } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_pd(__m256d __a, __m256d __b) { return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b); } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm256_testz_ps(__m256 __a, __m256 __b) { return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b); } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm256_testc_ps(__m256 __a, __m256 __b) { return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b); } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_ps(__m256 __a, __m256 __b) { return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b); } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm256_testz_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b); } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm256_testc_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b); } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b); } /* Vector extract sign mask */ -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm256_movemask_pd(__m256d __a) { return __builtin_ia32_movmskpd256((__v4df)__a); } -static __inline int DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS _mm256_movemask_ps(__m256 __a) { return __builtin_ia32_movmskps256((__v8sf)__a); } /* Vector __zero */ -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_zeroall(void) { __builtin_ia32_vzeroall(); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_zeroupper(void) { __builtin_ia32_vzeroupper(); } /* Vector load with broadcast */ -static __inline __m128 DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS _mm_broadcast_ss(float const *__a) { float __f = *__a; return (__m128)(__v4sf){ __f, __f, __f, __f }; } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_sd(double const *__a) { double __d = *__a; return (__m256d)(__v4df){ __d, __d, __d, __d }; } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ss(float const *__a) { float __f = *__a; return (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f }; } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_pd(__m128d const *__a) { return (__m256d)__builtin_ia32_vbroadcastf128_pd256(__a); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ps(__m128 const *__a) { return (__m256)__builtin_ia32_vbroadcastf128_ps256(__a); } /* SIMD load ops */ -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_load_pd(double const *__p) { return *(__m256d *)__p; } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_load_ps(float const *__p) { return *(__m256 *)__p; } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_loadu_pd(double const *__p) { struct __loadu_pd { @@ -762,7 +762,7 @@ _mm256_loadu_pd(double const *__p) return ((struct __loadu_pd*)__p)->__v; } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_loadu_ps(float const *__p) { struct __loadu_ps { @@ -771,13 +771,13 @@ _mm256_loadu_ps(float const *__p) return ((struct __loadu_ps*)__p)->__v; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_load_si256(__m256i const *__p) { return *__p; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_loadu_si256(__m256i const *__p) { struct __loadu_si256 { @@ -786,141 +786,141 @@ _mm256_loadu_si256(__m256i const *__p) return ((struct __loadu_si256*)__p)->__v; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_lddqu_si256(__m256i const *__p) { return (__m256i)__builtin_ia32_lddqu256((char const *)__p); } /* SIMD store ops */ -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_store_pd(double *__p, __m256d __a) { *(__m256d *)__p = __a; } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_store_ps(float *__p, __m256 __a) { *(__m256 *)__p = __a; } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_pd(double *__p, __m256d __a) { __builtin_ia32_storeupd256(__p, (__v4df)__a); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_ps(float *__p, __m256 __a) { __builtin_ia32_storeups256(__p, (__v8sf)__a); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_store_si256(__m256i *__p, __m256i __a) { *__p = __a; } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_si256(__m256i *__p, __m256i __a) { __builtin_ia32_storedqu256((char *)__p, (__v32qi)__a); } /* Conditional load ops */ -static __inline __m128d DEFAULT_FN_ATTRS +static __inline __m128d __DEFAULT_FN_ATTRS _mm_maskload_pd(double const *__p, __m128d __m) { return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2df)__m); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_maskload_pd(double const *__p, __m256d __m) { return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p, (__v4df)__m); } -static __inline __m128 DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS _mm_maskload_ps(float const *__p, __m128 __m) { return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4sf)__m); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_maskload_ps(float const *__p, __m256 __m) { return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8sf)__m); } /* Conditional store ops */ -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_maskstore_ps(float *__p, __m256 __m, __m256 __a) { __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8sf)__m, (__v8sf)__a); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm_maskstore_pd(double *__p, __m128d __m, __m128d __a) { __builtin_ia32_maskstorepd((__v2df *)__p, (__v2df)__m, (__v2df)__a); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_maskstore_pd(double *__p, __m256d __m, __m256d __a) { __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4df)__m, (__v4df)__a); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm_maskstore_ps(float *__p, __m128 __m, __m128 __a) { __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4sf)__m, (__v4sf)__a); } /* Cacheability support ops */ -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_stream_si256(__m256i *__a, __m256i __b) { __builtin_ia32_movntdq256((__v4di *)__a, (__v4di)__b); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_stream_pd(double *__a, __m256d __b) { __builtin_ia32_movntpd256(__a, (__v4df)__b); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_stream_ps(float *__p, __m256 __a) { __builtin_ia32_movntps256(__p, (__v8sf)__a); } /* Create vectors */ -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_pd(double __a, double __b, double __c, double __d) { return (__m256d){ __d, __c, __b, __a }; } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set_ps(float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h) { return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a }; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) { return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 }; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, @@ -930,7 +930,7 @@ _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 }; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, @@ -948,34 +948,34 @@ _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, }; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d) { return (__m256i)(__v4di){ __d, __c, __b, __a }; } /* Create vectors with elements in reverse order */ -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setr_pd(double __a, double __b, double __c, double __d) { return (__m256d){ __a, __b, __c, __d }; } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setr_ps(float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h) { return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h }; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) { return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 }; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, @@ -985,7 +985,7 @@ _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 }; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, @@ -1002,39 +1002,39 @@ _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 }; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d) { return (__m256i)(__v4di){ __a, __b, __c, __d }; } /* Create vectors with repeated elements */ -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set1_pd(double __w) { return (__m256d){ __w, __w, __w, __w }; } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set1_ps(float __w) { return (__m256){ __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi32(int __i) { return (__m256i)(__v8si){ __i, __i, __i, __i, __i, __i, __i, __i }; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi16(short __w) { return (__m256i)(__v16hi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi8(char __b) { return (__m256i)(__v32qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, @@ -1042,99 +1042,99 @@ _mm256_set1_epi8(char __b) __b, __b, __b, __b, __b, __b, __b }; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi64x(long long __q) { return (__m256i)(__v4di){ __q, __q, __q, __q }; } /* Create __zeroed vectors */ -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void) { return (__m256d){ 0, 0, 0, 0 }; } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void) { return (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 }; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void) { return (__m256i){ 0LL, 0LL, 0LL, 0LL }; } /* Cast between vector types */ -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castpd_ps(__m256d __a) { return (__m256)__a; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castpd_si256(__m256d __a) { return (__m256i)__a; } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castps_pd(__m256 __a) { return (__m256d)__a; } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castps_si256(__m256 __a) { return (__m256i)__a; } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castsi256_ps(__m256i __a) { return (__m256)__a; } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castsi256_pd(__m256i __a) { return (__m256d)__a; } -static __inline __m128d DEFAULT_FN_ATTRS +static __inline __m128d __DEFAULT_FN_ATTRS _mm256_castpd256_pd128(__m256d __a) { return __builtin_shufflevector(__a, __a, 0, 1); } -static __inline __m128 DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS _mm256_castps256_ps128(__m256 __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } -static __inline __m128i DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS _mm256_castsi256_si128(__m256i __a) { return __builtin_shufflevector(__a, __a, 0, 1); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castpd128_pd256(__m128d __a) { return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castps128_ps256(__m128 __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castsi128_si256(__m128i __a) { return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); @@ -1205,7 +1205,7 @@ _mm256_castsi128_si256(__m128i __a) (((M) & 1) ? 3 : 1) );}) /* SIMD load ops (unaligned) */ -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo) { struct __loadu_ps { @@ -1216,7 +1216,7 @@ _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo) return _mm256_insertf128_ps(__v256, ((struct __loadu_ps*)__addr_hi)->__v, 1); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo) { struct __loadu_pd { @@ -1227,7 +1227,7 @@ _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo) return _mm256_insertf128_pd(__v256, ((struct __loadu_pd*)__addr_hi)->__v, 1); } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo) { struct __loadu_si128 { @@ -1240,7 +1240,7 @@ _mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo) } /* SIMD store ops (unaligned) */ -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a) { __m128 __v128; @@ -1251,7 +1251,7 @@ _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a) __builtin_ia32_storeups(__addr_hi, __v128); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a) { __m128d __v128; @@ -1262,7 +1262,7 @@ _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a) __builtin_ia32_storeupd(__addr_hi, __v128); } -static __inline void DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a) { __m128i __v128; @@ -1273,36 +1273,36 @@ _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a) __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set_m128 (__m128 __hi, __m128 __lo) { return (__m256) __builtin_shufflevector(__lo, __hi, 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_m128d (__m128d __hi, __m128d __lo) { return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo); } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_m128i (__m128i __hi, __m128i __lo) { return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setr_m128 (__m128 __lo, __m128 __hi) { return _mm256_set_m128(__hi, __lo); } -static __inline __m256d DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setr_m128d (__m128d __lo, __m128d __hi) { return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo); } -static __inline __m256i DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_m128i (__m128i __lo, __m128i __hi) { return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo); } -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __AVXINTRIN_H */ diff --git a/lib/Headers/bmi2intrin.h b/lib/Headers/bmi2intrin.h index 7818934e17c2..fdae82cf2ba7 100644 --- a/lib/Headers/bmi2intrin.h +++ b/lib/Headers/bmi2intrin.h @@ -29,21 +29,21 @@ #define __BMI2INTRIN_H /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2"))) -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS _bzhi_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_bzhi_si(__X, __Y); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS _pdep_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_pdep_si(__X, __Y); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS _pext_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_pext_si(__X, __Y); @@ -51,25 +51,25 @@ _pext_u32(unsigned int __X, unsigned int __Y) #ifdef __x86_64__ -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS _bzhi_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_bzhi_di(__X, __Y); } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS _pdep_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_pdep_di(__X, __Y); } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS _pext_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_pext_di(__X, __Y); } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mulx_u64 (unsigned long long __X, unsigned long long __Y, unsigned long long *__P) { @@ -80,7 +80,7 @@ _mulx_u64 (unsigned long long __X, unsigned long long __Y, #else /* !__x86_64__ */ -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS _mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P) { unsigned long long __res = (unsigned long long) __X * __Y; @@ -90,6 +90,6 @@ _mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P) #endif /* !__x86_64__ */ -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __BMI2INTRIN_H */ diff --git a/lib/Headers/bmiintrin.h b/lib/Headers/bmiintrin.h index 0aad8f24ea00..dc2f83f3e2d1 100644 --- a/lib/Headers/bmiintrin.h +++ b/lib/Headers/bmiintrin.h @@ -37,53 +37,53 @@ #define _tzcnt_u32(a) (__tzcnt_u32((a))) /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) -static __inline__ unsigned short DEFAULT_FN_ATTRS +static __inline__ unsigned short __DEFAULT_FN_ATTRS __tzcnt_u16(unsigned short __X) { return __X ? __builtin_ctzs(__X) : 16; } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __andn_u32(unsigned int __X, unsigned int __Y) { return ~__X & __Y; } /* AMD-specified, double-leading-underscore version of BEXTR */ -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __bextr_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_bextr_u32(__X, __Y); } /* Intel-specified, single-leading-underscore version of BEXTR */ -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z) { return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsi_u32(unsigned int __X) { return __X & -__X; } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsmsk_u32(unsigned int __X) { return __X ^ (__X - 1); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsr_u32(unsigned int __X) { return __X & (__X - 1); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __tzcnt_u32(unsigned int __X) { return __X ? __builtin_ctz(__X) : 32; @@ -98,45 +98,45 @@ __tzcnt_u32(unsigned int __X) #define _blsr_u64(a) (__blsr_u64((a))) #define _tzcnt_u64(a) (__tzcnt_u64((a))) -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __andn_u64 (unsigned long long __X, unsigned long long __Y) { return ~__X & __Y; } /* AMD-specified, double-leading-underscore version of BEXTR */ -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __bextr_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_bextr_u64(__X, __Y); } /* Intel-specified, single-leading-underscore version of BEXTR */ -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) { return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsi_u64(unsigned long long __X) { return __X & -__X; } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsmsk_u64(unsigned long long __X) { return __X ^ (__X - 1); } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsr_u64(unsigned long long __X) { return __X & (__X - 1); } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __tzcnt_u64(unsigned long long __X) { return __X ? __builtin_ctzll(__X) : 64; @@ -144,6 +144,6 @@ __tzcnt_u64(unsigned long long __X) #endif /* __x86_64__ */ -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __BMIINTRIN_H */ diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h index eee2428d41a0..e22ffaf19240 100644 --- a/lib/Headers/emmintrin.h +++ b/lib/Headers/emmintrin.h @@ -36,435 +36,435 @@ typedef short __v8hi __attribute__((__vector_size__(16))); typedef char __v16qi __attribute__((__vector_size__(16))); /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b) { __a[0] += __b[0]; return __a; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b) { return __a + __b; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b) { __a[0] -= __b[0]; return __a; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b) { return __a - __b; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b) { __a[0] *= __b[0]; return __a; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b) { return __a * __b; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b) { __a[0] /= __b[0]; return __a; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b) { return __a / __b; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_sqrtsd(__b); return (__m128d) { __c[0], __a[1] }; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) { return __builtin_ia32_sqrtpd(__a); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, __m128d __b) { return __builtin_ia32_minsd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b) { return __builtin_ia32_minpd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, __m128d __b) { return __builtin_ia32_maxsd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b) { return __builtin_ia32_maxpd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, __m128d __b) { return (__m128d)((__v4si)__a & (__v4si)__b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, __m128d __b) { return (__m128d)(~(__v4si)__a & (__v4si)__b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, __m128d __b) { return (__m128d)((__v4si)__a | (__v4si)__b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, __m128d __b) { return (__m128d)((__v4si)__a ^ (__v4si)__b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpeqpd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpltpd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmplepd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpltpd(__b, __a); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmplepd(__b, __a); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpordpd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpunordpd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpneqpd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnltpd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnlepd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnltpd(__b, __a); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnlepd(__b, __a); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpeqsd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpltsd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmplesd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmpltsd(__b, __a); return (__m128d) { __c[0], __a[1] }; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmplesd(__b, __a); return (__m128d) { __c[0], __a[1] }; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpordsd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpunordsd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpneqsd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnltsd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnlesd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmpnltsd(__b, __a); return (__m128d) { __c[0], __a[1] }; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmpnlesd(__b, __a); return (__m128d) { __c[0], __a[1] }; } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdeq(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdlt(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdle(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdgt(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdge(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdneq(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdeq(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdlt(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdle(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdgt(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdge(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdneq(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) { return __builtin_ia32_cvtpd2ps(__a); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a) { return __builtin_ia32_cvtps2pd(__a); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a) { return __builtin_ia32_cvtdq2pd((__v4si)__a); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a) { return __builtin_ia32_cvtpd2dq(__a); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) { return __builtin_ia32_cvtsd2si(__a); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, __m128d __b) { __a[0] = __b[0]; return __a; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, int __b) { __a[0] = __b; return __a; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a, __m128 __b) { __a[0] = __b[0]; return __a; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a) { return (__m128i)__builtin_ia32_cvttpd2dq(__a); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a) { return __a[0]; } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtpd_pi32(__m128d __a) { return (__m64)__builtin_ia32_cvtpd2pi(__a); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvttpd_pi32(__m128d __a) { return (__m64)__builtin_ia32_cvttpd2pi(__a); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtpi32_pd(__m64 __a) { return __builtin_ia32_cvtpi2pd((__v2si)__a); } -static __inline__ double DEFAULT_FN_ATTRS +static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a) { return __a[0]; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp) { return *(__m128d*)__dp; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp) { struct __mm_load1_pd_struct { @@ -476,14 +476,14 @@ _mm_load1_pd(double const *__dp) #define _mm_load_pd1(dp) _mm_load1_pd(dp) -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp) { __m128d __u = *(__m128d*)__dp; return __builtin_shufflevector(__u, __u, 1, 0); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp) { struct __loadu_pd { @@ -492,7 +492,7 @@ _mm_loadu_pd(double const *__dp) return ((struct __loadu_pd*)__dp)->__v; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp) { struct __mm_load_sd_struct { @@ -502,7 +502,7 @@ _mm_load_sd(double const *__dp) return (__m128d){ __u, 0 }; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a, double const *__dp) { struct __mm_loadh_pd_struct { @@ -512,7 +512,7 @@ _mm_loadh_pd(__m128d __a, double const *__dp) return (__m128d){ __a[0], __u }; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a, double const *__dp) { struct __mm_loadl_pd_struct { @@ -522,43 +522,43 @@ _mm_loadl_pd(__m128d __a, double const *__dp) return (__m128d){ __u, __a[1] }; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w) { return (__m128d){ __w, 0 }; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w) { return (__m128d){ __w, __w }; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, double __x) { return (__m128d){ __x, __w }; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, double __x) { return (__m128d){ __w, __x }; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void) { return (__m128d){ 0, 0 }; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b) { return (__m128d){ __b[0], __a[1] }; } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp, __m128d __a) { struct __mm_store_sd_struct { @@ -567,7 +567,7 @@ _mm_store_sd(double *__dp, __m128d __a) ((struct __mm_store_sd_struct*)__dp)->__u = __a[0]; } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a) { struct __mm_store1_pd_struct { @@ -577,26 +577,26 @@ _mm_store1_pd(double *__dp, __m128d __a) ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0]; } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp, __m128d __a) { *(__m128d *)__dp = __a; } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, __m128d __a) { __builtin_ia32_storeupd(__dp, __a); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp, __m128d __a) { __a = __builtin_shufflevector(__a, __a, 1, 0); *(__m128d *)__dp = __a; } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp, __m128d __a) { struct __mm_storeh_pd_struct { @@ -605,7 +605,7 @@ _mm_storeh_pd(double *__dp, __m128d __a) ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1]; } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, __m128d __a) { struct __mm_storeh_pd_struct { @@ -614,211 +614,211 @@ _mm_storel_pd(double *__dp, __m128d __a) ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0]; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, __m128i __b) { return (__m128i)((__v16qi)__a + (__v16qi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hi)__a + (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4si)__a + (__v4si)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) { return __a + __b; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b) { return __a + __b; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hi)__a * (__v8hi)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) { return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b) { return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, __m128i __b) { return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, __m128i __b) { return (__m128i)((__v16qi)__a - (__v16qi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hi)__a - (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4si)__a - (__v4si)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) { return __a - __b; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b) { return __a - __b; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b) { return __a & __b; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, __m128i __b) { return ~__a & __b; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, __m128i __b) { return __a | __b; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, __m128i __b) { return __a ^ __b; @@ -847,61 +847,61 @@ _mm_xor_si128(__m128i __a, __m128i __b) #define _mm_bslli_si128(a, imm) \ _mm_slli_si128((a), (imm)) -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count) { return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count) { return __builtin_ia32_psllqi128(__a, __count); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count) { return __builtin_ia32_psllq128(__a, __count); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); @@ -930,61 +930,61 @@ _mm_sra_epi32(__m128i __a, __m128i __count) #define _mm_bsrli_si128(a, imm) \ _mm_srli_si128((a), (imm)) -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count) { return __builtin_ia32_psrlqi128(__a, __count); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count) { return __builtin_ia32_psrlq128(__a, __count); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b) { return (__m128i)((__v16qi)__a == (__v16qi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hi)__a == (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4si)__a == (__v4si)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, __m128i __b) { /* This function always performs a signed comparison, but __v16qi is a char @@ -993,90 +993,90 @@ _mm_cmpgt_epi8(__m128i __a, __m128i __b) return (__m128i)((__v16qs)__a > (__v16qs)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hi)__a > (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4si)__a > (__v4si)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, __m128i __b) { return _mm_cmpgt_epi8(__b, __a); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, __m128i __b) { return _mm_cmpgt_epi16(__b, __a); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, __m128i __b) { return _mm_cmpgt_epi32(__b, __a); } #ifdef __x86_64__ -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi64_sd(__m128d __a, long long __b) { __a[0] = __b; return __a; } -static __inline__ long long DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsd_si64(__m128d __a) { return __builtin_ia32_cvtsd2si64(__a); } -static __inline__ long long DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttsd_si64(__m128d __a) { return __a[0]; } #endif -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a) { return __builtin_ia32_cvtdq2ps((__v4si)__a); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a) { return (__m128i)__builtin_ia32_cvtps2dq(__a); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) { return (__m128i)__builtin_ia32_cvttps2dq(__a); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) { return (__m128i)(__v4si){ __a, 0, 0, 0 }; } #ifdef __x86_64__ -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) { return (__m128i){ __a, 0 }; } #endif -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a) { __v4si __b = (__v4si)__a; @@ -1084,20 +1084,20 @@ _mm_cvtsi128_si32(__m128i __a) } #ifdef __x86_64__ -static __inline__ long long DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsi128_si64(__m128i __a) { return __a[0]; } #endif -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p) { return *__p; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i const *__p) { struct __loadu_si128 { @@ -1106,7 +1106,7 @@ _mm_loadu_si128(__m128i const *__p) return ((struct __loadu_si128*)__p)->__v; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadl_epi64(__m128i const *__p) { struct __mm_loadl_epi64_struct { @@ -1115,115 +1115,115 @@ _mm_loadl_epi64(__m128i const *__p) return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0}; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long q1, long long q0) { return (__m128i){ q0, q1 }; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 q1, __m64 q0) { return (__m128i){ (long long)q0, (long long)q1 }; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int i3, int i2, int i1, int i0) { return (__m128i)(__v4si){ i0, i1, i2, i3}; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0) { return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 }; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0) { return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 }; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) { return (__m128i){ __q, __q }; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) { return (__m128i){ (long long)__q, (long long)__q }; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) { return (__m128i)(__v4si){ __i, __i, __i, __i }; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) { return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) { return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b }; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 q0, __m64 q1) { return (__m128i){ (long long)q0, (long long)q1 }; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int i0, int i1, int i2, int i3) { return (__m128i)(__v4si){ i0, i1, i2, i3}; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7) { return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 }; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15) { return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 }; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void) { return (__m128i){ 0LL, 0LL }; } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p, __m128i __b) { *__p = __b; } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si128(__m128i *__p, __m128i __b) { __builtin_ia32_storedqu((char *)__p, (__v16qi)__b); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p) { __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i *__p, __m128i __a) { struct __mm_storel_epi64_struct { @@ -1232,76 +1232,76 @@ _mm_storel_epi64(__m128i *__p, __m128i __a) ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0]; } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, __m128d __a) { __builtin_ia32_movntpd(__p, __a); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, __m128i __a) { __builtin_ia32_movntdq(__p, __a); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si32(int *__p, int __a) { __builtin_ia32_movnti(__p, __a); } #ifdef __x86_64__ -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si64(long long *__p, long long __a) { __builtin_ia32_movnti64(__p, __a); } #endif -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_clflush(void const *__p) { __builtin_ia32_clflush(__p); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_lfence(void) { __builtin_ia32_lfence(); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_mfence(void) { __builtin_ia32_mfence(); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_extract_epi16(__m128i __a, int __imm) { __v8hi __b = (__v8hi)__a; return (unsigned short)__b[__imm & 7]; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_insert_epi16(__m128i __a, int __b, int __imm) { __v8hi __c = (__v8hi)__a; @@ -1309,7 +1309,7 @@ _mm_insert_epi16(__m128i __a, int __b, int __imm) return (__m128i)__c; } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) { return __builtin_ia32_pmovmskb128((__v16qi)__a); @@ -1337,85 +1337,85 @@ _mm_movemask_epi8(__m128i __a) 4 + (((imm) & 0x30) >> 4), \ 4 + (((imm) & 0xc0) >> 6)); }) -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a) { return (__m64)__a[0]; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a) { return (__m128i){ (long long)__a, 0 }; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a) { return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b) { return __builtin_shufflevector(__a, __b, 1, 2+1); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b) { return __builtin_shufflevector(__a, __b, 0, 2+0); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) { return __builtin_ia32_movmskpd(__a); @@ -1425,49 +1425,49 @@ _mm_movemask_pd(__m128d __a) __builtin_shufflevector((__m128d)(a), (__m128d)(b), \ (i) & 1, (((i) & 2) >> 1) + 2); }) -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a) { return (__m128)__a; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a) { return (__m128i)__a; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a) { return (__m128d)__a; } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a) { return (__m128i)__a; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a) { return (__m128)__a; } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) { return (__m128d)__a; } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_pause(void) { __asm__ volatile ("pause"); } -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) diff --git a/lib/Headers/f16cintrin.h b/lib/Headers/f16cintrin.h index c56960cf7b08..9349b78fe84c 100644 --- a/lib/Headers/f16cintrin.h +++ b/lib/Headers/f16cintrin.h @@ -32,7 +32,7 @@ typedef float __v8sf __attribute__ ((__vector_size__ (32))); typedef float __m256 __attribute__ ((__vector_size__ (32))); /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) #define _mm_cvtps_ph(a, imm) __extension__ ({ \ __m128 __a = (a); \ @@ -42,18 +42,18 @@ typedef float __m256 __attribute__ ((__vector_size__ (32))); __m256 __a = (a); \ (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); }) -static __inline __m128 DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS _mm_cvtph_ps(__m128i __a) { return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a); } -static __inline __m256 DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_cvtph_ps(__m128i __a) { return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); } -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __F16CINTRIN_H */ diff --git a/lib/Headers/fma4intrin.h b/lib/Headers/fma4intrin.h index 5268805605cc..f1178877b252 100644 --- a/lib/Headers/fma4intrin.h +++ b/lib/Headers/fma4intrin.h @@ -31,200 +31,200 @@ #include <pmmintrin.h> /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4"))) -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C); } -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __FMA4INTRIN_H */ diff --git a/lib/Headers/fmaintrin.h b/lib/Headers/fmaintrin.h index 6f5d1b907a38..114a14380ea0 100644 --- a/lib/Headers/fmaintrin.h +++ b/lib/Headers/fmaintrin.h @@ -29,200 +29,200 @@ #define __FMAINTRIN_H /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma"))) -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C); } -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __FMAINTRIN_H */ diff --git a/lib/Headers/fxsrintrin.h b/lib/Headers/fxsrintrin.h new file mode 100644 index 000000000000..2b3549c057a1 --- /dev/null +++ b/lib/Headers/fxsrintrin.h @@ -0,0 +1,55 @@ +/*===---- fxsrintrin.h - FXSR intrinsic ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use <fxsrintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __FXSRINTRIN_H +#define __FXSRINTRIN_H + +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ void __DEFAULT_FN_ATTRS +_fxsave(void *__p) { + return __builtin_ia32_fxsave(__p); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_fxsave64(void *__p) { + return __builtin_ia32_fxsave64(__p); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_fxrstor(void *__p) { + return __builtin_ia32_fxrstor(__p); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_fxrstor64(void *__p) { + return __builtin_ia32_fxrstor64(__p); +} + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h index 4af407dfc7e8..604bc8ca5b39 100644 --- a/lib/Headers/immintrin.h +++ b/lib/Headers/immintrin.h @@ -56,6 +56,8 @@ #include <avx512bwintrin.h> +#include <avx512cdintrin.h> + #include <avx512dqintrin.h> #include <avx512vlbwintrin.h> @@ -140,6 +142,8 @@ _writegsbase_u64(unsigned long long __V) #include <shaintrin.h> +#include <fxsrintrin.h> + /* Some intrinsics inside adxintrin.h are available only on processors with ADX, * whereas others are also available at all times. */ #include <adxintrin.h> diff --git a/lib/Headers/inttypes.h b/lib/Headers/inttypes.h new file mode 100644 index 000000000000..3d59d141deb2 --- /dev/null +++ b/lib/Headers/inttypes.h @@ -0,0 +1,102 @@ +/*===---- inttypes.h - Standard header for integer printf macros ----------===*\ + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * +\*===----------------------------------------------------------------------===*/ + +#ifndef __CLANG_INTTYPES_H +#define __CLANG_INTTYPES_H + +#include_next <inttypes.h> + +#if defined(_MSC_VER) && _MSC_VER < 1900 +/* MSVC headers define int32_t as int, but PRIx32 as "lx" instead of "x". + * This triggers format warnings, so fix it up here. */ +#undef PRId32 +#undef PRIdLEAST32 +#undef PRIdFAST32 +#undef PRIi32 +#undef PRIiLEAST32 +#undef PRIiFAST32 +#undef PRIo32 +#undef PRIoLEAST32 +#undef PRIoFAST32 +#undef PRIu32 +#undef PRIuLEAST32 +#undef PRIuFAST32 +#undef PRIx32 +#undef PRIxLEAST32 +#undef PRIxFAST32 +#undef PRIX32 +#undef PRIXLEAST32 +#undef PRIXFAST32 + +#undef SCNd32 +#undef SCNdLEAST32 +#undef SCNdFAST32 +#undef SCNi32 +#undef SCNiLEAST32 +#undef SCNiFAST32 +#undef SCNo32 +#undef SCNoLEAST32 +#undef SCNoFAST32 +#undef SCNu32 +#undef SCNuLEAST32 +#undef SCNuFAST32 +#undef SCNx32 +#undef SCNxLEAST32 +#undef SCNxFAST32 + +#define PRId32 "d" +#define PRIdLEAST32 "d" +#define PRIdFAST32 "d" +#define PRIi32 "i" +#define PRIiLEAST32 "i" +#define PRIiFAST32 "i" +#define PRIo32 "o" +#define PRIoLEAST32 "o" +#define PRIoFAST32 "o" +#define PRIu32 "u" +#define PRIuLEAST32 "u" +#define PRIuFAST32 "u" +#define PRIx32 "x" +#define PRIxLEAST32 "x" +#define PRIxFAST32 "x" +#define PRIX32 "X" +#define PRIXLEAST32 "X" +#define PRIXFAST32 "X" + +#define SCNd32 "d" +#define SCNdLEAST32 "d" +#define SCNdFAST32 "d" +#define SCNi32 "i" +#define SCNiLEAST32 "i" +#define SCNiFAST32 "i" +#define SCNo32 "o" +#define SCNoLEAST32 "o" +#define SCNoFAST32 "o" +#define SCNu32 "u" +#define SCNuLEAST32 "u" +#define SCNuFAST32 "u" +#define SCNx32 "x" +#define SCNxLEAST32 "x" +#define SCNxFAST32 "x" +#endif + +#endif /* __CLANG_INTTYPES_H */ diff --git a/lib/Headers/lzcntintrin.h b/lib/Headers/lzcntintrin.h index 41e61e9fdefa..4c00e42ac3a9 100644 --- a/lib/Headers/lzcntintrin.h +++ b/lib/Headers/lzcntintrin.h @@ -29,40 +29,40 @@ #define __LZCNTINTRIN_H /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) -static __inline__ unsigned short DEFAULT_FN_ATTRS +static __inline__ unsigned short __DEFAULT_FN_ATTRS __lzcnt16(unsigned short __X) { return __X ? __builtin_clzs(__X) : 16; } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __lzcnt32(unsigned int __X) { return __X ? __builtin_clz(__X) : 32; } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS _lzcnt_u32(unsigned int __X) { return __X ? __builtin_clz(__X) : 32; } #ifdef __x86_64__ -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __lzcnt64(unsigned long long __X) { return __X ? __builtin_clzll(__X) : 64; } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS _lzcnt_u64(unsigned long long __X) { return __X ? __builtin_clzll(__X) : 64; } #endif -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __LZCNTINTRIN_H */ diff --git a/lib/Headers/mm3dnow.h b/lib/Headers/mm3dnow.h index 70734e49682e..3218df827177 100644 --- a/lib/Headers/mm3dnow.h +++ b/lib/Headers/mm3dnow.h @@ -30,138 +30,138 @@ typedef float __v2sf __attribute__((__vector_size__(8))); /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"))) -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _m_femms() { __builtin_ia32_femms(); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pavgusb(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pf2id(__m64 __m) { return (__m64)__builtin_ia32_pf2id((__v2sf)__m); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfacc(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfadd(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfcmpeq(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfcmpge(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfcmpgt(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfmax(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfmin(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfmul(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfrcp(__m64 __m) { return (__m64)__builtin_ia32_pfrcp((__v2sf)__m); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfrcpit1(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfrcpit2(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfrsqrt(__m64 __m) { return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfrsqrtit1(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfsub(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfsubr(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pi2fd(__m64 __m) { return (__m64)__builtin_ia32_pi2fd((__v2si)__m); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pmulhrw(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pf2iw(__m64 __m) { return (__m64)__builtin_ia32_pf2iw((__v2sf)__m); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfnacc(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfpnacc(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pi2fw(__m64 __m) { return (__m64)__builtin_ia32_pi2fw((__v2si)__m); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pswapdsf(__m64 __m) { return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pswapdsi(__m64 __m) { return (__m64)__builtin_ia32_pswapdsi((__v2si)__m); } -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif diff --git a/lib/Headers/mmintrin.h b/lib/Headers/mmintrin.h index d1d729f26a38..484b499dd7fc 100644 --- a/lib/Headers/mmintrin.h +++ b/lib/Headers/mmintrin.h @@ -31,369 +31,369 @@ typedef short __v4hi __attribute__((__vector_size__(8))); typedef char __v8qi __attribute__((__vector_size__(8))); /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_empty(void) { __builtin_ia32_emms(); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi32_si64(int __i) { return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi64_si32(__m64 __m) { return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi64_m64(long long __i) { return (__m64)__i; } -static __inline__ long long DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtm64_si64(__m64 __m) { return (long long)__m; } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pu16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_madd_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhi_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mullo_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi16(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi16(__m64 __m, int __count) { return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi32(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi32(__m64 __m, int __count) { return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_si64(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psllq(__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_si64(__m64 __m, int __count) { return (__m64)__builtin_ia32_psllqi(__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi16(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi16(__m64 __m, int __count) { return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi32(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi32(__m64 __m, int __count) { return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi16(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi16(__m64 __m, int __count) { return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi32(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi32(__m64 __m, int __count) { return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_si64(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psrlq(__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_si64(__m64 __m, int __count) { return (__m64)__builtin_ia32_psrlqi(__m, __count); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_and_si64(__m64 __m1, __m64 __m2) { return __builtin_ia32_pand(__m1, __m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_andnot_si64(__m64 __m1, __m64 __m2) { return __builtin_ia32_pandn(__m1, __m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_or_si64(__m64 __m1, __m64 __m2) { return __builtin_ia32_por(__m1, __m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_xor_si64(__m64 __m1, __m64 __m2) { return __builtin_ia32_pxor(__m1, __m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setzero_si64(void) { return (__m64){ 0LL }; } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi32(int __i1, int __i0) { return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) { return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) { @@ -401,44 +401,44 @@ _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, __b4, __b5, __b6, __b7); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi32(int __i) { return _mm_set_pi32(__i, __i); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi16(short __w) { return _mm_set_pi16(__w, __w, __w, __w); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi8(char __b) { return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi32(int __i0, int __i1) { return _mm_set_pi32(__i1, __i0); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) { return _mm_set_pi16(__w3, __w2, __w1, __w0); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7) { return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); } -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS /* Aliases for compatibility. */ #define _m_empty _mm_empty diff --git a/lib/Headers/module.modulemap b/lib/Headers/module.modulemap index 8fcb5bc1b3d2..0fc70a8c9b06 100644 --- a/lib/Headers/module.modulemap +++ b/lib/Headers/module.modulemap @@ -32,142 +32,117 @@ module _Builtin_intrinsics [system] [extern_c] { } explicit module cpuid { - requires x86 header "cpuid.h" } explicit module mmx { - requires mmx header "mmintrin.h" } explicit module f16c { - requires f16c header "f16cintrin.h" } explicit module sse { - requires sse export mmx export sse2 // note: for hackish <emmintrin.h> dependency header "xmmintrin.h" } explicit module sse2 { - requires sse2 export sse header "emmintrin.h" } explicit module sse3 { - requires sse3 export sse2 header "pmmintrin.h" } explicit module ssse3 { - requires ssse3 export sse3 header "tmmintrin.h" } explicit module sse4_1 { - requires sse41 export ssse3 header "smmintrin.h" } explicit module sse4_2 { - requires sse42 export sse4_1 header "nmmintrin.h" } explicit module sse4a { - requires sse4a export sse3 header "ammintrin.h" } explicit module avx { - requires avx export sse4_2 header "avxintrin.h" } explicit module avx2 { - requires avx2 export avx header "avx2intrin.h" } explicit module avx512f { - requires avx512f export avx2 header "avx512fintrin.h" } explicit module avx512er { - requires avx512er header "avx512erintrin.h" } explicit module bmi { - requires bmi header "bmiintrin.h" } explicit module bmi2 { - requires bmi2 header "bmi2intrin.h" } explicit module fma { - requires fma header "fmaintrin.h" } explicit module fma4 { - requires fma4 export sse3 header "fma4intrin.h" } explicit module lzcnt { - requires lzcnt header "lzcntintrin.h" } explicit module popcnt { - requires popcnt header "popcntintrin.h" } explicit module mm3dnow { - requires mm3dnow header "mm3dnow.h" } explicit module xop { - requires xop export fma4 header "xopintrin.h" } explicit module aes_pclmul { - requires aes, pclmul header "wmmintrin.h" export aes export pclmul } explicit module aes { - requires aes header "__wmmintrin_aes.h" } explicit module pclmul { - requires pclmul header "__wmmintrin_pclmul.h" } } diff --git a/lib/Headers/pmmintrin.h b/lib/Headers/pmmintrin.h index 6e61539fb2ca..e888b6fb7067 100644 --- a/lib/Headers/pmmintrin.h +++ b/lib/Headers/pmmintrin.h @@ -27,57 +27,57 @@ #include <emmintrin.h> /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse3"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse3"))) -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_lddqu_si128(__m128i const *__p) { return (__m128i)__builtin_ia32_lddqu((char const *)__p); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_addsub_ps(__m128 __a, __m128 __b) { return __builtin_ia32_addsubps(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hadd_ps(__m128 __a, __m128 __b) { return __builtin_ia32_haddps(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hsub_ps(__m128 __a, __m128 __b) { return __builtin_ia32_hsubps(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehdup_ps(__m128 __a) { return __builtin_shufflevector(__a, __a, 1, 1, 3, 3); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_moveldup_ps(__m128 __a) { return __builtin_shufflevector(__a, __a, 0, 0, 2, 2); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_addsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_addsubpd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hadd_pd(__m128d __a, __m128d __b) { return __builtin_ia32_haddpd(__a, __b); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_hsubpd(__a, __b); @@ -85,7 +85,7 @@ _mm_hsub_pd(__m128d __a, __m128d __b) #define _mm_loaddup_pd(dp) _mm_load1_pd(dp) -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_movedup_pd(__m128d __a) { return __builtin_shufflevector(__a, __a, 0, 0); @@ -99,18 +99,18 @@ _mm_movedup_pd(__m128d __a) #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) #define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints) { __builtin_ia32_monitor((void *)__p, __extensions, __hints); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_mwait(unsigned __extensions, unsigned __hints) { __builtin_ia32_mwait(__extensions, __hints); } -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __PMMINTRIN_H */ diff --git a/lib/Headers/popcntintrin.h b/lib/Headers/popcntintrin.h index fede8da2bdf5..29c074b61d1c 100644 --- a/lib/Headers/popcntintrin.h +++ b/lib/Headers/popcntintrin.h @@ -25,22 +25,22 @@ #define _POPCNTINTRIN_H /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("popcnt"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("popcnt"))) -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_popcnt_u32(unsigned int __A) { return __builtin_popcount(__A); } #ifdef __x86_64__ -static __inline__ long long DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS _mm_popcnt_u64(unsigned long long __A) { return __builtin_popcountll(__A); } #endif /* __x86_64__ */ -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* _POPCNTINTRIN_H */ diff --git a/lib/Headers/rdseedintrin.h b/lib/Headers/rdseedintrin.h index ac9ec4f9ba92..421f4ea48702 100644 --- a/lib/Headers/rdseedintrin.h +++ b/lib/Headers/rdseedintrin.h @@ -29,28 +29,28 @@ #define __RDSEEDINTRIN_H /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rdseed"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rdseed"))) -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _rdseed16_step(unsigned short *__p) { return __builtin_ia32_rdseed16_step(__p); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _rdseed32_step(unsigned int *__p) { return __builtin_ia32_rdseed32_step(__p); } #ifdef __x86_64__ -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _rdseed64_step(unsigned long long *__p) { return __builtin_ia32_rdseed64_step(__p); } #endif -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __RDSEEDINTRIN_H */ diff --git a/lib/Headers/rtmintrin.h b/lib/Headers/rtmintrin.h index 8709a125a7be..e6a58d743bc9 100644 --- a/lib/Headers/rtmintrin.h +++ b/lib/Headers/rtmintrin.h @@ -38,15 +38,15 @@ #define _XABORT_CODE(x) (((x) >> 24) & 0xFF) /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rtm"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rtm"))) -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS _xbegin(void) { return __builtin_ia32_xbegin(); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _xend(void) { __builtin_ia32_xend(); @@ -54,6 +54,6 @@ _xend(void) #define _xabort(imm) __builtin_ia32_xabort((imm)) -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __RTMINTRIN_H */ diff --git a/lib/Headers/shaintrin.h b/lib/Headers/shaintrin.h index 4b7429162783..8602d0249d48 100644 --- a/lib/Headers/shaintrin.h +++ b/lib/Headers/shaintrin.h @@ -29,47 +29,47 @@ #define __SHAINTRIN_H /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha"))) #define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \ __builtin_ia32_sha1rnds4((V1), (V2), (M)); }) -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha1nexte_epu32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_sha1nexte((__v4si)__X, (__v4si)__Y); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha1msg1_epu32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_sha1msg1((__v4si)__X, (__v4si)__Y); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha1msg2_epu32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_sha1msg2((__v4si)__X, (__v4si)__Y); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__X, (__v4si)__Y, (__v4si)__Z); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha256msg1_epu32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_sha256msg1((__v4si)__X, (__v4si)__Y); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha256msg2_epu32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_sha256msg2((__v4si)__X, (__v4si)__Y); } -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __SHAINTRIN_H */ diff --git a/lib/Headers/smmintrin.h b/lib/Headers/smmintrin.h index f2cc909fe773..e197590a38e2 100644 --- a/lib/Headers/smmintrin.h +++ b/lib/Headers/smmintrin.h @@ -27,7 +27,7 @@ #include <tmmintrin.h> /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"))) /* SSE4 Rounding macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 @@ -91,21 +91,21 @@ (((M) & 0x04) ? 6 : 2), \ (((M) & 0x08) ? 7 : 3)); }) -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M) { return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2, (__v2df)__M); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M) { return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2, (__v4sf)__M); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) { return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2, @@ -126,13 +126,13 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) (((M) & 0x80) ? 15 : 7)); }) /* SSE4 Dword Multiply Instructions. */ -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32 (__m128i __V1, __m128i __V2) { return (__m128i) ((__v4si)__V1 * (__v4si)__V2); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32 (__m128i __V1, __m128i __V2) { return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2); @@ -150,56 +150,56 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2) (__m128d) __builtin_ia32_dppd((__v2df)__X, (__v2df)__Y, (M)); }) /* SSE4 Streaming Load Hint Instruction. */ -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_stream_load_si128 (__m128i *__V) { return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __V); } /* SSE4 Packed Integer Min/Max Instructions. */ -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8 (__m128i __V1, __m128i __V2) { return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8 (__m128i __V1, __m128i __V2) { return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16 (__m128i __V1, __m128i __V2) { return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16 (__m128i __V1, __m128i __V2) { return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32 (__m128i __V1, __m128i __V2) { return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32 (__m128i __V1, __m128i __V2) { return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32 (__m128i __V1, __m128i __V2) { return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32 (__m128i __V1, __m128i __V2) { return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2); @@ -253,19 +253,19 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) #endif /* __x86_64 */ /* SSE4 128-bit Packed Integer Comparisons. */ -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); @@ -276,88 +276,88 @@ _mm_testnzc_si128(__m128i __M, __m128i __V) #define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V)) /* SSE4 64-bit Packed Integer Comparisons. */ -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi64(__m128i __V1, __m128i __V2) { return (__m128i)((__v2di)__V1 == (__v2di)__V2); } /* SSE4 Packed Integer Sign-Extension. */ -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) { return (__m128i) __builtin_ia32_pmovsxbw128((__v16qi) __V); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V) { return (__m128i) __builtin_ia32_pmovsxbd128((__v16qi) __V); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V) { return (__m128i) __builtin_ia32_pmovsxbq128((__v16qi) __V); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V) { return (__m128i) __builtin_ia32_pmovsxwd128((__v8hi) __V); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V) { return (__m128i) __builtin_ia32_pmovsxwq128((__v8hi)__V); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V) { return (__m128i) __builtin_ia32_pmovsxdq128((__v4si)__V); } /* SSE4 Packed Integer Zero-Extension. */ -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V) { return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V) { return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V) { return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V) { return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V) { return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) { return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V); } /* SSE4 Pack with Unsigned Saturation. */ -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1, __m128i __V2) { return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2); @@ -369,7 +369,7 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2) __m128i __Y = (Y); \ (__m128i) __builtin_ia32_mpsadbw128((__v16qi)__X, (__v16qi)__Y, (M)); }) -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) { return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V); @@ -380,8 +380,8 @@ _mm_minpos_epu16(__m128i __V) /* These definitions are normally in nmmintrin.h, but gcc puts them in here so we'll do the same. */ -#undef DEFAULT_FN_ATTRS -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) +#undef __DEFAULT_FN_ATTRS +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) /* These specify the type of data that we're comparing. */ #define _SIDD_UBYTE_OPS 0x00 @@ -442,40 +442,40 @@ _mm_minpos_epu16(__m128i __V) __builtin_ia32_pcmpestriz128((A), (LA), (B), (LB), (M)) /* SSE4.2 Compare Packed Data -- Greater Than. */ -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi64(__m128i __V1, __m128i __V2) { return (__m128i)((__v2di)__V1 > (__v2di)__V2); } /* SSE4.2 Accumulate CRC32. */ -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_crc32_u8(unsigned int __C, unsigned char __D) { return __builtin_ia32_crc32qi(__C, __D); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_crc32_u16(unsigned int __C, unsigned short __D) { return __builtin_ia32_crc32hi(__C, __D); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_crc32_u32(unsigned int __C, unsigned int __D) { return __builtin_ia32_crc32si(__C, __D); } #ifdef __x86_64__ -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mm_crc32_u64(unsigned long long __C, unsigned long long __D) { return __builtin_ia32_crc32di(__C, __D); } #endif /* __x86_64__ */ -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #ifdef __POPCNT__ #include <popcntintrin.h> diff --git a/lib/Headers/tbmintrin.h b/lib/Headers/tbmintrin.h index 1926df9f033c..62f613f9ee5c 100644 --- a/lib/Headers/tbmintrin.h +++ b/lib/Headers/tbmintrin.h @@ -29,59 +29,59 @@ #define __TBMINTRIN_H /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("tbm"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("tbm"))) #define __bextri_u32(a, b) (__builtin_ia32_bextri_u32((a), (b))) -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __blcfill_u32(unsigned int a) { return a & (a + 1); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __blci_u32(unsigned int a) { return a | ~(a + 1); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __blcic_u32(unsigned int a) { return ~a & (a + 1); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __blcmsk_u32(unsigned int a) { return a ^ (a + 1); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __blcs_u32(unsigned int a) { return a | (a + 1); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsfill_u32(unsigned int a) { return a | (a - 1); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsic_u32(unsigned int a) { return ~a | (a - 1); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __t1mskc_u32(unsigned int a) { return ~a | (a + 1); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS __tzmsk_u32(unsigned int a) { return ~a & (a - 1); @@ -90,61 +90,61 @@ __tzmsk_u32(unsigned int a) #ifdef __x86_64__ #define __bextri_u64(a, b) (__builtin_ia32_bextri_u64((a), (int)(b))) -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blcfill_u64(unsigned long long a) { return a & (a + 1); } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blci_u64(unsigned long long a) { return a | ~(a + 1); } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blcic_u64(unsigned long long a) { return ~a & (a + 1); } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blcmsk_u64(unsigned long long a) { return a ^ (a + 1); } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blcs_u64(unsigned long long a) { return a | (a + 1); } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsfill_u64(unsigned long long a) { return a | (a - 1); } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsic_u64(unsigned long long a) { return ~a | (a - 1); } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __t1mskc_u64(unsigned long long a) { return ~a | (a + 1); } -static __inline__ unsigned long long DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __tzmsk_u64(unsigned long long a) { return ~a & (a - 1); } #endif -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __TBMINTRIN_H */ diff --git a/lib/Headers/tmmintrin.h b/lib/Headers/tmmintrin.h index 2474c9414fd3..120d73c34f57 100644 --- a/lib/Headers/tmmintrin.h +++ b/lib/Headers/tmmintrin.h @@ -27,39 +27,39 @@ #include <pmmintrin.h> /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"))) -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi8(__m64 __a) { return (__m64)__builtin_ia32_pabsb((__v8qi)__a); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a) { return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi16(__m64 __a) { return (__m64)__builtin_ia32_pabsw((__v4hi)__a); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a) { return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi32(__m64 __a) { return (__m64)__builtin_ia32_pabsd((__v2si)__a); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a) { return (__m128i)__builtin_ia32_pabsd128((__v4si)__a); @@ -75,150 +75,150 @@ _mm_abs_epi32(__m128i __a) __m64 __b = (b); \ (__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); }) -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi32(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadds_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadds_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi32(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsubs_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_maddubs_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhrs_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_shuffle_pi8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi32(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); } -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __TMMINTRIN_H */ diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h index 0085eb4237c1..18aa8c1426a3 100644 --- a/lib/Headers/xmmintrin.h +++ b/lib/Headers/xmmintrin.h @@ -37,184 +37,184 @@ typedef float __m128 __attribute__((__vector_size__(16))); #endif /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse"))) -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ss(__m128 __a, __m128 __b) { __a[0] += __b[0]; return __a; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ps(__m128 __a, __m128 __b) { return __a + __b; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ss(__m128 __a, __m128 __b) { __a[0] -= __b[0]; return __a; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ps(__m128 __a, __m128 __b) { return __a - __b; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ss(__m128 __a, __m128 __b) { __a[0] *= __b[0]; return __a; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ps(__m128 __a, __m128 __b) { return __a * __b; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ss(__m128 __a, __m128 __b) { __a[0] /= __b[0]; return __a; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ps(__m128 __a, __m128 __b) { return __a / __b; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ss(__m128 __a) { __m128 __c = __builtin_ia32_sqrtss(__a); return (__m128) { __c[0], __a[1], __a[2], __a[3] }; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a) { return __builtin_ia32_sqrtps(__a); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ss(__m128 __a) { __m128 __c = __builtin_ia32_rcpss(__a); return (__m128) { __c[0], __a[1], __a[2], __a[3] }; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ps(__m128 __a) { return __builtin_ia32_rcpps(__a); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt_ss(__m128 __a) { __m128 __c = __builtin_ia32_rsqrtss(__a); return (__m128) { __c[0], __a[1], __a[2], __a[3] }; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt_ps(__m128 __a) { return __builtin_ia32_rsqrtps(__a); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ss(__m128 __a, __m128 __b) { return __builtin_ia32_minss(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ps(__m128 __a, __m128 __b) { return __builtin_ia32_minps(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ss(__m128 __a, __m128 __b) { return __builtin_ia32_maxss(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ps(__m128 __a, __m128 __b) { return __builtin_ia32_maxps(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_and_ps(__m128 __a, __m128 __b) { return (__m128)((__v4si)__a & (__v4si)__b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_andnot_ps(__m128 __a, __m128 __b) { return (__m128)(~(__v4si)__a & (__v4si)__b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_or_ps(__m128 __a, __m128 __b) { return (__m128)((__v4si)__a | (__v4si)__b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_xor_ps(__m128 __a, __m128 __b) { return (__m128)((__v4si)__a ^ (__v4si)__b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpeq_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpeqss(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpeq_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpeqps(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmplt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpltss(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmplt_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpltps(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmple_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpless(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmple_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpleps(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpgt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, @@ -222,13 +222,13 @@ _mm_cmpgt_ss(__m128 __a, __m128 __b) 4, 1, 2, 3); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpgt_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpltps(__b, __a); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpge_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, @@ -236,49 +236,49 @@ _mm_cmpge_ss(__m128 __a, __m128 __b) 4, 1, 2, 3); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpge_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpleps(__b, __a); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpneq_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpneqss(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpneq_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpneqps(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnlt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnltss(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnlt_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnltps(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnle_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnless(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnle_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnleps(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpngt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, @@ -286,13 +286,13 @@ _mm_cmpngt_ss(__m128 __a, __m128 __b) 4, 1, 2, 3); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpngt_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnltps(__b, __a); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnge_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, @@ -300,115 +300,115 @@ _mm_cmpnge_ss(__m128 __a, __m128 __b) 4, 1, 2, 3); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnge_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnleps(__b, __a); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpord_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpordss(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpord_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpordps(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpunord_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpunordss(__a, __b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpunord_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpunordps(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comieq(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comilt(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comile(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comigt(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comige(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comineq(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomieq(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomilt(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomile(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomigt(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomige(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomineq(__a, __b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtss_si32(__m128 __a) { return __builtin_ia32_cvtss2si(__a); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_cvt_ss2si(__m128 __a) { return _mm_cvtss_si32(__a); @@ -416,7 +416,7 @@ _mm_cvt_ss2si(__m128 __a) #ifdef __x86_64__ -static __inline__ long long DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtss_si64(__m128 __a) { return __builtin_ia32_cvtss2si64(__a); @@ -424,56 +424,56 @@ _mm_cvtss_si64(__m128 __a) #endif -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtps_pi32(__m128 __a) { return (__m64)__builtin_ia32_cvtps2pi(__a); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvt_ps2pi(__m128 __a) { return _mm_cvtps_pi32(__a); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttss_si32(__m128 __a) { return __a[0]; } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtt_ss2si(__m128 __a) { return _mm_cvttss_si32(__a); } -static __inline__ long long DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttss_si64(__m128 __a) { return __a[0]; } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvttps_pi32(__m128 __a) { return (__m64)__builtin_ia32_cvttps2pi(__a); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtt_ps2pi(__m128 __a) { return _mm_cvttps_pi32(__a); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsi32_ss(__m128 __a, int __b) { __a[0] = __b; return __a; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvt_si2ss(__m128 __a, int __b) { return _mm_cvtsi32_ss(__a, __b); @@ -481,7 +481,7 @@ _mm_cvt_si2ss(__m128 __a, int __b) #ifdef __x86_64__ -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsi64_ss(__m128 __a, long long __b) { __a[0] = __b; @@ -490,25 +490,25 @@ _mm_cvtsi64_ss(__m128 __a, long long __b) #endif -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpi32_ps(__m128 __a, __m64 __b) { return __builtin_ia32_cvtpi2ps(__a, (__v2si)__b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvt_pi2ps(__m128 __a, __m64 __b) { return _mm_cvtpi32_ps(__a, __b); } -static __inline__ float DEFAULT_FN_ATTRS +static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtss_f32(__m128 __a) { return __a[0]; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadh_pi(__m128 __a, const __m64 *__p) { typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8))); @@ -520,7 +520,7 @@ _mm_loadh_pi(__m128 __a, const __m64 *__p) return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadl_pi(__m128 __a, const __m64 *__p) { typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8))); @@ -532,7 +532,7 @@ _mm_loadl_pi(__m128 __a, const __m64 *__p) return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load_ss(const float *__p) { struct __mm_load_ss_struct { @@ -542,7 +542,7 @@ _mm_load_ss(const float *__p) return (__m128){ __u, 0, 0, 0 }; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load1_ps(const float *__p) { struct __mm_load1_ps_struct { @@ -554,13 +554,13 @@ _mm_load1_ps(const float *__p) #define _mm_load_ps1(p) _mm_load1_ps(p) -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load_ps(const float *__p) { return *(__m128*)__p; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadu_ps(const float *__p) { struct __loadu_ps { @@ -569,63 +569,63 @@ _mm_loadu_ps(const float *__p) return ((struct __loadu_ps*)__p)->__v; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadr_ps(const float *__p) { __m128 __a = _mm_load_ps(__p); return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ss(float __w) { return (__m128){ __w, 0, 0, 0 }; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set1_ps(float __w) { return (__m128){ __w, __w, __w, __w }; } /* Microsoft specific. */ -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ps1(float __w) { return _mm_set1_ps(__w); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ps(float __z, float __y, float __x, float __w) { return (__m128){ __w, __x, __y, __z }; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setr_ps(float __z, float __y, float __x, float __w) { return (__m128){ __z, __y, __x, __w }; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void) { return (__m128){ 0, 0, 0, 0 }; } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pi(__m64 *__p, __m128 __a) { __builtin_ia32_storehps((__v2si *)__p, __a); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pi(__m64 *__p, __m128 __a) { __builtin_ia32_storelps((__v2si *)__p, __a); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_store_ss(float *__p, __m128 __a) { struct __mm_store_ss_struct { @@ -634,32 +634,32 @@ _mm_store_ss(float *__p, __m128 __a) ((struct __mm_store_ss_struct*)__p)->__u = __a[0]; } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_ps(float *__p, __m128 __a) { __builtin_ia32_storeups(__p, __a); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_ps(float *__p, __m128 __a) { __a = __builtin_shufflevector(__a, __a, 0, 0, 0, 0); _mm_storeu_ps(__p, __a); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_store_ps1(float *__p, __m128 __a) { return _mm_store1_ps(__p, __a); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_store_ps(float *__p, __m128 __a) { *(__m128 *)__p = __a; } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_ps(float *__p, __m128 __a) { __a = __builtin_shufflevector(__a, __a, 3, 2, 1, 0); @@ -678,32 +678,32 @@ _mm_storer_ps(float *__p, __m128 __a) #define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel))) #endif -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pi(__m64 *__p, __m64 __a) { __builtin_ia32_movntq(__p, __a); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_ps(float *__p, __m128 __a) { __builtin_ia32_movntps(__p, __a); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_sfence(void) { __builtin_ia32_sfence(); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_extract_pi16(__m64 __a, int __n) { __v4hi __b = (__v4hi)__a; return (unsigned short)__b[__n & 3]; } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_insert_pi16(__m64 __a, int __d, int __n) { __v4hi __b = (__v4hi)__a; @@ -711,37 +711,37 @@ _mm_insert_pi16(__m64 __a, int __d, int __n) return (__m64)__b; } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_max_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_max_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_min_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_min_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pi8(__m64 __a) { return __builtin_ia32_pmovmskb((__v8qi)__a); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhi_pu16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b); @@ -751,37 +751,37 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b) __m64 __a = (a); \ (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); }) -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) { __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_avg_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_avg_pu16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sad_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b); } -static __inline__ unsigned int DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_getcsr(void) { return __builtin_ia32_stmxcsr(); } -static __inline__ void DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS _mm_setcsr(unsigned int __i) { __builtin_ia32_ldmxcsr(__i); @@ -795,37 +795,37 @@ _mm_setcsr(unsigned int __i) (((mask) & 0x30) >> 4) + 4, \ (((mask) & 0xc0) >> 6) + 4); }) -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpackhi_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpacklo_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_move_ss(__m128 __a, __m128 __b) { return __builtin_shufflevector(__a, __b, 4, 1, 2, 3); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehl_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector(__a, __b, 6, 7, 2, 3); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movelh_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector(__a, __b, 0, 1, 4, 5); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpi16_ps(__m64 __a) { __m64 __b, __c; @@ -843,7 +843,7 @@ _mm_cvtpi16_ps(__m64 __a) return __r; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpu16_ps(__m64 __a) { __m64 __b, __c; @@ -860,7 +860,7 @@ _mm_cvtpu16_ps(__m64 __a) return __r; } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpi8_ps(__m64 __a) { __m64 __b; @@ -872,7 +872,7 @@ _mm_cvtpi8_ps(__m64 __a) return _mm_cvtpi16_ps(__b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpu8_ps(__m64 __a) { __m64 __b; @@ -883,7 +883,7 @@ _mm_cvtpu8_ps(__m64 __a) return _mm_cvtpi16_ps(__b); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpi32x2_ps(__m64 __a, __m64 __b) { __m128 __c; @@ -895,7 +895,7 @@ _mm_cvtpi32x2_ps(__m64 __a, __m64 __b) return _mm_cvtpi32_ps(__c, __a); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtps_pi16(__m128 __a) { __m64 __b, __c; @@ -907,7 +907,7 @@ _mm_cvtps_pi16(__m128 __a) return _mm_packs_pi32(__b, __c); } -static __inline__ __m64 DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtps_pi8(__m128 __a) { __m64 __b, __c; @@ -918,7 +918,7 @@ _mm_cvtps_pi8(__m128 __a) return _mm_packs_pi16(__b, __c); } -static __inline__ int DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_ps(__m128 __a) { return __builtin_ia32_movmskps(__a); @@ -992,7 +992,7 @@ do { \ #define _m_ _mm_ #define _m_ _mm_ -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS /* Ugly hack for backwards-compatibility (compatible with gcc) */ #if defined(__SSE2__) && !__has_feature(modules) diff --git a/lib/Headers/xopintrin.h b/lib/Headers/xopintrin.h index 8417d7826962..86188bb29fd7 100644 --- a/lib/Headers/xopintrin.h +++ b/lib/Headers/xopintrin.h @@ -31,207 +31,207 @@ #include <fma4intrin.h> /* Define the default attributes for the functions in this file. */ -#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"))) -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddw_epi8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddd_epi8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epi8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddd_epi16(__m128i __A) { return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epi16(__m128i __A) { return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epi32(__m128i __A) { return (__m128i)__builtin_ia32_vphadddq((__v4si)__A); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddw_epu8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddd_epu8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epu8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddd_epu16(__m128i __A) { return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epu16(__m128i __A) { return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epu32(__m128i __A) { return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubw_epi8(__m128i __A) { return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubd_epi16(__m128i __A) { return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubq_epi32(__m128i __A) { return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C); } -static __inline__ __m256i DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi8(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi32(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); @@ -253,49 +253,49 @@ _mm_rot_epi64(__m128i __A, __m128i __B) __m128i __A = (A); \ (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); }) -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shl_epi8(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shl_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shl_epi32(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shl_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha_epi8(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha_epi32(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B); @@ -350,385 +350,385 @@ _mm_sha_epi64(__m128i __A, __m128i __B) #define _MM_PCOMCTRL_FALSE 6 #define _MM_PCOMCTRL_TRUE 7 -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE); } -static __inline__ __m128i DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE); @@ -762,42 +762,42 @@ _mm_comtrue_epi64(__m128i __A, __m128i __B) (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \ (__v8si)__C, (I)); }) -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_frcz_ss(__m128 __A) { return (__m128)__builtin_ia32_vfrczss((__v4sf)__A); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_frcz_sd(__m128d __A) { return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A); } -static __inline__ __m128 DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_frcz_ps(__m128 __A) { return (__m128)__builtin_ia32_vfrczps((__v4sf)__A); } -static __inline__ __m128d DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_frcz_pd(__m128d __A) { return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); } -static __inline__ __m256 DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_frcz_ps(__m256 __A) { return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); } -static __inline__ __m256d DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_frcz_pd(__m256d __A) { return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); } -#undef DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* __XOPINTRIN_H */ |