diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Headers/immintrin.h')
-rw-r--r-- | contrib/llvm-project/clang/lib/Headers/immintrin.h | 747 |
1 files changed, 747 insertions, 0 deletions
diff --git a/contrib/llvm-project/clang/lib/Headers/immintrin.h b/contrib/llvm-project/clang/lib/Headers/immintrin.h new file mode 100644 index 000000000000..cd6cf09b90ca --- /dev/null +++ b/contrib/llvm-project/clang/lib/Headers/immintrin.h @@ -0,0 +1,747 @@ +/*===---- immintrin.h - Intel intrinsics -----------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#define __IMMINTRIN_H + +#if !defined(__i386__) && !defined(__x86_64__) +#error "This header is only meant to be used on x86 and x64 architecture" +#endif + +#include <x86gprintrin.h> + +#if !defined(__SCE__) || __has_feature(modules) || defined(__MMX__) +#include <mmintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE__) +#include <xmmintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE2__) +#include <emmintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE3__) +#include <pmmintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__SSSE3__) +#include <tmmintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__SSE4_2__) || defined(__SSE4_1__)) +#include <smmintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AES__) || defined(__PCLMUL__)) +#include <wmmintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__CLFLUSHOPT__) +#include <clflushoptintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__CLWB__) +#include <clwbintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX__) +#include <avxintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX2__) +#include <avx2intrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__F16C__) +#include <f16cintrin.h> +#endif + +/* No feature check desired due to internal checks */ +#include <bmiintrin.h> + +#if !defined(__SCE__) || __has_feature(modules) || defined(__BMI2__) +#include <bmi2intrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__LZCNT__) +#include <lzcntintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__POPCNT__) +#include <popcntintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__FMA__) +#include <fmaintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512F__) +#include <avx512fintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VL__) +#include <avx512vlintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BW__) +#include <avx512bwintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BITALG__) +#include <avx512bitalgintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512CD__) +#include <avx512cdintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__) +#include <avx512vpopcntdqintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) +#include <avx512vpopcntdqvlintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VNNI__) +#include <avx512vnniintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512VNNI__)) +#include <avx512vlvnniintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNI__) +#include <avxvnniintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512DQ__) +#include <avx512dqintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512BITALG__)) +#include <avx512vlbitalgintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512BW__)) +#include <avx512vlbwintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512CD__)) +#include <avx512vlcdintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512DQ__)) +#include <avx512vldqintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__) +#include <avx512ifmaintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX512IFMA__) && defined(__AVX512VL__)) +#include <avx512ifmavlintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXIFMA__) +#include <avxifmaintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI__) +#include <avx512vbmiintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX512VBMI__) && defined(__AVX512VL__)) +#include <avx512vbmivlintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI2__) +#include <avx512vbmi2intrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) +#include <avx512vlvbmi2intrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__) +#include <avx512fp16intrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512FP16__)) +#include <avx512vlfp16intrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BF16__) +#include <avx512bf16intrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512BF16__)) +#include <avx512vlbf16intrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__) +#include <pkuintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__VPCLMULQDQ__) +#include <vpclmulqdqintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__VAES__) +#include <vaesintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__GFNI__) +#include <gfniintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT8__) +#include <avxvnniint8intrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXNECONVERT__) +#include <avxneconvertintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA512__) +#include <sha512intrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__SM3__) +#include <sm3intrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__SM4__) +#include <sm4intrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT16__) +#include <avxvnniint16intrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__RDPID__) +/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103). +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> RDPID </c> instruction. +/// +/// \returns The 32-bit contents of the MSR. +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) +_rdpid_u32(void) { + return __builtin_ia32_rdpid(); +} +#endif // __RDPID__ + +#if !defined(__SCE__) || __has_feature(modules) || defined(__RDRND__) +/// Returns a 16-bit hardware-generated random value. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> RDRAND </c> instruction. +/// +/// \param __p +/// A pointer to a 16-bit memory location to place the random value. +/// \returns 1 if the value was successfully generated, 0 otherwise. +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) +_rdrand16_step(unsigned short *__p) +{ + return (int)__builtin_ia32_rdrand16_step(__p); +} + +/// Returns a 32-bit hardware-generated random value. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> RDRAND </c> instruction. +/// +/// \param __p +/// A pointer to a 32-bit memory location to place the random value. +/// \returns 1 if the value was successfully generated, 0 otherwise. +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) +_rdrand32_step(unsigned int *__p) +{ + return (int)__builtin_ia32_rdrand32_step(__p); +} + +/// Returns a 64-bit hardware-generated random value. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> RDRAND </c> instruction. +/// +/// \param __p +/// A pointer to a 64-bit memory location to place the random value. +/// \returns 1 if the value was successfully generated, 0 otherwise. +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) +_rdrand64_step(unsigned long long *__p) +{ +#ifdef __x86_64__ + return (int)__builtin_ia32_rdrand64_step(__p); +#else + // We need to emulate the functionality of 64-bit rdrand with 2 32-bit + // rdrand instructions. + unsigned int __lo, __hi; + unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo); + unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi); + if (__res_lo && __res_hi) { + *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo; + return 1; + } else { + *__p = 0; + return 0; + } +#endif +} +#endif /* __RDRND__ */ + +#if !defined(__SCE__) || __has_feature(modules) || defined(__FSGSBASE__) +#ifdef __x86_64__ +/// Reads the FS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction. +/// +/// \returns The lower 32 bits of the FS base register. +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) +_readfsbase_u32(void) +{ + return __builtin_ia32_rdfsbase32(); +} + +/// Reads the FS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction. +/// +/// \returns The contents of the FS base register. +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) +_readfsbase_u64(void) +{ + return __builtin_ia32_rdfsbase64(); +} + +/// Reads the GS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction. +/// +/// \returns The lower 32 bits of the GS base register. +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) +_readgsbase_u32(void) +{ + return __builtin_ia32_rdgsbase32(); +} + +/// Reads the GS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction. +/// +/// \returns The contents of the GS base register. +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) +_readgsbase_u64(void) +{ + return __builtin_ia32_rdgsbase64(); +} + +/// Modifies the FS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. +/// +/// \param __V +/// Value to use for the lower 32 bits of the FS base register. +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) +_writefsbase_u32(unsigned int __V) +{ + __builtin_ia32_wrfsbase32(__V); +} + +/// Modifies the FS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. +/// +/// \param __V +/// Value to use for the FS base register. +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) +_writefsbase_u64(unsigned long long __V) +{ + __builtin_ia32_wrfsbase64(__V); +} + +/// Modifies the GS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> WRGSBASE </c> instruction. +/// +/// \param __V +/// Value to use for the lower 32 bits of the GS base register. +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) +_writegsbase_u32(unsigned int __V) +{ + __builtin_ia32_wrgsbase32(__V); +} + +/// Modifies the GS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. +/// +/// \param __V +/// Value to use for GS base register. +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) +_writegsbase_u64(unsigned long long __V) +{ + __builtin_ia32_wrgsbase64(__V); +} + +#endif +#endif /* __FSGSBASE__ */ + +#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVBE__) + +/* The structs used below are to force the load/store to be unaligned. This + * is accomplished with the __packed__ attribute. The __may_alias__ prevents + * tbaa metadata from being generated based on the struct and the type of the + * field inside of it. + */ + +/// Load a 16-bit value from memory and swap its bytes. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the 16-bit value to load. +/// \returns The byte-swapped value. +static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_loadbe_i16(void const * __P) { + struct __loadu_i16 { + unsigned short __v; + } __attribute__((__packed__, __may_alias__)); + return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v); +} + +/// Swap the bytes of a 16-bit value and store it to memory. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the memory for storing the swapped value. +/// \param __D +/// The 16-bit value to be byte-swapped. +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_storebe_i16(void * __P, short __D) { + struct __storeu_i16 { + unsigned short __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D); +} + +/// Load a 32-bit value from memory and swap its bytes. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the 32-bit value to load. +/// \returns The byte-swapped value. +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_loadbe_i32(void const * __P) { + struct __loadu_i32 { + unsigned int __v; + } __attribute__((__packed__, __may_alias__)); + return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v); +} + +/// Swap the bytes of a 32-bit value and store it to memory. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the memory for storing the swapped value. +/// \param __D +/// The 32-bit value to be byte-swapped. +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_storebe_i32(void * __P, int __D) { + struct __storeu_i32 { + unsigned int __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D); +} + +#ifdef __x86_64__ +/// Load a 64-bit value from memory and swap its bytes. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the 64-bit value to load. +/// \returns The byte-swapped value. +static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_loadbe_i64(void const * __P) { + struct __loadu_i64 { + unsigned long long __v; + } __attribute__((__packed__, __may_alias__)); + return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v); +} + +/// Swap the bytes of a 64-bit value and store it to memory. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the memory for storing the swapped value. +/// \param __D +/// The 64-bit value to be byte-swapped. +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_storebe_i64(void * __P, long long __D) { + struct __storeu_i64 { + unsigned long long __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D); +} +#endif +#endif /* __MOVBE */ + +#if !defined(__SCE__) || __has_feature(modules) || defined(__RTM__) +#include <rtmintrin.h> +#include <xtestintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA__) +#include <shaintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__FXSR__) +#include <fxsrintrin.h> +#endif + +/* No feature check desired due to internal MSC_VER checks */ +#include <xsaveintrin.h> + +#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEOPT__) +#include <xsaveoptintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEC__) +#include <xsavecintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVES__) +#include <xsavesintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__SHSTK__) +#include <cetintrin.h> +#endif + +/* Intrinsics inside adcintrin.h are available at all times. */ +#include <adcintrin.h> + +#if !defined(__SCE__) || __has_feature(modules) || defined(__ADX__) +#include <adxintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__RDSEED__) +#include <rdseedintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__WBNOINVD__) +#include <wbnoinvdintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__CLDEMOTE__) +#include <cldemoteintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__WAITPKG__) +#include <waitpkgintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVDIRI__) || \ + defined(__MOVDIR64B__) +#include <movdirintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__) +#include <pconfigintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__SGX__) +#include <sgxintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__PTWRITE__) +#include <ptwriteintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__INVPCID__) +#include <invpcidintrin.h> +#endif +#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__) +#include <amxfp16intrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) || \ + defined(__WIDEKL__) +#include <keylockerintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TILE__) || \ + defined(__AMX_INT8__) || defined(__AMX_BF16__) +#include <amxintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_COMPLEX__) +#include <amxcomplexintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + defined(__AVX512VP2INTERSECT__) +#include <avx512vp2intersectintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) +#include <avx512vlvp2intersectintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__) +#include <enqcmdintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__SERIALIZE__) +#include <serializeintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__TSXLDTRK__) +#include <tsxldtrkintrin.h> +#endif + +#if defined(_MSC_VER) && __has_extension(gnu_asm) +/* Define the default attributes for these intrinsics */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#ifdef __cplusplus +extern "C" { +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Exchange HLE +\*----------------------------------------------------------------------------*/ +#if defined(__i386__) || defined(__x86_64__) +static __inline__ long __DEFAULT_FN_ATTRS +_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { + __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" + : "+r" (_Value), "+m" (*_Target) :: "memory"); + return _Value; +} +static __inline__ long __DEFAULT_FN_ATTRS +_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { + __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" + : "+r" (_Value), "+m" (*_Target) :: "memory"); + return _Value; +} +#endif +#if defined(__x86_64__) +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { + __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" + : "+r" (_Value), "+m" (*_Target) :: "memory"); + return _Value; +} +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { + __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" + : "+r" (_Value), "+m" (*_Target) :: "memory"); + return _Value; +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Compare Exchange HLE +\*----------------------------------------------------------------------------*/ +#if defined(__i386__) || defined(__x86_64__) +static __inline__ long __DEFAULT_FN_ATTRS +_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, + long _Exchange, long _Comparand) { + __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" + : "+a" (_Comparand), "+m" (*_Destination) + : "r" (_Exchange) : "memory"); + return _Comparand; +} +static __inline__ long __DEFAULT_FN_ATTRS +_InterlockedCompareExchange_HLERelease(long volatile *_Destination, + long _Exchange, long _Comparand) { + __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" + : "+a" (_Comparand), "+m" (*_Destination) + : "r" (_Exchange) : "memory"); + return _Comparand; +} +#endif +#if defined(__x86_64__) +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, + __int64 _Exchange, __int64 _Comparand) { + __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" + : "+a" (_Comparand), "+m" (*_Destination) + : "r" (_Exchange) : "memory"); + return _Comparand; +} +static __inline__ __int64 __DEFAULT_FN_ATTRS +_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, + __int64 _Exchange, __int64 _Comparand) { + __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" + : "+a" (_Comparand), "+m" (*_Destination) + : "r" (_Exchange) : "memory"); + return _Comparand; +} +#endif +#ifdef __cplusplus +} +#endif + +#undef __DEFAULT_FN_ATTRS + +#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ + +#endif /* __IMMINTRIN_H */ |