summaryrefslogtreecommitdiff
path: root/lib/Headers/smmintrin.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Headers/smmintrin.h')
-rw-r--r--lib/Headers/smmintrin.h345
1 files changed, 162 insertions, 183 deletions
diff --git a/lib/Headers/smmintrin.h b/lib/Headers/smmintrin.h
index c2fa5a452bcec..4806b3e4e150d 100644
--- a/lib/Headers/smmintrin.h
+++ b/lib/Headers/smmintrin.h
@@ -21,13 +21,13 @@
*===-----------------------------------------------------------------------===
*/
-#ifndef _SMMINTRIN_H
-#define _SMMINTRIN_H
+#ifndef __SMMINTRIN_H
+#define __SMMINTRIN_H
#include <tmmintrin.h>
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"), __min_vector_width__(128)))
/* SSE4 Rounding macros. */
#define _MM_FROUND_TO_NEAREST_INT 0x00
@@ -46,7 +46,7 @@
#define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION)
#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION)
-/// \brief Rounds up each element of the 128-bit vector of [4 x float] to an
+/// Rounds up each element of the 128-bit vector of [4 x float] to an
/// integer and returns the rounded values in a 128-bit vector of
/// [4 x float].
///
@@ -63,7 +63,7 @@
/// \returns A 128-bit vector of [4 x float] containing the rounded values.
#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL)
-/// \brief Rounds up each element of the 128-bit vector of [2 x double] to an
+/// Rounds up each element of the 128-bit vector of [2 x double] to an
/// integer and returns the rounded values in a 128-bit vector of
/// [2 x double].
///
@@ -80,7 +80,7 @@
/// \returns A 128-bit vector of [2 x double] containing the rounded values.
#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL)
-/// \brief Copies three upper elements of the first 128-bit vector operand to
+/// Copies three upper elements of the first 128-bit vector operand to
/// the corresponding three upper elements of the 128-bit result vector of
/// [4 x float]. Rounds up the lowest element of the second 128-bit vector
/// operand to an integer and copies it to the lowest element of the 128-bit
@@ -105,7 +105,7 @@
/// values.
#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL)
-/// \brief Copies the upper element of the first 128-bit vector operand to the
+/// Copies the upper element of the first 128-bit vector operand to the
/// corresponding upper element of the 128-bit result vector of [2 x double].
/// Rounds up the lower element of the second 128-bit vector operand to an
/// integer and copies it to the lower element of the 128-bit result vector
@@ -130,7 +130,7 @@
/// values.
#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL)
-/// \brief Rounds down each element of the 128-bit vector of [4 x float] to an
+/// Rounds down each element of the 128-bit vector of [4 x float] to an
/// an integer and returns the rounded values in a 128-bit vector of
/// [4 x float].
///
@@ -147,7 +147,7 @@
/// \returns A 128-bit vector of [4 x float] containing the rounded values.
#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR)
-/// \brief Rounds down each element of the 128-bit vector of [2 x double] to an
+/// Rounds down each element of the 128-bit vector of [2 x double] to an
/// integer and returns the rounded values in a 128-bit vector of
/// [2 x double].
///
@@ -164,7 +164,7 @@
/// \returns A 128-bit vector of [2 x double] containing the rounded values.
#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR)
-/// \brief Copies three upper elements of the first 128-bit vector operand to
+/// Copies three upper elements of the first 128-bit vector operand to
/// the corresponding three upper elements of the 128-bit result vector of
/// [4 x float]. Rounds down the lowest element of the second 128-bit vector
/// operand to an integer and copies it to the lowest element of the 128-bit
@@ -189,7 +189,7 @@
/// values.
#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR)
-/// \brief Copies the upper element of the first 128-bit vector operand to the
+/// Copies the upper element of the first 128-bit vector operand to the
/// corresponding upper element of the 128-bit result vector of [2 x double].
/// Rounds down the lower element of the second 128-bit vector operand to an
/// integer and copies it to the lower element of the 128-bit result vector
@@ -214,7 +214,7 @@
/// values.
#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)
-/// \brief Rounds each element of the 128-bit vector of [4 x float] to an
+/// Rounds each element of the 128-bit vector of [4 x float] to an
/// integer value according to the rounding control specified by the second
/// argument and returns the rounded values in a 128-bit vector of
/// [4 x float].
@@ -244,10 +244,10 @@
/// 10: Upward (toward positive infinity) \n
/// 11: Truncated
/// \returns A 128-bit vector of [4 x float] containing the rounded values.
-#define _mm_round_ps(X, M) __extension__ ({ \
- (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)); })
+#define _mm_round_ps(X, M) \
+ (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))
-/// \brief Copies three upper elements of the first 128-bit vector operand to
+/// Copies three upper elements of the first 128-bit vector operand to
/// the corresponding three upper elements of the 128-bit result vector of
/// [4 x float]. Rounds the lowest element of the second 128-bit vector
/// operand to an integer value according to the rounding control specified
@@ -285,11 +285,11 @@
/// 11: Truncated
/// \returns A 128-bit vector of [4 x float] containing the copied and rounded
/// values.
-#define _mm_round_ss(X, Y, M) __extension__ ({ \
+#define _mm_round_ss(X, Y, M) \
(__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (M)); })
+ (__v4sf)(__m128)(Y), (M))
-/// \brief Rounds each element of the 128-bit vector of [2 x double] to an
+/// Rounds each element of the 128-bit vector of [2 x double] to an
/// integer value according to the rounding control specified by the second
/// argument and returns the rounded values in a 128-bit vector of
/// [2 x double].
@@ -319,10 +319,10 @@
/// 10: Upward (toward positive infinity) \n
/// 11: Truncated
/// \returns A 128-bit vector of [2 x double] containing the rounded values.
-#define _mm_round_pd(X, M) __extension__ ({ \
- (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)); })
+#define _mm_round_pd(X, M) \
+ (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M))
-/// \brief Copies the upper element of the first 128-bit vector operand to the
+/// Copies the upper element of the first 128-bit vector operand to the
/// corresponding upper element of the 128-bit result vector of [2 x double].
/// Rounds the lower element of the second 128-bit vector operand to an
/// integer value according to the rounding control specified by the third
@@ -360,12 +360,12 @@
/// 11: Truncated
/// \returns A 128-bit vector of [2 x double] containing the copied and rounded
/// values.
-#define _mm_round_sd(X, Y, M) __extension__ ({ \
+#define _mm_round_sd(X, Y, M) \
(__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (M)); })
+ (__v2df)(__m128d)(Y), (M))
/* SSE4 Packed Blending Intrinsics. */
-/// \brief Returns a 128-bit vector of [2 x double] where the values are
+/// Returns a 128-bit vector of [2 x double] where the values are
/// selected from either the first or second operand as specified by the
/// third operand, the control mask.
///
@@ -389,13 +389,11 @@
/// When a mask bit is 1, the corresponding 64-bit element in operand \a V2
/// is copied to the same position in the result.
/// \returns A 128-bit vector of [2 x double] containing the copied values.
-#define _mm_blend_pd(V1, V2, M) __extension__ ({ \
- (__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \
- (__v2df)(__m128d)(V2), \
- (((M) & 0x01) ? 2 : 0), \
- (((M) & 0x02) ? 3 : 1)); })
+#define _mm_blend_pd(V1, V2, M) \
+ (__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \
+ (__v2df)(__m128d)(V2), (int)(M))
-/// \brief Returns a 128-bit vector of [4 x float] where the values are selected
+/// Returns a 128-bit vector of [4 x float] where the values are selected
/// from either the first or second operand as specified by the third
/// operand, the control mask.
///
@@ -419,14 +417,11 @@
/// When a mask bit is 1, the corresponding 32-bit element in operand \a V2
/// is copied to the same position in the result.
/// \returns A 128-bit vector of [4 x float] containing the copied values.
-#define _mm_blend_ps(V1, V2, M) __extension__ ({ \
- (__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \
- (((M) & 0x01) ? 4 : 0), \
- (((M) & 0x02) ? 5 : 1), \
- (((M) & 0x04) ? 6 : 2), \
- (((M) & 0x08) ? 7 : 3)); })
-
-/// \brief Returns a 128-bit vector of [2 x double] where the values are
+#define _mm_blend_ps(V1, V2, M) \
+ (__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \
+ (__v4sf)(__m128)(V2), (int)(M))
+
+/// Returns a 128-bit vector of [2 x double] where the values are
/// selected from either the first or second operand as specified by the
/// third operand, the control mask.
///
@@ -453,7 +448,7 @@ _mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)
(__v2df)__M);
}
-/// \brief Returns a 128-bit vector of [4 x float] where the values are
+/// Returns a 128-bit vector of [4 x float] where the values are
/// selected from either the first or second operand as specified by the
/// third operand, the control mask.
///
@@ -480,7 +475,7 @@ _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)
(__v4sf)__M);
}
-/// \brief Returns a 128-bit vector of [16 x i8] where the values are selected
+/// Returns a 128-bit vector of [16 x i8] where the values are selected
/// from either of the first or second operand as specified by the third
/// operand, the control mask.
///
@@ -493,7 +488,7 @@ _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)
/// \param __V2
/// A 128-bit vector of [16 x i8].
/// \param __M
-/// A 128-bit vector operand, with mask bits 127, 119, 111 ... 7 specifying
+/// A 128-bit vector operand, with mask bits 127, 119, 111...7 specifying
/// how the values are to be copied. The position of the mask bit corresponds
/// to the most significant bit of a copied value. When a mask bit is 0, the
/// corresponding 8-bit element in operand \a __V1 is copied to the same
@@ -507,7 +502,7 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
(__v16qi)__M);
}
-/// \brief Returns a 128-bit vector of [8 x i16] where the values are selected
+/// Returns a 128-bit vector of [8 x i16] where the values are selected
/// from either of the first or second operand as specified by the third
/// operand, the control mask.
///
@@ -531,20 +526,12 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
/// When a mask bit is 1, the corresponding 16-bit element in operand \a V2
/// is copied to the same position in the result.
/// \returns A 128-bit vector of [8 x i16] containing the copied values.
-#define _mm_blend_epi16(V1, V2, M) __extension__ ({ \
- (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \
- (__v8hi)(__m128i)(V2), \
- (((M) & 0x01) ? 8 : 0), \
- (((M) & 0x02) ? 9 : 1), \
- (((M) & 0x04) ? 10 : 2), \
- (((M) & 0x08) ? 11 : 3), \
- (((M) & 0x10) ? 12 : 4), \
- (((M) & 0x20) ? 13 : 5), \
- (((M) & 0x40) ? 14 : 6), \
- (((M) & 0x80) ? 15 : 7)); })
+#define _mm_blend_epi16(V1, V2, M) \
+ (__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \
+ (__v8hi)(__m128i)(V2), (int)(M))
/* SSE4 Dword Multiply Instructions. */
-/// \brief Multiples corresponding elements of two 128-bit vectors of [4 x i32]
+/// Multiples corresponding elements of two 128-bit vectors of [4 x i32]
/// and returns the lower 32 bits of the each product in a 128-bit vector of
/// [4 x i32].
///
@@ -563,7 +550,7 @@ _mm_mullo_epi32 (__m128i __V1, __m128i __V2)
return (__m128i) ((__v4su)__V1 * (__v4su)__V2);
}
-/// \brief Multiplies corresponding even-indexed elements of two 128-bit
+/// Multiplies corresponding even-indexed elements of two 128-bit
/// vectors of [4 x i32] and returns a 128-bit vector of [2 x i64]
/// containing the products.
///
@@ -584,7 +571,7 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
}
/* SSE4 Floating Point Dot Product Instructions. */
-/// \brief Computes the dot product of the two 128-bit vectors of [4 x float]
+/// Computes the dot product of the two 128-bit vectors of [4 x float]
/// and returns it in the elements of the 128-bit result vector of
/// [4 x float].
///
@@ -616,11 +603,11 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
/// each [4 x float] subvector. If a bit is set, the dot product is returned
/// in the corresponding element; otherwise that element is set to zero.
/// \returns A 128-bit vector of [4 x float] containing the dot product.
-#define _mm_dp_ps(X, Y, M) __extension__ ({ \
+#define _mm_dp_ps(X, Y, M) \
(__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (M)); })
+ (__v4sf)(__m128)(Y), (M))
-/// \brief Computes the dot product of the two 128-bit vectors of [2 x double]
+/// Computes the dot product of the two 128-bit vectors of [2 x double]
/// and returns it in the elements of the 128-bit result vector of
/// [2 x double].
///
@@ -648,15 +635,15 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
/// input vectors are used as an input for dot product; otherwise that input
/// is treated as zero. Bits [1:0] determine which elements of the result
/// will receive a copy of the final dot product, with bit [0] corresponding
-/// to the lowest element and bit [3] corresponding to the highest element of
+/// to the lowest element and bit [1] corresponding to the highest element of
/// each [2 x double] vector. If a bit is set, the dot product is returned in
/// the corresponding element; otherwise that element is set to zero.
-#define _mm_dp_pd(X, Y, M) __extension__ ({\
+#define _mm_dp_pd(X, Y, M) \
(__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (M)); })
+ (__v2df)(__m128d)(Y), (M))
/* SSE4 Streaming Load Hint Instruction. */
-/// \brief Loads integer values from a 128-bit aligned memory location to a
+/// Loads integer values from a 128-bit aligned memory location to a
/// 128-bit integer vector.
///
/// \headerfile <x86intrin.h>
@@ -675,7 +662,7 @@ _mm_stream_load_si128 (__m128i const *__V)
}
/* SSE4 Packed Integer Min/Max Instructions. */
-/// \brief Compares the corresponding elements of two 128-bit vectors of
+/// Compares the corresponding elements of two 128-bit vectors of
/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the lesser
/// of the two values.
///
@@ -694,7 +681,7 @@ _mm_min_epi8 (__m128i __V1, __m128i __V2)
return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);
}
-/// \brief Compares the corresponding elements of two 128-bit vectors of
+/// Compares the corresponding elements of two 128-bit vectors of
/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the
/// greater value of the two.
///
@@ -713,7 +700,7 @@ _mm_max_epi8 (__m128i __V1, __m128i __V2)
return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);
}
-/// \brief Compares the corresponding elements of two 128-bit vectors of
+/// Compares the corresponding elements of two 128-bit vectors of
/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the lesser
/// value of the two.
///
@@ -732,7 +719,7 @@ _mm_min_epu16 (__m128i __V1, __m128i __V2)
return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);
}
-/// \brief Compares the corresponding elements of two 128-bit vectors of
+/// Compares the corresponding elements of two 128-bit vectors of
/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the
/// greater value of the two.
///
@@ -751,7 +738,7 @@ _mm_max_epu16 (__m128i __V1, __m128i __V2)
return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);
}
-/// \brief Compares the corresponding elements of two 128-bit vectors of
+/// Compares the corresponding elements of two 128-bit vectors of
/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the lesser
/// value of the two.
///
@@ -770,7 +757,7 @@ _mm_min_epi32 (__m128i __V1, __m128i __V2)
return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);
}
-/// \brief Compares the corresponding elements of two 128-bit vectors of
+/// Compares the corresponding elements of two 128-bit vectors of
/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the
/// greater value of the two.
///
@@ -789,7 +776,7 @@ _mm_max_epi32 (__m128i __V1, __m128i __V2)
return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);
}
-/// \brief Compares the corresponding elements of two 128-bit vectors of
+/// Compares the corresponding elements of two 128-bit vectors of
/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the lesser
/// value of the two.
///
@@ -808,7 +795,7 @@ _mm_min_epu32 (__m128i __V1, __m128i __V2)
return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);
}
-/// \brief Compares the corresponding elements of two 128-bit vectors of
+/// Compares the corresponding elements of two 128-bit vectors of
/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the
/// greater value of the two.
///
@@ -828,7 +815,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
}
/* SSE4 Insertion and Extraction from XMM Register Instructions. */
-/// \brief Takes the first argument \a X and inserts an element from the second
+/// Takes the first argument \a X and inserts an element from the second
/// argument \a Y as selected by the third argument \a N. That result then
/// has elements zeroed out also as selected by the third argument \a N. The
/// resulting 128-bit vector of [4 x float] is then returned.
@@ -866,11 +853,11 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 11: Copies the selected bits from \a Y to result bits [127:96]. \n
/// Bits[3:0]: If any of these bits are set, the corresponding result
/// element is cleared.
-/// \returns A 128-bit vector of [4 x float] containing the copied single-
-/// precision floating point elements from the operands.
+/// \returns A 128-bit vector of [4 x float] containing the copied
+/// single-precision floating point elements from the operands.
#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
-/// \brief Extracts a 32-bit integer from a 128-bit vector of [4 x float] and
+/// Extracts a 32-bit integer from a 128-bit vector of [4 x float] and
/// returns it, using the immediate value parameter \a N as a selector.
///
/// \headerfile <x86intrin.h>
@@ -893,15 +880,14 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 11: Bits [127:96] of parameter \a X are returned.
/// \returns A 32-bit integer containing the extracted 32 bits of float data.
#define _mm_extract_ps(X, N) (__extension__ \
- ({ union { int __i; float __f; } __t; \
- __v4sf __a = (__v4sf)(__m128)(X); \
- __t.__f = __a[(N) & 3]; \
- __t.__i;}))
+ ({ union { int __i; float __f; } __t; \
+ __t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \
+ __t.__i;}))
/* Miscellaneous insert and extract macros. */
/* Extract a single-precision float from X at index N into D. */
-#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \
- (D) = __a[N]; }))
+#define _MM_EXTRACT_FLOAT(D, X, N) \
+ { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); }
/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create
an index suitable for _mm_insert_ps. */
@@ -912,7 +898,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
_MM_MK_INSERTPS_NDX((N), 0, 0x0e))
/* Insert int into packed integer array at index. */
-/// \brief Constructs a 128-bit vector of [16 x i8] by first making a copy of
+/// Constructs a 128-bit vector of [16 x i8] by first making a copy of
/// the 128-bit integer vector parameter, and then inserting the lower 8 bits
/// of an integer parameter \a I into an offset specified by the immediate
/// value parameter \a N.
@@ -952,12 +938,11 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 1110: Bits [119:112] of the result are used for insertion. \n
/// 1111: Bits [127:120] of the result are used for insertion.
/// \returns A 128-bit integer vector containing the constructed values.
-#define _mm_insert_epi8(X, I, N) (__extension__ \
- ({ __v16qi __a = (__v16qi)(__m128i)(X); \
- __a[(N) & 15] = (I); \
- (__m128i)__a;}))
+#define _mm_insert_epi8(X, I, N) \
+ (__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \
+ (int)(I), (int)(N))
-/// \brief Constructs a 128-bit vector of [4 x i32] by first making a copy of
+/// Constructs a 128-bit vector of [4 x i32] by first making a copy of
/// the 128-bit integer vector parameter, and then inserting the 32-bit
/// integer parameter \a I at the offset specified by the immediate value
/// parameter \a N.
@@ -985,13 +970,12 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 10: Bits [95:64] of the result are used for insertion. \n
/// 11: Bits [127:96] of the result are used for insertion.
/// \returns A 128-bit integer vector containing the constructed values.
-#define _mm_insert_epi32(X, I, N) (__extension__ \
- ({ __v4si __a = (__v4si)(__m128i)(X); \
- __a[(N) & 3] = (I); \
- (__m128i)__a;}))
+#define _mm_insert_epi32(X, I, N) \
+ (__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \
+ (int)(I), (int)(N))
#ifdef __x86_64__
-/// \brief Constructs a 128-bit vector of [2 x i64] by first making a copy of
+/// Constructs a 128-bit vector of [2 x i64] by first making a copy of
/// the 128-bit integer vector parameter, and then inserting the 64-bit
/// integer parameter \a I, using the immediate value parameter \a N as an
/// insertion location selector.
@@ -1017,16 +1001,15 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 0: Bits [63:0] of the result are used for insertion. \n
/// 1: Bits [127:64] of the result are used for insertion. \n
/// \returns A 128-bit integer vector containing the constructed values.
-#define _mm_insert_epi64(X, I, N) (__extension__ \
- ({ __v2di __a = (__v2di)(__m128i)(X); \
- __a[(N) & 1] = (I); \
- (__m128i)__a;}))
+#define _mm_insert_epi64(X, I, N) \
+ (__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \
+ (long long)(I), (int)(N))
#endif /* __x86_64__ */
/* Extract int from packed integer array at index. This returns the element
* as a zero extended value, so it is unsigned.
*/
-/// \brief Extracts an 8-bit element from the 128-bit integer vector of
+/// Extracts an 8-bit element from the 128-bit integer vector of
/// [16 x i8], using the immediate value parameter \a N as a selector.
///
/// \headerfile <x86intrin.h>
@@ -1061,11 +1044,11 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// \returns An unsigned integer, whose lower 8 bits are selected from the
/// 128-bit integer vector parameter and the remaining bits are assigned
/// zeros.
-#define _mm_extract_epi8(X, N) (__extension__ \
- ({ __v16qi __a = (__v16qi)(__m128i)(X); \
- (int)(unsigned char) __a[(N) & 15];}))
+#define _mm_extract_epi8(X, N) \
+ (int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \
+ (int)(N))
-/// \brief Extracts a 32-bit element from the 128-bit integer vector of
+/// Extracts a 32-bit element from the 128-bit integer vector of
/// [4 x i32], using the immediate value parameter \a N as a selector.
///
/// \headerfile <x86intrin.h>
@@ -1087,12 +1070,11 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 11: Bits [127:96] of the parameter \a X are exracted.
/// \returns An integer, whose lower 32 bits are selected from the 128-bit
/// integer vector parameter and the remaining bits are assigned zeros.
-#define _mm_extract_epi32(X, N) (__extension__ \
- ({ __v4si __a = (__v4si)(__m128i)(X); \
- (int)__a[(N) & 3];}))
+#define _mm_extract_epi32(X, N) \
+ (int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N))
#ifdef __x86_64__
-/// \brief Extracts a 64-bit element from the 128-bit integer vector of
+/// Extracts a 64-bit element from the 128-bit integer vector of
/// [2 x i64], using the immediate value parameter \a N as a selector.
///
/// \headerfile <x86intrin.h>
@@ -1111,13 +1093,12 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 0: Bits [63:0] are returned. \n
/// 1: Bits [127:64] are returned. \n
/// \returns A 64-bit integer.
-#define _mm_extract_epi64(X, N) (__extension__ \
- ({ __v2di __a = (__v2di)(__m128i)(X); \
- (long long)__a[(N) & 1];}))
+#define _mm_extract_epi64(X, N) \
+ (long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N))
#endif /* __x86_64 */
/* SSE4 128-bit Packed Integer Comparisons. */
-/// \brief Tests whether the specified bits in a 128-bit integer vector are all
+/// Tests whether the specified bits in a 128-bit integer vector are all
/// zeros.
///
/// \headerfile <x86intrin.h>
@@ -1135,7 +1116,7 @@ _mm_testz_si128(__m128i __M, __m128i __V)
return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
}
-/// \brief Tests whether the specified bits in a 128-bit integer vector are all
+/// Tests whether the specified bits in a 128-bit integer vector are all
/// ones.
///
/// \headerfile <x86intrin.h>
@@ -1153,7 +1134,7 @@ _mm_testc_si128(__m128i __M, __m128i __V)
return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
}
-/// \brief Tests whether the specified bits in a 128-bit integer vector are
+/// Tests whether the specified bits in a 128-bit integer vector are
/// neither all zeros nor all ones.
///
/// \headerfile <x86intrin.h>
@@ -1172,7 +1153,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
}
-/// \brief Tests whether the specified bits in a 128-bit integer vector are all
+/// Tests whether the specified bits in a 128-bit integer vector are all
/// ones.
///
/// \headerfile <x86intrin.h>
@@ -1189,7 +1170,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
/// otherwise.
#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))
-/// \brief Tests whether the specified bits in a 128-bit integer vector are
+/// Tests whether the specified bits in a 128-bit integer vector are
/// neither all zeros nor all ones.
///
/// \headerfile <x86intrin.h>
@@ -1208,7 +1189,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
/// FALSE otherwise.
#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
-/// \brief Tests whether the specified bits in a 128-bit integer vector are all
+/// Tests whether the specified bits in a 128-bit integer vector are all
/// zeros.
///
/// \headerfile <x86intrin.h>
@@ -1227,7 +1208,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
/* SSE4 64-bit Packed Integer Comparisons. */
-/// \brief Compares each of the corresponding 64-bit values of the 128-bit
+/// Compares each of the corresponding 64-bit values of the 128-bit
/// integer vectors for equality.
///
/// \headerfile <x86intrin.h>
@@ -1246,7 +1227,7 @@ _mm_cmpeq_epi64(__m128i __V1, __m128i __V2)
}
/* SSE4 Packed Integer Sign-Extension. */
-/// \brief Sign-extends each of the lower eight 8-bit integer elements of a
+/// Sign-extends each of the lower eight 8-bit integer elements of a
/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a
/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector
/// are unused.
@@ -1267,7 +1248,7 @@ _mm_cvtepi8_epi16(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
}
-/// \brief Sign-extends each of the lower four 8-bit integer elements of a
+/// Sign-extends each of the lower four 8-bit integer elements of a
/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a
/// 128-bit vector of [4 x i32]. The upper twelve elements of the input
/// vector are unused.
@@ -1277,8 +1258,8 @@ _mm_cvtepi8_epi16(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVSXBD / PMOVSXBD </c> instruction.
///
/// \param __V
-/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are sign-
-/// extended to 32-bit values.
+/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are
+/// sign-extended to 32-bit values.
/// \returns A 128-bit vector of [4 x i32] containing the sign-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi8_epi32(__m128i __V)
@@ -1288,7 +1269,7 @@ _mm_cvtepi8_epi32(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si);
}
-/// \brief Sign-extends each of the lower two 8-bit integer elements of a
+/// Sign-extends each of the lower two 8-bit integer elements of a
/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in
/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input
/// vector are unused.
@@ -1298,8 +1279,8 @@ _mm_cvtepi8_epi32(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVSXBQ / PMOVSXBQ </c> instruction.
///
/// \param __V
-/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are sign-
-/// extended to 64-bit values.
+/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are
+/// sign-extended to 64-bit values.
/// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi8_epi64(__m128i __V)
@@ -1309,7 +1290,7 @@ _mm_cvtepi8_epi64(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);
}
-/// \brief Sign-extends each of the lower four 16-bit integer elements of a
+/// Sign-extends each of the lower four 16-bit integer elements of a
/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in
/// a 128-bit vector of [4 x i32]. The upper four elements of the input
/// vector are unused.
@@ -1319,8 +1300,8 @@ _mm_cvtepi8_epi64(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVSXWD / PMOVSXWD </c> instruction.
///
/// \param __V
-/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are sign-
-/// extended to 32-bit values.
+/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are
+/// sign-extended to 32-bit values.
/// \returns A 128-bit vector of [4 x i32] containing the sign-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi16_epi32(__m128i __V)
@@ -1328,7 +1309,7 @@ _mm_cvtepi16_epi32(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si);
}
-/// \brief Sign-extends each of the lower two 16-bit integer elements of a
+/// Sign-extends each of the lower two 16-bit integer elements of a
/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in
/// a 128-bit vector of [2 x i64]. The upper six elements of the input
/// vector are unused.
@@ -1338,8 +1319,8 @@ _mm_cvtepi16_epi32(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVSXWQ / PMOVSXWQ </c> instruction.
///
/// \param __V
-/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are sign-
-/// extended to 64-bit values.
+/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are
+/// sign-extended to 64-bit values.
/// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi16_epi64(__m128i __V)
@@ -1347,7 +1328,7 @@ _mm_cvtepi16_epi64(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di);
}
-/// \brief Sign-extends each of the lower two 32-bit integer elements of a
+/// Sign-extends each of the lower two 32-bit integer elements of a
/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in
/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector
/// are unused.
@@ -1357,8 +1338,8 @@ _mm_cvtepi16_epi64(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVSXDQ / PMOVSXDQ </c> instruction.
///
/// \param __V
-/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are sign-
-/// extended to 64-bit values.
+/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are
+/// sign-extended to 64-bit values.
/// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi32_epi64(__m128i __V)
@@ -1367,7 +1348,7 @@ _mm_cvtepi32_epi64(__m128i __V)
}
/* SSE4 Packed Integer Zero-Extension. */
-/// \brief Zero-extends each of the lower eight 8-bit integer elements of a
+/// Zero-extends each of the lower eight 8-bit integer elements of a
/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a
/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector
/// are unused.
@@ -1377,8 +1358,8 @@ _mm_cvtepi32_epi64(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVZXBW / PMOVZXBW </c> instruction.
///
/// \param __V
-/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are zero-
-/// extended to 16-bit values.
+/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are
+/// zero-extended to 16-bit values.
/// \returns A 128-bit vector of [8 x i16] containing the zero-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepu8_epi16(__m128i __V)
@@ -1386,7 +1367,7 @@ _mm_cvtepu8_epi16(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
}
-/// \brief Zero-extends each of the lower four 8-bit integer elements of a
+/// Zero-extends each of the lower four 8-bit integer elements of a
/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a
/// 128-bit vector of [4 x i32]. The upper twelve elements of the input
/// vector are unused.
@@ -1396,8 +1377,8 @@ _mm_cvtepu8_epi16(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVZXBD / PMOVZXBD </c> instruction.
///
/// \param __V
-/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are zero-
-/// extended to 32-bit values.
+/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are
+/// zero-extended to 32-bit values.
/// \returns A 128-bit vector of [4 x i32] containing the zero-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepu8_epi32(__m128i __V)
@@ -1405,7 +1386,7 @@ _mm_cvtepu8_epi32(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si);
}
-/// \brief Zero-extends each of the lower two 8-bit integer elements of a
+/// Zero-extends each of the lower two 8-bit integer elements of a
/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in
/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input
/// vector are unused.
@@ -1415,8 +1396,8 @@ _mm_cvtepu8_epi32(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVZXBQ / PMOVZXBQ </c> instruction.
///
/// \param __V
-/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are zero-
-/// extended to 64-bit values.
+/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are
+/// zero-extended to 64-bit values.
/// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepu8_epi64(__m128i __V)
@@ -1424,7 +1405,7 @@ _mm_cvtepu8_epi64(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di);
}
-/// \brief Zero-extends each of the lower four 16-bit integer elements of a
+/// Zero-extends each of the lower four 16-bit integer elements of a
/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in
/// a 128-bit vector of [4 x i32]. The upper four elements of the input
/// vector are unused.
@@ -1434,8 +1415,8 @@ _mm_cvtepu8_epi64(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVZXWD / PMOVZXWD </c> instruction.
///
/// \param __V
-/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are zero-
-/// extended to 32-bit values.
+/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are
+/// zero-extended to 32-bit values.
/// \returns A 128-bit vector of [4 x i32] containing the zero-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepu16_epi32(__m128i __V)
@@ -1443,7 +1424,7 @@ _mm_cvtepu16_epi32(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si);
}
-/// \brief Zero-extends each of the lower two 16-bit integer elements of a
+/// Zero-extends each of the lower two 16-bit integer elements of a
/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in
/// a 128-bit vector of [2 x i64]. The upper six elements of the input vector
/// are unused.
@@ -1453,8 +1434,8 @@ _mm_cvtepu16_epi32(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVZXWQ / PMOVZXWQ </c> instruction.
///
/// \param __V
-/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are zero-
-/// extended to 64-bit values.
+/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are
+/// zero-extended to 64-bit values.
/// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepu16_epi64(__m128i __V)
@@ -1462,7 +1443,7 @@ _mm_cvtepu16_epi64(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di);
}
-/// \brief Zero-extends each of the lower two 32-bit integer elements of a
+/// Zero-extends each of the lower two 32-bit integer elements of a
/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in
/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector
/// are unused.
@@ -1472,8 +1453,8 @@ _mm_cvtepu16_epi64(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVZXDQ / PMOVZXDQ </c> instruction.
///
/// \param __V
-/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are zero-
-/// extended to 64-bit values.
+/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are
+/// zero-extended to 64-bit values.
/// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepu32_epi64(__m128i __V)
@@ -1482,7 +1463,7 @@ _mm_cvtepu32_epi64(__m128i __V)
}
/* SSE4 Pack with Unsigned Saturation. */
-/// \brief Converts 32-bit signed integers from both 128-bit integer vector
+/// Converts 32-bit signed integers from both 128-bit integer vector
/// operands into 16-bit unsigned integers, and returns the packed result.
/// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than
/// 0x0000 are saturated to 0x0000.
@@ -1511,7 +1492,7 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2)
}
/* SSE4 Multiple Packed Sums of Absolute Difference. */
-/// \brief Subtracts 8-bit unsigned integer values and computes the absolute
+/// Subtracts 8-bit unsigned integer values and computes the absolute
/// values of the differences to the corresponding bits in the destination.
/// Then sums of the absolute differences are returned according to the bit
/// fields in the immediate operand.
@@ -1534,23 +1515,23 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2)
/// \code
/// // M2 represents bit 2 of the immediate operand
/// // M10 represents bits [1:0] of the immediate operand
-/// i = M2 * 4
-/// j = M10 * 4
+/// i = M2 * 4;
+/// j = M10 * 4;
/// for (k = 0; k < 8; k = k + 1) {
-/// d0 = abs(X[i + k + 0] - Y[j + 0])
-/// d1 = abs(X[i + k + 1] - Y[j + 1])
-/// d2 = abs(X[i + k + 2] - Y[j + 2])
-/// d3 = abs(X[i + k + 3] - Y[j + 3])
-/// r[k] = d0 + d1 + d2 + d3
+/// d0 = abs(X[i + k + 0] - Y[j + 0]);
+/// d1 = abs(X[i + k + 1] - Y[j + 1]);
+/// d2 = abs(X[i + k + 2] - Y[j + 2]);
+/// d3 = abs(X[i + k + 3] - Y[j + 3]);
+/// r[k] = d0 + d1 + d2 + d3;
/// }
/// \endcode
/// \returns A 128-bit integer vector containing the sums of the sets of
/// absolute differences between both operands.
-#define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \
+#define _mm_mpsadbw_epu8(X, Y, M) \
(__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \
- (__v16qi)(__m128i)(Y), (M)); })
+ (__v16qi)(__m128i)(Y), (M))
-/// \brief Finds the minimum unsigned 16-bit element in the input 128-bit
+/// Finds the minimum unsigned 16-bit element in the input 128-bit
/// vector of [8 x u16] and returns it and along with its index.
///
/// \headerfile <x86intrin.h>
@@ -1604,7 +1585,7 @@ _mm_minpos_epu16(__m128i __V)
#define _SIDD_UNIT_MASK 0x40
/* SSE4.2 Packed Comparison Intrinsics. */
-/// \brief Uses the immediate operand \a M to perform a comparison of string
+/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns a 128-bit integer vector representing the result
/// mask of the comparison.
@@ -1660,7 +1641,7 @@ _mm_minpos_epu16(__m128i __V)
(__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (int)(M))
-/// \brief Uses the immediate operand \a M to perform a comparison of string
+/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns an integer representing the result index of the
/// comparison.
@@ -1714,7 +1695,7 @@ _mm_minpos_epu16(__m128i __V)
(int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (int)(M))
-/// \brief Uses the immediate operand \a M to perform a comparison of string
+/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns a 128-bit integer vector representing the result
/// mask of the comparison.
@@ -1775,7 +1756,7 @@ _mm_minpos_epu16(__m128i __V)
(__v16qi)(__m128i)(B), (int)(LB), \
(int)(M))
-/// \brief Uses the immediate operand \a M to perform a comparison of string
+/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns an integer representing the result index of the
/// comparison.
@@ -1835,7 +1816,7 @@ _mm_minpos_epu16(__m128i __V)
(int)(M))
/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */
-/// \brief Uses the immediate operand \a M to perform a comparison of string
+/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the bit mask is zero and the length of the
/// string in \a B is the maximum, otherwise, returns 0.
@@ -1885,7 +1866,7 @@ _mm_minpos_epu16(__m128i __V)
(int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (int)(M))
-/// \brief Uses the immediate operand \a M to perform a comparison of string
+/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the bit mask is non-zero, otherwise, returns
/// 0.
@@ -1934,7 +1915,7 @@ _mm_minpos_epu16(__m128i __V)
(int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (int)(M))
-/// \brief Uses the immediate operand \a M to perform a comparison of string
+/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns bit 0 of the resulting bit mask.
///
@@ -1982,7 +1963,7 @@ _mm_minpos_epu16(__m128i __V)
(int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (int)(M))
-/// \brief Uses the immediate operand \a M to perform a comparison of string
+/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the length of the string in \a A is less than
/// the maximum, otherwise, returns 0.
@@ -2032,7 +2013,7 @@ _mm_minpos_epu16(__m128i __V)
(int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (int)(M))
-/// \brief Uses the immediate operand \a M to perform a comparison of string
+/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the length of the string in \a B is less than
/// the maximum, otherwise, returns 0.
@@ -2082,7 +2063,7 @@ _mm_minpos_epu16(__m128i __V)
(int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (int)(M))
-/// \brief Uses the immediate operand \a M to perform a comparison of string
+/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the bit mask is zero and the length of the
/// string in \a B is the maximum, otherwise, returns 0.
@@ -2137,7 +2118,7 @@ _mm_minpos_epu16(__m128i __V)
(__v16qi)(__m128i)(B), (int)(LB), \
(int)(M))
-/// \brief Uses the immediate operand \a M to perform a comparison of string
+/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the resulting mask is non-zero, otherwise,
/// returns 0.
@@ -2191,7 +2172,7 @@ _mm_minpos_epu16(__m128i __V)
(__v16qi)(__m128i)(B), (int)(LB), \
(int)(M))
-/// \brief Uses the immediate operand \a M to perform a comparison of string
+/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns bit 0 of the resulting bit mask.
///
@@ -2244,7 +2225,7 @@ _mm_minpos_epu16(__m128i __V)
(__v16qi)(__m128i)(B), (int)(LB), \
(int)(M))
-/// \brief Uses the immediate operand \a M to perform a comparison of string
+/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the length of the string in \a A is less than
/// the maximum, otherwise, returns 0.
@@ -2299,7 +2280,7 @@ _mm_minpos_epu16(__m128i __V)
(__v16qi)(__m128i)(B), (int)(LB), \
(int)(M))
-/// \brief Uses the immediate operand \a M to perform a comparison of string
+/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the length of the string in \a B is less than
/// the maximum, otherwise, returns 0.
@@ -2354,7 +2335,7 @@ _mm_minpos_epu16(__m128i __V)
(int)(M))
/* SSE4.2 Compare Packed Data -- Greater Than. */
-/// \brief Compares each of the corresponding 64-bit values of the 128-bit
+/// Compares each of the corresponding 64-bit values of the 128-bit
/// integer vectors to determine if the values in the first operand are
/// greater than those in the second operand.
///
@@ -2374,7 +2355,7 @@ _mm_cmpgt_epi64(__m128i __V1, __m128i __V2)
}
/* SSE4.2 Accumulate CRC32. */
-/// \brief Adds the unsigned integer operand to the CRC-32C checksum of the
+/// Adds the unsigned integer operand to the CRC-32C checksum of the
/// unsigned char operand.
///
/// \headerfile <x86intrin.h>
@@ -2394,7 +2375,7 @@ _mm_crc32_u8(unsigned int __C, unsigned char __D)
return __builtin_ia32_crc32qi(__C, __D);
}
-/// \brief Adds the unsigned integer operand to the CRC-32C checksum of the
+/// Adds the unsigned integer operand to the CRC-32C checksum of the
/// unsigned short operand.
///
/// \headerfile <x86intrin.h>
@@ -2414,7 +2395,7 @@ _mm_crc32_u16(unsigned int __C, unsigned short __D)
return __builtin_ia32_crc32hi(__C, __D);
}
-/// \brief Adds the first unsigned integer operand to the CRC-32C checksum of
+/// Adds the first unsigned integer operand to the CRC-32C checksum of
/// the second unsigned integer operand.
///
/// \headerfile <x86intrin.h>
@@ -2435,7 +2416,7 @@ _mm_crc32_u32(unsigned int __C, unsigned int __D)
}
#ifdef __x86_64__
-/// \brief Adds the unsigned integer operand to the CRC-32C checksum of the
+/// Adds the unsigned integer operand to the CRC-32C checksum of the
/// unsigned 64-bit integer operand.
///
/// \headerfile <x86intrin.h>
@@ -2458,8 +2439,6 @@ _mm_crc32_u64(unsigned long long __C, unsigned long long __D)
#undef __DEFAULT_FN_ATTRS
-#ifdef __POPCNT__
#include <popcntintrin.h>
-#endif
-#endif /* _SMMINTRIN_H */
+#endif /* __SMMINTRIN_H */