1 files changed, 59 insertions, 34 deletions
diff --git a/lib/Headers/__clang_cuda_device_functions.h b/lib/Headers/__clang_cuda_device_functions.h
index 67bbc68b1637..50ad674f9483 100644
--- a/lib/Headers/__clang_cuda_device_functions.h
+++ b/lib/Headers/__clang_cuda_device_functions.h
@@ -1,22 +1,8 @@
 /*===---- __clang_cuda_device_functions.h - CUDA runtime support -----------===
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  *
  *===-----------------------------------------------------------------------===
  */
@@ -24,15 +10,21 @@
 #ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__
 #define __CLANG_CUDA_DEVICE_FUNCTIONS_H__
 
+#ifndef _OPENMP
 #if CUDA_VERSION < 9000
 #error This file is intended to be used with CUDA-9+ only.
 #endif
+#endif
 
 // __DEVICE__ is a helper macro with common set of attributes for the wrappers
 // we implement in this file. We need static in order to avoid emitting unused
 // functions and __forceinline__ helps inlining these wrappers at -O1.
 #pragma push_macro("__DEVICE__")
+#ifdef _OPENMP
+#define __DEVICE__ static __attribute__((always_inline))
+#else
 #define __DEVICE__ static __device__ __forceinline__
+#endif
 
 // libdevice provides fast low precision and slow full-recision implementations
 // for some functions. Which one gets selected depends on
@@ -45,6 +37,15 @@
 #define __FAST_OR_SLOW(fast, slow) slow
 #endif
 
+// For C++ 17 we need to include noexcept attribute to be compatible
+// with the header-defined version. This may be removed once
+// variant is supported.
+#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
+#define __NOEXCEPT noexcept
+#else
+#define __NOEXCEPT
+#endif
+
 __DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); }
 __DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); }
 __DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); }
@@ -52,8 +53,13 @@ __DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); }
 __DEVICE__ unsigned long long __brevll(unsigned long long __a) {
   return __nv_brevll(__a);
 }
+#if defined(__cplusplus)
 __DEVICE__ void __brkpt() { asm volatile("brkpt;"); }
 __DEVICE__ void __brkpt(int __a) { __brkpt(); }
+#else
+__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { asm volatile("brkpt;"); }
+__DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); }
+#endif
 __DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,
                                     unsigned int __c) {
   return __nv_byte_perm(__a, __b, __c);
@@ -237,6 +243,9 @@ __DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); }
 __DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }
 __DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }
 __DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }
+#ifdef _MSC_VER
+__DEVICE__ int __finitel(long double __a);
+#endif
 __DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }
 __DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }
 __DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }
@@ -445,8 +454,14 @@ __DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); }
 __DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); }
 __DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); }
 __DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); }
+#ifdef _MSC_VER
+__DEVICE__ int __isinfl(long double __a);
+#endif
 __DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); }
 __DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); }
+#ifdef _MSC_VER
+__DEVICE__ int __isnanl(long double __a);
+#endif
 __DEVICE__ double __ll2double_rd(long long __a) {
   return __nv_ll2double_rd(__a);
 }
@@ -520,8 +535,8 @@ __DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {
 __DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); }
 __DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); }
 __DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); }
-__DEVICE__ void __sincosf(float __a, float *__sptr, float *__cptr) {
-  return __nv_fast_sincosf(__a, __sptr, __cptr);
+__DEVICE__ void __sincosf(float __a, float *__s, float *__c) {
+  return __nv_fast_sincosf(__a, __s, __c);
 }
 __DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); }
 __DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); }
@@ -1468,7 +1483,8 @@ __DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {
   return r;
 }
 #endif // CUDA_VERSION >= 9020
-__DEVICE__ int abs(int __a) { return __nv_abs(__a); }
+__DEVICE__ int abs(int __a) __NOEXCEPT { return __nv_abs(__a); }
+__DEVICE__ double fabs(double __a) __NOEXCEPT { return __nv_fabs(__a); }
 __DEVICE__ double acos(double __a) { return __nv_acos(__a); }
 __DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }
 __DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }
@@ -1487,8 +1503,10 @@ __DEVICE__ double cbrt(double __a) { return __nv_cbrt(__a); }
 __DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); }
 __DEVICE__ double ceil(double __a) { return __nv_ceil(__a); }
 __DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); }
+#ifndef _OPENMP
 __DEVICE__ int clock() { return __nvvm_read_ptx_sreg_clock(); }
 __DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }
+#endif
 __DEVICE__ double copysign(double __a, double __b) {
   return __nv_copysign(__a, __b);
 }
@@ -1525,7 +1543,6 @@ __DEVICE__ float exp2f(float __a) { return __nv_exp2f(__a); }
 __DEVICE__ float expf(float __a) { return __nv_expf(__a); }
 __DEVICE__ double expm1(double __a) { return __nv_expm1(__a); }
 __DEVICE__ float expm1f(float __a) { return __nv_expm1f(__a); }
-__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }
 __DEVICE__ float fabsf(float __a) { return __nv_fabsf(__a); }
 __DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); }
 __DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); }
@@ -1563,16 +1580,16 @@ __DEVICE__ double j1(double __a) { return __nv_j1(__a); }
 __DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }
 __DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }
 __DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }
-#if defined(__LP64__)
-__DEVICE__ long labs(long __a) { return llabs(__a); };
+#if defined(__LP64__) || defined(_WIN64)
+__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_llabs(__a); };
 #else
-__DEVICE__ long labs(long __a) { return __nv_abs(__a); };
+__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_abs(__a); };
 #endif
 __DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); }
 __DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); }
 __DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); }
 __DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); }
-__DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); }
+__DEVICE__ long long llabs(long long __a) __NOEXCEPT { return __nv_llabs(__a); }
 __DEVICE__ long long llmax(long long __a, long long __b) {
   return __nv_llmax(__a, __b);
 }
@@ -1597,7 +1614,7 @@ __DEVICE__ float logbf(float __a) { return __nv_logbf(__a); }
 __DEVICE__ float logf(float __a) {
   return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a);
 }
-#if defined(__LP64__)
+#if defined(__LP64__) || defined(_WIN64)
 __DEVICE__ long lrint(double __a) { return llrint(__a); }
 __DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); }
 __DEVICE__ long lround(double __a) { return llround(__a); }
@@ -1609,12 +1626,16 @@ __DEVICE__ long lround(double __a) { return round(__a); }
 __DEVICE__ long lroundf(float __a) { return roundf(__a); }
 #endif
 __DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); }
+// These functions shouldn't be declared when including this header
+// for math function resolution purposes.
+#ifndef _OPENMP
 __DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) {
   return __builtin_memcpy(__a, __b, __c);
 }
 __DEVICE__ void *memset(void *__a, int __b, size_t __c) {
   return __builtin_memset(__a, __b, __c);
 }
+#endif
 __DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); }
 __DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); }
 __DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); }
@@ -1698,6 +1719,8 @@ __DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); }
 __DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }
 __DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }
 __DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }
+// TODO: remove once variant is supported
+#ifndef _OPENMP
 __DEVICE__ double scalbln(double __a, long __b) {
   if (__b > INT_MAX)
     return __a > 0 ? HUGE_VAL : -HUGE_VAL;
@@ -1712,18 +1735,19 @@ __DEVICE__ float scalblnf(float __a, long __b) {
     return __a > 0 ? 0.f : -0.f;
   return scalbnf(__a, (int)__b);
 }
+#endif
 __DEVICE__ double sin(double __a) { return __nv_sin(__a); }
-__DEVICE__ void sincos(double __a, double *__sptr, double *__cptr) {
-  return __nv_sincos(__a, __sptr, __cptr);
+__DEVICE__ void sincos(double __a, double *__s, double *__c) {
+  return __nv_sincos(__a, __s, __c);
 }
-__DEVICE__ void sincosf(float __a, float *__sptr, float *__cptr) {
-  return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __sptr, __cptr);
+__DEVICE__ void sincosf(float __a, float *__s, float *__c) {
+  return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __s, __c);
 }
-__DEVICE__ void sincospi(double __a, double *__sptr, double *__cptr) {
-  return __nv_sincospi(__a, __sptr, __cptr);
+__DEVICE__ void sincospi(double __a, double *__s, double *__c) {
+  return __nv_sincospi(__a, __s, __c);
 }
-__DEVICE__ void sincospif(float __a, float *__sptr, float *__cptr) {
-  return __nv_sincospif(__a, __sptr, __cptr);
+__DEVICE__ void sincospif(float __a, float *__s, float *__c) {
+  return __nv_sincospif(__a, __s, __c);
 }
 __DEVICE__ float sinf(float __a) {
   return __FAST_OR_SLOW(__nv_fast_sinf, __nv_sinf)(__a);
@@ -1763,6 +1787,7 @@ __DEVICE__ float y1f(float __a) { return __nv_y1f(__a); }
 __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }
 __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
 
+#undef __NOEXCEPT
 #pragma pop_macro("__DEVICE__")
 #pragma pop_macro("__FAST_OR_SLOW")
 #endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__