diff options
Diffstat (limited to 'lib/Headers/cuda_builtin_vars.h')
-rw-r--r-- | lib/Headers/cuda_builtin_vars.h | 44 |
1 files changed, 30 insertions, 14 deletions
diff --git a/lib/Headers/cuda_builtin_vars.h b/lib/Headers/cuda_builtin_vars.h index 901356b3d5ce1..6f5eb9c78d852 100644 --- a/lib/Headers/cuda_builtin_vars.h +++ b/lib/Headers/cuda_builtin_vars.h @@ -24,16 +24,20 @@ #ifndef __CUDA_BUILTIN_VARS_H #define __CUDA_BUILTIN_VARS_H +// Forward declares from vector_types.h. +struct uint3; +struct dim3; + // The file implements built-in CUDA variables using __declspec(property). // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx // All read accesses of built-in variable fields get converted into calls to a -// getter function which in turn would call appropriate builtin to fetch the +// getter function which in turn calls the appropriate builtin to fetch the // value. // // Example: // int x = threadIdx.x; // IR output: -// %0 = call i32 @llvm.ptx.read.tid.x() #3 +// %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 // PTX output: // mov.u32 %r2, %tid.x; @@ -60,33 +64,45 @@ __attribute__((device)) TypeName *operator&() const __DELETE struct __cuda_builtin_threadIdx_t { - __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x()); - __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y()); - __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z()); + __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x()); + __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y()); + __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z()); + // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a + // uint3). This function is defined after we pull in vector_types.h. + __attribute__((device)) operator uint3() const; private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t); }; struct __cuda_builtin_blockIdx_t { - __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x()); - __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y()); - __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z()); + __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x()); + __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y()); + __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z()); + // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a + // uint3). This function is defined after we pull in vector_types.h. + __attribute__((device)) operator uint3() const; private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t); }; struct __cuda_builtin_blockDim_t { - __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x()); - __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y()); - __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z()); + __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x()); + __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y()); + __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z()); + // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a + // dim3). This function is defined after we pull in vector_types.h. + __attribute__((device)) operator dim3() const; private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t); }; struct __cuda_builtin_gridDim_t { - __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x()); - __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y()); - __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z()); + __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x()); + __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y()); + __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z()); + // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a + // dim3). This function is defined after we pull in vector_types.h. + __attribute__((device)) operator dim3() const; private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t); }; |