summaryrefslogtreecommitdiff
path: root/lib/Headers/cuda_builtin_vars.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Headers/cuda_builtin_vars.h')
-rw-r--r--lib/Headers/cuda_builtin_vars.h44
1 files changed, 30 insertions, 14 deletions
diff --git a/lib/Headers/cuda_builtin_vars.h b/lib/Headers/cuda_builtin_vars.h
index 901356b3d5ce1..6f5eb9c78d852 100644
--- a/lib/Headers/cuda_builtin_vars.h
+++ b/lib/Headers/cuda_builtin_vars.h
@@ -24,16 +24,20 @@
#ifndef __CUDA_BUILTIN_VARS_H
#define __CUDA_BUILTIN_VARS_H
+// Forward declares from vector_types.h.
+struct uint3;
+struct dim3;
+
// The file implements built-in CUDA variables using __declspec(property).
// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
// All read accesses of built-in variable fields get converted into calls to a
-// getter function which in turn would call appropriate builtin to fetch the
+// getter function which in turn calls the appropriate builtin to fetch the
// value.
//
// Example:
// int x = threadIdx.x;
// IR output:
-// %0 = call i32 @llvm.ptx.read.tid.x() #3
+// %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3
// PTX output:
// mov.u32 %r2, %tid.x;
@@ -60,33 +64,45 @@
__attribute__((device)) TypeName *operator&() const __DELETE
struct __cuda_builtin_threadIdx_t {
- __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x());
- __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y());
- __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z());
+ __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());
+ __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());
+ __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());
+ // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
+ // uint3). This function is defined after we pull in vector_types.h.
+ __attribute__((device)) operator uint3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
};
struct __cuda_builtin_blockIdx_t {
- __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x());
- __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y());
- __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z());
+ __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());
+ __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());
+ __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());
+ // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
+ // uint3). This function is defined after we pull in vector_types.h.
+ __attribute__((device)) operator uint3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
};
struct __cuda_builtin_blockDim_t {
- __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x());
- __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y());
- __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z());
+ __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());
+ __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());
+ __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());
+ // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
+ // dim3). This function is defined after we pull in vector_types.h.
+ __attribute__((device)) operator dim3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
};
struct __cuda_builtin_gridDim_t {
- __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x());
- __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y());
- __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z());
+ __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());
+ __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());
+ __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());
+ // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
+ // dim3). This function is defined after we pull in vector_types.h.
+ __attribute__((device)) operator dim3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
};