diff options
Diffstat (limited to 'test/CodeGenCUDA/launch-bounds.cu')
-rw-r--r-- | test/CodeGenCUDA/launch-bounds.cu | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/test/CodeGenCUDA/launch-bounds.cu b/test/CodeGenCUDA/launch-bounds.cu new file mode 100644 index 000000000000..ed4c2bfc8870 --- /dev/null +++ b/test/CodeGenCUDA/launch-bounds.cu @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s + +#include "Inputs/cuda.h" + +#define MAX_THREADS_PER_BLOCK 256 +#define MIN_BLOCKS_PER_MP 2 + +// Test both max threads per block and Min cta per sm. +extern "C" { +__global__ void +__launch_bounds__( MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP ) +Kernel1() +{ +} +} + +// CHECK: !{{[0-9]+}} = metadata !{void ()* @Kernel1, metadata !"maxntidx", i32 256} +// CHECK: !{{[0-9]+}} = metadata !{void ()* @Kernel1, metadata !"minctasm", i32 2} + +// Test only max threads per block. Min cta per sm defaults to 0, and +// CodeGen doesn't output a zero value for minctasm. +extern "C" { +__global__ void +__launch_bounds__( MAX_THREADS_PER_BLOCK ) +Kernel2() +{ +} +} + +// CHECK: !{{[0-9]+}} = metadata !{void ()* @Kernel2, metadata !"maxntidx", i32 256} |