diff options
Diffstat (limited to 'test/Driver/cuda-options.cu')
-rw-r--r-- | test/Driver/cuda-options.cu | 82 |
1 files changed, 72 insertions, 10 deletions
diff --git a/test/Driver/cuda-options.cu b/test/Driver/cuda-options.cu index c4bfda903d9e1..4ffab317d16ac 100644 --- a/test/Driver/cuda-options.cu +++ b/test/Driver/cuda-options.cu @@ -62,7 +62,7 @@ // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ // RUN: -check-prefix LINK %s -// Verify that --cuda-gpu-arch option passes the correct GPU archtecture to +// Verify that --cuda-gpu-arch option passes the correct GPU architecture to // device compilation. // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_30 -c %s 2>&1 \ // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ @@ -73,11 +73,10 @@ // and that all results are included on the host side. // RUN: %clang -### -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \ -// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ -// RUN: -check-prefix DEVICE2 -check-prefix DEVICE-SM30 \ -// RUN: -check-prefix DEVICE2-SM35 -check-prefix HOST \ -// RUN: -check-prefix HOST-NOSAVE -check-prefix INCLUDES-DEVICE \ -// RUN: -check-prefix NOLINK %s +// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \ +// RUN: -check-prefixes DEVICE-SM30,DEVICE2-SM35 \ +// RUN: -check-prefixes INCLUDES-DEVICE,INCLUDES-DEVICE2 \ +// RUN: -check-prefixes HOST,HOST-NOSAVE,NOLINK %s // Verify that device-side results are passed to the correct tool when // -save-temps is used. @@ -93,21 +92,21 @@ // RUN: -check-prefix HOST-AS -check-prefix NOLINK %s // Verify that --[no-]cuda-gpu-arch arguments are handled correctly. -// a) --no-cuda-gpu-arch=X negates preceeding --cuda-gpu-arch=X +// a) --no-cuda-gpu-arch=X negates preceding --cuda-gpu-arch=X // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ // RUN: --no-cuda-gpu-arch=sm_35 \ // RUN: -c %s 2>&1 \ // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s -// b) --no-cuda-gpu-arch=X negates more than one preceeding --cuda-gpu-arch=X +// b) --no-cuda-gpu-arch=X negates more than one preceding --cuda-gpu-arch=X // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ // RUN: --no-cuda-gpu-arch=sm_35 \ // RUN: -c %s 2>&1 \ // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s -// c) if --no-cuda-gpu-arch=X negates all preceeding --cuda-gpu-arch=X +// c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X // we default to sm_20 -- same as if no --cuda-gpu-arch were passed. // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ @@ -143,6 +142,48 @@ // RUN: -c %s 2>&1 \ // RUN: | FileCheck -check-prefix ARCHALLERROR %s + +// Verify that --[no-]cuda-include-ptx arguments are handled correctly. +// a) by default we're including PTX for all GPUs. +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ +// RUN: -c %s 2>&1 \ +// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s + +// b) --no-cuda-include-ptx=all disables PTX inclusion for all GPUs +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ +// RUN: --no-cuda-include-ptx=all \ +// RUN: -c %s 2>&1 \ +// RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,NOPTX-SM30 %s + +// c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only. +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ +// RUN: --no-cuda-include-ptx=sm_35 \ +// RUN: -c %s 2>&1 \ +// RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,PTX-SM30 %s +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ +// RUN: --no-cuda-include-ptx=sm_30 \ +// RUN: -c %s 2>&1 \ +// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,NOPTX-SM30 %s + +// d) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=all +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ +// RUN: --no-cuda-include-ptx=all --cuda-include-ptx=all \ +// RUN: -c %s 2>&1 \ +// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s + +// e) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=sm_XX +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ +// RUN: --no-cuda-include-ptx=sm_30 --cuda-include-ptx=all \ +// RUN: -c %s 2>&1 \ +// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s + + // ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20" // NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20" // ARCH-SM30: "-cc1"{{.*}}"-target-cpu" "sm_30" @@ -182,9 +223,15 @@ // DEVICE2-SAME: "-aux-triple" "x86_64--linux-gnu" // DEVICE2-SAME: "-fcuda-is-device" // DEVICE2-SM35-SAME: "-target-cpu" "sm_35" -// DEVICE2-SAME: "-o" "[[GPUBINARY2:[^"]*]]" +// DEVICE2-SAME: "-o" "[[PTXFILE2:[^"]*]]" // DEVICE2-SAME: "-x" "cuda" +// Match another call to ptxas. +// DEVICE2: ptxas +// DEVICE2-SM35-DAG: "--gpu-name" "sm_35" +// DEVICE2-DAG: "--output-file" "[[CUBINFILE2:[^"]*]]" +// DEVICE2-DAG: "[[PTXFILE2]]" + // Match no device-side compilation. // NODEVICE-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda" // NODEVICE-NOT: "-fcuda-is-device" @@ -193,6 +240,8 @@ // INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]" // INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]" // INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]" +// INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]" +// INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]" // Match host-side preprocessor job with -save-temps. // HOST-SAVE: "-cc1" "-triple" "x86_64--linux-gnu" @@ -207,7 +256,11 @@ // HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]" // HOST-NOSAVE-SAME: "-x" "cuda" // HOST-SAVE-SAME: "-x" "cuda-cpp-output" +// There is only one GPU binary after combining it with fatbinary! +// INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]" +// There is only one GPU binary after combining it with fatbinary. +// INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" // Match external assembler that uses compilation output. // HOST-AS: "-o" "{{.*}}.o" "[[HOSTOUTPUT]]" @@ -225,3 +278,12 @@ // Match no linker. // NOLINK-NOT: "{{.*}}{{ld|link}}{{(.exe)?}}" + +// FATBIN-COMMON:fatbinary +// FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]" +// FATBIN-COMMON: "--image=profile=sm_30,file= +// PTX-SM30: "--image=profile=compute_30,file= +// NOPTX-SM30-NOT: "--image=profile=compute_30,file= +// FATBIN-COMMON: "--image=profile=sm_35,file= +// PTX-SM35: "--image=profile=compute_35,file= +// NOPTX-SM35-NOT: "--image=profile=compute_35,file= |