summaryrefslogtreecommitdiff
path: root/test/CodeGenCUDA/fp-contract.cu
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGenCUDA/fp-contract.cu')
-rw-r--r--test/CodeGenCUDA/fp-contract.cu32
1 files changed, 32 insertions, 0 deletions
diff --git a/test/CodeGenCUDA/fp-contract.cu b/test/CodeGenCUDA/fp-contract.cu
new file mode 100644
index 0000000000000..070ebaea44eed
--- /dev/null
+++ b/test/CodeGenCUDA/fp-contract.cu
@@ -0,0 +1,32 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// By default we should fuse multiply/add into fma instruction.
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \
+// RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix ENABLED %s
+
+// Explicit -ffp-contract=fast
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \
+// RUN: -ffp-contract=fast -disable-llvm-passes -o - %s \
+// RUN: | FileCheck -check-prefix ENABLED %s
+
+// Explicit -ffp-contract=on -- fusing by front-end (disabled).
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \
+// RUN: -ffp-contract=on -disable-llvm-passes -o - %s \
+// RUN: | FileCheck -check-prefix DISABLED %s
+
+// Explicit -ffp-contract=off should disable instruction fusing.
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \
+// RUN: -ffp-contract=off -disable-llvm-passes -o - %s \
+// RUN: | FileCheck -check-prefix DISABLED %s
+
+
+#include "Inputs/cuda.h"
+
+__host__ __device__ float func(float a, float b, float c) { return a + b * c; }
+// ENABLED: fma.rn.f32
+// ENABLED-NEXT: st.param.f32
+
+// DISABLED: mul.rn.f32
+// DISABLED-NEXT: add.rn.f32
+// DISABLED-NEXT: st.param.f32