summaryrefslogtreecommitdiff
path: root/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/SystemZ/SystemZTargetTransformInfo.cpp')
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp34
1 files changed, 33 insertions, 1 deletions
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index f56b238f91e66..6a3dc6799c43f 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -325,6 +325,30 @@ int SystemZTTIImpl::getArithmeticInstrCost(
unsigned ScalarBits = Ty->getScalarSizeInBits();
+ // Div with a constant which is a power of 2 will be converted by
+ // DAGCombiner to use shifts. With vector shift-element instructions, a
+ // vector sdiv costs about as much as a scalar one.
+ const unsigned SDivCostEstimate = 4;
+ bool SDivPow2 = false;
+ bool UDivPow2 = false;
+ if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv) &&
+ Args.size() == 2) {
+ const ConstantInt *CI = nullptr;
+ if (const Constant *C = dyn_cast<Constant>(Args[1])) {
+ if (C->getType()->isVectorTy())
+ CI = dyn_cast_or_null<const ConstantInt>(C->getSplatValue());
+ else
+ CI = dyn_cast<const ConstantInt>(C);
+ }
+ if (CI != nullptr &&
+ (CI->getValue().isPowerOf2() || (-CI->getValue()).isPowerOf2())) {
+ if (Opcode == Instruction::SDiv)
+ SDivPow2 = true;
+ else
+ UDivPow2 = true;
+ }
+ }
+
if (Ty->isVectorTy()) {
assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type.");
unsigned VF = Ty->getVectorNumElements();
@@ -333,10 +357,13 @@ int SystemZTTIImpl::getArithmeticInstrCost(
// These vector operations are custom handled, but are still supported
// with one instruction per vector, regardless of element size.
if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
- Opcode == Instruction::AShr) {
+ Opcode == Instruction::AShr || UDivPow2) {
return NumVectors;
}
+ if (SDivPow2)
+ return (NumVectors * SDivCostEstimate);
+
// These FP operations are supported with a single vector instruction for
// double (base implementation assumes float generally costs 2). For
// FP128, the scalar cost is 1, and there is no overhead since the values
@@ -395,6 +422,11 @@ int SystemZTTIImpl::getArithmeticInstrCost(
// 2 * ipm sequences ; xor ; shift ; compare
return 7;
+ if (UDivPow2)
+ return 1;
+ if (SDivPow2)
+ return SDivCostEstimate;
+
// An extra extension for narrow types is needed.
if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem))
// sext of op(s) for narrow types