1 files changed, 227 insertions, 194 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index acec3c533585..864200e5f71c 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -30,7 +30,8 @@ using namespace llvm;
 //
 //===----------------------------------------------------------------------===//
 
-int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
+                                  TTI::TargetCostKind CostKind) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -63,7 +64,8 @@ int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
 }
 
 int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
-                                  const APInt &Imm, Type *Ty) {
+                                  const APInt &Imm, Type *Ty,
+                                  TTI::TargetCostKind CostKind) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -177,11 +179,12 @@ int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
     break;
   }
 
-  return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+  return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind);
 }
 
 int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
-                                        const APInt &Imm, Type *Ty) {
+                                        const APInt &Imm, Type *Ty,
+                                        TTI::TargetCostKind CostKind) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -226,7 +229,7 @@ int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
       return TTI::TCC_Free;
     break;
   }
-  return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+  return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind);
 }
 
 TargetTransformInfo::PopcntSupportKind
@@ -246,8 +249,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
   for (auto &BB : L->blocks())
     for (auto &I : *BB) {
       if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
-        ImmutableCallSite CS(&I);
-        if (const Function *F = CS.getCalledFunction()) {
+        if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
           if (isLoweredToCall(F))
             HasCall = true;
           if (F->getIntrinsicID() == Intrinsic::memcpy ||
@@ -259,7 +261,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
       }
       if (isa<StoreInst>(&I)) {
         Type *MemAccessTy = I.getOperand(0)->getType();
-        NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0);
+        NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0,
+                                     TTI::TCK_RecipThroughput);
       }
     }
 
@@ -291,6 +294,10 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
   UP.Force = true;
 }
 
+void SystemZTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+                                           TTI::PeelingPreferences &PP) {
+  BaseT::getPeelingPreferences(L, SE, PP);
+}
 
 bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
                                    TargetTransformInfo::LSRCost &C2) {
@@ -323,6 +330,23 @@ unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const {
   return 0;
 }
 
+unsigned SystemZTTIImpl::getMinPrefetchStride(unsigned NumMemAccesses,
+                                              unsigned NumStridedMemAccesses,
+                                              unsigned NumPrefetches,
+                                              bool HasCall) const {
+  // Don't prefetch a loop with many far apart accesses.
+  if (NumPrefetches > 16)
+    return UINT_MAX;
+
+  // Emit prefetch instructions for smaller strides in cases where we think
+  // the hardware prefetcher might not be able to keep up.
+  if (NumStridedMemAccesses > 32 &&
+      NumStridedMemAccesses == NumMemAccesses && !HasCall)
+    return 1;
+
+  return ST->hasMiscellaneousExtensions3() ? 8192 : 2048;
+}
+
 bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
   EVT VT = TLI->getValueType(DL, DataType);
   return (VT.isScalarInteger() && TLI->isTypeLegal(VT));
@@ -341,18 +365,25 @@ static unsigned getScalarSizeInBits(Type *Ty) {
 // type until it is legal. This would e.g. return 4 for <6 x i64>, instead of
 // 3.
 static unsigned getNumVectorRegs(Type *Ty) {
-  assert(Ty->isVectorTy() && "Expected vector type");
-  unsigned WideBits = getScalarSizeInBits(Ty) * Ty->getVectorNumElements();
+  auto *VTy = cast<FixedVectorType>(Ty);
+  unsigned WideBits = getScalarSizeInBits(Ty) * VTy->getNumElements();
   assert(WideBits > 0 && "Could not compute size of vector");
   return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
 }
 
 int SystemZTTIImpl::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
+    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+    TTI::OperandValueKind Op1Info,
     TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
     TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
     const Instruction *CxtI) {
 
+  // TODO: Handle more cost kinds.
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
+                                         Op2Info, Opd1PropInfo,
+                                         Opd2PropInfo, Args, CxtI);
+
   // TODO: return a good value for BB-VECTORIZER that includes the
   // immediate loads, which we do not want to count for the loop
   // vectorizer, since they are hopefully hoisted out of the loop. This
@@ -391,10 +422,59 @@ int SystemZTTIImpl::getArithmeticInstrCost(
     }
   }
 
-  if (Ty->isVectorTy()) {
-    assert(ST->hasVector() &&
-           "getArithmeticInstrCost() called with vector type.");
-    unsigned VF = Ty->getVectorNumElements();
+  if (!Ty->isVectorTy()) {
+    // These FP operations are supported with a dedicated instruction for
+    // float, double and fp128 (base implementation assumes float generally
+    // costs 2).
+    if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+        Opcode == Instruction::FMul || Opcode == Instruction::FDiv)
+      return 1;
+
+    // There is no native support for FRem.
+    if (Opcode == Instruction::FRem)
+      return LIBCALL_COST;
+
+    // Give discount for some combined logical operations if supported.
+    if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) {
+      if (Opcode == Instruction::Xor) {
+        for (const Value *A : Args) {
+          if (const Instruction *I = dyn_cast<Instruction>(A))
+            if (I->hasOneUse() &&
+                (I->getOpcode() == Instruction::And ||
+                 I->getOpcode() == Instruction::Or ||
+                 I->getOpcode() == Instruction::Xor))
+              return 0;
+        }
+      }
+      else if (Opcode == Instruction::Or || Opcode == Instruction::And) {
+        for (const Value *A : Args) {
+          if (const Instruction *I = dyn_cast<Instruction>(A))
+            if (I->hasOneUse() && I->getOpcode() == Instruction::Xor)
+              return 0;
+        }
+      }
+    }
+
+    // Or requires one instruction, although it has custom handling for i64.
+    if (Opcode == Instruction::Or)
+      return 1;
+
+    if (Opcode == Instruction::Xor && ScalarBits == 1) {
+      if (ST->hasLoadStoreOnCond2())
+        return 5; // 2 * (li 0; loc 1); xor
+      return 7; // 2 * ipm sequences ; xor ; shift ; compare
+    }
+
+    if (DivRemConstPow2)
+      return (SignedDivRem ? SDivPow2Cost : 1);
+    if (DivRemConst)
+      return DivMulSeqCost;
+    if (SignedDivRem || UnsignedDivRem)
+      return DivInstrCost;
+  }
+  else if (ST->hasVector()) {
+    auto *VTy = cast<FixedVectorType>(Ty);
+    unsigned VF = VTy->getNumElements();
     unsigned NumVectors = getNumVectorRegs(Ty);
 
     // These vector operations are custom handled, but are still supported
@@ -407,7 +487,7 @@ int SystemZTTIImpl::getArithmeticInstrCost(
     if (DivRemConstPow2)
       return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));
     if (DivRemConst)
-      return VF * DivMulSeqCost + getScalarizationOverhead(Ty, Args);
+      return VF * DivMulSeqCost + getScalarizationOverhead(VTy, Args);
     if ((SignedDivRem || UnsignedDivRem) && VF > 4)
       // Temporary hack: disable high vectorization factors with integer
       // division/remainder, which will get scalarized and handled with
@@ -429,8 +509,8 @@ int SystemZTTIImpl::getArithmeticInstrCost(
         // Return the cost of multiple scalar invocation plus the cost of
         // inserting and extracting the values.
         unsigned ScalarCost =
-            getArithmeticInstrCost(Opcode, Ty->getScalarType());
-        unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args);
+            getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind);
+        unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(VTy, Args);
         // FIXME: VF 2 for these FP operations are currently just as
         // expensive as for VF 4.
         if (VF == 2)
@@ -447,101 +527,51 @@ int SystemZTTIImpl::getArithmeticInstrCost(
 
     // There is no native support for FRem.
     if (Opcode == Instruction::FRem) {
-      unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(Ty, Args);
+      unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(VTy, Args);
       // FIXME: VF 2 for float is currently just as expensive as for VF 4.
       if (VF == 2 && ScalarBits == 32)
         Cost *= 2;
       return Cost;
     }
   }
-  else {  // Scalar:
-    // These FP operations are supported with a dedicated instruction for
-    // float, double and fp128 (base implementation assumes float generally
-    // costs 2).
-    if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
-        Opcode == Instruction::FMul || Opcode == Instruction::FDiv)
-      return 1;
-
-    // There is no native support for FRem.
-    if (Opcode == Instruction::FRem)
-      return LIBCALL_COST;
-
-    // Give discount for some combined logical operations if supported.
-    if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) {
-      if (Opcode == Instruction::Xor) {
-        for (const Value *A : Args) {
-          if (const Instruction *I = dyn_cast<Instruction>(A))
-            if (I->hasOneUse() &&
-                (I->getOpcode() == Instruction::And ||
-                 I->getOpcode() == Instruction::Or ||
-                 I->getOpcode() == Instruction::Xor))
-              return 0;
-        }
-      }
-      else if (Opcode == Instruction::Or || Opcode == Instruction::And) {
-        for (const Value *A : Args) {
-          if (const Instruction *I = dyn_cast<Instruction>(A))
-            if (I->hasOneUse() && I->getOpcode() == Instruction::Xor)
-              return 0;
-        }
-      }
-    }
-
-    // Or requires one instruction, although it has custom handling for i64.
-    if (Opcode == Instruction::Or)
-      return 1;
-
-    if (Opcode == Instruction::Xor && ScalarBits == 1) {
-      if (ST->hasLoadStoreOnCond2())
-        return 5; // 2 * (li 0; loc 1); xor
-      return 7; // 2 * ipm sequences ; xor ; shift ; compare
-    }
-
-    if (DivRemConstPow2)
-      return (SignedDivRem ? SDivPow2Cost : 1);
-    if (DivRemConst)
-      return DivMulSeqCost;
-    if (SignedDivRem || UnsignedDivRem)
-      return DivInstrCost;
-  }
 
   // Fallback to the default implementation.
-  return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
                                        Opd1PropInfo, Opd2PropInfo, Args, CxtI);
 }
 
-int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
-                                   Type *SubTp) {
-  assert (Tp->isVectorTy());
-  assert (ST->hasVector() && "getShuffleCost() called.");
-  unsigned NumVectors = getNumVectorRegs(Tp);
+int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
+                                   int Index, VectorType *SubTp) {
+  if (ST->hasVector()) {
+    unsigned NumVectors = getNumVectorRegs(Tp);
 
-  // TODO: Since fp32 is expanded, the shuffle cost should always be 0.
+    // TODO: Since fp32 is expanded, the shuffle cost should always be 0.
 
-  // FP128 values are always in scalar registers, so there is no work
-  // involved with a shuffle, except for broadcast. In that case register
-  // moves are done with a single instruction per element.
-  if (Tp->getScalarType()->isFP128Ty())
-    return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0);
+    // FP128 values are always in scalar registers, so there is no work
+    // involved with a shuffle, except for broadcast. In that case register
+    // moves are done with a single instruction per element.
+    if (Tp->getScalarType()->isFP128Ty())
+      return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0);
 
-  switch (Kind) {
-  case  TargetTransformInfo::SK_ExtractSubvector:
-    // ExtractSubvector Index indicates start offset.
+    switch (Kind) {
+    case  TargetTransformInfo::SK_ExtractSubvector:
+      // ExtractSubvector Index indicates start offset.
 
-    // Extracting a subvector from first index is a noop.
-    return (Index == 0 ? 0 : NumVectors);
+      // Extracting a subvector from first index is a noop.
+      return (Index == 0 ? 0 : NumVectors);
 
-  case TargetTransformInfo::SK_Broadcast:
-    // Loop vectorizer calls here to figure out the extra cost of
-    // broadcasting a loaded value to all elements of a vector. Since vlrep
-    // loads and replicates with a single instruction, adjust the returned
-    // value.
-    return NumVectors - 1;
+    case TargetTransformInfo::SK_Broadcast:
+      // Loop vectorizer calls here to figure out the extra cost of
+      // broadcasting a loaded value to all elements of a vector. Since vlrep
+      // loads and replicates with a single instruction, adjust the returned
+      // value.
+      return NumVectors - 1;
 
-  default:
+    default:
 
-    // SystemZ supports single instruction permutation / replication.
-    return NumVectors;
+      // SystemZ supports single instruction permutation / replication.
+      return NumVectors;
+    }
   }
 
   return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
@@ -564,8 +594,9 @@ getVectorTruncCost(Type *SrcTy, Type *DstTy) {
   assert (SrcTy->isVectorTy() && DstTy->isVectorTy());
   assert (SrcTy->getPrimitiveSizeInBits() > DstTy->getPrimitiveSizeInBits() &&
           "Packing must reduce size of vector type.");
-  assert (SrcTy->getVectorNumElements() == DstTy->getVectorNumElements() &&
-          "Packing should not change number of elements.");
+  assert(cast<FixedVectorType>(SrcTy)->getNumElements() ==
+             cast<FixedVectorType>(DstTy)->getNumElements() &&
+         "Packing should not change number of elements.");
 
   // TODO: Since fp32 is expanded, the extract cost should always be 0.
 
@@ -580,7 +611,7 @@ getVectorTruncCost(Type *SrcTy, Type *DstTy) {
 
   unsigned Cost = 0;
   unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy);
-  unsigned VF = SrcTy->getVectorNumElements();
+  unsigned VF = cast<FixedVectorType>(SrcTy)->getNumElements();
   for (unsigned P = 0; P < Log2Diff; ++P) {
     if (NumParts > 1)
       NumParts /= 2;
@@ -642,7 +673,7 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
     // Return the potentially vectorized type based on 'I' and 'VF'.  'I' may
     // be either scalar or already vectorized with a same or lesser VF.
     Type *ElTy = OpTy->getScalarType();
-    return VectorType::get(ElTy, VF);
+    return FixedVectorType::get(ElTy, VF);
   }
 
   return nullptr;
@@ -653,8 +684,8 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
 unsigned SystemZTTIImpl::
 getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
                               const Instruction *I) {
-  assert (Dst->isVectorTy());
-  unsigned VF = Dst->getVectorNumElements();
+  auto *DstVTy = cast<FixedVectorType>(Dst);
+  unsigned VF = DstVTy->getNumElements();
   unsigned Cost = 0;
   // If we know what the widths of the compared operands, get any cost of
   // converting it to match Dst. Otherwise assume same widths.
@@ -668,14 +699,50 @@ getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
 }
 
 int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+                                     TTI::TargetCostKind CostKind,
                                      const Instruction *I) {
+  // FIXME: Can the logic below also be used for these cost kinds?
+  if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) {
+    int BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
+    return BaseCost == 0 ? BaseCost : 1;
+  }
+
   unsigned DstScalarBits = Dst->getScalarSizeInBits();
   unsigned SrcScalarBits = Src->getScalarSizeInBits();
 
-  if (Src->isVectorTy()) {
-    assert (ST->hasVector() && "getCastInstrCost() called with vector type.");
-    assert (Dst->isVectorTy());
-    unsigned VF = Src->getVectorNumElements();
+  if (!Src->isVectorTy()) {
+    assert (!Dst->isVectorTy());
+
+    if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
+      if (SrcScalarBits >= 32 ||
+          (I != nullptr && isa<LoadInst>(I->getOperand(0))))
+        return 1;
+      return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/;
+    }
+
+    if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+        Src->isIntegerTy(1)) {
+      if (ST->hasLoadStoreOnCond2())
+        return 2; // li 0; loc 1
+
+      // This should be extension of a compare i1 result, which is done with
+      // ipm and a varying sequence of instructions.
+      unsigned Cost = 0;
+      if (Opcode == Instruction::SExt)
+        Cost = (DstScalarBits < 64 ? 3 : 4);
+      if (Opcode == Instruction::ZExt)
+        Cost = 3;
+      Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
+      if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
+        // If operands of an fp-type was compared, this costs +1.
+        Cost++;
+      return Cost;
+    }
+  }
+  else if (ST->hasVector()) {
+    auto *SrcVecTy = cast<FixedVectorType>(Src);
+    auto *DstVecTy = cast<FixedVectorType>(Dst);
+    unsigned VF = SrcVecTy->getNumElements();
     unsigned NumDstVectors = getNumVectorRegs(Dst);
     unsigned NumSrcVectors = getNumVectorRegs(Src);
 
@@ -720,7 +787,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
       // inserting and extracting the values. Base implementation does not
       // realize float->int gets scalarized.
       unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(),
-                                             Src->getScalarType());
+                                             Src->getScalarType(), CostKind);
       unsigned TotCost = VF * ScalarCost;
       bool NeedsInserts = true, NeedsExtracts = true;
       // FP128 registers do not get inserted or extracted.
@@ -731,8 +798,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
           (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
         NeedsExtracts = false;
 
-      TotCost += getScalarizationOverhead(Src, false, NeedsExtracts);
-      TotCost += getScalarizationOverhead(Dst, NeedsInserts, false);
+      TotCost += getScalarizationOverhead(SrcVecTy, false, NeedsExtracts);
+      TotCost += getScalarizationOverhead(DstVecTy, NeedsInserts, false);
 
       // FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4.
       if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
@@ -743,7 +810,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
 
     if (Opcode == Instruction::FPTrunc) {
       if (SrcScalarBits == 128)  // fp128 -> double/float + inserts of elements.
-        return VF /*ldxbr/lexbr*/ + getScalarizationOverhead(Dst, true, false);
+        return VF /*ldxbr/lexbr*/ +
+               getScalarizationOverhead(DstVecTy, true, false);
       else // double -> float
         return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/);
     }
@@ -756,40 +824,11 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
         return VF * 2;
       }
       // -> fp128.  VF * lxdb/lxeb + extraction of elements.
-      return VF + getScalarizationOverhead(Src, false, true);
+      return VF + getScalarizationOverhead(SrcVecTy, false, true);
     }
   }
-  else { // Scalar
-    assert (!Dst->isVectorTy());
-
-    if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
-      if (SrcScalarBits >= 32 ||
-          (I != nullptr && isa<LoadInst>(I->getOperand(0))))
-        return 1;
-      return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/;
-    }
 
-    if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
-        Src->isIntegerTy(1)) {
-      if (ST->hasLoadStoreOnCond2())
-        return 2; // li 0; loc 1
-
-      // This should be extension of a compare i1 result, which is done with
-      // ipm and a varying sequence of instructions.
-      unsigned Cost = 0;
-      if (Opcode == Instruction::SExt)
-        Cost = (DstScalarBits < 64 ? 3 : 4);
-      if (Opcode == Instruction::ZExt)
-        Cost = 3;
-      Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
-      if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
-        // If operands of an fp-type was compared, this costs +1.
-        Cost++;
-      return Cost;
-    }
-  }
-
-  return BaseT::getCastInstrCost(Opcode, Dst, Src, I);
+  return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
 }
 
 // Scalar i8 / i16 operations will typically be made after first extending
@@ -805,10 +844,38 @@ static unsigned getOperandsExtensionCost(const Instruction *I) {
 }
 
 int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                       Type *CondTy, const Instruction *I) {
-  if (ValTy->isVectorTy()) {
-    assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type.");
-    unsigned VF = ValTy->getVectorNumElements();
+                                       Type *CondTy,
+                                       TTI::TargetCostKind CostKind,
+                                       const Instruction *I) {
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind);
+
+  if (!ValTy->isVectorTy()) {
+    switch (Opcode) {
+    case Instruction::ICmp: {
+      // A loaded value compared with 0 with multiple users becomes Load and
+      // Test. The load is then not foldable, so return 0 cost for the ICmp.
+      unsigned ScalarBits = ValTy->getScalarSizeInBits();
+      if (I != nullptr && ScalarBits >= 32)
+        if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
+          if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
+            if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
+                C->getZExtValue() == 0)
+              return 0;
+
+      unsigned Cost = 1;
+      if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
+        Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);
+      return Cost;
+    }
+    case Instruction::Select:
+      if (ValTy->isFloatingPointTy())
+        return 4; // No load on condition for FP - costs a conditional jump.
+      return 1; // Load On Condition / Select Register.
+    }
+  }
+  else if (ST->hasVector()) {
+    unsigned VF = cast<FixedVectorType>(ValTy)->getNumElements();
 
     // Called with a compare instruction.
     if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
@@ -856,32 +923,8 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
       return getNumVectorRegs(ValTy) /*vsel*/ + PackCost;
     }
   }
-  else { // Scalar
-    switch (Opcode) {
-    case Instruction::ICmp: {
-      // A loaded value compared with 0 with multiple users becomes Load and
-      // Test. The load is then not foldable, so return 0 cost for the ICmp.
-      unsigned ScalarBits = ValTy->getScalarSizeInBits();
-      if (I != nullptr && ScalarBits >= 32)
-        if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
-          if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
-            if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
-                C->getZExtValue() == 0)
-              return 0;
-
-      unsigned Cost = 1;
-      if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
-        Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);
-      return Cost;
-    }
-    case Instruction::Select:
-      if (ValTy->isFloatingPointTy())
-        return 4; // No load on condition for FP - costs a conditional jump.
-      return 1; // Load On Condition / Select Register.
-    }
-  }
 
-  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr);
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind);
 }
 
 int SystemZTTIImpl::
@@ -995,9 +1038,14 @@ static bool isBswapIntrinsicCall(const Value *V) {
 
 int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                     MaybeAlign Alignment, unsigned AddressSpace,
+                                    TTI::TargetCostKind CostKind,
                                     const Instruction *I) {
   assert(!Src->isVoidTy() && "Invalid type");
 
+  // TODO: Handle other cost kinds.
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return 1;
+
   if (!Src->isVectorTy() && Opcode == Instruction::Load && I != nullptr) {
     // Store the load or its truncated or extended value in FoldedValue.
     const Instruction *FoldedValue = nullptr;
@@ -1058,16 +1106,13 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
 // needed for using / defining the vector operands. The SystemZ version does
 // roughly the same but bases the computations on vector permutations
 // instead.
-int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
-                                               unsigned Factor,
-                                               ArrayRef<unsigned> Indices,
-                                               unsigned Alignment,
-                                               unsigned AddressSpace,
-                                               bool UseMaskForCond,
-                                               bool UseMaskForGaps) {
+int SystemZTTIImpl::getInterleavedMemoryOpCost(
+    unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+    Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+    bool UseMaskForCond, bool UseMaskForGaps) {
   if (UseMaskForCond || UseMaskForGaps)
     return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                             Alignment, AddressSpace,
+                                             Alignment, AddressSpace, CostKind,
                                              UseMaskForCond, UseMaskForGaps);
   assert(isa<VectorType>(VecTy) &&
          "Expect a vector type for interleaved memory op");
@@ -1075,7 +1120,7 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
   // Return the ceiling of dividing A by B.
   auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
 
-  unsigned NumElts = VecTy->getVectorNumElements();
+  unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
   assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
   unsigned VF = NumElts / Factor;
   unsigned NumEltsPerVecReg = (128U / getScalarSizeInBits(VecTy));
@@ -1125,22 +1170,10 @@ static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {
   return -1;
 }
 
-int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                          ArrayRef<Value *> Args,
-                                          FastMathFlags FMF, unsigned VF) {
-  int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
-  if (Cost != -1)
-    return Cost;
-  return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
-}
-
-int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                          ArrayRef<Type *> Tys,
-                                          FastMathFlags FMF,
-                                          unsigned ScalarizationCostPassed) {
-  int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
+int SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+                                          TTI::TargetCostKind CostKind) {
+  int Cost = getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType());
   if (Cost != -1)
     return Cost;
-  return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys,
-                                      FMF, ScalarizationCostPassed);
+  return BaseT::getIntrinsicInstrCost(ICA, CostKind);
 }