aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp25
1 files changed, 15 insertions, 10 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index c05906649f16..f1e1359255bd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -338,6 +338,9 @@ class LowerMatrixIntrinsics {
Value *extractVector(unsigned I, unsigned J, unsigned NumElts,
IRBuilder<> &Builder) const {
Value *Vec = isColumnMajor() ? getColumn(J) : getRow(I);
+ assert(cast<FixedVectorType>(Vec->getType())->getNumElements() >=
+ NumElts &&
+ "Extracted vector will contain poison values");
return Builder.CreateShuffleVector(
Vec, createSequentialMask(isColumnMajor() ? I : J, NumElts, 0),
"block");
@@ -1423,13 +1426,13 @@ public:
FixedVectorType::get(MatMul->getType()->getScalarType(), TileSize);
MatrixTy TileResult;
// Insert in the inner loop header.
- Builder.SetInsertPoint(TI.InnerLoopHeader->getTerminator());
+ Builder.SetInsertPoint(TI.KLoop.Header->getTerminator());
// Create PHI nodes for the result columns to accumulate across iterations.
SmallVector<PHINode *, 4> ColumnPhis;
for (unsigned I = 0; I < TileSize; I++) {
auto *Phi = Builder.CreatePHI(TileVecTy, 2, "result.vec." + Twine(I));
Phi->addIncoming(ConstantAggregateZero::get(TileVecTy),
- TI.RowLoopHeader->getSingleSuccessor());
+ TI.RowLoop.Header->getSingleSuccessor());
TileResult.addVector(Phi);
ColumnPhis.push_back(Phi);
}
@@ -1438,27 +1441,29 @@ public:
// Res += Load(CurrentRow, K) * Load(K, CurrentColumn)
Builder.SetInsertPoint(InnerBody->getTerminator());
// Load tiles of the operands.
- MatrixTy A = loadMatrix(LPtr, {}, false, LShape, TI.CurrentRow, TI.CurrentK,
- {TileSize, TileSize}, EltType, Builder);
- MatrixTy B = loadMatrix(RPtr, {}, false, RShape, TI.CurrentK, TI.CurrentCol,
- {TileSize, TileSize}, EltType, Builder);
+ MatrixTy A =
+ loadMatrix(LPtr, {}, false, LShape, TI.RowLoop.Index, TI.KLoop.Index,
+ {TileSize, TileSize}, EltType, Builder);
+ MatrixTy B =
+ loadMatrix(RPtr, {}, false, RShape, TI.KLoop.Index, TI.ColumnLoop.Index,
+ {TileSize, TileSize}, EltType, Builder);
emitMatrixMultiply(TileResult, A, B, Builder, true, false,
getFastMathFlags(MatMul));
// Store result after the inner loop is done.
- Builder.SetInsertPoint(TI.RowLoopLatch->getTerminator());
+ Builder.SetInsertPoint(TI.RowLoop.Latch->getTerminator());
storeMatrix(TileResult, Store->getPointerOperand(), Store->getAlign(),
Store->isVolatile(), {LShape.NumRows, RShape.NumColumns},
- TI.CurrentRow, TI.CurrentCol, EltType, Builder);
+ TI.RowLoop.Index, TI.ColumnLoop.Index, EltType, Builder);
for (unsigned I = 0; I < TileResult.getNumVectors(); I++)
- ColumnPhis[I]->addIncoming(TileResult.getVector(I), TI.InnerLoopLatch);
+ ColumnPhis[I]->addIncoming(TileResult.getVector(I), TI.KLoop.Latch);
// Force unrolling of a few iterations of the inner loop, to make sure there
// is enough work per iteration.
// FIXME: The unroller should make this decision directly instead, but
// currently the cost-model is not up to the task.
unsigned InnerLoopUnrollCount = std::min(10u, LShape.NumColumns / TileSize);
- addStringMetadataToLoop(LI->getLoopFor(TI.InnerLoopHeader),
+ addStringMetadataToLoop(LI->getLoopFor(TI.KLoop.Header),
"llvm.loop.unroll.count", InnerLoopUnrollCount);
}