diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2022-07-27 19:50:45 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2022-07-27 19:50:54 +0000 |
| commit | 08e8dd7b9db7bb4a9de26d44c1cbfd24e869c014 (patch) | |
| tree | 041e72e32710b1e742516d8c9f1575bf0116d3e3 /llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp | |
| parent | 4b4fe385e49bd883fd183b5f21c1ea486c722e61 (diff) | |
vendor/llvm-project/llvmorg-15-init-17827-gd77882e66779vendor/llvm-project/llvmorg-15-init-17826-g1f8ae9d7e7e4
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp | 25 |
1 files changed, 15 insertions, 10 deletions
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index c05906649f16..f1e1359255bd 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -338,6 +338,9 @@ class LowerMatrixIntrinsics { Value *extractVector(unsigned I, unsigned J, unsigned NumElts, IRBuilder<> &Builder) const { Value *Vec = isColumnMajor() ? getColumn(J) : getRow(I); + assert(cast<FixedVectorType>(Vec->getType())->getNumElements() >= + NumElts && + "Extracted vector will contain poison values"); return Builder.CreateShuffleVector( Vec, createSequentialMask(isColumnMajor() ? I : J, NumElts, 0), "block"); @@ -1423,13 +1426,13 @@ public: FixedVectorType::get(MatMul->getType()->getScalarType(), TileSize); MatrixTy TileResult; // Insert in the inner loop header. - Builder.SetInsertPoint(TI.InnerLoopHeader->getTerminator()); + Builder.SetInsertPoint(TI.KLoop.Header->getTerminator()); // Create PHI nodes for the result columns to accumulate across iterations. SmallVector<PHINode *, 4> ColumnPhis; for (unsigned I = 0; I < TileSize; I++) { auto *Phi = Builder.CreatePHI(TileVecTy, 2, "result.vec." + Twine(I)); Phi->addIncoming(ConstantAggregateZero::get(TileVecTy), - TI.RowLoopHeader->getSingleSuccessor()); + TI.RowLoop.Header->getSingleSuccessor()); TileResult.addVector(Phi); ColumnPhis.push_back(Phi); } @@ -1438,27 +1441,29 @@ public: // Res += Load(CurrentRow, K) * Load(K, CurrentColumn) Builder.SetInsertPoint(InnerBody->getTerminator()); // Load tiles of the operands. - MatrixTy A = loadMatrix(LPtr, {}, false, LShape, TI.CurrentRow, TI.CurrentK, - {TileSize, TileSize}, EltType, Builder); - MatrixTy B = loadMatrix(RPtr, {}, false, RShape, TI.CurrentK, TI.CurrentCol, - {TileSize, TileSize}, EltType, Builder); + MatrixTy A = + loadMatrix(LPtr, {}, false, LShape, TI.RowLoop.Index, TI.KLoop.Index, + {TileSize, TileSize}, EltType, Builder); + MatrixTy B = + loadMatrix(RPtr, {}, false, RShape, TI.KLoop.Index, TI.ColumnLoop.Index, + {TileSize, TileSize}, EltType, Builder); emitMatrixMultiply(TileResult, A, B, Builder, true, false, getFastMathFlags(MatMul)); // Store result after the inner loop is done. - Builder.SetInsertPoint(TI.RowLoopLatch->getTerminator()); + Builder.SetInsertPoint(TI.RowLoop.Latch->getTerminator()); storeMatrix(TileResult, Store->getPointerOperand(), Store->getAlign(), Store->isVolatile(), {LShape.NumRows, RShape.NumColumns}, - TI.CurrentRow, TI.CurrentCol, EltType, Builder); + TI.RowLoop.Index, TI.ColumnLoop.Index, EltType, Builder); for (unsigned I = 0; I < TileResult.getNumVectors(); I++) - ColumnPhis[I]->addIncoming(TileResult.getVector(I), TI.InnerLoopLatch); + ColumnPhis[I]->addIncoming(TileResult.getVector(I), TI.KLoop.Latch); // Force unrolling of a few iterations of the inner loop, to make sure there // is enough work per iteration. // FIXME: The unroller should make this decision directly instead, but // currently the cost-model is not up to the task. unsigned InnerLoopUnrollCount = std::min(10u, LShape.NumColumns / TileSize); - addStringMetadataToLoop(LI->getLoopFor(TI.InnerLoopHeader), + addStringMetadataToLoop(LI->getLoopFor(TI.KLoop.Header), "llvm.loop.unroll.count", InnerLoopUnrollCount); } |
