diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index b6f6167d1dfb3..64720bfe6cf50 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -481,16 +481,16 @@ struct MemoryCounterWaitOpLowering if (chipset.majorVersion >= 12) { Location loc = op.getLoc(); if (std::optional ds = adaptor.getDs()) - rewriter.create(loc, *ds); + ROCDL::WaitDscntOp::create(rewriter, loc, *ds); if (std::optional load = adaptor.getLoad()) - rewriter.create(loc, *load); + ROCDL::WaitLoadcntOp::create(rewriter, loc, *load); if (std::optional store = adaptor.getStore()) - rewriter.create(loc, *store); + ROCDL::WaitStorecntOp::create(rewriter, loc, *store); if (std::optional exp = adaptor.getExp()) - rewriter.create(loc, *exp); + ROCDL::WaitExpcntOp::create(rewriter, loc, *exp); rewriter.eraseOp(op); return success(); diff --git a/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractToNeonPatterns.cpp b/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractToNeonPatterns.cpp index 45b896d10834a..1aa8064a80ff9 100644 --- a/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractToNeonPatterns.cpp +++ b/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractToNeonPatterns.cpp @@ -145,8 +145,8 @@ class VectorContractRewriter { return rewriter.createOrFold(loc, acc.getType(), acc, lhs, rhs); case MMLA::Bfloat: - return rewriter.create(loc, acc.getType(), acc, lhs, - rhs); + return arm_neon::BfmmlaOp::create(rewriter, loc, acc.getType(), acc, lhs, + rhs); case MMLA::Nop: llvm_unreachable("Uninitialized operation type"); } @@ -226,8 +226,9 @@ class VectorContractRewriter { // Initial accumulator for the final result. This is the un-tiled result if // tiling is done. - Value result = rewriter.create( - loc, op.getResultType(), rewriter.getZeroAttr(op.getResultType())); + Value result = + arith::ConstantOp::create(rewriter, loc, op.getResultType(), + rewriter.getZeroAttr(op.getResultType())); SmallVector loopOrder = {0, 1}; if (iterationBounds.size() == 3) @@ -263,8 +264,9 @@ class VectorContractRewriter { if (dimM == 1) { auto expandRowVector = [&](Value tiledOperand, VectorType expandedTypeType) { - auto emptyOperand = rewriter.create( - loc, expandedTypeType, rewriter.getZeroAttr(expandedTypeType)); + auto emptyOperand = + arith::ConstantOp::create(rewriter, loc, expandedTypeType, + rewriter.getZeroAttr(expandedTypeType)); SmallVector offsets( cast(emptyOperand.getType()).getRank(), 0); SmallVector strides( @@ -280,8 +282,8 @@ class VectorContractRewriter { // using the instruction for unsigned by signed multiplication with // reversed operands. if (swapOperands) - tiledAcc = rewriter.create( - loc, tiledAcc, ArrayRef({1, 0})); + tiledAcc = vector::TransposeOp::create(rewriter, loc, tiledAcc, + ArrayRef({1, 0})); // Collapse tiled operands to 1D vectors required by the ArmNeon ops auto collapsedLhs = rewriter.createOrFold( @@ -309,8 +311,8 @@ class VectorContractRewriter { // Because of the reversed operands the result is obtained transposed. // Transpose it back, if (swapOperands) - tiledRes = rewriter.create( - loc, tiledRes, ArrayRef({1, 0})); + tiledRes = vector::TransposeOp::create(rewriter, loc, tiledRes, + ArrayRef({1, 0})); // With vecmat, only one row of tiled ACC can be inserted into the final // result diff --git a/mlir/lib/Dialect/ArmSVE/Transforms/LowerContractToSVEPatterns.cpp b/mlir/lib/Dialect/ArmSVE/Transforms/LowerContractToSVEPatterns.cpp index fcfeb9c519157..35b0bd1f41346 100644 --- a/mlir/lib/Dialect/ArmSVE/Transforms/LowerContractToSVEPatterns.cpp +++ b/mlir/lib/Dialect/ArmSVE/Transforms/LowerContractToSVEPatterns.cpp @@ -214,13 +214,13 @@ Value VectorContractRewriter::createMMLA(PatternRewriter &rewriter, switch (mmlaOp) { case MMLA::SignedInt: - return rewriter.create(loc, resTy, acc, lhs, rhs); + return arm_sve::SmmlaOp::create(rewriter, loc, resTy, acc, lhs, rhs); case MMLA::UnsignedInt: - return rewriter.create(loc, resTy, acc, lhs, rhs); + return arm_sve::UmmlaOp::create(rewriter, loc, resTy, acc, lhs, rhs); case MMLA::MixedInt: - return rewriter.create(loc, resTy, acc, lhs, rhs); + return arm_sve::UsmmlaOp::create(rewriter, loc, resTy, acc, lhs, rhs); case MMLA::Bfloat: - return rewriter.create(loc, resTy, acc, lhs, rhs); + return arm_sve::BfmmlaOp::create(rewriter, loc, resTy, acc, lhs, rhs); default: llvm_unreachable("Uninitialized operation kind"); } @@ -316,62 +316,63 @@ Value VectorContractRewriter::lower(vector::ContractionOp op, for (int64_t i = 0; i < M; i += 2) { // Extract two consecutive rows of the LHS tile. auto r0 = - rewriter.create(loc, lhs, ArrayRef{i}); + vector::ExtractOp::create(rewriter, loc, lhs, ArrayRef{i}); auto r1 = - rewriter.create(loc, lhs, ArrayRef{i + 1}); + vector::ExtractOp::create(rewriter, loc, lhs, ArrayRef{i + 1}); // Concatenate to obtain a 2 x K x flattened sub-tile. SmallVector shuffleIdx(2 * K); std::iota(shuffleIdx.begin(), shuffleIdx.end(), 0); - auto t = rewriter.create(loc, r0, r1, shuffleIdx); + auto t = vector::ShuffleOp::create(rewriter, loc, r0, r1, shuffleIdx); // Turn it into a scalable vector. - auto s = rewriter.create( - loc, t, rewriter.create(loc, flatLhsType), 0); + auto s = vector::ScalableInsertOp::create( + rewriter, loc, t, ub::PoisonOp::create(rewriter, loc, flatLhsType), 0); // Replicate the sub-tile VSCALE times to fill the entire vector. - auto r = rewriter.create(loc, s, 0); + auto r = arm_sve::DupQLaneOp::create(rewriter, loc, s, 0); lhsTile.push_back(r); } // "Flatten" the RHS tile from <[N]xK> to <[N*K]>. - auto rhs = rewriter.create(this->rhs.getLoc(), - flatRhsTileType, this->rhs); + auto rhs = vector::ShapeCastOp::create(rewriter, this->rhs.getLoc(), + flatRhsTileType, this->rhs); // Extract the RHS sub-tiles with logical shape . SmallVector rhsTile; for (int64_t j = 0; j < N; j += 2) - rhsTile.push_back(rewriter.create( - loc, flatRhsType, rhs, j * K)); + rhsTile.push_back(vector::ScalableExtractOp::create( + rewriter, loc, flatRhsType, rhs, j * K)); // Extract and pack the ACC sub-tiles. SmallVector accTile; for (int64_t i = 0; i < M; i += 2) { // Extract two consecutive rows of the accumulator tile. - auto r0 = rewriter.create(loc, op.getAcc(), - ArrayRef{i}); - auto r1 = rewriter.create(loc, op.getAcc(), - ArrayRef{i + 1}); + auto r0 = vector::ExtractOp::create(rewriter, loc, op.getAcc(), + ArrayRef{i}); + auto r1 = vector::ExtractOp::create(rewriter, loc, op.getAcc(), + ArrayRef{i + 1}); Value accTileVec; if (swapOperands) { // We are performing the operation with swapped LHS and RHS we need to // transpose each individual 2x2 tile of the accumulator and (later) the // final result. - accTileVec = rewriter.create(loc, r0, r1); + accTileVec = vector::InterleaveOp::create(rewriter, loc, r0, r1); } else { // Bitcast accumulator rows to double-width integer elements, so // subsequent interleave/deinterleave work on pairs of elements. - auto r0I64 = rewriter.create(loc, accRow64Ty, r0); - auto r1I64 = rewriter.create(loc, accRow64Ty, r1); + auto r0I64 = vector::BitCastOp::create(rewriter, loc, accRow64Ty, r0); + auto r1I64 = vector::BitCastOp::create(rewriter, loc, accRow64Ty, r1); // Interleave the rows, effectively flattening each 2x2 tile into 4 // consecutive elements. - auto intrI64 = rewriter.create(loc, r0I64, r1I64); + auto intrI64 = vector::InterleaveOp::create(rewriter, loc, r0I64, r1I64); // Bitcast back to original element type. - accTileVec = rewriter.create(loc, accRowX2Ty, intrI64); + accTileVec = + vector::BitCastOp::create(rewriter, loc, accRowX2Ty, intrI64); } // Extract ACC sub-tiles. for (int64_t j = 0; j < N; j += 2) - accTile.push_back(rewriter.create( - loc, flatAccType, accTileVec, j * 2)); + accTile.push_back(vector::ScalableExtractOp::create( + rewriter, loc, flatAccType, accTileVec, j * 2)); } // Emit sub-tile matrix multiplications. @@ -384,13 +385,13 @@ Value VectorContractRewriter::lower(vector::ContractionOp op, } // Unpack the OUT sub-tiles and insert into the result. - Value result = rewriter.create(loc, op.getResultType()); + Value result = ub::PoisonOp::create(rewriter, loc, op.getResultType()); for (int64_t i = 0; i < M / 2; ++i) { // Collect a number of sub-tiles in a row. - Value row = rewriter.create(loc, accRowX2Ty); + Value row = ub::PoisonOp::create(rewriter, loc, accRowX2Ty); for (int64_t j = 0; j < N / 2; ++j) - row = rewriter.create( - loc, outTile[i * N / 2 + j], row, j * 4); + row = vector::ScalableInsertOp::create( + rewriter, loc, outTile[i * N / 2 + j], row, j * 4); // Unpack the row to obtain two rows of the output. If we have the out // sub-tiles transposed we obtain two consecutive output rows by @@ -398,22 +399,22 @@ Value VectorContractRewriter::lower(vector::ContractionOp op, // Otherwise, the interleave is by pairs. Value out0, out1; if (swapOperands) { - auto tmp = rewriter.create(loc, row); + auto tmp = vector::DeinterleaveOp::create(rewriter, loc, row); out0 = tmp.getRes1(); out1 = tmp.getRes2(); } else { // Deinterleave by pairs. - auto row64 = rewriter.create(loc, accRowX264Ty, row); - auto deintr64 = rewriter.create(loc, row64); + auto row64 = vector::BitCastOp::create(rewriter, loc, accRowX264Ty, row); + auto deintr64 = vector::DeinterleaveOp::create(rewriter, loc, row64); // Bitcast back into original element type and insert into the result. - out0 = - rewriter.create(loc, accRowTy, deintr64.getRes1()); - out1 = - rewriter.create(loc, accRowTy, deintr64.getRes2()); + out0 = vector::BitCastOp::create(rewriter, loc, accRowTy, + deintr64.getRes1()); + out1 = vector::BitCastOp::create(rewriter, loc, accRowTy, + deintr64.getRes2()); } - result = rewriter.create(loc, out0, result, i * 2); - result = rewriter.create(loc, out1, result, i * 2 + 1); + result = vector::InsertOp::create(rewriter, loc, out0, result, i * 2); + result = vector::InsertOp::create(rewriter, loc, out1, result, i * 2 + 1); } return result; diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 78c6bd13aba3a..00b33d8498cd2 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -2198,8 +2198,8 @@ vectorizeAsLinalgContraction(RewriterBase &rewriter, VectorizationState &state, } // Create contraction. - Operation *contractOp = rewriter.create( - loc, /*lhs=*/vecOperands[0], + Operation *contractOp = vector::ContractionOp::create( + rewriter, loc, /*lhs=*/vecOperands[0], /*rhs=*/vecOperands[1], /*acc=*/vecOperands[2], linalgOp.getIndexingMaps(), rewriter.getArrayAttr(iterAttrs), *maybeKind); contractOp = state.maskOperation(rewriter, contractOp, linalgOp); diff --git a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp index 97fe3cb5b4705..5af46a48f124f 100644 --- a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp +++ b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp @@ -237,8 +237,8 @@ LogicalResult resolveSourceIndicesExpandShape( llvm::map_to_vector(group, [&](int64_t d) { return destShape[d]; }); SmallVector groupIndices = llvm::map_to_vector(group, [&](int64_t d) { return indices[d]; }); - Value collapsedIndex = rewriter.create( - loc, groupIndices, groupBasis, /*disjoint=*/startsInbounds); + Value collapsedIndex = affine::AffineLinearizeIndexOp::create( + rewriter, loc, groupIndices, groupBasis, /*disjoint=*/startsInbounds); sourceIndices.push_back(collapsedIndex); } return success(); @@ -250,8 +250,8 @@ resolveSourceIndicesCollapseShape(Location loc, PatternRewriter &rewriter, ValueRange indices, SmallVectorImpl &sourceIndices) { // Note: collapse_shape requires a strided memref, we can do this. - auto metadata = rewriter.create( - loc, collapseShapeOp.getSrc()); + auto metadata = memref::ExtractStridedMetadataOp::create( + rewriter, loc, collapseShapeOp.getSrc()); SmallVector sourceSizes = metadata.getConstifiedMixedSizes(); for (auto [index, group] : llvm::zip(indices, collapseShapeOp.getReassociationIndices())) { @@ -265,8 +265,8 @@ resolveSourceIndicesCollapseShape(Location loc, PatternRewriter &rewriter, SmallVector basis = llvm::map_to_vector(group, [&](int64_t d) { return sourceSizes[d]; }); - auto delinearize = rewriter.create( - loc, index, basis, /*hasOuterBound=*/true); + auto delinearize = affine::AffineDelinearizeIndexOp::create( + rewriter, loc, index, basis, /*hasOuterBound=*/true); llvm::append_range(sourceIndices, delinearize.getResults()); } if (collapseShapeOp.getReassociationIndices().empty()) { diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp index 229a289838c60..850f70cca288f 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp @@ -207,7 +207,7 @@ struct WgToSgCreateNdOp : public OpConversionPattern { // Subtract startOfRange from the original subgroup id to get the adjusted // sg id Value startOfRangeVal = - rewriter.create(loc, startOfRange); + arith::ConstantIndexOp::create(rewriter, loc, startOfRange); adjustedSgId = rewriter.createOrFold(loc, linearSgId, startOfRangeVal); } @@ -431,8 +431,8 @@ struct WgToSgVectorBroadcastOp SmallVector newBroadcastOps; for (auto operand : adaptor.getOperands().front()) { - auto newBroadcast = rewriter.create( - op.getLoc(), newResultType, operand); + auto newBroadcast = vector::BroadcastOp::create(rewriter, op.getLoc(), + newResultType, operand); xegpu::setLayoutAttr(newBroadcast->getResult(0), layout.dropSgLayoutAndData()); newBroadcastOps.push_back(newBroadcast.getResult()); @@ -563,8 +563,8 @@ struct WgToSgConvertLayoutOp if (input && target) { // keep the ConvertLayoutOp for rest fields, e.g., inst_data. for (auto [i, src] : llvm::enumerate(adaptor.getSource())) { - auto newOp = rewriter.create( - op.getLoc(), src.getType(), src, input, target); + auto newOp = xegpu::ConvertLayoutOp::create( + rewriter, op.getLoc(), src.getType(), src, input, target); newOps[i] = newOp; } }