Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Encoding] Delete the allocation support of roundDimsTo field. #20332

Merged
merged 1 commit into from
Mar 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 16 additions & 81 deletions compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,93 +254,28 @@ Value EncodingAttr::calculateStorageSizeInBytes(Location loc,
OpBuilder &builder,
RankedTensorType type,
ValueRange dynamicDims) const {
if (ArrayAttr layoutsAttr = getLayouts()) {
if (!llvm::all_of(layoutsAttr.getValue(),
llvm::IsaPred<SerializableEncodingAttrInterface>)) {
return nullptr;
}

Value res;
for (auto attr :
layoutsAttr.getAsRange<SerializableEncodingAttrInterface>()) {
Value requestedSize =
attr.calculateStorageSizeInBytes(loc, builder, type, dynamicDims);
if (!res) {
res = requestedSize;
continue;
}
res = builder.create<arith::MaxUIOp>(loc, res, requestedSize);
}
return res;
}

// TODO(hanchung): Deprecate the below logic once EncodingSpecialization pass
// is enabled by default. The layouts should be resolved and `roundDimsTo`
// will be deprecated.
SmallVector<int64_t> paddedShape(type.getShape());
SmallVector<Value> paddedDynamicDims(dynamicDims.begin(), dynamicDims.end());
ArrayRef<int64_t> roundDimsTo = getRoundDimsToArray();
FailureOr<linalg::ContractionDimensions> cDims =
getEncodingContractionDims(*this);
auto pad = [&](int dim, int value) {
std::optional<unsigned> maybeMappedDim = mapDimToOperandIndex(dim);
if (!maybeMappedDim) {
return;
}
unsigned mappedDim = maybeMappedDim.value();
if (type.isDynamicDim(mappedDim)) {
mappedDim = type.getDynamicDimIndex(mappedDim);
auto alignment = builder.create<arith::ConstantIndexOp>(loc, value);
paddedDynamicDims[mappedDim] = builder.create<arith::CeilDivUIOp>(
loc, paddedDynamicDims[mappedDim], alignment);
paddedDynamicDims[mappedDim] = builder.create<arith::MulIOp>(
loc, paddedDynamicDims[mappedDim], alignment);
} else {
paddedShape[mappedDim] = llvm::alignTo(paddedShape[mappedDim], value);
}
};
for (auto m : cDims->m) {
pad(m, roundDimsTo[0]);
}
for (auto n : cDims->n) {
pad(n, roundDimsTo[1]);
}
for (auto k : cDims->k) {
pad(k, roundDimsTo[2]);
}

constexpr int64_t kNumBitsInByte = 8;
unsigned elementBits = getTypeBitWidth(type.getElementType());
int64_t numBytesPerElem = 1;
if (elementBits > kNumBitsInByte) {
numBytesPerElem *= getRoundedElementByteWidth(type.getElementType());
}

int64_t staticCount = numBytesPerElem;
for (unsigned i = 0, e = type.getRank(); i < e; ++i) {
if (!type.isDynamicDim(i)) {
staticCount *= paddedShape[i];
}
if (!isSerialized()) {
return nullptr;
}

Value result =
builder.create<arith::ConstantIndexOp>(loc, staticCount).getResult();
for (auto dim : paddedDynamicDims) {
result = builder.create<arith::MulIOp>(loc, result, dim);
ArrayAttr layoutsAttr = getLayouts();
if (!llvm::all_of(layoutsAttr.getValue(),
llvm::IsaPred<SerializableEncodingAttrInterface>)) {
return nullptr;
}

// Always pack the elements back-to-back for subtypes.
if (elementBits < kNumBitsInByte) {
if (kNumBitsInByte % elementBits) {
assert(false && "unsupported subtype");
return Value();
Value res;
for (auto attr :
layoutsAttr.getAsRange<SerializableEncodingAttrInterface>()) {
Value requestedSize =
attr.calculateStorageSizeInBytes(loc, builder, type, dynamicDims);
if (!res) {
res = requestedSize;
continue;
}
Value divisor = builder.create<arith::ConstantIndexOp>(
loc, kNumBitsInByte / elementBits);
result = builder.create<arith::CeilDivUIOp>(loc, result, divisor);
res = builder.create<arith::MaxUIOp>(loc, res, requestedSize);
}

return result;
return res;
}

//===---------------------------------------------------------------------===//
Expand Down
17 changes: 7 additions & 10 deletions compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.td
Original file line number Diff line number Diff line change
Expand Up @@ -86,19 +86,16 @@ def EncodingAttr :
AttrParameter<"EncodingOpTypeAttr", "operand type">:$op_type,
AttrParameter<"ArrayAttr", "element types of the user's operands">:$element_types,
OptionalParameter<"ArrayAttr", "Indexing maps of the operation using this tensor">:$user_indexing_maps,
// TODO(hanchung): Deprecate the round_dims_to field when we plumb the layouts
// field through the whole stack. See https://github.com/iree-org/iree/issues/17924
// for details. Note that today we abuse the attribute to carry narrow
// matrix information. The end goal is deprecating the field and add a
// "iteration_space_size" field to describe the shape. It is useful to
// TODO(#19897): Switch round_dims_to to iteration_sizes.
// Note that today we abuse the attribute to carry narrow matrix
// information. The end goal is deprecating the field and add a
// "iteration_sizes" field to describe the shape. It is useful to
// handle narrow matrix cases.
OptionalParameter<"DenseArrayAttr", "Values for padding M,N,K dimensions">:$round_dims_to,
OptionalParameter<"ArrayAttr", "An array of attributes that describes the "
"potential layouts on the device. It is an array because a device could "
"have several executable targets. Note that it can be any attribute that "
"implements EncodingLayoutResolverAttrInterface. The expectation of the field "
"is to bridge the logics between host codes and device codes. If an "
"attribute does not implement the interface, it could be discarded anytime.">:$layouts
"layouts of the encoding. It is an array because a device could have "
"multiple target device. Note that it can be any attribute that "
"implements SerializableEncodingAttrInterface.">:$layouts
);

let builders = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ def IREEEncoding_SerializableEncodingAttrInterface :
Returns the storage size (in bytes) for the tensor types with an
optional encoding. Returns Value() if the size is unknown, i.e., it can
not be inferred with existing information.
Returns nullptr on failure.
}],
/*retTy=*/"::mlir::Value",
/*methodName=*/"calculateStorageSizeInBytes",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,27 +39,6 @@ util.func public @sizeof_lhs_encoding_dynamic_using_layouts(%arg0: index, %arg1:

// -----

#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
util.func public @sizeof_lhs_encoding_dynamic(%arg0: index, %arg1: index) -> index {
%0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
util.return %0 : index
}
// CHECK-LABEL: @sizeof_lhs_encoding_dynamic
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]]
// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]]
// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
// CHECK: return %[[T1]]

// -----

#encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [0, 1], innerTileSizes = [4, 16], outerDimsPerm = [0, 1]}}>
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], layouts = [#encoding_layout]>
util.func public @sizeof_lhs_encoding_partially_dynamic_using_layouts(%arg0: index) -> index {
Expand All @@ -76,24 +55,6 @@ util.func public @sizeof_lhs_encoding_partially_dynamic_using_layouts(%arg0: ind

// -----

#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
util.func public @sizeof_lhs_encoding_partially_dynamic(%arg0: index) -> index {
%0 = stream.tensor.sizeof tensor<10x?xf32, #encoding>{%arg0} : index
util.return %0 : index
}
// CHECK-LABEL: @sizeof_lhs_encoding_partially_dynamic
// CHECK-DAG: %[[C48:.+]] = arith.constant 48 : index
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg0, %[[C16]]
// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D1]], %[[C48]]
// CHECK: return %[[T0]]

// -----

// In GEMM, the RHS has the `(M, N, K) -> (K, N)` layout. The tile sizes
// (i.e., [8, 16]) are for [dim_1, dim_0] in the encoding_info, where dim_1 is
// N-dimension and dim_0 is K-dimension.
Expand All @@ -117,28 +78,6 @@ util.func public @sizeof_rhs_encoding_dynamic_using_layouts(%arg0: index, %arg1:

// -----

#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
#encoding = #iree_encoding.encoding<operand_index = 1, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
util.func public @sizeof_rhs_encoding_dynamic(%arg0: index, %arg1: index) -> index {
%0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
util.return %0 : index
}
// CHECK-LABEL: @sizeof_rhs_encoding_dynamic
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C8]]
// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]]
// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C16]]
// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C16]]
// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
// CHECK: return %[[T1]]

// -----

#encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [0, 1], innerTileSizes = [4, 8], outerDimsPerm = [0, 1]}}>
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], layouts = [#encoding_layout]>
util.func public @sizeof_result_encoding_dynamic_using_layouts(%arg0: index, %arg1: index) -> index {
Expand All @@ -158,73 +97,6 @@ util.func public @sizeof_result_encoding_dynamic_using_layouts(%arg0: index, %ar

// -----

#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
#encoding = #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
util.func public @sizeof_result_encoding_dynamic(%arg0: index, %arg1: index) -> index {
%0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
util.return %0 : index
}
// CHECK-LABEL: @sizeof_result_encoding_dynamic
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index
// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]]
// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C8]]
// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]]
// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
// CHECK: return %[[T1]]

// -----

// The layout is as the same as the the matmul LHS layout because it broadcasts
// across the batch dimension. The test is preserved for having the same test
// suite of non-layouts style encoding. I.e., this is the resolved layout
// version of the below sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic
// test.
#encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [0, 1], innerTileSizes = [4, 16], outerDimsPerm = [0, 1]}}>
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], layouts = [#encoding_layout]>
util.func public @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic_using_layouts(%arg0: index, %arg1: index) -> index {
%0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
util.return %0 : index
}
// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic_using_layouts
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C4]]
// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C16]]
// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
// CHECK: return %[[T1]]

// -----

#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
#map3 = affine_map<(d0, d1, d2) -> (d1, d2)>
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [[#map, #map3], #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
util.func public @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic(%arg0: index, %arg1: index) -> index {
%0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
util.return %0 : index
}
// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]]
// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]]
// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
// CHECK: return %[[T1]]

// -----

// The M-dimension inner tile is not present because it broadcasts across the
// M-dimension. We do not need to pack the M-dimension in this case.
#encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [1], innerTileSizes = [16], outerDimsPerm = [0, 1]}}>
Expand All @@ -247,29 +119,6 @@ util.func public @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic_using_layo

// -----

#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
#map3 = affine_map<(d0, d1, d2) -> (d0, d2)>
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [[#map, #map3], #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
util.func public @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic(%arg0: index, %arg1: index) -> index {
%0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
util.return %0 : index
}
// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]]
// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
//
// Multiplied by 4 because f32 has 4 bytes.
//
// CHECK: %[[T0:.+]] = arith.muli %arg0, %[[C4]]
// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
// CHECK: return %[[T1]]

// -----

#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
Expand Down
8 changes: 3 additions & 5 deletions compiler/src/iree/compiler/Utils/ElementPackingUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,9 @@ Value calculateStorageElementCountInBytes(Location loc,
RankedTensorType shapedType,
ValueRange dynamicDims,
OpBuilder &builder) {
Attribute encoding = shapedType.getEncoding();
if (auto encodingLayoutAttr =
dyn_cast_or_null<IREE::Encoding::SerializableEncodingAttrInterface>(
encoding)) {
return encodingLayoutAttr.calculateStorageSizeInBytes(
if (auto serializableEncodingAttr =
IREE::Encoding::getSerializableEncodingAttrInterface(shapedType)) {
return serializableEncodingAttr.calculateStorageSizeInBytes(
loc, builder, shapedType, dynamicDims);
}

Expand Down
Loading