Skip to content

Commit 6c8a1af

Browse files
committed
[Encoding] Delete the allocation support of roundDimsTo field.
The field was introduced for plumbing through data-tiling support in a target-independent way. IREE has the ability to represent layouts and resolve layouts in Stream, so this limit path is no longer needed. The attribute is preserved because CodeGen currrently abuse the field to get the iteration domain sizes. It is an important feature in CPU targets. Signed-off-by: hanhanW <hanhan0912@gmail.com>
1 parent 43753b3 commit 6c8a1af

File tree

5 files changed

+27
-247
lines changed

5 files changed

+27
-247
lines changed

compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.cpp

+16-81
Original file line numberDiff line numberDiff line change
@@ -254,93 +254,28 @@ Value EncodingAttr::calculateStorageSizeInBytes(Location loc,
254254
OpBuilder &builder,
255255
RankedTensorType type,
256256
ValueRange dynamicDims) const {
257-
if (ArrayAttr layoutsAttr = getLayouts()) {
258-
if (!llvm::all_of(layoutsAttr.getValue(),
259-
llvm::IsaPred<SerializableEncodingAttrInterface>)) {
260-
return nullptr;
261-
}
262-
263-
Value res;
264-
for (auto attr :
265-
layoutsAttr.getAsRange<SerializableEncodingAttrInterface>()) {
266-
Value requestedSize =
267-
attr.calculateStorageSizeInBytes(loc, builder, type, dynamicDims);
268-
if (!res) {
269-
res = requestedSize;
270-
continue;
271-
}
272-
res = builder.create<arith::MaxUIOp>(loc, res, requestedSize);
273-
}
274-
return res;
275-
}
276-
277-
// TODO(hanchung): Deprecate the below logic once EncodingSpecialization pass
278-
// is enabled by default. The layouts should be resolved and `roundDimsTo`
279-
// will be deprecated.
280-
SmallVector<int64_t> paddedShape(type.getShape());
281-
SmallVector<Value> paddedDynamicDims(dynamicDims.begin(), dynamicDims.end());
282-
ArrayRef<int64_t> roundDimsTo = getRoundDimsToArray();
283-
FailureOr<linalg::ContractionDimensions> cDims =
284-
getEncodingContractionDims(*this);
285-
auto pad = [&](int dim, int value) {
286-
std::optional<unsigned> maybeMappedDim = mapDimToOperandIndex(dim);
287-
if (!maybeMappedDim) {
288-
return;
289-
}
290-
unsigned mappedDim = maybeMappedDim.value();
291-
if (type.isDynamicDim(mappedDim)) {
292-
mappedDim = type.getDynamicDimIndex(mappedDim);
293-
auto alignment = builder.create<arith::ConstantIndexOp>(loc, value);
294-
paddedDynamicDims[mappedDim] = builder.create<arith::CeilDivUIOp>(
295-
loc, paddedDynamicDims[mappedDim], alignment);
296-
paddedDynamicDims[mappedDim] = builder.create<arith::MulIOp>(
297-
loc, paddedDynamicDims[mappedDim], alignment);
298-
} else {
299-
paddedShape[mappedDim] = llvm::alignTo(paddedShape[mappedDim], value);
300-
}
301-
};
302-
for (auto m : cDims->m) {
303-
pad(m, roundDimsTo[0]);
304-
}
305-
for (auto n : cDims->n) {
306-
pad(n, roundDimsTo[1]);
307-
}
308-
for (auto k : cDims->k) {
309-
pad(k, roundDimsTo[2]);
310-
}
311-
312-
constexpr int64_t kNumBitsInByte = 8;
313-
unsigned elementBits = getTypeBitWidth(type.getElementType());
314-
int64_t numBytesPerElem = 1;
315-
if (elementBits > kNumBitsInByte) {
316-
numBytesPerElem *= getRoundedElementByteWidth(type.getElementType());
317-
}
318-
319-
int64_t staticCount = numBytesPerElem;
320-
for (unsigned i = 0, e = type.getRank(); i < e; ++i) {
321-
if (!type.isDynamicDim(i)) {
322-
staticCount *= paddedShape[i];
323-
}
257+
if (!isSerialized()) {
258+
return nullptr;
324259
}
325260

326-
Value result =
327-
builder.create<arith::ConstantIndexOp>(loc, staticCount).getResult();
328-
for (auto dim : paddedDynamicDims) {
329-
result = builder.create<arith::MulIOp>(loc, result, dim);
261+
ArrayAttr layoutsAttr = getLayouts();
262+
if (!llvm::all_of(layoutsAttr.getValue(),
263+
llvm::IsaPred<SerializableEncodingAttrInterface>)) {
264+
return nullptr;
330265
}
331266

332-
// Always pack the elements back-to-back for subtypes.
333-
if (elementBits < kNumBitsInByte) {
334-
if (kNumBitsInByte % elementBits) {
335-
assert(false && "unsupported subtype");
336-
return Value();
267+
Value res;
268+
for (auto attr :
269+
layoutsAttr.getAsRange<SerializableEncodingAttrInterface>()) {
270+
Value requestedSize =
271+
attr.calculateStorageSizeInBytes(loc, builder, type, dynamicDims);
272+
if (!res) {
273+
res = requestedSize;
274+
continue;
337275
}
338-
Value divisor = builder.create<arith::ConstantIndexOp>(
339-
loc, kNumBitsInByte / elementBits);
340-
result = builder.create<arith::CeilDivUIOp>(loc, result, divisor);
276+
res = builder.create<arith::MaxUIOp>(loc, res, requestedSize);
341277
}
342-
343-
return result;
278+
return res;
344279
}
345280

346281
//===---------------------------------------------------------------------===//

compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.td

+7-10
Original file line numberDiff line numberDiff line change
@@ -86,19 +86,16 @@ def EncodingAttr :
8686
AttrParameter<"EncodingOpTypeAttr", "operand type">:$op_type,
8787
AttrParameter<"ArrayAttr", "element types of the user's operands">:$element_types,
8888
OptionalParameter<"ArrayAttr", "Indexing maps of the operation using this tensor">:$user_indexing_maps,
89-
// TODO(hanchung): Deprecate the round_dims_to field when we plumb the layouts
90-
// field through the whole stack. See https://github.com/iree-org/iree/issues/17924
91-
// for details. Note that today we abuse the attribute to carry narrow
92-
// matrix information. The end goal is deprecating the field and add a
93-
// "iteration_space_size" field to describe the shape. It is useful to
89+
// TODO(#19897): Switch round_dims_to to iteration_sizes.
90+
// Note that today we abuse the attribute to carry narrow matrix
91+
// information. The end goal is deprecating the field and add a
92+
// "iteration_sizes" field to describe the shape. It is useful to
9493
// handle narrow matrix cases.
9594
OptionalParameter<"DenseArrayAttr", "Values for padding M,N,K dimensions">:$round_dims_to,
9695
OptionalParameter<"ArrayAttr", "An array of attributes that describes the "
97-
"potential layouts on the device. It is an array because a device could "
98-
"have several executable targets. Note that it can be any attribute that "
99-
"implements EncodingLayoutResolverAttrInterface. The expectation of the field "
100-
"is to bridge the logics between host codes and device codes. If an "
101-
"attribute does not implement the interface, it could be discarded anytime.">:$layouts
96+
"layouts of the encoding. It is an array because a device could have "
97+
"multiple target device. Note that it can be any attribute that "
98+
"implements SerializableEncodingAttrInterface.">:$layouts
10299
);
103100

104101
let builders = [

compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingInterfaces.td

+1
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ def IREEEncoding_SerializableEncodingAttrInterface :
181181
Returns the storage size (in bytes) for the tensor types with an
182182
optional encoding. Returns Value() if the size is unknown, i.e., it can
183183
not be inferred with existing information.
184+
Returns nullptr on failure.
184185
}],
185186
/*retTy=*/"::mlir::Value",
186187
/*methodName=*/"calculateStorageSizeInBytes",

compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_encoding.mlir

-151
Original file line numberDiff line numberDiff line change
@@ -39,27 +39,6 @@ util.func public @sizeof_lhs_encoding_dynamic_using_layouts(%arg0: index, %arg1:
3939

4040
// -----
4141

42-
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
43-
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
44-
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
45-
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
46-
util.func public @sizeof_lhs_encoding_dynamic(%arg0: index, %arg1: index) -> index {
47-
%0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
48-
util.return %0 : index
49-
}
50-
// CHECK-LABEL: @sizeof_lhs_encoding_dynamic
51-
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
52-
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
53-
// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]]
54-
// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
55-
// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]]
56-
// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
57-
// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
58-
// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
59-
// CHECK: return %[[T1]]
60-
61-
// -----
62-
6342
#encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [0, 1], innerTileSizes = [4, 16], outerDimsPerm = [0, 1]}}>
6443
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], layouts = [#encoding_layout]>
6544
util.func public @sizeof_lhs_encoding_partially_dynamic_using_layouts(%arg0: index) -> index {
@@ -76,24 +55,6 @@ util.func public @sizeof_lhs_encoding_partially_dynamic_using_layouts(%arg0: ind
7655

7756
// -----
7857

79-
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
80-
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
81-
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
82-
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
83-
util.func public @sizeof_lhs_encoding_partially_dynamic(%arg0: index) -> index {
84-
%0 = stream.tensor.sizeof tensor<10x?xf32, #encoding>{%arg0} : index
85-
util.return %0 : index
86-
}
87-
// CHECK-LABEL: @sizeof_lhs_encoding_partially_dynamic
88-
// CHECK-DAG: %[[C48:.+]] = arith.constant 48 : index
89-
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
90-
// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg0, %[[C16]]
91-
// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
92-
// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D1]], %[[C48]]
93-
// CHECK: return %[[T0]]
94-
95-
// -----
96-
9758
// In GEMM, the RHS has the `(M, N, K) -> (K, N)` layout. The tile sizes
9859
// (i.e., [8, 16]) are for [dim_1, dim_0] in the encoding_info, where dim_1 is
9960
// N-dimension and dim_0 is K-dimension.
@@ -117,28 +78,6 @@ util.func public @sizeof_rhs_encoding_dynamic_using_layouts(%arg0: index, %arg1:
11778

11879
// -----
11980

120-
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
121-
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
122-
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
123-
#encoding = #iree_encoding.encoding<operand_index = 1, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
124-
util.func public @sizeof_rhs_encoding_dynamic(%arg0: index, %arg1: index) -> index {
125-
%0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
126-
util.return %0 : index
127-
}
128-
// CHECK-LABEL: @sizeof_rhs_encoding_dynamic
129-
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
130-
// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index
131-
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
132-
// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C8]]
133-
// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]]
134-
// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C16]]
135-
// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C16]]
136-
// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
137-
// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
138-
// CHECK: return %[[T1]]
139-
140-
// -----
141-
14281
#encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [0, 1], innerTileSizes = [4, 8], outerDimsPerm = [0, 1]}}>
14382
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], layouts = [#encoding_layout]>
14483
util.func public @sizeof_result_encoding_dynamic_using_layouts(%arg0: index, %arg1: index) -> index {
@@ -158,73 +97,6 @@ util.func public @sizeof_result_encoding_dynamic_using_layouts(%arg0: index, %ar
15897

15998
// -----
16099

161-
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
162-
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
163-
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
164-
#encoding = #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
165-
util.func public @sizeof_result_encoding_dynamic(%arg0: index, %arg1: index) -> index {
166-
%0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
167-
util.return %0 : index
168-
}
169-
// CHECK-LABEL: @sizeof_result_encoding_dynamic
170-
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
171-
// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index
172-
// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]]
173-
// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
174-
// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C8]]
175-
// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]]
176-
// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
177-
// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
178-
// CHECK: return %[[T1]]
179-
180-
// -----
181-
182-
// The layout is as the same as the the matmul LHS layout because it broadcasts
183-
// across the batch dimension. The test is preserved for having the same test
184-
// suite of non-layouts style encoding. I.e., this is the resolved layout
185-
// version of the below sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic
186-
// test.
187-
#encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [0, 1], innerTileSizes = [4, 16], outerDimsPerm = [0, 1]}}>
188-
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], layouts = [#encoding_layout]>
189-
util.func public @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic_using_layouts(%arg0: index, %arg1: index) -> index {
190-
%0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
191-
util.return %0 : index
192-
}
193-
// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic_using_layouts
194-
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
195-
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
196-
// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C4]]
197-
// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
198-
// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C16]]
199-
// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
200-
// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
201-
// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
202-
// CHECK: return %[[T1]]
203-
204-
// -----
205-
206-
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
207-
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
208-
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
209-
#map3 = affine_map<(d0, d1, d2) -> (d1, d2)>
210-
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [[#map, #map3], #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
211-
util.func public @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic(%arg0: index, %arg1: index) -> index {
212-
%0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
213-
util.return %0 : index
214-
}
215-
// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic
216-
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
217-
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
218-
// CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]]
219-
// CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
220-
// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]]
221-
// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
222-
// CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
223-
// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
224-
// CHECK: return %[[T1]]
225-
226-
// -----
227-
228100
// The M-dimension inner tile is not present because it broadcasts across the
229101
// M-dimension. We do not need to pack the M-dimension in this case.
230102
#encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [1], innerTileSizes = [16], outerDimsPerm = [0, 1]}}>
@@ -247,29 +119,6 @@ util.func public @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic_using_layo
247119

248120
// -----
249121

250-
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
251-
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
252-
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
253-
#map3 = affine_map<(d0, d1, d2) -> (d0, d2)>
254-
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [[#map, #map3], #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
255-
util.func public @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic(%arg0: index, %arg1: index) -> index {
256-
%0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
257-
util.return %0 : index
258-
}
259-
// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic
260-
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
261-
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
262-
// CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]]
263-
// CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
264-
//
265-
// Multiplied by 4 because f32 has 4 bytes.
266-
//
267-
// CHECK: %[[T0:.+]] = arith.muli %arg0, %[[C4]]
268-
// CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
269-
// CHECK: return %[[T1]]
270-
271-
// -----
272-
273122
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
274123
#map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
275124
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>

compiler/src/iree/compiler/Utils/ElementPackingUtils.cpp

+3-5
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,9 @@ Value calculateStorageElementCountInBytes(Location loc,
8989
RankedTensorType shapedType,
9090
ValueRange dynamicDims,
9191
OpBuilder &builder) {
92-
Attribute encoding = shapedType.getEncoding();
93-
if (auto encodingLayoutAttr =
94-
dyn_cast_or_null<IREE::Encoding::SerializableEncodingAttrInterface>(
95-
encoding)) {
96-
return encodingLayoutAttr.calculateStorageSizeInBytes(
92+
if (auto serializableEncodingAttr =
93+
IREE::Encoding::getSerializableEncodingAttrInterface(shapedType)) {
94+
return serializableEncodingAttr.calculateStorageSizeInBytes(
9795
loc, builder, shapedType, dynamicDims);
9896
}
9997

0 commit comments

Comments
 (0)