[Encoding] Delete the allocation support of roundDimsTo field.

hanhanW · hanhanW · commit 6c8a1af3a061 · 2025-03-20T14:37:21.000-07:00
The field was introduced for plumbing through data-tiling support in a
target-independent way. IREE has the ability to represent layouts and
resolve layouts in Stream, so this limit path is no longer needed. The
attribute is preserved because CodeGen currrently abuse the field to get
the iteration domain sizes. It is an important feature in CPU targets.

Signed-off-by: hanhanW &lt;hanhan0912@gmail.com&gt;
diff --git a/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.cpp b/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.cpp
@@ -254,93 +254,28 @@ Value EncodingAttr::calculateStorageSizeInBytes(Location loc,
                                                 OpBuilder &builder,
                                                 RankedTensorType type,
                                                 ValueRange dynamicDims) const {
-  if (ArrayAttr layoutsAttr = getLayouts()) {
-    if (!llvm::all_of(layoutsAttr.getValue(),
-                      llvm::IsaPred<SerializableEncodingAttrInterface>)) {
-      return nullptr;
-    }
-
-    Value res;
-    for (auto attr :
-         layoutsAttr.getAsRange<SerializableEncodingAttrInterface>()) {
-      Value requestedSize =
-          attr.calculateStorageSizeInBytes(loc, builder, type, dynamicDims);
-      if (!res) {
-        res = requestedSize;
-        continue;
-      }
-      res = builder.create<arith::MaxUIOp>(loc, res, requestedSize);
-    }
-    return res;
-  }
-
-  // TODO(hanchung): Deprecate the below logic once EncodingSpecialization pass
-  // is enabled by default. The layouts should be resolved and `roundDimsTo`
-  // will be deprecated.
-  SmallVector<int64_t> paddedShape(type.getShape());
-  SmallVector<Value> paddedDynamicDims(dynamicDims.begin(), dynamicDims.end());
-  ArrayRef<int64_t> roundDimsTo = getRoundDimsToArray();
-  FailureOr<linalg::ContractionDimensions> cDims =
-      getEncodingContractionDims(*this);
-  auto pad = [&](int dim, int value) {
-    std::optional<unsigned> maybeMappedDim = mapDimToOperandIndex(dim);
-    if (!maybeMappedDim) {
-      return;
-    }
-    unsigned mappedDim = maybeMappedDim.value();
-    if (type.isDynamicDim(mappedDim)) {
-      mappedDim = type.getDynamicDimIndex(mappedDim);
-      auto alignment = builder.create<arith::ConstantIndexOp>(loc, value);
-      paddedDynamicDims[mappedDim] = builder.create<arith::CeilDivUIOp>(
-          loc, paddedDynamicDims[mappedDim], alignment);
-      paddedDynamicDims[mappedDim] = builder.create<arith::MulIOp>(
-          loc, paddedDynamicDims[mappedDim], alignment);
-    } else {
-      paddedShape[mappedDim] = llvm::alignTo(paddedShape[mappedDim], value);
-    }
-  };
-  for (auto m : cDims->m) {
-    pad(m, roundDimsTo[0]);
-  }
-  for (auto n : cDims->n) {
-    pad(n, roundDimsTo[1]);
-  }
-  for (auto k : cDims->k) {
-    pad(k, roundDimsTo[2]);
-  }
-
-  constexpr int64_t kNumBitsInByte = 8;
-  unsigned elementBits = getTypeBitWidth(type.getElementType());
-  int64_t numBytesPerElem = 1;
-  if (elementBits > kNumBitsInByte) {
-    numBytesPerElem *= getRoundedElementByteWidth(type.getElementType());
-  }
-
-  int64_t staticCount = numBytesPerElem;
-  for (unsigned i = 0, e = type.getRank(); i < e; ++i) {
-    if (!type.isDynamicDim(i)) {
-      staticCount *= paddedShape[i];
-    }
+  if (!isSerialized()) {
+    return nullptr;
   }
 
-  Value result =
-      builder.create<arith::ConstantIndexOp>(loc, staticCount).getResult();
-  for (auto dim : paddedDynamicDims) {
-    result = builder.create<arith::MulIOp>(loc, result, dim);
+  ArrayAttr layoutsAttr = getLayouts();
+  if (!llvm::all_of(layoutsAttr.getValue(),
+                    llvm::IsaPred<SerializableEncodingAttrInterface>)) {
+    return nullptr;
   }
 
-  // Always pack the elements back-to-back for subtypes.
-  if (elementBits < kNumBitsInByte) {
-    if (kNumBitsInByte % elementBits) {
-      assert(false && "unsupported subtype");
-      return Value();
+  Value res;
+  for (auto attr :
+       layoutsAttr.getAsRange<SerializableEncodingAttrInterface>()) {
+    Value requestedSize =
+        attr.calculateStorageSizeInBytes(loc, builder, type, dynamicDims);
+    if (!res) {
+      res = requestedSize;
+      continue;
     }
-    Value divisor = builder.create<arith::ConstantIndexOp>(
-        loc, kNumBitsInByte / elementBits);
-    result = builder.create<arith::CeilDivUIOp>(loc, result, divisor);
+    res = builder.create<arith::MaxUIOp>(loc, res, requestedSize);
   }
-
-  return result;
+  return res;
 }
 
 //===---------------------------------------------------------------------===//
diff --git a/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.td b/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.td
@@ -86,19 +86,16 @@ def EncodingAttr :
     AttrParameter<"EncodingOpTypeAttr", "operand type">:$op_type,
     AttrParameter<"ArrayAttr", "element types of the user's operands">:$element_types,
     OptionalParameter<"ArrayAttr", "Indexing maps of the operation using this tensor">:$user_indexing_maps,
-    // TODO(hanchung): Deprecate the round_dims_to field when we plumb the layouts
-    // field through the whole stack. See https://github.com/iree-org/iree/issues/17924
-    // for details. Note that today we abuse the attribute to carry narrow
-    // matrix information. The end goal is deprecating the field and add a
-    // "iteration_space_size" field to describe the shape. It is useful to
+    // TODO(#19897): Switch round_dims_to to iteration_sizes.
+    // Note that today we abuse the attribute to carry narrow matrix
+    // information. The end goal is deprecating the field and add a
+    // "iteration_sizes" field to describe the shape. It is useful to
     // handle narrow matrix cases.
     OptionalParameter<"DenseArrayAttr", "Values for padding M,N,K dimensions">:$round_dims_to,
     OptionalParameter<"ArrayAttr", "An array of attributes that describes the "
-    "potential layouts on the device. It is an array because a device could "
-    "have several executable targets. Note that it can be any attribute that "
-    "implements EncodingLayoutResolverAttrInterface. The expectation of the field "
-    "is to bridge the logics between host codes and device codes. If an "
-    "attribute does not implement the interface, it could be discarded anytime.">:$layouts
+    "layouts of the encoding. It is an array because a device could have "
+    "multiple target device. Note that it can be any attribute that "
+    "implements SerializableEncodingAttrInterface.">:$layouts
   );
 
   let builders = [
diff --git a/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingInterfaces.td b/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingInterfaces.td
@@ -181,6 +181,7 @@ def IREEEncoding_SerializableEncodingAttrInterface :
         Returns the storage size (in bytes) for the tensor types with an
         optional encoding. Returns Value() if the size is unknown, i.e., it can
         not be inferred with existing information.
+        Returns nullptr on failure.
       }],
       /*retTy=*/"::mlir::Value",
       /*methodName=*/"calculateStorageSizeInBytes",
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_encoding.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_encoding.mlir
@@ -39,27 +39,6 @@ util.func public @sizeof_lhs_encoding_dynamic_using_layouts(%arg0: index, %arg1:
 
 // -----
 
-#map = affine_map<(d0, d1, d2) -> (d0, d2)>
-#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
-#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
-#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
-util.func public @sizeof_lhs_encoding_dynamic(%arg0: index, %arg1: index) -> index {
-  %0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
-  util.return %0 : index
-}
-// CHECK-LABEL: @sizeof_lhs_encoding_dynamic
-// CHECK-DAG:     %[[C4:.+]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C16:.+]] = arith.constant 16 : index
-// CHECK:         %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]]
-// CHECK:         %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
-// CHECK:         %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]]
-// CHECK:         %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
-// CHECK:         %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
-// CHECK:         %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
-// CHECK:         return %[[T1]]
-
-// -----
-
 #encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [0, 1], innerTileSizes = [4, 16], outerDimsPerm = [0, 1]}}>
 #encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], layouts = [#encoding_layout]>
 util.func public @sizeof_lhs_encoding_partially_dynamic_using_layouts(%arg0: index) -> index {
@@ -76,24 +55,6 @@ util.func public @sizeof_lhs_encoding_partially_dynamic_using_layouts(%arg0: ind
 
 // -----
 
-#map = affine_map<(d0, d1, d2) -> (d0, d2)>
-#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
-#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
-#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
-util.func public @sizeof_lhs_encoding_partially_dynamic(%arg0: index) -> index {
-  %0 = stream.tensor.sizeof tensor<10x?xf32, #encoding>{%arg0} : index
-  util.return %0 : index
-}
-// CHECK-LABEL: @sizeof_lhs_encoding_partially_dynamic
-// CHECK-DAG:     %[[C48:.+]] = arith.constant 48 : index
-// CHECK-DAG:     %[[C16:.+]] = arith.constant 16 : index
-// CHECK:         %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg0, %[[C16]]
-// CHECK:         %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
-// CHECK:         %[[T0:.+]] = arith.muli %[[PAD_D1]], %[[C48]]
-// CHECK:         return %[[T0]]
-
-// -----
-
 // In GEMM, the RHS has the `(M, N, K) -> (K, N)` layout. The  tile sizes
 // (i.e., [8, 16]) are for [dim_1, dim_0] in the encoding_info, where dim_1 is
 // N-dimension and dim_0 is K-dimension.
@@ -117,28 +78,6 @@ util.func public @sizeof_rhs_encoding_dynamic_using_layouts(%arg0: index, %arg1:
 
 // -----
 
-#map = affine_map<(d0, d1, d2) -> (d0, d2)>
-#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
-#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
-#encoding = #iree_encoding.encoding<operand_index = 1, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
-util.func public @sizeof_rhs_encoding_dynamic(%arg0: index, %arg1: index) -> index {
-  %0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
-  util.return %0 : index
-}
-// CHECK-LABEL: @sizeof_rhs_encoding_dynamic
-// CHECK-DAG:     %[[C4:.+]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C8:.+]] = arith.constant 8 : index
-// CHECK-DAG:     %[[C16:.+]] = arith.constant 16 : index
-// CHECK:         %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C8]]
-// CHECK:         %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]]
-// CHECK:         %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C16]]
-// CHECK:         %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C16]]
-// CHECK:         %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
-// CHECK:         %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
-// CHECK:         return %[[T1]]
-
-// -----
-
 #encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [0, 1], innerTileSizes = [4, 8], outerDimsPerm = [0, 1]}}>
 #encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], layouts = [#encoding_layout]>
 util.func public @sizeof_result_encoding_dynamic_using_layouts(%arg0: index, %arg1: index) -> index {
@@ -158,73 +97,6 @@ util.func public @sizeof_result_encoding_dynamic_using_layouts(%arg0: index, %ar
 
 // -----
 
-#map = affine_map<(d0, d1, d2) -> (d0, d2)>
-#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
-#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
-#encoding = #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
-util.func public @sizeof_result_encoding_dynamic(%arg0: index, %arg1: index) -> index {
-  %0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
-  util.return %0 : index
-}
-// CHECK-LABEL: @sizeof_result_encoding_dynamic
-// CHECK-DAG:     %[[C4:.+]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C8:.+]] = arith.constant 8 : index
-// CHECK:         %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]]
-// CHECK:         %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
-// CHECK:         %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C8]]
-// CHECK:         %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]]
-// CHECK:         %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
-// CHECK:         %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
-// CHECK:         return %[[T1]]
-
-// -----
-
-// The layout is as the same as the the matmul LHS layout because it broadcasts
-// across the batch dimension. The test is preserved for having the same test
-// suite of non-layouts style encoding. I.e., this is the resolved layout
-// version of the below sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic
-// test.
-#encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [0, 1], innerTileSizes = [4, 16], outerDimsPerm = [0, 1]}}>
-#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], layouts = [#encoding_layout]>
-util.func public @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic_using_layouts(%arg0: index, %arg1: index) -> index {
-  %0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
-  util.return %0 : index
-}
-// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic_using_layouts
-// CHECK-DAG:     %[[C4:.+]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C16:.+]] = arith.constant 16 : index
-// CHECK:         %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C4]]
-// CHECK:         %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
-// CHECK:         %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C16]]
-// CHECK:         %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
-// CHECK:         %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
-// CHECK:         %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
-// CHECK:         return %[[T1]]
-
-// -----
-
-#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
-#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
-#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-#map3 = affine_map<(d0, d1, d2) -> (d1, d2)>
-#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [[#map, #map3], #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
-util.func public @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic(%arg0: index, %arg1: index) -> index {
-  %0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
-  util.return %0 : index
-}
-// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic
-// CHECK-DAG:     %[[C4:.+]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C16:.+]] = arith.constant 16 : index
-// CHECK:         %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]]
-// CHECK:         %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
-// CHECK:         %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]]
-// CHECK:         %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
-// CHECK:         %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
-// CHECK:         %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
-// CHECK:         return %[[T1]]
-
-// -----
-
 // The M-dimension inner tile is not present because it broadcasts across the
 // M-dimension. We do not need to pack the M-dimension in this case.
 #encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [1], innerTileSizes = [16], outerDimsPerm = [0, 1]}}>
@@ -247,29 +119,6 @@ util.func public @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic_using_layo
 
 // -----
 
-#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
-#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
-#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-#map3 = affine_map<(d0, d1, d2) -> (d0, d2)>
-#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [[#map, #map3], #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
-util.func public @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic(%arg0: index, %arg1: index) -> index {
-  %0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
-  util.return %0 : index
-}
-// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic
-// CHECK-DAG:     %[[C4:.+]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C16:.+]] = arith.constant 16 : index
-// CHECK:         %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]]
-// CHECK:         %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
-//
-// Multiplied by 4 because f32 has 4 bytes.
-//
-// CHECK:         %[[T0:.+]] = arith.muli %arg0, %[[C4]]
-// CHECK:         %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
-// CHECK:         return %[[T1]]
-
-// -----
-
 #map = affine_map<(d0, d1, d2) -> (d0, d2)>
 #map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
 #map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
diff --git a/compiler/src/iree/compiler/Utils/ElementPackingUtils.cpp b/compiler/src/iree/compiler/Utils/ElementPackingUtils.cpp
@@ -89,11 +89,9 @@ Value calculateStorageElementCountInBytes(Location loc,
                                           RankedTensorType shapedType,
                                           ValueRange dynamicDims,
                                           OpBuilder &builder) {
-  Attribute encoding = shapedType.getEncoding();
-  if (auto encodingLayoutAttr =
-          dyn_cast_or_null<IREE::Encoding::SerializableEncodingAttrInterface>(
-              encoding)) {
-    return encodingLayoutAttr.calculateStorageSizeInBytes(
+  if (auto serializableEncodingAttr =
+          IREE::Encoding::getSerializableEncodingAttrInterface(shapedType)) {
+    return serializableEncodingAttr.calculateStorageSizeInBytes(
         loc, builder, shapedType, dynamicDims);
   }