iree-org · hanhanW · Mar 20, 2025 · Mar 20, 2025
@@ -254,93 +254,28 @@ Value EncodingAttr::calculateStorageSizeInBytes(Location loc,
                                                 OpBuilder &builder,
                                                 RankedTensorType type,
                                                 ValueRange dynamicDims) const {
-  if (ArrayAttr layoutsAttr = getLayouts()) {
-    if (!llvm::all_of(layoutsAttr.getValue(),
-                      llvm::IsaPred<SerializableEncodingAttrInterface>)) {
-      return nullptr;
-    }
-
-    Value res;
-    for (auto attr :
-         layoutsAttr.getAsRange<SerializableEncodingAttrInterface>()) {
-      Value requestedSize =
-          attr.calculateStorageSizeInBytes(loc, builder, type, dynamicDims);
-      if (!res) {
-        res = requestedSize;
-        continue;
-      }
-      res = builder.create<arith::MaxUIOp>(loc, res, requestedSize);
-    }
-    return res;
-  }
-
-  // TODO(hanchung): Deprecate the below logic once EncodingSpecialization pass
-  // is enabled by default. The layouts should be resolved and `roundDimsTo`
-  // will be deprecated.
-  SmallVector<int64_t> paddedShape(type.getShape());
-  SmallVector<Value> paddedDynamicDims(dynamicDims.begin(), dynamicDims.end());
-  ArrayRef<int64_t> roundDimsTo = getRoundDimsToArray();
-  FailureOr<linalg::ContractionDimensions> cDims =
-      getEncodingContractionDims(*this);
-  auto pad = [&](int dim, int value) {
-    std::optional<unsigned> maybeMappedDim = mapDimToOperandIndex(dim);
-    if (!maybeMappedDim) {
-      return;
-    }
-    unsigned mappedDim = maybeMappedDim.value();
-    if (type.isDynamicDim(mappedDim)) {
-      mappedDim = type.getDynamicDimIndex(mappedDim);
-      auto alignment = builder.create<arith::ConstantIndexOp>(loc, value);
-      paddedDynamicDims[mappedDim] = builder.create<arith::CeilDivUIOp>(
-          loc, paddedDynamicDims[mappedDim], alignment);
-      paddedDynamicDims[mappedDim] = builder.create<arith::MulIOp>(
-          loc, paddedDynamicDims[mappedDim], alignment);
-    } else {
-      paddedShape[mappedDim] = llvm::alignTo(paddedShape[mappedDim], value);
-    }
-  };
-  for (auto m : cDims->m) {
-    pad(m, roundDimsTo[0]);
-  }
-  for (auto n : cDims->n) {
-    pad(n, roundDimsTo[1]);
-  }
-  for (auto k : cDims->k) {
-    pad(k, roundDimsTo[2]);
-  }
-
-  constexpr int64_t kNumBitsInByte = 8;
-  unsigned elementBits = getTypeBitWidth(type.getElementType());
-  int64_t numBytesPerElem = 1;
-  if (elementBits > kNumBitsInByte) {
-    numBytesPerElem *= getRoundedElementByteWidth(type.getElementType());
-  }
-
-  int64_t staticCount = numBytesPerElem;
-  for (unsigned i = 0, e = type.getRank(); i < e; ++i) {
-    if (!type.isDynamicDim(i)) {
-      staticCount *= paddedShape[i];
-    }
+  if (!isSerialized()) {
+    return nullptr;
   }
 
-  Value result =
-      builder.create<arith::ConstantIndexOp>(loc, staticCount).getResult();
-  for (auto dim : paddedDynamicDims) {
-    result = builder.create<arith::MulIOp>(loc, result, dim);
+  ArrayAttr layoutsAttr = getLayouts();
+  if (!llvm::all_of(layoutsAttr.getValue(),
+                    llvm::IsaPred<SerializableEncodingAttrInterface>)) {
+    return nullptr;
   }
 
-  // Always pack the elements back-to-back for subtypes.
-  if (elementBits < kNumBitsInByte) {
-    if (kNumBitsInByte % elementBits) {
-      assert(false && "unsupported subtype");
-      return Value();
+  Value res;
+  for (auto attr :
+       layoutsAttr.getAsRange<SerializableEncodingAttrInterface>()) {
+    Value requestedSize =
+        attr.calculateStorageSizeInBytes(loc, builder, type, dynamicDims);
+    if (!res) {
+      res = requestedSize;
+      continue;
     }
-    Value divisor = builder.create<arith::ConstantIndexOp>(
-        loc, kNumBitsInByte / elementBits);
-    result = builder.create<arith::CeilDivUIOp>(loc, result, divisor);
+    res = builder.create<arith::MaxUIOp>(loc, res, requestedSize);
   }
-
-  return result;
+  return res;
 }
 
 //===---------------------------------------------------------------------===//

@@ -86,19 +86,16 @@ def EncodingAttr :
     AttrParameter<"EncodingOpTypeAttr", "operand type">:$op_type,
     AttrParameter<"ArrayAttr", "element types of the user's operands">:$element_types,
     OptionalParameter<"ArrayAttr", "Indexing maps of the operation using this tensor">:$user_indexing_maps,
-    // TODO(hanchung): Deprecate the round_dims_to field when we plumb the layouts
-    // field through the whole stack. See https://github.com/iree-org/iree/issues/17924
-    // for details. Note that today we abuse the attribute to carry narrow
-    // matrix information. The end goal is deprecating the field and add a
-    // "iteration_space_size" field to describe the shape. It is useful to
+    // TODO(#19897): Switch round_dims_to to iteration_sizes.
+    // Note that today we abuse the attribute to carry narrow matrix
+    // information. The end goal is deprecating the field and add a
+    // "iteration_sizes" field to describe the shape. It is useful to
     // handle narrow matrix cases.
     OptionalParameter<"DenseArrayAttr", "Values for padding M,N,K dimensions">:$round_dims_to,
     OptionalParameter<"ArrayAttr", "An array of attributes that describes the "
-    "potential layouts on the device. It is an array because a device could "
-    "have several executable targets. Note that it can be any attribute that "
-    "implements EncodingLayoutResolverAttrInterface. The expectation of the field "
-    "is to bridge the logics between host codes and device codes. If an "
-    "attribute does not implement the interface, it could be discarded anytime.">:$layouts
+    "layouts of the encoding. It is an array because a device could have "
+    "multiple target device. Note that it can be any attribute that "
+    "implements SerializableEncodingAttrInterface.">:$layouts
   );
 
   let builders = [

@@ -181,6 +181,7 @@ def IREEEncoding_SerializableEncodingAttrInterface :
         Returns the storage size (in bytes) for the tensor types with an
         optional encoding. Returns Value() if the size is unknown, i.e., it can
         not be inferred with existing information.
+        Returns nullptr on failure.
       }],
       /*retTy=*/"::mlir::Value",
       /*methodName=*/"calculateStorageSizeInBytes",

@@ -39,27 +39,6 @@ util.func public @sizeof_lhs_encoding_dynamic_using_layouts(%arg0: index, %arg1:
 
 // -----
 
-#map = affine_map<(d0, d1, d2) -> (d0, d2)>
-#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
-#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
-#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
-util.func public @sizeof_lhs_encoding_dynamic(%arg0: index, %arg1: index) -> index {
-  %0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
-  util.return %0 : index
-}
-// CHECK-LABEL: @sizeof_lhs_encoding_dynamic
-// CHECK-DAG:     %[[C4:.+]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C16:.+]] = arith.constant 16 : index
-// CHECK:         %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]]
-// CHECK:         %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
-// CHECK:         %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]]
-// CHECK:         %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
-// CHECK:         %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
-// CHECK:         %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
-// CHECK:         return %[[T1]]
-
-// -----
-
 #encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [0, 1], innerTileSizes = [4, 16], outerDimsPerm = [0, 1]}}>
 #encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], layouts = [#encoding_layout]>
 util.func public @sizeof_lhs_encoding_partially_dynamic_using_layouts(%arg0: index) -> index {
@@ -76,24 +55,6 @@ util.func public @sizeof_lhs_encoding_partially_dynamic_using_layouts(%arg0: ind
 
 // -----
 
-#map = affine_map<(d0, d1, d2) -> (d0, d2)>
-#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
-#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
-#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
-util.func public @sizeof_lhs_encoding_partially_dynamic(%arg0: index) -> index {
-  %0 = stream.tensor.sizeof tensor<10x?xf32, #encoding>{%arg0} : index
-  util.return %0 : index
-}
-// CHECK-LABEL: @sizeof_lhs_encoding_partially_dynamic
-// CHECK-DAG:     %[[C48:.+]] = arith.constant 48 : index
-// CHECK-DAG:     %[[C16:.+]] = arith.constant 16 : index
-// CHECK:         %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg0, %[[C16]]
-// CHECK:         %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
-// CHECK:         %[[T0:.+]] = arith.muli %[[PAD_D1]], %[[C48]]
-// CHECK:         return %[[T0]]
-
-// -----
-
 // In GEMM, the RHS has the `(M, N, K) -> (K, N)` layout. The  tile sizes
 // (i.e., [8, 16]) are for [dim_1, dim_0] in the encoding_info, where dim_1 is
 // N-dimension and dim_0 is K-dimension.
@@ -117,28 +78,6 @@ util.func public @sizeof_rhs_encoding_dynamic_using_layouts(%arg0: index, %arg1:
 
 // -----
 
-#map = affine_map<(d0, d1, d2) -> (d0, d2)>
-#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
-#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
-#encoding = #iree_encoding.encoding<operand_index = 1, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
-util.func public @sizeof_rhs_encoding_dynamic(%arg0: index, %arg1: index) -> index {
-  %0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
-  util.return %0 : index
-}
-// CHECK-LABEL: @sizeof_rhs_encoding_dynamic
-// CHECK-DAG:     %[[C4:.+]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C8:.+]] = arith.constant 8 : index
-// CHECK-DAG:     %[[C16:.+]] = arith.constant 16 : index
-// CHECK:         %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C8]]
-// CHECK:         %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]]
-// CHECK:         %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C16]]
-// CHECK:         %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C16]]
-// CHECK:         %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
-// CHECK:         %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
-// CHECK:         return %[[T1]]
-
-// -----
-
 #encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [0, 1], innerTileSizes = [4, 8], outerDimsPerm = [0, 1]}}>
 #encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], layouts = [#encoding_layout]>
 util.func public @sizeof_result_encoding_dynamic_using_layouts(%arg0: index, %arg1: index) -> index {
@@ -158,73 +97,6 @@ util.func public @sizeof_result_encoding_dynamic_using_layouts(%arg0: index, %ar
 
 // -----
 
-#map = affine_map<(d0, d1, d2) -> (d0, d2)>
-#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
-#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
-#encoding = #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
-util.func public @sizeof_result_encoding_dynamic(%arg0: index, %arg1: index) -> index {
-  %0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
-  util.return %0 : index
-}
-// CHECK-LABEL: @sizeof_result_encoding_dynamic
-// CHECK-DAG:     %[[C4:.+]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C8:.+]] = arith.constant 8 : index
-// CHECK:         %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]]
-// CHECK:         %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
-// CHECK:         %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C8]]
-// CHECK:         %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]]
-// CHECK:         %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
-// CHECK:         %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
-// CHECK:         return %[[T1]]
-
-// -----
-
-// The layout is as the same as the the matmul LHS layout because it broadcasts
-// across the batch dimension. The test is preserved for having the same test
-// suite of non-layouts style encoding. I.e., this is the resolved layout
-// version of the below sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic
-// test.
-#encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [0, 1], innerTileSizes = [4, 16], outerDimsPerm = [0, 1]}}>
-#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], layouts = [#encoding_layout]>
-util.func public @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic_using_layouts(%arg0: index, %arg1: index) -> index {
-  %0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
-  util.return %0 : index
-}
-// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic_using_layouts
-// CHECK-DAG:     %[[C4:.+]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C16:.+]] = arith.constant 16 : index
-// CHECK:         %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C4]]
-// CHECK:         %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
-// CHECK:         %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C16]]
-// CHECK:         %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
-// CHECK:         %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
-// CHECK:         %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
-// CHECK:         return %[[T1]]
-
-// -----
-
-#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
-#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
-#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-#map3 = affine_map<(d0, d1, d2) -> (d1, d2)>
-#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [[#map, #map3], #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
-util.func public @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic(%arg0: index, %arg1: index) -> index {
-  %0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
-  util.return %0 : index
-}
-// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic
-// CHECK-DAG:     %[[C4:.+]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C16:.+]] = arith.constant 16 : index
-// CHECK:         %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]]
-// CHECK:         %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
-// CHECK:         %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]]
-// CHECK:         %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
-// CHECK:         %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
-// CHECK:         %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
-// CHECK:         return %[[T1]]
-
-// -----
-
 // The M-dimension inner tile is not present because it broadcasts across the
 // M-dimension. We do not need to pack the M-dimension in this case.
 #encoding_layout = #iree_cpu.vmvx_encoding_layout<configuration = {encoding_info = {innerDimsPos = [1], innerTileSizes = [16], outerDimsPerm = [0, 1]}}>
@@ -247,29 +119,6 @@ util.func public @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic_using_layo
 
 // -----
 
-#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
-#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
-#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-#map3 = affine_map<(d0, d1, d2) -> (d0, d2)>
-#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [[#map, #map3], #map1, #map2], round_dims_to = array<i64: 4, 8, 16>>
-util.func public @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic(%arg0: index, %arg1: index) -> index {
-  %0 = stream.tensor.sizeof tensor<?x?xf32, #encoding>{%arg0, %arg1} : index
-  util.return %0 : index
-}
-// CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic
-// CHECK-DAG:     %[[C4:.+]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C16:.+]] = arith.constant 16 : index
-// CHECK:         %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]]
-// CHECK:         %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
-//
-// Multiplied by 4 because f32 has 4 bytes.
-//
-// CHECK:         %[[T0:.+]] = arith.muli %arg0, %[[C4]]
-// CHECK:         %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
-// CHECK:         return %[[T1]]
-
-// -----
-
 #map = affine_map<(d0, d1, d2) -> (d0, d2)>
 #map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
 #map2 = affine_map<(d0, d1, d2) -> (d0, d1)>

@@ -89,11 +89,9 @@ Value calculateStorageElementCountInBytes(Location loc,
                                           RankedTensorType shapedType,
                                           ValueRange dynamicDims,
                                           OpBuilder &builder) {
-  Attribute encoding = shapedType.getEncoding();
-  if (auto encodingLayoutAttr =
-          dyn_cast_or_null<IREE::Encoding::SerializableEncodingAttrInterface>(
-              encoding)) {
-    return encodingLayoutAttr.calculateStorageSizeInBytes(
+  if (auto serializableEncodingAttr =
+          IREE::Encoding::getSerializableEncodingAttrInterface(shapedType)) {
+    return serializableEncodingAttr.calculateStorageSizeInBytes(
         loc, builder, shapedType, dynamicDims);
   }