Remove the >3D restriction on reshaping for dot_general (#13521)

NatashaKnk · web-flow · commit 09fb0e679c8a · 2023-05-10T15:02:26.000-07:00
Previously, the collapsing dims together (in case of multiple dims of
the same type) would only happen if the input rank is larger than 3.
This made the incorrect assumption that a 3d tensor is of a standard
form (BxCxP or BxPxC), where in reality this could be the result of
multiples of one dimension but none of another (BxBxC, for example). In
those cases we still need to perform a reshape.
diff --git a/compiler/src/iree/compiler/InputConversion/MHLO/MHLOToMHLOPreprocessing.cpp b/compiler/src/iree/compiler/InputConversion/MHLO/MHLOToMHLOPreprocessing.cpp
@@ -292,11 +292,13 @@ class TransposeReshapeGenericDotGeneral
         b.getI64TensorAttr(targetOrder));
   }
 
-  Value ReshapeIfMorethan3D(OpBuilder &b, Location loc, Value src,
-                            size_t dimsBorder0, size_t dimsBorder1) const {
+  Value ReshapeIfNonStandard(OpBuilder &b, Location loc, Value src,
+                             size_t dimsBorder0, size_t dimsBorder1) const {
     auto type = src.getType().cast<RankedTensorType>();
-    if (type.getRank() <= 3) return src;
     auto shape = type.getShape();
+    if (dimsBorder0 <= 1 && dimsBorder1 - dimsBorder0 <= 1 &&
+        shape.size() - dimsBorder1 <= 1)
+      return src;
     SmallVector<int64_t, 4> result_shape = {
         std::accumulate(shape.begin(), shape.begin() + dimsBorder0, 1,
                         std::multiplies<int64_t>()),
@@ -387,10 +389,10 @@ class TransposeReshapeGenericDotGeneral
     int64_t numRhsContractionDims =
         rhsContractionBase + rhsContractingDims.size();
 
-    lhs = ReshapeIfMorethan3D(rewriter, op.getLoc(), lhs,
-                              lhsBatchingDims.size(), lhsContractionBase);
-    rhs = ReshapeIfMorethan3D(rewriter, op.getLoc(), rhs,
-                              rhsBatchingDims.size(), numRhsContractionDims);
+    lhs = ReshapeIfNonStandard(rewriter, op.getLoc(), lhs,
+                               lhsBatchingDims.size(), lhsContractionBase);
+    rhs = ReshapeIfNonStandard(rewriter, op.getLoc(), rhs,
+                               rhsBatchingDims.size(), numRhsContractionDims);
 
     if (lhs == op.getLhs() && rhs == op.getRhs())
       return rewriter.notifyMatchFailure(op, "already in canonical form");
diff --git a/compiler/src/iree/compiler/InputConversion/MHLO/test/mhlo_to_mhlo_preprocessing_canonicalize_dot_general.mlir b/compiler/src/iree/compiler/InputConversion/MHLO/test/mhlo_to_mhlo_preprocessing_canonicalize_dot_general.mlir
@@ -15,3 +15,21 @@ func.func public @dot_general_2d(%arg0: tensor<4x3xf32> {mhlo.sharding = ""}, %a
   // CHECK-SAME: precision_config = [#mhlo<precision HIGHEST>, #mhlo<precision HIGHEST>]
   return %0 : tensor<3xf32>
 }
+
+// CHECK-LABEL: @dot_general_4d
+func.func public @dot_general_4d(%arg0: tensor<1x2x3xf32> {mhlo.sharding = ""}, %arg1: tensor<1x4x2x3xf32> {mhlo.sharding = ""}) -> tensor<1x2x4xf32> {
+  %0 = "mhlo.dot_general"(%arg0, %arg1) {dot_dimension_numbers = #mhlo.dot<lhs_batching_dimensions = [0, 1], rhs_batching_dimensions = [0, 2], lhs_contracting_dimensions = [2], rhs_contracting_dimensions = [3]>, precision_config = [#mhlo<precision HIGHEST>, #mhlo<precision HIGHEST>]} : (tensor<1x2x3xf32>, tensor<1x4x2x3xf32>) -> tensor<1x2x4xf32>
+
+  // CHECK: %[[RHS_T:.+]] = "mhlo.transpose"(%arg1) {permutation = dense<[0, 2, 3, 1]> : tensor<4xi64>} : (tensor<1x4x2x3xf32>) -> tensor<1x2x3x4xf32>
+  // CHECK: %[[LHS_R:.+]] = mhlo.reshape %arg0 : (tensor<1x2x3xf32>) -> tensor<2x1x3xf32>
+  // CHECK: %[[RHS_R:.+]] = mhlo.reshape %[[RHS_T]] : (tensor<1x2x3x4xf32>) -> tensor<2x3x4xf32> 
+  // CHECK: %[[DOT:.+]] = "mhlo.dot_general"(%[[LHS_R]], %[[RHS_R]])
+  // CHECK-SAME: dot_dimension_numbers = #mhlo.dot<
+  // CHECK-SAME: lhs_batching_dimensions = [0]
+  // CHECK-SAME: rhs_batching_dimensions = [0]
+  // CHECK-SAME: lhs_contracting_dimensions = [2]
+  // CHECK-SAME: rhs_contracting_dimensions = [1]>
+  // CHECK-SAME: precision_config = [#mhlo<precision HIGHEST>, #mhlo<precision HIGHEST>]
+  // CHECK: mhlo.reshape %[[DOT]] : (tensor<2x1x4xf32>) -> tensor<1x2x4xf32>
+  return %0 : tensor<1x2x4xf32>
+}