@@ -230,8 +230,8 @@ func.func @set_encoding_ACC_unroll8x8x4_MFMA_F32_16x16x4_F32() {
230
230
// CHECK-SAME : tensor<2x5x128x128xf32> into tensor<2x5x8x4x4x4x2x16xf32>
231
231
// CHECK: %[[TRANSPOSE:.*]] = linalg.transpose
232
232
// CHECK-SAME: ins(%[[EXPAND]] : tensor<2x5x8x4x4x4x2x16xf32>)
233
- // CHECK-SAME: outs({{.*}} : tensor<2x5x8x4x2x4x16x4xf32 >)
234
- // CHECK-SAME: permutation = [0, 1, 2, 5 , 6, 3, 7, 4]
233
+ // CHECK-SAME: outs({{.*}} : tensor<2x5x4x8x2x4x16x4xf32 >)
234
+ // CHECK-SAME: permutation = [0, 1, 5, 2 , 6, 3, 7, 4]
235
235
// CHECK: flow.dispatch.tensor.store %[[TRANSPOSE]]
236
236
237
237
// -----
@@ -255,9 +255,9 @@ func.func @unset_encoding_ACC_unroll8x8x4_MFMA_F32_16x16x4_F32() {
255
255
256
256
// CHECK-LABEL: func.func @unset_encoding_ACC_unroll8x8x4_MFMA_F32_16x16x4_F32() {
257
257
// CHECK: %[[TRANSPOSE:.*]] = linalg.transpose
258
- // CHECK-SAME: ins(%{{.+}} : tensor<2x5x8x4x2x4x16x4xf32 >)
258
+ // CHECK-SAME: ins(%{{.+}} : tensor<2x5x4x8x2x4x16x4xf32 >)
259
259
// CHECK-SAME: outs({{.*}} : tensor<2x5x8x4x4x4x2x16xf32>)
260
- // CHECK-SAME: permutation = [0, 1, 2 , 5, 7, 3 , 4, 6]
260
+ // CHECK-SAME: permutation = [0, 1, 3 , 5, 7, 2 , 4, 6]
261
261
// CHECK: %[[COLLAPSE:.*]] = tensor.collapse_shape %[[TRANSPOSE]]
262
262
// CHECK-SAME: : tensor<2x5x8x4x4x4x2x16xf32> into tensor<2x5x128x128xf32>
263
263
// CHECK: %[[UNPACK:.*]] = tensor.unpack %[[COLLAPSE]]
@@ -298,9 +298,9 @@ func.func @unset_encoding_ACC_dynamic_unroll8x8x4_MFMA_F32_16x16x4_F32() {
298
298
}
299
299
// CHECK-LABEL: func.func @unset_encoding_ACC_dynamic_unroll8x8x4_MFMA_F32_16x16x4_F32
300
300
// CHECK: %[[TRANSPOSE:.*]] = linalg.transpose
301
- // CHECK-SAME: ins(%{{.+}} : tensor<?x?x8x4x2x4x16x4xf32 >)
301
+ // CHECK-SAME: ins(%{{.+}} : tensor<?x?x4x8x2x4x16x4xf32 >)
302
302
// CHECK-SAME: outs({{.*}} : tensor<?x?x8x4x4x4x2x16xf32>)
303
- // CHECK-SAME: permutation = [0, 1, 2 , 5, 7, 3 , 4, 6]
303
+ // CHECK-SAME: permutation = [0, 1, 3 , 5, 7, 2 , 4, 6]
304
304
// CHECK: %[[COLLAPSE:.*]] = tensor.collapse_shape %[[TRANSPOSE]]
305
305
// CHECK-SAME: : tensor<?x?x8x4x4x4x2x16xf32> into tensor<?x?x128x128xf32>
306
306
// CHECK: %[[UNPACK:.*]] = tensor.unpack %[[COLLAPSE]]
@@ -362,7 +362,7 @@ func.func @matmul_lowering_MFMA_F32_16x16x4_F32() {
362
362
// CHECK-DAG: %[[ACC_BINDING:.+]] = hal.interface.binding.subspan {{.+}} binding(2)
363
363
// CHECK-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]{{.+}} -> tensor<?x?x8x4x16x4xf32>
364
364
// CHECK-DAG: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]]{{.+}} -> tensor<?x?x4x2x4x16x4xf32>
365
- // CHECK-DAG: %[[ACC:.+]] = flow.dispatch.tensor.load %[[ACC_BINDING]]{{.+}} -> tensor<?x?x8x4x2x4x16x4xf32 >
365
+ // CHECK-DAG: %[[ACC:.+]] = flow.dispatch.tensor.load %[[ACC_BINDING]]{{.+}} -> tensor<?x?x4x8x2x4x16x4xf32 >
366
366
// CHECK: %[[MMA:.+]] = iree_gpu.multi_mma %[[LHS]], %[[RHS]], %[[ACC]]
367
367
// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]],
368
368
// CHECK-SAME: iterator_types = [#iree_gpu.iterator_type<parallel>, #iree_gpu.iterator_type<parallel>, #iree_gpu.iterator_type<reduction>]
@@ -422,7 +422,7 @@ func.func @batch_matmul_lowering_MFMA_F32_16x16x4_F32() {
422
422
// CHECK-DAG: %[[ACC_BINDING:.+]] = hal.interface.binding.subspan {{.+}} binding(2)
423
423
// CHECK-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]{{.+}} -> tensor<?x?x?x8x4x16x4xf32>
424
424
// CHECK-DAG: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]]{{.+}} -> tensor<?x?x?x4x2x4x16x4xf32>
425
- // CHECK-DAG: %[[ACC:.+]] = flow.dispatch.tensor.load %[[ACC_BINDING]]{{.+}} -> tensor<?x?x?x8x4x2x4x16x4xf32 >
425
+ // CHECK-DAG: %[[ACC:.+]] = flow.dispatch.tensor.load %[[ACC_BINDING]]{{.+}} -> tensor<?x?x?x4x8x2x4x16x4xf32 >
426
426
// CHECK: %[[MMA:.+]] = iree_gpu.multi_mma %[[LHS]], %[[RHS]], %[[ACC]]
427
427
// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]],
428
428
// CHECK-SAME: iterator_types = [#iree_gpu.iterator_type<parallel>, #iree_gpu.iterator_type<parallel>, #iree_gpu.iterator_type<parallel>, #iree_gpu.iterator_type<reduction>]
@@ -528,8 +528,8 @@ func.func @set_encoding_ACC_unroll8x8x2_MFMA_I32_16x16x32_I8() {
528
528
// CHECK-SAME : tensor<2x5x128x128xi32> into tensor<2x5x8x4x4x4x2x16xi32>
529
529
// CHECK: %[[TRANSPOSE:.*]] = linalg.transpose
530
530
// CHECK-SAME: ins(%[[EXPAND]] : tensor<2x5x8x4x4x4x2x16xi32>)
531
- // CHECK-SAME: outs({{.*}} : tensor<2x5x8x4x2x4x16x4xi32 >)
532
- // CHECK-SAME: permutation = [0, 1, 2, 5 , 6, 3, 7, 4]
531
+ // CHECK-SAME: outs({{.*}} : tensor<2x5x4x8x2x4x16x4xi32 >)
532
+ // CHECK-SAME: permutation = [0, 1, 5, 2 , 6, 3, 7, 4]
533
533
// CHECK: flow.dispatch.tensor.store %[[TRANSPOSE]]
534
534
535
535
// -----
@@ -553,9 +553,9 @@ func.func @unset_encoding_ACC_unroll8x8x2_MFMA_I32_16x16x32_I8() {
553
553
554
554
// CHECK-LABEL: func.func @unset_encoding_ACC_unroll8x8x2_MFMA_I32_16x16x32_I8() {
555
555
// CHECK: %[[TRANSPOSE:.*]] = linalg.transpose
556
- // CHECK-SAME: ins(%{{.+}} : tensor<2x5x8x4x2x4x16x4xi32 >)
556
+ // CHECK-SAME: ins(%{{.+}} : tensor<2x5x4x8x2x4x16x4xi32 >)
557
557
// CHECK-SAME: outs({{.*}} : tensor<2x5x8x4x4x4x2x16xi32>)
558
- // CHECK-SAME: permutation = [0, 1, 2 , 5, 7, 3 , 4, 6]
558
+ // CHECK-SAME: permutation = [0, 1, 3 , 5, 7, 2 , 4, 6]
559
559
// CHECK: %[[COLLAPSE:.*]] = tensor.collapse_shape %[[TRANSPOSE]]
560
560
// CHECK-SAME: : tensor<2x5x8x4x4x4x2x16xi32> into tensor<2x5x128x128xi32>
561
561
// CHECK: %[[UNPACK:.*]] = tensor.unpack %[[COLLAPSE]]
@@ -618,7 +618,7 @@ func.func @matmul_lowering_MFMA_I32_16x16x32_I8() {
618
618
// CHECK-DAG: %[[ACC_BINDING:.+]] = hal.interface.binding.subspan {{.+}} binding(2)
619
619
// CHECK-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]{{.+}} -> tensor<?x?x8x4x16x2x8xi8>
620
620
// CHECK-DAG: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]]{{.+}} -> tensor<?x?x4x2x4x16x2x8xi8>
621
- // CHECK-DAG: %[[ACC:.+]] = flow.dispatch.tensor.load %[[ACC_BINDING]]{{.+}} -> tensor<?x?x8x4x2x4x16x4xi32 >
621
+ // CHECK-DAG: %[[ACC:.+]] = flow.dispatch.tensor.load %[[ACC_BINDING]]{{.+}} -> tensor<?x?x4x8x2x4x16x4xi32 >
622
622
// CHECK: %[[MMA:.+]] = iree_gpu.multi_mma %[[LHS]], %[[RHS]], %[[ACC]]
623
623
// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]],
624
624
// CHECK-SAME: iterator_types = [#iree_gpu.iterator_type<parallel>, #iree_gpu.iterator_type<parallel>, #iree_gpu.iterator_type<reduction>]
@@ -1124,7 +1124,7 @@ func.func @batch_matmul_lowering_MFMA_F32_16x16x32_F8E4M3FNUZ() {
1124
1124
// CHECK-DAG: %[[ACC_BINDING:.+]] = hal.interface.binding.subspan {{.+}} binding(2)
1125
1125
// CHECK-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]{{.+}} -> tensor<?x?x?x8x4x16x2x8xf8E4M3FNUZ>
1126
1126
// CHECK-DAG: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]]{{.+}} -> tensor<?x?x?x4x2x4x16x2x8xf8E4M3FNUZ>
1127
- // CHECK-DAG: %[[ACC:.+]] = flow.dispatch.tensor.load %[[ACC_BINDING]]{{.+}} -> tensor<?x?x?x8x4x2x4x16x4xf32 >
1127
+ // CHECK-DAG: %[[ACC:.+]] = flow.dispatch.tensor.load %[[ACC_BINDING]]{{.+}} -> tensor<?x?x?x4x8x2x4x16x4xf32 >
1128
1128
// CHECK: %[[MMA:.+]] = iree_gpu.multi_mma %[[LHS]], %[[RHS]], %[[ACC]]
1129
1129
// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]],
1130
1130
// CHECK-SAME: iterator_types = [#iree_gpu.iterator_type<parallel>, #iree_gpu.iterator_type<parallel>, #iree_gpu.iterator_type<parallel>, #iree_gpu.iterator_type<reduction>]
@@ -1184,7 +1184,7 @@ func.func @batch_matmul_lowering_MFMA_F32_16x16x16_BF16() {
1184
1184
// CHECK-DAG: %[[ACC_BINDING:.+]] = hal.interface.binding.subspan {{.+}} binding(2)
1185
1185
// CHECK-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]{{.+}} -> tensor<?x?x?x8x4x16x2x4xbf16>
1186
1186
// CHECK-DAG: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]]{{.+}} -> tensor<?x?x?x4x2x4x16x2x4xbf16>
1187
- // CHECK-DAG: %[[ACC:.+]] = flow.dispatch.tensor.load %[[ACC_BINDING]]{{.+}} -> tensor<?x?x?x8x4x2x4x16x4xf32 >
1187
+ // CHECK-DAG: %[[ACC:.+]] = flow.dispatch.tensor.load %[[ACC_BINDING]]{{.+}} -> tensor<?x?x?x4x8x2x4x16x4xf32 >
1188
1188
// CHECK: %[[MMA:.+]] = iree_gpu.multi_mma %[[LHS]], %[[RHS]], %[[ACC]]
1189
1189
// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]],
1190
1190
// CHECK-SAME: iterator_types = [#iree_gpu.iterator_type<parallel>, #iree_gpu.iterator_type<parallel>, #iree_gpu.iterator_type<parallel>, #iree_gpu.iterator_type<reduction>]
0 commit comments