@@ -39,27 +39,6 @@ util.func public @sizeof_lhs_encoding_dynamic_using_layouts(%arg0: index, %arg1:
39
39
40
40
// -----
41
41
42
- #map = affine_map <(d0 , d1 , d2 ) -> (d0 , d2 )>
43
- #map1 = affine_map <(d0 , d1 , d2 ) -> (d2 , d1 )>
44
- #map2 = affine_map <(d0 , d1 , d2 ) -> (d0 , d1 )>
45
- #encoding = #iree_encoding.encoding <operand_index = 0 , op_type = matmul , element_types = [f32 , f32 , f32 ], user_indexing_maps = [#map , #map1 , #map2 ], round_dims_to = array <i64 : 4 , 8 , 16 >>
46
- util.func public @sizeof_lhs_encoding_dynamic (%arg0: index , %arg1: index ) -> index {
47
- %0 = stream.tensor.sizeof tensor <?x?xf32 , #encoding >{%arg0 , %arg1 } : index
48
- util.return %0 : index
49
- }
50
- // CHECK-LABEL: @sizeof_lhs_encoding_dynamic
51
- // CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
52
- // CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
53
- // CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]]
54
- // CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
55
- // CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]]
56
- // CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
57
- // CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
58
- // CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
59
- // CHECK: return %[[T1]]
60
-
61
- // -----
62
-
63
42
#encoding_layout = #iree_cpu.vmvx_encoding_layout <configuration = {encoding_info = {innerDimsPos = [0 , 1 ], innerTileSizes = [4 , 16 ], outerDimsPerm = [0 , 1 ]}}>
64
43
#encoding = #iree_encoding.encoding <operand_index = 0 , op_type = matmul , element_types = [f32 , f32 , f32 ], layouts = [#encoding_layout ]>
65
44
util.func public @sizeof_lhs_encoding_partially_dynamic_using_layouts (%arg0: index ) -> index {
@@ -76,24 +55,6 @@ util.func public @sizeof_lhs_encoding_partially_dynamic_using_layouts(%arg0: ind
76
55
77
56
// -----
78
57
79
- #map = affine_map <(d0 , d1 , d2 ) -> (d0 , d2 )>
80
- #map1 = affine_map <(d0 , d1 , d2 ) -> (d2 , d1 )>
81
- #map2 = affine_map <(d0 , d1 , d2 ) -> (d0 , d1 )>
82
- #encoding = #iree_encoding.encoding <operand_index = 0 , op_type = matmul , element_types = [f32 , f32 , f32 ], user_indexing_maps = [#map , #map1 , #map2 ], round_dims_to = array <i64 : 4 , 8 , 16 >>
83
- util.func public @sizeof_lhs_encoding_partially_dynamic (%arg0: index ) -> index {
84
- %0 = stream.tensor.sizeof tensor <10 x?xf32 , #encoding >{%arg0 } : index
85
- util.return %0 : index
86
- }
87
- // CHECK-LABEL: @sizeof_lhs_encoding_partially_dynamic
88
- // CHECK-DAG: %[[C48:.+]] = arith.constant 48 : index
89
- // CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
90
- // CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg0, %[[C16]]
91
- // CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
92
- // CHECK: %[[T0:.+]] = arith.muli %[[PAD_D1]], %[[C48]]
93
- // CHECK: return %[[T0]]
94
-
95
- // -----
96
-
97
58
// In GEMM, the RHS has the `(M, N, K) -> (K, N)` layout. The tile sizes
98
59
// (i.e., [8, 16]) are for [dim_1, dim_0] in the encoding_info, where dim_1 is
99
60
// N-dimension and dim_0 is K-dimension.
@@ -117,28 +78,6 @@ util.func public @sizeof_rhs_encoding_dynamic_using_layouts(%arg0: index, %arg1:
117
78
118
79
// -----
119
80
120
- #map = affine_map <(d0 , d1 , d2 ) -> (d0 , d2 )>
121
- #map1 = affine_map <(d0 , d1 , d2 ) -> (d2 , d1 )>
122
- #map2 = affine_map <(d0 , d1 , d2 ) -> (d0 , d1 )>
123
- #encoding = #iree_encoding.encoding <operand_index = 1 , op_type = matmul , element_types = [f32 , f32 , f32 ], user_indexing_maps = [#map , #map1 , #map2 ], round_dims_to = array <i64 : 4 , 8 , 16 >>
124
- util.func public @sizeof_rhs_encoding_dynamic (%arg0: index , %arg1: index ) -> index {
125
- %0 = stream.tensor.sizeof tensor <?x?xf32 , #encoding >{%arg0 , %arg1 } : index
126
- util.return %0 : index
127
- }
128
- // CHECK-LABEL: @sizeof_rhs_encoding_dynamic
129
- // CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
130
- // CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index
131
- // CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
132
- // CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C8]]
133
- // CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]]
134
- // CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C16]]
135
- // CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C16]]
136
- // CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
137
- // CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
138
- // CHECK: return %[[T1]]
139
-
140
- // -----
141
-
142
81
#encoding_layout = #iree_cpu.vmvx_encoding_layout <configuration = {encoding_info = {innerDimsPos = [0 , 1 ], innerTileSizes = [4 , 8 ], outerDimsPerm = [0 , 1 ]}}>
143
82
#encoding = #iree_encoding.encoding <operand_index = 0 , op_type = matmul , element_types = [f32 , f32 , f32 ], layouts = [#encoding_layout ]>
144
83
util.func public @sizeof_result_encoding_dynamic_using_layouts (%arg0: index , %arg1: index ) -> index {
@@ -158,73 +97,6 @@ util.func public @sizeof_result_encoding_dynamic_using_layouts(%arg0: index, %ar
158
97
159
98
// -----
160
99
161
- #map = affine_map <(d0 , d1 , d2 ) -> (d0 , d2 )>
162
- #map1 = affine_map <(d0 , d1 , d2 ) -> (d2 , d1 )>
163
- #map2 = affine_map <(d0 , d1 , d2 ) -> (d0 , d1 )>
164
- #encoding = #iree_encoding.encoding <operand_index = 2 , op_type = matmul , element_types = [f32 , f32 , f32 ], user_indexing_maps = [#map , #map1 , #map2 ], round_dims_to = array <i64 : 4 , 8 , 16 >>
165
- util.func public @sizeof_result_encoding_dynamic (%arg0: index , %arg1: index ) -> index {
166
- %0 = stream.tensor.sizeof tensor <?x?xf32 , #encoding >{%arg0 , %arg1 } : index
167
- util.return %0 : index
168
- }
169
- // CHECK-LABEL: @sizeof_result_encoding_dynamic
170
- // CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
171
- // CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index
172
- // CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]]
173
- // CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
174
- // CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C8]]
175
- // CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C8]]
176
- // CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
177
- // CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
178
- // CHECK: return %[[T1]]
179
-
180
- // -----
181
-
182
- // The layout is as the same as the the matmul LHS layout because it broadcasts
183
- // across the batch dimension. The test is preserved for having the same test
184
- // suite of non-layouts style encoding. I.e., this is the resolved layout
185
- // version of the below sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic
186
- // test.
187
- #encoding_layout = #iree_cpu.vmvx_encoding_layout <configuration = {encoding_info = {innerDimsPos = [0 , 1 ], innerTileSizes = [4 , 16 ], outerDimsPerm = [0 , 1 ]}}>
188
- #encoding = #iree_encoding.encoding <operand_index = 0 , op_type = matmul , element_types = [f32 , f32 , f32 ], layouts = [#encoding_layout ]>
189
- util.func public @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic_using_layouts (%arg0: index , %arg1: index ) -> index {
190
- %0 = stream.tensor.sizeof tensor <?x?xf32 , #encoding >{%arg0 , %arg1 } : index
191
- util.return %0 : index
192
- }
193
- // CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic_using_layouts
194
- // CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
195
- // CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
196
- // CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivsi %arg0, %[[C4]]
197
- // CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
198
- // CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivsi %arg1, %[[C16]]
199
- // CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
200
- // CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
201
- // CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
202
- // CHECK: return %[[T1]]
203
-
204
- // -----
205
-
206
- #map = affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d3 )>
207
- #map1 = affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d3 , d2 )>
208
- #map2 = affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 )>
209
- #map3 = affine_map <(d0 , d1 , d2 ) -> (d1 , d2 )>
210
- #encoding = #iree_encoding.encoding <operand_index = 0 , op_type = matmul , element_types = [f32 , f32 , f32 ], user_indexing_maps = [[#map , #map3 ], #map1 , #map2 ], round_dims_to = array <i64 : 4 , 8 , 16 >>
211
- util.func public @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic (%arg0: index , %arg1: index ) -> index {
212
- %0 = stream.tensor.sizeof tensor <?x?xf32 , #encoding >{%arg0 , %arg1 } : index
213
- util.return %0 : index
214
- }
215
- // CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_batch_dim_dynamic
216
- // CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
217
- // CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
218
- // CHECK: %[[CEIL_DIV_D0:.+]] = arith.ceildivui %arg0, %[[C4]]
219
- // CHECK: %[[PAD_D0:.+]] = arith.muli %[[CEIL_DIV_D0]], %[[C4]]
220
- // CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]]
221
- // CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
222
- // CHECK: %[[T0:.+]] = arith.muli %[[PAD_D0]], %[[C4]]
223
- // CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
224
- // CHECK: return %[[T1]]
225
-
226
- // -----
227
-
228
100
// The M-dimension inner tile is not present because it broadcasts across the
229
101
// M-dimension. We do not need to pack the M-dimension in this case.
230
102
#encoding_layout = #iree_cpu.vmvx_encoding_layout <configuration = {encoding_info = {innerDimsPos = [1 ], innerTileSizes = [16 ], outerDimsPerm = [0 , 1 ]}}>
@@ -247,29 +119,6 @@ util.func public @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic_using_layo
247
119
248
120
// -----
249
121
250
- #map = affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d3 )>
251
- #map1 = affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d3 , d2 )>
252
- #map2 = affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 )>
253
- #map3 = affine_map <(d0 , d1 , d2 ) -> (d0 , d2 )>
254
- #encoding = #iree_encoding.encoding <operand_index = 0 , op_type = matmul , element_types = [f32 , f32 , f32 ], user_indexing_maps = [[#map , #map3 ], #map1 , #map2 ], round_dims_to = array <i64 : 4 , 8 , 16 >>
255
- util.func public @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic (%arg0: index , %arg1: index ) -> index {
256
- %0 = stream.tensor.sizeof tensor <?x?xf32 , #encoding >{%arg0 , %arg1 } : index
257
- util.return %0 : index
258
- }
259
- // CHECK-LABEL: @sizeof_lhs_encoding_with_bcast_across_m_dim_dynamic
260
- // CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
261
- // CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
262
- // CHECK: %[[CEIL_DIV_D1:.+]] = arith.ceildivui %arg1, %[[C16]]
263
- // CHECK: %[[PAD_D1:.+]] = arith.muli %[[CEIL_DIV_D1]], %[[C16]]
264
- //
265
- // Multiplied by 4 because f32 has 4 bytes.
266
- //
267
- // CHECK: %[[T0:.+]] = arith.muli %arg0, %[[C4]]
268
- // CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[PAD_D1]]
269
- // CHECK: return %[[T1]]
270
-
271
- // -----
272
-
273
122
#map = affine_map <(d0 , d1 , d2 ) -> (d0 , d2 )>
274
123
#map1 = affine_map <(d0 , d1 , d2 ) -> (d1 , d2 )>
275
124
#map2 = affine_map <(d0 , d1 , d2 ) -> (d0 , d1 )>
0 commit comments