-
Notifications
You must be signed in to change notification settings - Fork 0
conv_2d_nhwc_hwcf_cbsm_template
joejiong edited this page Dec 21, 2021
·
2 revisions
#map0 = affine_map<(d0) -> (d0)>
#map1 = affine_map<(d0) -> (d0 ceildiv 256)>
module {
func @pointwise_conv_2d_nhwc_hwcf(%arg0: memref<?x?x?x?xf32>, %arg1: memref<?x?x?x?xf32>, %arg2: memref<?x?x?x?xf32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
%KH = memref.dim %arg1, %c0 : memref<?x?x?x?xf32> // FH
%KW = memref.dim %arg1, %c1 : memref<?x?x?x?xf32> // FW
%KC = memref.dim %arg1, %c2 : memref<?x?x?x?xf32> // FC
%ON = memref.dim %arg2, %c1 : memref<?x?x?x?xf32> // ON
%OH = memref.dim %arg2, %c1 : memref<?x?x?x?xf32> // OH
%OW = memref.dim %arg2, %c0 : memref<?x?x?x?xf32> // OW
%OF = memref.dim %arg2, %c2 : memref<?x?x?x?xf32> // OF
affine.for %on = #map0(%c0) to #map(%ON) { // on : 0-on(batch)
affine.for %of = #map0(%c0) to #map0(%OF) { // of : 0-of
affine.for %kc = #map0(%c0) to #map0(%KC) { // kc : 0-kc
affine.for %oh = #map0(%c0) to #map0(%OH) { // a3 : 0-oh
affine.for %fh = #map0(%c0) to #map0(%KH) { // a4 : 0-fh
affine.for %fw = #map0(%c0) to #map0(%KW) { // a5 : 0-fw
affine.for %ow_256 = #map0(%c0) to #map1(%OW) { // a6 : 0-up[ow/256]
// f4 = vector.load(filter[fh,fw,?fc,of]) (对于所有kc只有一个值of)
%4 = affine.vector_load %arg1[%c0, %c0, %kc, %of] : memref<1x1x?x?xf32>, vector<1xf32>
// vec.bcast(vector.load(filter[fh,fw])) 1-256
%5 = vector.broadcast %4 : vector<1xf32> to vector<256xf32>
// %6=vec.load256(img[on, fh+oh, ow+fw*256,kc])
%6 = affine.vector_load %arg0[%on, %oh + %fh, %fw + %ow_256 * 256] : memref<?x?x?x?xf32>, vector<256xf32>
// fi = vector.load(out[on, oh, up(ow/256)*256], of)
%7 = affine.vector_load %arg2[%on, %oh, %ow_256 * 256, %of] : memref<?x?x?x?xf32>, vector<256xf32>
// vec.fma(vec.load256(img[fh+oh, fw+ow*256])*vec.bcast(vector.load(filter[fh,fw]))
// +vec.load(out[ow, up(ow/256)*256]))
%8 = vector.fma %6, %5, %7 : vector<256xf32>
// out[oh,up(ow/256)*256]
affine.vector_store %8, %arg2[%on, %oh, %ow_256 * 256, %of] : memref<?x?xf32>, vector<256xf32>
}
}
}
}
}
}
}
return
}
}