@@ -314,8 +314,8 @@ typedef struct __tile1024i_str {
314
314
/// \param stride
315
315
/// The stride between the rows' data to be loaded in memory.
316
316
__DEFAULT_FN_ATTRS_TILE
317
- static void __tile_loadd (__tile1024i * dst , const void * base ,
318
- __SIZE_TYPE__ stride ) {
317
+ static __inline__ void __tile_loadd (__tile1024i * dst , const void * base ,
318
+ __SIZE_TYPE__ stride ) {
319
319
dst -> tile = _tile_loadd_internal (dst -> row , dst -> col , base , stride );
320
320
}
321
321
@@ -335,8 +335,8 @@ static void __tile_loadd(__tile1024i *dst, const void *base,
335
335
/// \param stride
336
336
/// The stride between the rows' data to be loaded in memory.
337
337
__DEFAULT_FN_ATTRS_TILE
338
- static void __tile_stream_loadd (__tile1024i * dst , const void * base ,
339
- __SIZE_TYPE__ stride ) {
338
+ static __inline__ void __tile_stream_loadd (__tile1024i * dst , const void * base ,
339
+ __SIZE_TYPE__ stride ) {
340
340
dst -> tile = _tile_loaddt1_internal (dst -> row , dst -> col , base , stride );
341
341
}
342
342
@@ -357,8 +357,8 @@ static void __tile_stream_loadd(__tile1024i *dst, const void *base,
357
357
/// \param src1
358
358
/// The 2nd source tile. Max size is 1024 Bytes.
359
359
__DEFAULT_FN_ATTRS_INT8
360
- static void __tile_dpbssd (__tile1024i * dst , __tile1024i src0 ,
361
- __tile1024i src1 ) {
360
+ static __inline__ void __tile_dpbssd (__tile1024i * dst , __tile1024i src0 ,
361
+ __tile1024i src1 ) {
362
362
dst -> tile = _tile_dpbssd_internal (src0 .row , src1 .col , src0 .col , dst -> tile ,
363
363
src0 .tile , src1 .tile );
364
364
}
@@ -380,8 +380,8 @@ static void __tile_dpbssd(__tile1024i *dst, __tile1024i src0,
380
380
/// \param src1
381
381
/// The 2nd source tile. Max size is 1024 Bytes.
382
382
__DEFAULT_FN_ATTRS_INT8
383
- static void __tile_dpbsud (__tile1024i * dst , __tile1024i src0 ,
384
- __tile1024i src1 ) {
383
+ static __inline__ void __tile_dpbsud (__tile1024i * dst , __tile1024i src0 ,
384
+ __tile1024i src1 ) {
385
385
dst -> tile = _tile_dpbsud_internal (src0 .row , src1 .col , src0 .col , dst -> tile ,
386
386
src0 .tile , src1 .tile );
387
387
}
@@ -403,8 +403,8 @@ static void __tile_dpbsud(__tile1024i *dst, __tile1024i src0,
403
403
/// \param src1
404
404
/// The 2nd source tile. Max size is 1024 Bytes.
405
405
__DEFAULT_FN_ATTRS_INT8
406
- static void __tile_dpbusd (__tile1024i * dst , __tile1024i src0 ,
407
- __tile1024i src1 ) {
406
+ static __inline__ void __tile_dpbusd (__tile1024i * dst , __tile1024i src0 ,
407
+ __tile1024i src1 ) {
408
408
dst -> tile = _tile_dpbusd_internal (src0 .row , src1 .col , src0 .col , dst -> tile ,
409
409
src0 .tile , src1 .tile );
410
410
}
@@ -426,8 +426,8 @@ static void __tile_dpbusd(__tile1024i *dst, __tile1024i src0,
426
426
/// \param src1
427
427
/// The 2nd source tile. Max size is 1024 Bytes.
428
428
__DEFAULT_FN_ATTRS_INT8
429
- static void __tile_dpbuud (__tile1024i * dst , __tile1024i src0 ,
430
- __tile1024i src1 ) {
429
+ static __inline__ void __tile_dpbuud (__tile1024i * dst , __tile1024i src0 ,
430
+ __tile1024i src1 ) {
431
431
dst -> tile = _tile_dpbuud_internal (src0 .row , src1 .col , src0 .col , dst -> tile ,
432
432
src0 .tile , src1 .tile );
433
433
}
@@ -446,7 +446,8 @@ static void __tile_dpbuud(__tile1024i *dst, __tile1024i src0,
446
446
/// \param stride
447
447
/// The stride between the rows' data to be stored in memory.
448
448
__DEFAULT_FN_ATTRS_TILE
449
- static void __tile_stored (void * base , __SIZE_TYPE__ stride , __tile1024i src ) {
449
+ static __inline__ void __tile_stored (void * base , __SIZE_TYPE__ stride ,
450
+ __tile1024i src ) {
450
451
_tile_stored_internal (src .row , src .col , base , stride , src .tile );
451
452
}
452
453
@@ -459,7 +460,7 @@ static void __tile_stored(void *base, __SIZE_TYPE__ stride, __tile1024i src) {
459
460
/// \param dst
460
461
/// The destination tile to be zero. Max size is 1024 Bytes.
461
462
__DEFAULT_FN_ATTRS_TILE
462
- static void __tile_zero (__tile1024i * dst ) {
463
+ static __inline__ void __tile_zero (__tile1024i * dst ) {
463
464
dst -> tile = __builtin_ia32_tilezero_internal (dst -> row , dst -> col );
464
465
}
465
466
@@ -479,8 +480,8 @@ static void __tile_zero(__tile1024i *dst) {
479
480
/// \param src1
480
481
/// The 2nd source tile. Max size is 1024 Bytes.
481
482
__DEFAULT_FN_ATTRS_BF16
482
- static void __tile_dpbf16ps (__tile1024i * dst , __tile1024i src0 ,
483
- __tile1024i src1 ) {
483
+ static __inline__ void __tile_dpbf16ps (__tile1024i * dst , __tile1024i src0 ,
484
+ __tile1024i src1 ) {
484
485
dst -> tile = _tile_dpbf16ps_internal (src0 .row , src1 .col , src0 .col , dst -> tile ,
485
486
src0 .tile , src1 .tile );
486
487
}
0 commit comments