50
50
#define LDBG (X ) LLVM_DEBUG(DBGS() << X << " \n " )
51
51
namespace mlir ::iree_compiler {
52
52
53
- llvm::cl::opt<bool > clGPUTestTileAndFuseMatmul (
54
- " iree-codegen-llvmgpu-test-tile-and-fuse-matmul" ,
53
+ // TODO: Formalize flag under LLVMGPU opt levels.
54
+ llvm::cl::opt<bool > clGPUUseTileAndFuseMatmul (
55
+ " iree-codegen-llvmgpu-use-tile-and-fuse-matmul" ,
55
56
llvm::cl::desc (" test the the tile and fuse pipeline for matmul" ),
56
- llvm::cl::init(false ));
57
-
58
- llvm::cl::opt<bool > clGPUTestTileAndFuseVectorize (
59
- " iree-codegen-llvmgpu-test-tile-and-fuse-vectorize" ,
60
- llvm::cl::desc (
61
- " test the tile and fuse pipeline for all supported operations" ),
62
- llvm::cl::init(false ));
57
+ llvm::cl::init(true ));
63
58
59
+ // TODO: Formalize flag under LLVMGPU opt levels.
64
60
llvm::cl::opt<bool > clLLVMGPUVectorizePipeline (
65
61
" iree-codegen-llvmgpu-vectorize-pipeline" ,
66
62
llvm::cl::desc (" forces use of the legacy LLVMGPU vectorize pipeline" ),
67
63
llvm::cl::init(false ));
68
64
65
+ // TODO: Formalize flag under LLVMGPU opt levels.
69
66
llvm::cl::opt<bool > clGPUEnableVectorDistribution (
70
67
" iree-codegen-llvmgpu-use-vector-distribution" ,
71
68
llvm::cl::desc (" enable the usage of the vector distribution pipeline" ),
@@ -80,24 +77,28 @@ llvm::cl::opt<bool> clGPUUnalignedGEMMVectorDistribution(
80
77
" unaligned GEMMs when supported" ),
81
78
llvm::cl::init(false ));
82
79
80
+ // TODO: Formalize flag under LLVMGPU opt levels.
83
81
llvm::cl::opt<bool > clGPUUseTileAndFuseConvolution (
84
82
" iree-codegen-llvmgpu-use-tile-and-fuse-convolution" ,
85
83
llvm::cl::desc (
86
84
" enable the tile and fuse pipeline for supported convolutions" ),
87
85
llvm::cl::init(true ));
88
86
87
+ // TODO: Formalize flag under LLVMGPU opt levels.
89
88
// / Flag to force using WMMA tensorcore operations.
90
89
llvm::cl::opt<bool >
91
90
clGPUUseWMMA (" iree-codegen-llvmgpu-use-wmma" ,
92
91
llvm::cl::desc (" force use of wmma operations for tensorcore" ),
93
92
llvm::cl::init(false ));
94
93
94
+ // TODO: Formalize flag under LLVMGPU opt levels.
95
95
// / Flag used to toggle using mma.sync vs wmma when targetting tensorcore.
96
96
llvm::cl::opt<bool >
97
97
clGPUUseMMASync (" iree-codegen-llvmgpu-use-mma-sync" ,
98
98
llvm::cl::desc (" force use mma sync instead of wmma ops" ),
99
99
llvm::cl::init(false ));
100
100
101
+ // TODO: Move to a testing only flag.
101
102
llvm::cl::opt<int > clGPUMatmulCThreshold (
102
103
" iree-codegen-llvmgpu-matmul-c-matrix-threshold" ,
103
104
llvm::cl::desc (" matmul c matrix element count threshold to be considered "
@@ -114,6 +115,13 @@ static llvm::cl::opt<bool>
114
115
clLLVMGPUUseIgemm (" iree-codegen-llvmgpu-use-igemm" ,
115
116
llvm::cl::desc (" Enable implicit gemm for convolutions." ),
116
117
llvm::cl::init(true ));
118
+
119
+ // Hidden testing only flag
120
+ llvm::cl::opt<bool > clGPUTestTileAndFuseVectorize (
121
+ " iree-codegen-llvmgpu-test-tile-and-fuse-vectorize" ,
122
+ llvm::cl::desc (
123
+ " test the tile and fuse pipeline for all supported operations" ),
124
+ llvm::cl::init(false ), llvm::cl::Hidden);
117
125
namespace {
118
126
119
127
using CodeGenPipeline = IREE::Codegen::DispatchLoweringPassPipeline;
@@ -2340,7 +2348,7 @@ static LogicalResult setRootConfig(IREE::GPU::TargetAttr target,
2340
2348
LDBG (" Tile and fuse data tiled multi_mma config" );
2341
2349
return success ();
2342
2350
}
2343
- if (clGPUTestTileAndFuseMatmul ) {
2351
+ if (clGPUUseTileAndFuseMatmul ) {
2344
2352
if (succeeded (IREE::GPU::setMatmulLoweringConfig (target, entryPointFn,
2345
2353
computeOp))) {
2346
2354
LDBG (" Tile and fuse matmul config" );
0 commit comments