Skip to content

Commit c65a9db

Browse files
committed
AMDGPU: Fix missing immarg for mfma intrinsics
llvm-svn: 366230
1 parent 1ff5535 commit c65a9db

File tree

2 files changed

+63
-20
lines changed

2 files changed

+63
-20
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

+40-20
Original file line numberDiff line numberDiff line change
@@ -1675,83 +1675,103 @@ def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicNoRtn;
16751675
// llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
16761676
def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32f32_ty],
16771677
[llvm_float_ty, llvm_float_ty, llvm_v32f32_ty,
1678-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1678+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1679+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
16791680

16801681
def int_amdgcn_mfma_f32_16x16x1f32 : Intrinsic<[llvm_v16f32_ty],
16811682
[llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
1682-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1683+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1684+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
16831685

16841686
def int_amdgcn_mfma_f32_4x4x1f32 : Intrinsic<[llvm_v4f32_ty],
16851687
[llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
1686-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1688+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1689+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
16871690

16881691
def int_amdgcn_mfma_f32_32x32x2f32 : Intrinsic<[llvm_v16f32_ty],
16891692
[llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
1690-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1693+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1694+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
16911695

16921696
def int_amdgcn_mfma_f32_16x16x4f32 : Intrinsic<[llvm_v4f32_ty],
16931697
[llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
1694-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1698+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1699+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
16951700

16961701
def int_amdgcn_mfma_f32_32x32x4f16 : Intrinsic<[llvm_v32f32_ty],
16971702
[llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty,
1698-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1703+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1704+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
16991705

17001706
def int_amdgcn_mfma_f32_16x16x4f16 : Intrinsic<[llvm_v16f32_ty],
17011707
[llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
1702-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1708+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1709+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
17031710

17041711
def int_amdgcn_mfma_f32_4x4x4f16 : Intrinsic<[llvm_v4f32_ty],
17051712
[llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
1706-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1713+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1714+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
17071715

17081716
def int_amdgcn_mfma_f32_32x32x8f16 : Intrinsic<[llvm_v16f32_ty],
17091717
[llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
1710-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1718+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1719+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
17111720

17121721
def int_amdgcn_mfma_f32_16x16x16f16 : Intrinsic<[llvm_v4f32_ty],
17131722
[llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
1714-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1723+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1724+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
17151725

17161726
def int_amdgcn_mfma_i32_32x32x4i8 : Intrinsic<[llvm_v32i32_ty],
17171727
[llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty,
1718-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1728+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1729+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
17191730

17201731
def int_amdgcn_mfma_i32_16x16x4i8 : Intrinsic<[llvm_v16i32_ty],
17211732
[llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
1722-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1733+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1734+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
17231735

17241736
def int_amdgcn_mfma_i32_4x4x4i8 : Intrinsic<[llvm_v4i32_ty],
17251737
[llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
1726-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1738+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1739+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
17271740

17281741
def int_amdgcn_mfma_i32_32x32x8i8 : Intrinsic<[llvm_v16i32_ty],
17291742
[llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
1730-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1743+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1744+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
17311745

17321746
def int_amdgcn_mfma_i32_16x16x16i8 : Intrinsic<[llvm_v4i32_ty],
17331747
[llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
1734-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1748+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1749+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
17351750

17361751
def int_amdgcn_mfma_f32_32x32x2bf16 : Intrinsic<[llvm_v32f32_ty],
17371752
[llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty,
1738-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1753+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1754+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
17391755

17401756
def int_amdgcn_mfma_f32_16x16x2bf16 : Intrinsic<[llvm_v16f32_ty],
17411757
[llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
1742-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1758+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1759+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
17431760

17441761
def int_amdgcn_mfma_f32_4x4x2bf16 : Intrinsic<[llvm_v4f32_ty],
17451762
[llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
1746-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1763+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1764+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
17471765

17481766
def int_amdgcn_mfma_f32_32x32x4bf16 : Intrinsic<[llvm_v16f32_ty],
17491767
[llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
1750-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1768+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1769+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
17511770

17521771
def int_amdgcn_mfma_f32_16x16x8bf16 : Intrinsic<[llvm_v4f32_ty],
17531772
[llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
1754-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
1773+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1774+
[IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
17551775

17561776
//===----------------------------------------------------------------------===//
17571777
// Special Intrinsics for backend internal use only. No frontend

llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll

+23
Original file line numberDiff line numberDiff line change
@@ -674,3 +674,26 @@ define void @test_interp_p2_f16(float %arg0, float %arg1, i32 %arg2, i32 %arg3,
674674

675675
ret void
676676
}
677+
678+
declare <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x i32>, i32, i32, i32)
679+
define void @test_mfma_f32_32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 %arg3, i32 %arg4, i32 %arg5) {
680+
; CHECK: immarg operand has non-immediate parameter
681+
; CHECK-NEXT: i32 %arg3
682+
; CHECK-NEXT: %val0 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 %arg3, i32 2, i32 3)
683+
%val0 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 %arg3, i32 2, i32 3)
684+
store volatile <32 x i32> %val0, <32 x i32> addrspace(1)* undef
685+
686+
; CHECK: immarg operand has non-immediate parameter
687+
; CHECK-NEXT: i32 %arg4
688+
; CHECK-NEXT: %val1 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 %arg4, i32 3)
689+
%val1 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 %arg4, i32 3)
690+
store volatile <32 x i32> %val1, <32 x i32> addrspace(1)* undef
691+
692+
; CHECK: immarg operand has non-immediate parameter
693+
; CHECK-NEXT: i32 %arg5
694+
; CHECK-NEXT: %val2 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 2, i32 %arg5)
695+
%val2 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 2, i32 %arg5)
696+
store volatile <32 x i32> %val2, <32 x i32> addrspace(1)* undef
697+
698+
ret void
699+
}

0 commit comments

Comments
 (0)