Skip to content

Commit d99c999

Browse files
[RISCV][VLOPT] Add support for mask-register logical instructions and set mask instructions (#112231)
We need to adjust getEMULEqualsEEWDivSEWTimesLMUL to account for the fact that Log2EEW for mask instructions is 0 but their EMUL is calculated using Log2EEW=3.
1 parent 3f136f7 commit d99c999

File tree

3 files changed

+399
-0
lines changed

3 files changed

+399
-0
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

+46
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,12 @@ getEMULEqualsEEWDivSEWTimesLMUL(unsigned Log2EEW, const MachineInstr &MI) {
159159
auto [MILMUL, MILMULIsFractional] = RISCVVType::decodeVLMUL(MIVLMUL);
160160
unsigned MILog2SEW =
161161
MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
162+
163+
// Mask instructions will have 0 as the SEW operand. But the LMUL of these
164+
// instructions is calculated is as if the SEW operand was 3 (e8).
165+
if (MILog2SEW == 0)
166+
MILog2SEW = 3;
167+
162168
unsigned MISEW = 1 << MILog2SEW;
163169

164170
unsigned EEW = 1 << Log2EEW;
@@ -492,6 +498,29 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
492498
return OperandInfo(EMUL, Log2EEW);
493499
}
494500

501+
// Vector Mask Instructions
502+
// Vector Mask-Register Logical Instructions
503+
// vmsbf.m set-before-first mask bit
504+
// vmsif.m set-including-first mask bit
505+
// vmsof.m set-only-first mask bit
506+
// EEW=1 and EMUL=(EEW/SEW)*LMUL
507+
// We handle the cases when operand is a v0 mask operand above the switch,
508+
// but these instructions may use non-v0 mask operands and need to be handled
509+
// specifically.
510+
case RISCV::VMAND_MM:
511+
case RISCV::VMNAND_MM:
512+
case RISCV::VMANDN_MM:
513+
case RISCV::VMXOR_MM:
514+
case RISCV::VMOR_MM:
515+
case RISCV::VMNOR_MM:
516+
case RISCV::VMORN_MM:
517+
case RISCV::VMXNOR_MM:
518+
case RISCV::VMSBF_M:
519+
case RISCV::VMSIF_M:
520+
case RISCV::VMSOF_M: {
521+
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
522+
}
523+
495524
default:
496525
return {};
497526
}
@@ -632,6 +661,23 @@ static bool isSupportedInstr(const MachineInstr &MI) {
632661

633662
// Vector Crypto
634663
case RISCV::VWSLL_VI:
664+
665+
// Vector Mask Instructions
666+
// Vector Mask-Register Logical Instructions
667+
// vmsbf.m set-before-first mask bit
668+
// vmsif.m set-including-first mask bit
669+
// vmsof.m set-only-first mask bit
670+
case RISCV::VMAND_MM:
671+
case RISCV::VMNAND_MM:
672+
case RISCV::VMANDN_MM:
673+
case RISCV::VMXOR_MM:
674+
case RISCV::VMOR_MM:
675+
case RISCV::VMNOR_MM:
676+
case RISCV::VMORN_MM:
677+
case RISCV::VMXNOR_MM:
678+
case RISCV::VMSBF_M:
679+
case RISCV::VMSIF_M:
680+
case RISCV::VMSOF_M:
635681
return true;
636682
}
637683

llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll

+293
Original file line numberDiff line numberDiff line change
@@ -2159,3 +2159,296 @@ define <vscale x 1 x i8> @vmerge_vvm(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x
21592159
%3 = call <vscale x 1 x i8> @llvm.riscv.vmerge.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %2, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, iXLen %vl)
21602160
ret <vscale x 1 x i8> %3
21612161
}
2162+
2163+
define <vscale x 1 x i32> @vmand_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) {
2164+
; NOVLOPT-LABEL: vmand_mm:
2165+
; NOVLOPT: # %bb.0:
2166+
; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
2167+
; NOVLOPT-NEXT: vmand.mm v8, v0, v8
2168+
; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2169+
; NOVLOPT-NEXT: vmand.mm v0, v0, v8
2170+
; NOVLOPT-NEXT: vmv1r.v v8, v9
2171+
; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2172+
; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2173+
; NOVLOPT-NEXT: ret
2174+
;
2175+
; VLOPT-LABEL: vmand_mm:
2176+
; VLOPT: # %bb.0:
2177+
; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2178+
; VLOPT-NEXT: vmand.mm v8, v0, v8
2179+
; VLOPT-NEXT: vmand.mm v0, v0, v8
2180+
; VLOPT-NEXT: vmv1r.v v8, v9
2181+
; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2182+
; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2183+
; VLOPT-NEXT: ret
2184+
%1 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1)
2185+
%2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl)
2186+
%3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0)
2187+
ret <vscale x 1 x i32> %3
2188+
}
2189+
2190+
define <vscale x 1 x i32> @vmnand_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) {
2191+
; NOVLOPT-LABEL: vmnand_mm:
2192+
; NOVLOPT: # %bb.0:
2193+
; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
2194+
; NOVLOPT-NEXT: vmnand.mm v8, v0, v8
2195+
; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2196+
; NOVLOPT-NEXT: vmand.mm v0, v0, v8
2197+
; NOVLOPT-NEXT: vmv1r.v v8, v9
2198+
; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2199+
; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2200+
; NOVLOPT-NEXT: ret
2201+
;
2202+
; VLOPT-LABEL: vmnand_mm:
2203+
; VLOPT: # %bb.0:
2204+
; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2205+
; VLOPT-NEXT: vmnand.mm v8, v0, v8
2206+
; VLOPT-NEXT: vmand.mm v0, v0, v8
2207+
; VLOPT-NEXT: vmv1r.v v8, v9
2208+
; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2209+
; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2210+
; VLOPT-NEXT: ret
2211+
%1 = call <vscale x 1 x i1> @llvm.riscv.vmnand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1)
2212+
%2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl)
2213+
%3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0)
2214+
ret <vscale x 1 x i32> %3
2215+
}
2216+
2217+
define <vscale x 1 x i32> @vmandn_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) {
2218+
; NOVLOPT-LABEL: vmandn_mm:
2219+
; NOVLOPT: # %bb.0:
2220+
; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
2221+
; NOVLOPT-NEXT: vmandn.mm v8, v0, v8
2222+
; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2223+
; NOVLOPT-NEXT: vmand.mm v0, v0, v8
2224+
; NOVLOPT-NEXT: vmv1r.v v8, v9
2225+
; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2226+
; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2227+
; NOVLOPT-NEXT: ret
2228+
;
2229+
; VLOPT-LABEL: vmandn_mm:
2230+
; VLOPT: # %bb.0:
2231+
; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2232+
; VLOPT-NEXT: vmandn.mm v8, v0, v8
2233+
; VLOPT-NEXT: vmand.mm v0, v0, v8
2234+
; VLOPT-NEXT: vmv1r.v v8, v9
2235+
; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2236+
; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2237+
; VLOPT-NEXT: ret
2238+
%1 = call <vscale x 1 x i1> @llvm.riscv.vmandn.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1)
2239+
%2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl)
2240+
%3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0)
2241+
ret <vscale x 1 x i32> %3
2242+
}
2243+
2244+
define <vscale x 1 x i32> @vmxor_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) {
2245+
; NOVLOPT-LABEL: vmxor_mm:
2246+
; NOVLOPT: # %bb.0:
2247+
; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
2248+
; NOVLOPT-NEXT: vmxor.mm v8, v0, v8
2249+
; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2250+
; NOVLOPT-NEXT: vmand.mm v0, v0, v8
2251+
; NOVLOPT-NEXT: vmv1r.v v8, v9
2252+
; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2253+
; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2254+
; NOVLOPT-NEXT: ret
2255+
;
2256+
; VLOPT-LABEL: vmxor_mm:
2257+
; VLOPT: # %bb.0:
2258+
; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2259+
; VLOPT-NEXT: vmxor.mm v8, v0, v8
2260+
; VLOPT-NEXT: vmand.mm v0, v0, v8
2261+
; VLOPT-NEXT: vmv1r.v v8, v9
2262+
; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2263+
; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2264+
; VLOPT-NEXT: ret
2265+
%1 = call <vscale x 1 x i1> @llvm.riscv.vmxor.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1)
2266+
%2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl)
2267+
%3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0)
2268+
ret <vscale x 1 x i32> %3
2269+
}
2270+
2271+
define <vscale x 1 x i32> @vmor_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) {
2272+
; NOVLOPT-LABEL: vmor_mm:
2273+
; NOVLOPT: # %bb.0:
2274+
; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
2275+
; NOVLOPT-NEXT: vmor.mm v8, v0, v8
2276+
; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2277+
; NOVLOPT-NEXT: vmand.mm v0, v0, v8
2278+
; NOVLOPT-NEXT: vmv1r.v v8, v9
2279+
; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2280+
; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2281+
; NOVLOPT-NEXT: ret
2282+
;
2283+
; VLOPT-LABEL: vmor_mm:
2284+
; VLOPT: # %bb.0:
2285+
; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2286+
; VLOPT-NEXT: vmor.mm v8, v0, v8
2287+
; VLOPT-NEXT: vmand.mm v0, v0, v8
2288+
; VLOPT-NEXT: vmv1r.v v8, v9
2289+
; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2290+
; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2291+
; VLOPT-NEXT: ret
2292+
%1 = call <vscale x 1 x i1> @llvm.riscv.vmor.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1)
2293+
%2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl)
2294+
%3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0)
2295+
ret <vscale x 1 x i32> %3
2296+
}
2297+
2298+
2299+
define <vscale x 1 x i32> @vmnor_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) {
2300+
; NOVLOPT-LABEL: vmnor_mm:
2301+
; NOVLOPT: # %bb.0:
2302+
; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
2303+
; NOVLOPT-NEXT: vmnor.mm v8, v0, v8
2304+
; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2305+
; NOVLOPT-NEXT: vmand.mm v0, v0, v8
2306+
; NOVLOPT-NEXT: vmv1r.v v8, v9
2307+
; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2308+
; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2309+
; NOVLOPT-NEXT: ret
2310+
;
2311+
; VLOPT-LABEL: vmnor_mm:
2312+
; VLOPT: # %bb.0:
2313+
; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2314+
; VLOPT-NEXT: vmnor.mm v8, v0, v8
2315+
; VLOPT-NEXT: vmand.mm v0, v0, v8
2316+
; VLOPT-NEXT: vmv1r.v v8, v9
2317+
; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2318+
; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2319+
; VLOPT-NEXT: ret
2320+
%1 = call <vscale x 1 x i1> @llvm.riscv.vmnor.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1)
2321+
%2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl)
2322+
%3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0)
2323+
ret <vscale x 1 x i32> %3
2324+
}
2325+
2326+
define <vscale x 1 x i32> @vmorn_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) {
2327+
; NOVLOPT-LABEL: vmorn_mm:
2328+
; NOVLOPT: # %bb.0:
2329+
; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
2330+
; NOVLOPT-NEXT: vmorn.mm v8, v0, v8
2331+
; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2332+
; NOVLOPT-NEXT: vmand.mm v0, v0, v8
2333+
; NOVLOPT-NEXT: vmv1r.v v8, v9
2334+
; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2335+
; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2336+
; NOVLOPT-NEXT: ret
2337+
;
2338+
; VLOPT-LABEL: vmorn_mm:
2339+
; VLOPT: # %bb.0:
2340+
; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2341+
; VLOPT-NEXT: vmorn.mm v8, v0, v8
2342+
; VLOPT-NEXT: vmand.mm v0, v0, v8
2343+
; VLOPT-NEXT: vmv1r.v v8, v9
2344+
; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2345+
; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2346+
; VLOPT-NEXT: ret
2347+
%1 = call <vscale x 1 x i1> @llvm.riscv.vmorn.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1)
2348+
%2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl)
2349+
%3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0)
2350+
ret <vscale x 1 x i32> %3
2351+
}
2352+
2353+
define <vscale x 1 x i32> @vmxnor_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) {
2354+
; NOVLOPT-LABEL: vmxnor_mm:
2355+
; NOVLOPT: # %bb.0:
2356+
; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
2357+
; NOVLOPT-NEXT: vmxnor.mm v8, v0, v8
2358+
; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2359+
; NOVLOPT-NEXT: vmand.mm v0, v0, v8
2360+
; NOVLOPT-NEXT: vmv1r.v v8, v9
2361+
; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2362+
; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2363+
; NOVLOPT-NEXT: ret
2364+
;
2365+
; VLOPT-LABEL: vmxnor_mm:
2366+
; VLOPT: # %bb.0:
2367+
; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2368+
; VLOPT-NEXT: vmxnor.mm v8, v0, v8
2369+
; VLOPT-NEXT: vmand.mm v0, v0, v8
2370+
; VLOPT-NEXT: vmv1r.v v8, v9
2371+
; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2372+
; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t
2373+
; VLOPT-NEXT: ret
2374+
%1 = call <vscale x 1 x i1> @llvm.riscv.vmxnor.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1)
2375+
%2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl)
2376+
%3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0)
2377+
ret <vscale x 1 x i32> %3
2378+
}
2379+
2380+
define <vscale x 1 x i32> @vmsbf_m(<vscale x 1 x i1> %a, <vscale x 1 x i32> %c, iXLen %vl) {
2381+
; NOVLOPT-LABEL: vmsbf_m:
2382+
; NOVLOPT: # %bb.0:
2383+
; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
2384+
; NOVLOPT-NEXT: vmsbf.m v9, v0
2385+
; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2386+
; NOVLOPT-NEXT: vmand.mm v0, v0, v9
2387+
; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2388+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8, v0.t
2389+
; NOVLOPT-NEXT: ret
2390+
;
2391+
; VLOPT-LABEL: vmsbf_m:
2392+
; VLOPT: # %bb.0:
2393+
; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2394+
; VLOPT-NEXT: vmsbf.m v9, v0
2395+
; VLOPT-NEXT: vmand.mm v0, v0, v9
2396+
; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2397+
; VLOPT-NEXT: vadd.vv v8, v8, v8, v0.t
2398+
; VLOPT-NEXT: ret
2399+
%1 = call <vscale x 1 x i1> @llvm.riscv.vmsbf.nxv1i1(<vscale x 1 x i1> %a, iXLen -1)
2400+
%2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl)
2401+
%3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0)
2402+
ret <vscale x 1 x i32> %3
2403+
}
2404+
2405+
define <vscale x 1 x i32> @vmsif_m(<vscale x 1 x i1> %a, <vscale x 1 x i32> %c, iXLen %vl) {
2406+
; NOVLOPT-LABEL: vmsif_m:
2407+
; NOVLOPT: # %bb.0:
2408+
; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
2409+
; NOVLOPT-NEXT: vmsif.m v9, v0
2410+
; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2411+
; NOVLOPT-NEXT: vmand.mm v0, v0, v9
2412+
; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2413+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8, v0.t
2414+
; NOVLOPT-NEXT: ret
2415+
;
2416+
; VLOPT-LABEL: vmsif_m:
2417+
; VLOPT: # %bb.0:
2418+
; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2419+
; VLOPT-NEXT: vmsif.m v9, v0
2420+
; VLOPT-NEXT: vmand.mm v0, v0, v9
2421+
; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2422+
; VLOPT-NEXT: vadd.vv v8, v8, v8, v0.t
2423+
; VLOPT-NEXT: ret
2424+
%1 = call <vscale x 1 x i1> @llvm.riscv.vmsif.nxv1i1(<vscale x 1 x i1> %a, iXLen -1)
2425+
%2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl)
2426+
%3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0)
2427+
ret <vscale x 1 x i32> %3
2428+
}
2429+
2430+
define <vscale x 1 x i32> @vmsof_m(<vscale x 1 x i1> %a, <vscale x 1 x i32> %c, iXLen %vl) {
2431+
; NOVLOPT-LABEL: vmsof_m:
2432+
; NOVLOPT: # %bb.0:
2433+
; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
2434+
; NOVLOPT-NEXT: vmsof.m v9, v0
2435+
; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2436+
; NOVLOPT-NEXT: vmand.mm v0, v0, v9
2437+
; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2438+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8, v0.t
2439+
; NOVLOPT-NEXT: ret
2440+
;
2441+
; VLOPT-LABEL: vmsof_m:
2442+
; VLOPT: # %bb.0:
2443+
; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
2444+
; VLOPT-NEXT: vmsof.m v9, v0
2445+
; VLOPT-NEXT: vmand.mm v0, v0, v9
2446+
; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
2447+
; VLOPT-NEXT: vadd.vv v8, v8, v8, v0.t
2448+
; VLOPT-NEXT: ret
2449+
%1 = call <vscale x 1 x i1> @llvm.riscv.vmsof.nxv1i1(<vscale x 1 x i1> %a, iXLen -1)
2450+
%2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl)
2451+
%3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0)
2452+
ret <vscale x 1 x i32> %3
2453+
}
2454+

0 commit comments

Comments
 (0)