Skip to content

Commit 35d4292

Browse files
committed
[X86][SchedModels] Fix missing ReadAdvance for MULX and ADCX/ADOX (PR51494)
Before this patch, instructions MULX32rm and MULX64rm were missing a ReadAdvance for the implicit read of register EDX/RDX. This patch fixes the issue, and it also introduces a new SchedWrite for the two variants of MULX. The general idea behind this last change is to eventually decrease the number of InstRW in the scheduling models. This patch also adds a ReadAdvance for the implicit read of EFLAGS in ADCX/ADOX. Differential Revision: https://reviews.llvm.org/D108372
1 parent 5cf5df8 commit 35d4292

20 files changed

+137
-144
lines changed

llvm/lib/Target/X86/X86InstrArithmetic.td

+14-5
Original file line numberDiff line numberDiff line change
@@ -1502,8 +1502,12 @@ let hasSideEffects = 0 in {
15021502
let mayLoad = 1 in
15031503
def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
15041504
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
1505-
1506-
[]>, T8XD, VEX_4V, Sched<[sched.Folded, WriteIMulH]>;
1505+
[]>, T8XD, VEX_4V,
1506+
Sched<[sched.Folded, WriteIMulH,
1507+
// Memory operand.
1508+
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
1509+
// Implicit read of EDX/RDX
1510+
sched.ReadAfterFold]>;
15071511

15081512
// Pseudo instructions to be used when the low result isn't used. The
15091513
// instruction is defined to keep the high if both destinations are the same.
@@ -1518,9 +1522,9 @@ let hasSideEffects = 0 in {
15181522

15191523
let Predicates = [HasBMI2] in {
15201524
let Uses = [EDX] in
1521-
defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul32>;
1525+
defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>;
15221526
let Uses = [RDX] in
1523-
defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteIMul64>, VEX_W;
1527+
defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, VEX_W;
15241528
}
15251529

15261530
//===----------------------------------------------------------------------===//
@@ -1547,7 +1551,12 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
15471551
"adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
15481552
} // SchedRW
15491553

1550-
let mayLoad = 1, SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold] in {
1554+
let mayLoad = 1,
1555+
SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold,
1556+
// Memory operand.
1557+
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
1558+
// Implicit read of EFLAGS
1559+
WriteADC.ReadAfterFold] in {
15511560
def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
15521561
(ins GR32:$src1, i32mem:$src2),
15531562
"adcx{l}\t{$src2, $dst|$dst, $src2}", []>, T8PD;

llvm/lib/Target/X86/X86SchedBroadwell.td

+2
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,11 @@ defm : X86WriteRes<WriteIMul16Imm, [BWPort1,BWPort0156], 4, [1,1], 2>;
123123
defm : X86WriteRes<WriteIMul16ImmLd, [BWPort1,BWPort0156,BWPort23], 8, [1,1,1], 3>;
124124
defm : BWWriteResPair<WriteIMul16Reg, [BWPort1], 3>;
125125
defm : BWWriteResPair<WriteIMul32, [BWPort1,BWPort06,BWPort0156], 4, [1,1,1], 3>;
126+
defm : BWWriteResPair<WriteMULX32, [BWPort1,BWPort06,BWPort0156], 4, [1,1,1], 3>;
126127
defm : BWWriteResPair<WriteIMul32Imm, [BWPort1], 3>;
127128
defm : BWWriteResPair<WriteIMul32Reg, [BWPort1], 3>;
128129
defm : BWWriteResPair<WriteIMul64, [BWPort1,BWPort5], 4, [1,1], 2>;
130+
defm : BWWriteResPair<WriteMULX64, [BWPort1,BWPort5], 4, [1,1], 2>;
129131
defm : BWWriteResPair<WriteIMul64Imm, [BWPort1], 3>;
130132
defm : BWWriteResPair<WriteIMul64Reg, [BWPort1], 3>;
131133
def : WriteRes<WriteIMulH, []> { let Latency = 3; }

llvm/lib/Target/X86/X86SchedHaswell.td

+2
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,11 @@ defm : X86WriteRes<WriteIMul16Imm, [HWPort1,HWPort0156], 4, [1,1], 2>;
140140
defm : X86WriteRes<WriteIMul16ImmLd, [HWPort1,HWPort0156,HWPort23], 8, [1,1,1], 3>;
141141
defm : HWWriteResPair<WriteIMul16Reg, [HWPort1], 3>;
142142
defm : HWWriteResPair<WriteIMul32, [HWPort1,HWPort06,HWPort0156], 4, [1,1,1], 3>;
143+
defm : HWWriteResPair<WriteMULX32, [HWPort1,HWPort06,HWPort0156], 4, [1,1,1], 3>;
143144
defm : HWWriteResPair<WriteIMul32Imm, [HWPort1], 3>;
144145
defm : HWWriteResPair<WriteIMul32Reg, [HWPort1], 3>;
145146
defm : HWWriteResPair<WriteIMul64, [HWPort1,HWPort6], 4, [1,1], 2>;
147+
defm : HWWriteResPair<WriteMULX64, [HWPort1,HWPort6], 4, [1,1], 2>;
146148
defm : HWWriteResPair<WriteIMul64Imm, [HWPort1], 3>;
147149
defm : HWWriteResPair<WriteIMul64Reg, [HWPort1], 3>;
148150
def : WriteRes<WriteIMulH, []> { let Latency = 3; }

llvm/lib/Target/X86/X86SchedSandyBridge.td

+2
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,11 @@ defm : X86WriteRes<WriteIMul16Imm, [SBPort1,SBPort015], 4, [1,1], 2>;
124124
defm : X86WriteRes<WriteIMul16ImmLd, [SBPort1,SBPort015,SBPort23], 8, [1,1,1], 3>;
125125
defm : SBWriteResPair<WriteIMul16Reg, [SBPort1], 3>;
126126
defm : SBWriteResPair<WriteIMul32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
127+
defm : SBWriteResPair<WriteMULX32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
127128
defm : SBWriteResPair<WriteIMul32Imm, [SBPort1], 3>;
128129
defm : SBWriteResPair<WriteIMul32Reg, [SBPort1], 3>;
129130
defm : SBWriteResPair<WriteIMul64, [SBPort1,SBPort0], 4, [1,1], 2>;
131+
defm : SBWriteResPair<WriteMULX64, [SBPort1,SBPort0], 4, [1,1], 2>;
130132
defm : SBWriteResPair<WriteIMul64Imm, [SBPort1], 3>;
131133
defm : SBWriteResPair<WriteIMul64Reg, [SBPort1], 3>;
132134
def : WriteRes<WriteIMulH, []> { let Latency = 3; }

llvm/lib/Target/X86/X86SchedSkylakeClient.td

+2
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,11 @@ defm : X86WriteRes<WriteIMul16Imm, [SKLPort1,SKLPort0156], 4, [1,1], 2>;
122122
defm : X86WriteRes<WriteIMul16ImmLd, [SKLPort1,SKLPort0156,SKLPort23], 8, [1,1,1], 3>;
123123
defm : SKLWriteResPair<WriteIMul16Reg, [SKLPort1], 3>;
124124
defm : SKLWriteResPair<WriteIMul32, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,1], 3>;
125+
defm : SKLWriteResPair<WriteMULX32, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,1], 3>;
125126
defm : SKLWriteResPair<WriteIMul32Imm, [SKLPort1], 3>;
126127
defm : SKLWriteResPair<WriteIMul32Reg, [SKLPort1], 3>;
127128
defm : SKLWriteResPair<WriteIMul64, [SKLPort1,SKLPort5], 4, [1,1], 2>;
129+
defm : SKLWriteResPair<WriteMULX64, [SKLPort1,SKLPort5], 4, [1,1], 2>;
128130
defm : SKLWriteResPair<WriteIMul64Imm, [SKLPort1], 3>;
129131
defm : SKLWriteResPair<WriteIMul64Reg, [SKLPort1], 3>;
130132
def : WriteRes<WriteIMulH, []> { let Latency = 3; }

llvm/lib/Target/X86/X86SchedSkylakeServer.td

+2
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,11 @@ defm : X86WriteRes<WriteIMul16ImmLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1
123123
defm : X86WriteRes<WriteIMul16Reg, [SKXPort1], 3, [1], 1>;
124124
defm : X86WriteRes<WriteIMul16RegLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
125125
defm : SKXWriteResPair<WriteIMul32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>;
126+
defm : SKXWriteResPair<WriteMULX32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>;
126127
defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1], 3>;
127128
defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1], 3>;
128129
defm : SKXWriteResPair<WriteIMul64, [SKXPort1,SKXPort5], 4, [1,1], 2>;
130+
defm : SKXWriteResPair<WriteMULX64, [SKXPort1,SKXPort5], 4, [1,1], 2>;
129131
defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1], 3>;
130132
defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1], 3>;
131133
def : WriteRes<WriteIMulH, []> { let Latency = 3; }

llvm/lib/Target/X86/X86Schedule.td

+3-1
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,9 @@ defm WriteIMul32Reg : X86SchedWritePair; // Integer 32-bit multiplication by reg
148148
defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
149149
defm WriteIMul64Imm : X86SchedWritePair; // Integer 64-bit multiplication by immediate.
150150
defm WriteIMul64Reg : X86SchedWritePair; // Integer 64-bit multiplication by register.
151-
def WriteIMulH : SchedWrite; // Integer multiplication, high part.
151+
defm WriteMULX32 : X86SchedWritePair; // Integer 32-bit Multiplication without affecting flags.
152+
defm WriteMULX64 : X86SchedWritePair; // Integer 64-bit Multiplication without affecting flags.
153+
def WriteIMulH : SchedWrite; // Integer multiplication, high part (only used by MULX).
152154

153155
def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
154156
def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.

llvm/lib/Target/X86/X86ScheduleAtom.td

+2
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12]
9191
defm : AtomWriteResPair<WriteIMul64Imm, [AtomPort01], [AtomPort01], 14, 14, [14], [14]>;
9292
defm : AtomWriteResPair<WriteIMul64Reg, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
9393
defm : X86WriteResUnsupported<WriteIMulH>;
94+
defm : X86WriteResPairUnsupported<WriteMULX32>;
95+
defm : X86WriteResPairUnsupported<WriteMULX64>;
9496

9597
defm : X86WriteRes<WriteXCHG, [AtomPort01], 2, [2], 1>;
9698
defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>;

llvm/lib/Target/X86/X86ScheduleBdVer2.td

+5-1
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,11 @@ defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4, [1, 2]>;
435435
defm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 6]>;
436436
defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul], 6, [1, 4],1, 1>;
437437
defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>;
438-
defm : X86WriteResUnsupported<WriteIMulH>; // BMI2 MULX
438+
439+
// BMI2 MULX
440+
defm : X86WriteResUnsupported<WriteIMulH>;
441+
defm : X86WriteResPairUnsupported<WriteMULX32>;
442+
defm : X86WriteResPairUnsupported<WriteMULX64>;
439443

440444
defm : PdWriteResExPair<WriteDiv8, [PdEX1, PdDiv], 12, [1, 12]>;
441445
defm : PdWriteResExPair<WriteDiv16, [PdEX1, PdDiv], 15, [1, 15], 2>;

llvm/lib/Target/X86/X86ScheduleBtVer2.td

+3-1
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,9 @@ defm : JWriteResIntPair<WriteIMul32Reg, [JALU1, JMul], 3, [1, 1], 1>;
209209
defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>;
210210
defm : JWriteResIntPair<WriteIMul64Imm, [JALU1, JMul], 6, [1, 4], 1>;
211211
defm : JWriteResIntPair<WriteIMul64Reg, [JALU1, JMul], 6, [1, 4], 1>;
212-
defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
212+
defm : X86WriteResUnsupported<WriteIMulH>;
213+
defm : X86WriteResPairUnsupported<WriteMULX32>;
214+
defm : X86WriteResPairUnsupported<WriteMULX64>;
213215

214216
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
215217
defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;

llvm/lib/Target/X86/X86ScheduleSLM.td

+3-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@ defm : SLMWriteResPair<WriteIMul32Reg, [SLM_IEC_RSV1], 3>;
111111
defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
112112
defm : SLMWriteResPair<WriteIMul64Imm, [SLM_IEC_RSV1], 3>;
113113
defm : SLMWriteResPair<WriteIMul64Reg, [SLM_IEC_RSV1], 3>;
114-
def : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>;
114+
defm : X86WriteResUnsupported<WriteIMulH>;
115+
defm : X86WriteResPairUnsupported<WriteMULX32>;
116+
defm : X86WriteResPairUnsupported<WriteMULX64>;
115117

116118
defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
117119
defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;

llvm/lib/Target/X86/X86ScheduleZnver1.td

+7-28
Original file line numberDiff line numberDiff line change
@@ -256,8 +256,9 @@ defm : ZnWriteResPair<WriteIDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
256256
defm : ZnWriteResPair<WriteIDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;
257257

258258
// IMULH
259-
def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
260-
let Latency = 4;
259+
def : WriteRes<WriteIMulH, [ZnMultiplier]>{
260+
let Latency = 3;
261+
let NumMicroOps = 0;
261262
}
262263

263264
// Floating point operations
@@ -659,32 +660,10 @@ def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
659660
}
660661
def : SchedAlias<WriteIMul64Ld, ZnWriteMul64Ld>;
661662

662-
// MULX.
663-
// r32,r32,r32.
664-
def ZnWriteMulX32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
665-
let Latency = 3;
666-
let ResourceCycles = [1, 2];
667-
}
668-
def : InstRW<[ZnWriteMulX32], (instrs MULX32rr)>;
669-
670-
// r32,r32,m32.
671-
def ZnWriteMulX32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
672-
let Latency = 8;
673-
let ResourceCycles = [1, 2, 2];
674-
}
675-
def : InstRW<[ZnWriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>;
676-
677-
// r64,r64,r64.
678-
def ZnWriteMulX64 : SchedWriteRes<[ZnALU1]> {
679-
let Latency = 3;
680-
}
681-
def : InstRW<[ZnWriteMulX64], (instrs MULX64rr)>;
682-
683-
// r64,r64,m64.
684-
def ZnWriteMulX64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
685-
let Latency = 8;
686-
}
687-
def : InstRW<[ZnWriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
663+
// MULX
664+
// Numbers are based on the AMD SOG for Family 17h - Instruction Latencies.
665+
defm : ZnWriteResPair<WriteMULX32, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;
666+
defm : ZnWriteResPair<WriteMULX64, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;
688667

689668
//-- Control transfer instructions --//
690669

llvm/lib/Target/X86/X86ScheduleZnver2.td

+6-27
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,9 @@ defm : Zn2WriteResPair<WriteIDiv32, [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
243243
defm : Zn2WriteResPair<WriteIDiv64, [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;
244244

245245
// IMULH
246-
def : WriteRes<WriteIMulH, [Zn2ALU1, Zn2Multiplier]>{
247-
let Latency = 4;
246+
def : WriteRes<WriteIMulH, [Zn2Multiplier]>{
247+
let Latency = 3;
248+
let NumMicroOps = 0;
248249
}
249250

250251
// Floating point operations
@@ -658,31 +659,9 @@ def : SchedAlias<WriteIMul64ImmLd, Zn2WriteMul64Ld>;
658659
def : SchedAlias<WriteIMul64RegLd, Zn2WriteMul64Ld>;
659660

660661
// MULX.
661-
// r32,r32,r32.
662-
def Zn2WriteMulX32 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
663-
let Latency = 3;
664-
let ResourceCycles = [1, 2];
665-
}
666-
def : InstRW<[Zn2WriteMulX32], (instrs MULX32rr)>;
667-
668-
// r32,r32,m32.
669-
def Zn2WriteMulX32Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
670-
let Latency = 7;
671-
let ResourceCycles = [1, 2, 2];
672-
}
673-
def : InstRW<[Zn2WriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>;
674-
675-
// r64,r64,r64.
676-
def Zn2WriteMulX64 : SchedWriteRes<[Zn2ALU1]> {
677-
let Latency = 3;
678-
}
679-
def : InstRW<[Zn2WriteMulX64], (instrs MULX64rr)>;
680-
681-
// r64,r64,m64.
682-
def Zn2WriteMulX64Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
683-
let Latency = 7;
684-
}
685-
def : InstRW<[Zn2WriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
662+
// Numbers are based on the AMD SOG for Family 17h - Instruction Latencies.
663+
defm : Zn2WriteResPair<WriteMULX32, [Zn2ALU1, Zn2Multiplier], 3, [1, 1], 1, 4, 0>;
664+
defm : Zn2WriteResPair<WriteMULX64, [Zn2ALU1, Zn2Multiplier], 3, [1, 1], 1, 4, 0>;
686665

687666
//-- Control transfer instructions --//
688667

llvm/lib/Target/X86/X86ScheduleZnver3.td

+8-2
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,7 @@ defm : Zn3WriteResIntPair<WriteIMul16, [Zn3Multiplier], 3, [3], 3, /*LoadUOps=*/
617617
defm : Zn3WriteResIntPair<WriteIMul16Imm, [Zn3Multiplier], 4, [4], 2>; // Integer 16-bit multiplication by immediate.
618618
defm : Zn3WriteResIntPair<WriteIMul16Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 16-bit multiplication by register.
619619
defm : Zn3WriteResIntPair<WriteIMul32, [Zn3Multiplier], 3, [3], 2>; // Integer 32-bit multiplication.
620+
defm : Zn3WriteResIntPair<WriteMULX32, [Zn3Multiplier], 4, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.
620621

621622
def Zn3MULX32rr : SchedWriteRes<[Zn3Multiplier]> {
622623
let Latency = 4;
@@ -630,11 +631,14 @@ def Zn3MULX32rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
630631
let ResourceCycles = [1, 1, 2];
631632
let NumMicroOps = Zn3MULX32rr.NumMicroOps;
632633
}
633-
def : InstRW<[Zn3MULX32rm, WriteIMulH], (instrs MULX32rm)>;
634+
def : InstRW<[Zn3MULX32rm, WriteIMulH,
635+
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
636+
ReadAfterLd], (instrs MULX32rm)>;
634637

635638
defm : Zn3WriteResIntPair<WriteIMul32Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by immediate.
636639
defm : Zn3WriteResIntPair<WriteIMul32Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by register.
637640
defm : Zn3WriteResIntPair<WriteIMul64, [Zn3Multiplier], 3, [3], 2>; // Integer 64-bit multiplication.
641+
defm : Zn3WriteResIntPair<WriteMULX64, [Zn3Multiplier], 4, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.
638642

639643
def Zn3MULX64rr : SchedWriteRes<[Zn3Multiplier]> {
640644
let Latency = 4;
@@ -648,7 +652,9 @@ def Zn3MULX64rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
648652
let ResourceCycles = [1, 1, 2];
649653
let NumMicroOps = Zn3MULX64rr.NumMicroOps;
650654
}
651-
def : InstRW<[Zn3MULX64rm, WriteIMulH], (instrs MULX64rm)>;
655+
def : InstRW<[Zn3MULX64rm, WriteIMulH,
656+
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
657+
ReadAfterLd], (instrs MULX64rm)>;
652658

653659
defm : Zn3WriteResIntPair<WriteIMul64Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by immediate.
654660
defm : Zn3WriteResIntPair<WriteIMul64Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by register.

llvm/test/tools/llvm-mca/X86/Haswell/adcx-adox-read-advance.s

+14-14
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@ adox (%rdi), %rcx
1515

1616
# CHECK: Iterations: 2
1717
# CHECK-NEXT: Instructions: 2
18-
# CHECK-NEXT: Total Cycles: 17
18+
# CHECK-NEXT: Total Cycles: 12
1919
# CHECK-NEXT: Total uOps: 6
2020

2121
# CHECK: Dispatch Width: 4
22-
# CHECK-NEXT: uOps Per Cycle: 0.35
23-
# CHECK-NEXT: IPC: 0.12
22+
# CHECK-NEXT: uOps Per Cycle: 0.50
23+
# CHECK-NEXT: IPC: 0.17
2424
# CHECK-NEXT: Block RThroughput: 0.8
2525

2626
# CHECK: Instruction Info:
@@ -55,11 +55,11 @@ adox (%rdi), %rcx
5555
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 0.50 0.50 - adcxq (%rdi), %rcx
5656

5757
# CHECK: Timeline view:
58-
# CHECK-NEXT: 0123456
58+
# CHECK-NEXT: 01
5959
# CHECK-NEXT: Index 0123456789
6060

61-
# CHECK: [0,0] DeeeeeeeER. .. adcxq (%rdi), %rcx
62-
# CHECK-NEXT: [1,0] .D======eeeeeeeER adcxq (%rdi), %rcx
61+
# CHECK: [0,0] DeeeeeeeER.. adcxq (%rdi), %rcx
62+
# CHECK-NEXT: [1,0] .D=eeeeeeeER adcxq (%rdi), %rcx
6363

6464
# CHECK: Average Wait times (based on the timeline view):
6565
# CHECK-NEXT: [0]: Executions
@@ -68,18 +68,18 @@ adox (%rdi), %rcx
6868
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
6969

7070
# CHECK: [0] [1] [2] [3]
71-
# CHECK-NEXT: 0. 2 4.0 0.5 0.0 adcxq (%rdi), %rcx
71+
# CHECK-NEXT: 0. 2 1.5 0.5 0.0 adcxq (%rdi), %rcx
7272

7373
# CHECK: [1] Code Region
7474

7575
# CHECK: Iterations: 2
7676
# CHECK-NEXT: Instructions: 2
77-
# CHECK-NEXT: Total Cycles: 17
77+
# CHECK-NEXT: Total Cycles: 12
7878
# CHECK-NEXT: Total uOps: 6
7979

8080
# CHECK: Dispatch Width: 4
81-
# CHECK-NEXT: uOps Per Cycle: 0.35
82-
# CHECK-NEXT: IPC: 0.12
81+
# CHECK-NEXT: uOps Per Cycle: 0.50
82+
# CHECK-NEXT: IPC: 0.17
8383
# CHECK-NEXT: Block RThroughput: 0.8
8484

8585
# CHECK: Instruction Info:
@@ -114,11 +114,11 @@ adox (%rdi), %rcx
114114
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 0.50 0.50 - adoxq (%rdi), %rcx
115115

116116
# CHECK: Timeline view:
117-
# CHECK-NEXT: 0123456
117+
# CHECK-NEXT: 01
118118
# CHECK-NEXT: Index 0123456789
119119

120-
# CHECK: [0,0] DeeeeeeeER. .. adoxq (%rdi), %rcx
121-
# CHECK-NEXT: [1,0] .D======eeeeeeeER adoxq (%rdi), %rcx
120+
# CHECK: [0,0] DeeeeeeeER.. adoxq (%rdi), %rcx
121+
# CHECK-NEXT: [1,0] .D=eeeeeeeER adoxq (%rdi), %rcx
122122

123123
# CHECK: Average Wait times (based on the timeline view):
124124
# CHECK-NEXT: [0]: Executions
@@ -127,4 +127,4 @@ adox (%rdi), %rcx
127127
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
128128

129129
# CHECK: [0] [1] [2] [3]
130-
# CHECK-NEXT: 0. 2 4.0 0.5 0.0 adoxq (%rdi), %rcx
130+
# CHECK-NEXT: 0. 2 1.5 0.5 0.0 adoxq (%rdi), %rcx

0 commit comments

Comments
 (0)