Skip to content

Commit 1c4573c

Browse files
committed
cpu/drcbearm64.cpp: Fix issues with and optimise ADD/ADDC code generation.
1 parent 0de555c commit 1c4573c

File tree

1 file changed

+85
-24
lines changed

1 file changed

+85
-24
lines changed

src/devices/cpu/drcbearm64.cpp

+85-24
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ class drcbe_arm64 : public drcbe_interface
490490
void op_sext(asmjit::a64::Assembler &a, const uml::instruction &inst);
491491
void op_roland(asmjit::a64::Assembler &a, const uml::instruction &inst);
492492
void op_rolins(asmjit::a64::Assembler &a, const uml::instruction &inst);
493-
template <asmjit::a64::Inst::Id Opcode> void op_add(asmjit::a64::Assembler &a, const uml::instruction &inst);
493+
template <bool CarryIn> void op_add(asmjit::a64::Assembler &a, const uml::instruction &inst);
494494
template <bool CarryIn> void op_sub(asmjit::a64::Assembler &a, const uml::instruction &inst);
495495
void op_cmp(asmjit::a64::Assembler &a, const uml::instruction &inst);
496496
void op_mulu(asmjit::a64::Assembler &a, const uml::instruction &inst);
@@ -646,8 +646,8 @@ const drcbe_arm64::opcode_table_entry drcbe_arm64::s_opcode_table_source[] =
646646
{ uml::OP_SEXT, &drcbe_arm64::op_sext }, // SEXT dst,src
647647
{ uml::OP_ROLAND, &drcbe_arm64::op_roland }, // ROLAND dst,src1,src2,src3
648648
{ uml::OP_ROLINS, &drcbe_arm64::op_rolins }, // ROLINS dst,src1,src2,src3
649-
{ uml::OP_ADD, &drcbe_arm64::op_add<a64::Inst::kIdAdds> }, // ADD dst,src1,src2[,f]
650-
{ uml::OP_ADDC, &drcbe_arm64::op_add<a64::Inst::kIdAdcs> }, // ADDC dst,src1,src2[,f]
649+
{ uml::OP_ADD, &drcbe_arm64::op_add<false> }, // ADD dst,src1,src2[,f]
650+
{ uml::OP_ADDC, &drcbe_arm64::op_add<true> }, // ADDC dst,src1,src2[,f]
651651
{ uml::OP_SUB, &drcbe_arm64::op_sub<false> }, // SUB dst,src1,src2[,f]
652652
{ uml::OP_SUBB, &drcbe_arm64::op_sub<true> }, // SUBB dst,src1,src2[,f]
653653
{ uml::OP_CMP, &drcbe_arm64::op_cmp }, // CMP src1,src2[,f]
@@ -3227,51 +3227,112 @@ void drcbe_arm64::op_rolins(a64::Assembler &a, const uml::instruction &inst)
32273227
a.tst(dst, dst);
32283228
}
32293229

3230-
template <a64::Inst::Id Opcode> void drcbe_arm64::op_add(a64::Assembler &a, const uml::instruction &inst)
3230+
template <bool CarryIn> void drcbe_arm64::op_add(a64::Assembler &a, const uml::instruction &inst)
32313231
{
32323232
assert(inst.size() == 4 || inst.size() == 8);
32333233
assert_no_condition(inst);
32343234
assert_flags(inst, FLAG_C | FLAG_V | FLAG_Z | FLAG_S);
32353235

3236+
const a64::Inst::Id opcode = CarryIn ? a64::Inst::kIdAdcs : a64::Inst::kIdAdds;
3237+
32363238
be_parameter dstp(*this, inst.param(0), PTYPE_MR);
32373239
be_parameter src1p(*this, inst.param(1), PTYPE_MRI);
32383240
be_parameter src2p(*this, inst.param(2), PTYPE_MRI);
32393241

3242+
const a64::Gp zero = select_register(a64::xzr, inst.size());
3243+
const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size());
3244+
const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size());
32403245
const a64::Gp output = dstp.select_register(TEMP_REG3, inst.size());
32413246

3242-
if (Opcode == a64::Inst::kIdAdcs)
3247+
if (CarryIn)
32433248
load_carry(a);
32443249

3245-
if (src1p.is_immediate() && is_valid_immediate_addsub(src1p.immediate()))
3250+
if (src1p.is_immediate_value(0))
32463251
{
3247-
const a64::Gp src = src2p.select_register(TEMP_REG2, inst.size());
3248-
3249-
mov_reg_param(a, inst.size(), src, src2p);
3250-
if (src1p.immediate() == 0)
3251-
a.emit(Opcode, output, src, select_register(a64::xzr, inst.size()));
3252+
if (src2p.is_immediate_value(0))
3253+
{
3254+
if (CarryIn)
3255+
{
3256+
a.emit(opcode, output, zero, zero);
3257+
mov_param_reg(a, inst.size(), dstp, output);
3258+
}
3259+
else
3260+
{
3261+
mov_param_reg(a, inst.size(), dstp, zero);
3262+
a.emit(opcode, zero, zero, zero);
3263+
}
3264+
}
3265+
else if (!CarryIn && src2.is_immediate() && is_valid_immediate_addsub(src2p.immediate()))
3266+
{
3267+
a.emit(opcode, output, zero, src2p.immediate());
3268+
mov_param_reg(a, inst.size(), dstp, output);
3269+
}
3270+
else if (!CarryIn && src2.is_immediate() && is_valid_immediate(src2p.immediate(), 24))
3271+
{
3272+
a.emit(opcode, output, zero, src2p.immediate() & (util::make_bitmask<uint64_t>(12) << 12));
3273+
a.emit(opcode, output, output, src2p.immediate() & util::make_bitmask<uint64_t>(12));
3274+
mov_param_reg(a, inst.size(), dstp, output);
3275+
}
32523276
else
3253-
a.emit(Opcode, output, src, src1p.immediate());
3254-
mov_param_reg(a, inst.size(), dstp, output);
3277+
{
3278+
mov_reg_param(a, inst.size(), src2, src2p);
3279+
a.emit(opcode, output, src2, zero);
3280+
mov_param_reg(a, inst.size(), dstp, output);
3281+
}
32553282
}
3256-
else if (src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate()))
3283+
else if (src2p.is_immediate_value(0))
32573284
{
3258-
const a64::Gp src = src1p.select_register(TEMP_REG1, inst.size());
3259-
3260-
mov_reg_param(a, inst.size(), src, src1p);
3261-
if (src2p.is_immediate_value(0))
3262-
a.emit(Opcode, output, src, select_register(a64::xzr, inst.size()));
3285+
if (!CarryIn && src1.is_immediate() && is_valid_immediate_addsub(src1p.immediate()))
3286+
{
3287+
a.emit(opcode, output, zero, src1p.immediate());
3288+
mov_param_reg(a, inst.size(), dstp, output);
3289+
}
3290+
else if (!CarryIn && src1.is_immediate() && is_valid_immediate(src1p.immediate(), 24))
3291+
{
3292+
a.emit(opcode, output, zero, src1p.immediate() & (util::make_bitmask<uint64_t>(12) << 12));
3293+
a.emit(opcode, output, output, src1p.immediate() & util::make_bitmask<uint64_t>(12));
3294+
mov_param_reg(a, inst.size(), dstp, output);
3295+
}
32633296
else
3264-
a.emit(Opcode, output, src, src2p.immediate());
3297+
{
3298+
mov_reg_param(a, inst.size(), src1, src1p);
3299+
a.emit(opcode, output, src1, zero);
3300+
mov_param_reg(a, inst.size(), dstp, output);
3301+
}
3302+
}
3303+
else if (!CarryIn && src1p.is_immediate() && is_valid_immediate_addsub(src1p.immediate()))
3304+
{
3305+
mov_reg_param(a, inst.size(), src2, src2p);
3306+
a.emit(opcode, output, src2, src1p.immediate());
3307+
mov_param_reg(a, inst.size(), dstp, output);
3308+
}
3309+
else if (!CarryIn && src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate()))
3310+
{
3311+
mov_reg_param(a, inst.size(), src1, src1p);
3312+
a.emit(opcode, output, src1, src2p.immediate());
3313+
mov_param_reg(a, inst.size(), dstp, output);
3314+
}
3315+
else if (!CarryIn && !inst.flags() && src1p.is_immediate() && is_valid_immediate(src1p.immediate(), 24))
3316+
{
3317+
// will still alter flags, but carry and overflow values will be incorrect for this path
3318+
mov_reg_param(a, inst.size(), src2, src2p);
3319+
a.emit(opcode, output, src2, src1p.immediate() & (util::make_bitmask<uint64_t>(12) << 12));
3320+
a.emit(opcode, output, output, src1p.immediate() & util::make_bitmask<uint64_t>(12));
3321+
mov_param_reg(a, inst.size(), dstp, output);
3322+
}
3323+
else if (!CarryIn && !inst.flags() && src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 24))
3324+
{
3325+
// will still alter flags, but carry and overflow values will be incorrect for this path
3326+
mov_reg_param(a, inst.size(), src1, src1p);
3327+
a.emit(opcode, output, src1, src2p.immediate() & (util::make_bitmask<uint64_t>(12) << 12));
3328+
a.emit(opcode, output, output, src2p.immediate() & util::make_bitmask<uint64_t>(12));
32653329
mov_param_reg(a, inst.size(), dstp, output);
32663330
}
32673331
else
32683332
{
3269-
const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size());
3270-
const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size());
3271-
32723333
mov_reg_param(a, inst.size(), src1, src1p);
32733334
mov_reg_param(a, inst.size(), src2, src2p);
3274-
a.emit(Opcode, output, src1, src2);
3335+
a.emit(opcode, output, src1, src2);
32753336
mov_param_reg(a, inst.size(), dstp, output);
32763337
}
32773338

0 commit comments

Comments
 (0)