Skip to content

Commit 96c4542

Browse files
committed
Avoid muliplications by 1
``` Benchmark #1: ./raytracer_cg_clif_pre Time (mean ± σ): 9.553 s ± 0.129 s [User: 9.543 s, System: 0.008 s] Range (min … max): 9.438 s … 9.837 s 10 runs Benchmark #2: ./raytracer_cg_clif_post Time (mean ± σ): 9.463 s ± 0.055 s [User: 9.452 s, System: 0.008 s] Range (min … max): 9.387 s … 9.518 s 10 runs Summary './raytracer_cg_clif_post' ran 1.01 ± 0.01 times faster than './raytracer_cg_clif_pre' ```
1 parent 4700926 commit 96c4542

File tree

1 file changed

+20
-12
lines changed

1 file changed

+20
-12
lines changed

src/intrinsics/mod.rs

+20-12
Original file line numberDiff line numberDiff line change
@@ -497,12 +497,12 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
497497
};
498498
copy | copy_nonoverlapping, <elem_ty> (v src, v dst, v count) {
499499
let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
500-
let elem_size = fx
501-
.bcx
502-
.ins()
503-
.iconst(fx.pointer_type, elem_size as i64);
504500
assert_eq!(args.len(), 3);
505-
let byte_amount = fx.bcx.ins().imul(count, elem_size);
501+
let byte_amount = if elem_size != 1 {
502+
fx.bcx.ins().imul_imm(count, elem_size as i64)
503+
} else {
504+
count
505+
};
506506

507507
if intrinsic.contains("nonoverlapping") {
508508
// FIXME emit_small_memcpy
@@ -515,12 +515,12 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
515515
// NOTE: the volatile variants have src and dst swapped
516516
volatile_copy_memory | volatile_copy_nonoverlapping_memory, <elem_ty> (v dst, v src, v count) {
517517
let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
518-
let elem_size = fx
519-
.bcx
520-
.ins()
521-
.iconst(fx.pointer_type, elem_size as i64);
522518
assert_eq!(args.len(), 3);
523-
let byte_amount = fx.bcx.ins().imul(count, elem_size);
519+
let byte_amount = if elem_size != 1 {
520+
fx.bcx.ins().imul_imm(count, elem_size as i64)
521+
} else {
522+
count
523+
};
524524

525525
// FIXME make the copy actually volatile when using emit_small_mem{cpy,move}
526526
if intrinsic.contains("nonoverlapping") {
@@ -676,7 +676,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
676676
offset | arith_offset, (c base, v offset) {
677677
let pointee_ty = base.layout().ty.builtin_deref(true).unwrap().ty;
678678
let pointee_size = fx.layout_of(pointee_ty).size.bytes();
679-
let ptr_diff = fx.bcx.ins().imul_imm(offset, pointee_size as i64);
679+
let ptr_diff = if pointee_size != 1 {
680+
fx.bcx.ins().imul_imm(offset, pointee_size as i64)
681+
} else {
682+
offset
683+
};
680684
let base_val = base.load_scalar(fx);
681685
let res = fx.bcx.ins().iadd(base_val, ptr_diff);
682686
ret.write_cvalue(fx, CValue::by_val(res, base.layout()));
@@ -688,7 +692,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
688692
write_bytes | volatile_set_memory, (c dst, v val, v count) {
689693
let pointee_ty = dst.layout().ty.builtin_deref(true).unwrap().ty;
690694
let pointee_size = fx.layout_of(pointee_ty).size.bytes();
691-
let count = fx.bcx.ins().imul_imm(count, pointee_size as i64);
695+
let count = if pointee_size != 1 {
696+
fx.bcx.ins().imul_imm(count, pointee_size as i64)
697+
} else {
698+
count
699+
};
692700
let dst_ptr = dst.load_scalar(fx);
693701
// FIXME make the memset actually volatile when switching to emit_small_memset
694702
// FIXME use emit_small_memset

0 commit comments

Comments
 (0)