Skip to content

Commit a9c24fd

Browse files
committed
Auto merge of #43488 - Florob:repeat-opt, r=arielb1
Optimize initialization of arrays using repeat expressions This PR was inspired by [this thread](https://www.reddit.com/r/rust/comments/6o8ok9/understanding_rust_performances_a_newbie_question/) on Reddit. It tries to bring array initialization in the same ballpark as `Vec::from_elem()` for unoptimized builds. For optimized builds this should relieve LLVM of having to figure out the construct we generate is in fact a `memset()`. To that end this emits `llvm.memset()` when: * the array is of integer type and all elements are zero (`Vec::from_elem()` also explicitly optimizes for this case) * the array elements are byte sized If the array is zero-sized initialization is omitted entirely.
2 parents dd1df35 + 11d6312 commit a9c24fd

File tree

4 files changed

+108
-4
lines changed

4 files changed

+108
-4
lines changed

src/librustc_trans/common.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ pub fn const_to_uint(v: ValueRef) -> u64 {
366366
}
367367
}
368368

369-
fn is_const_integral(v: ValueRef) -> bool {
369+
pub fn is_const_integral(v: ValueRef) -> bool {
370370
unsafe {
371371
!llvm::LLVMIsAConstantInt(v).is_null()
372372
}

src/librustc_trans/mir/rvalue.rs

+32-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use base;
2020
use builder::Builder;
2121
use callee;
2222
use common::{self, val_ty, C_bool, C_null, C_uint};
23-
use common::{C_integral};
23+
use common::{C_integral, C_i32};
2424
use adt;
2525
use machine;
2626
use monomorphize;
@@ -93,12 +93,42 @@ impl<'a, 'tcx> MirContext<'a, 'tcx> {
9393
}
9494

9595
mir::Rvalue::Repeat(ref elem, ref count) => {
96+
let dest_ty = dest.ty.to_ty(bcx.tcx());
97+
98+
// No need to inizialize memory of a zero-sized slice
99+
if common::type_is_zero_size(bcx.ccx, dest_ty) {
100+
return bcx;
101+
}
102+
96103
let tr_elem = self.trans_operand(&bcx, elem);
97104
let size = count.as_u64(bcx.tcx().sess.target.uint_type);
98105
let size = C_uint(bcx.ccx, size);
99106
let base = base::get_dataptr(&bcx, dest.llval);
107+
let align = dest.alignment.to_align();
108+
109+
if let OperandValue::Immediate(v) = tr_elem.val {
110+
// Use llvm.memset.p0i8.* to initialize all zero arrays
111+
if common::is_const_integral(v) && common::const_to_uint(v) == 0 {
112+
let align = align.unwrap_or_else(|| bcx.ccx.align_of(tr_elem.ty));
113+
let align = C_i32(bcx.ccx, align as i32);
114+
let ty = type_of::type_of(bcx.ccx, dest_ty);
115+
let size = machine::llsize_of(bcx.ccx, ty);
116+
let fill = C_integral(Type::i8(bcx.ccx), 0, false);
117+
base::call_memset(&bcx, base, fill, size, align, false);
118+
return bcx;
119+
}
120+
121+
// Use llvm.memset.p0i8.* to initialize byte arrays
122+
if common::val_ty(v) == Type::i8(bcx.ccx) {
123+
let align = align.unwrap_or_else(|| bcx.ccx.align_of(tr_elem.ty));
124+
let align = C_i32(bcx.ccx, align as i32);
125+
base::call_memset(&bcx, base, v, size, align, false);
126+
return bcx;
127+
}
128+
}
129+
100130
tvec::slice_for_each(&bcx, base, tr_elem.ty, size, |bcx, llslot, loop_bb| {
101-
self.store_operand(bcx, llslot, dest.alignment.to_align(), tr_elem);
131+
self.store_operand(bcx, llslot, align, tr_elem);
102132
bcx.br(loop_bb);
103133
})
104134
}

src/librustc_trans/tvec.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ pub fn slice_for_each<'a, 'tcx, F>(
3030
};
3131

3232
let body_bcx = bcx.build_sibling_block("slice_loop_body");
33-
let next_bcx = bcx.build_sibling_block("slice_loop_next");
3433
let header_bcx = bcx.build_sibling_block("slice_loop_header");
34+
let next_bcx = bcx.build_sibling_block("slice_loop_next");
3535

3636
let start = if zst {
3737
C_uint(bcx.ccx, 0usize)

src/test/codegen/slice-init.rs

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// compile-flags: -C no-prepopulate-passes
12+
13+
#![crate_type = "lib"]
14+
15+
// CHECK-LABEL: @zero_sized_elem
16+
#[no_mangle]
17+
pub fn zero_sized_elem() {
18+
// CHECK-NOT: br label %slice_loop_header{{.*}}
19+
// CHECK-NOT: call void @llvm.memset.p0i8
20+
let x = [(); 4];
21+
drop(&x);
22+
}
23+
24+
// CHECK-LABEL: @zero_len_array
25+
#[no_mangle]
26+
pub fn zero_len_array() {
27+
// CHECK-NOT: br label %slice_loop_header{{.*}}
28+
// CHECK-NOT: call void @llvm.memset.p0i8
29+
let x = [4; 0];
30+
drop(&x);
31+
}
32+
33+
// CHECK-LABEL: @byte_array
34+
#[no_mangle]
35+
pub fn byte_array() {
36+
// CHECK: call void @llvm.memset.p0i8.i[[WIDTH:[0-9]+]](i8* {{.*}}, i8 7, i[[WIDTH]] 4
37+
// CHECK-NOT: br label %slice_loop_header{{.*}}
38+
let x = [7u8; 4];
39+
drop(&x);
40+
}
41+
42+
#[allow(dead_code)]
43+
#[derive(Copy, Clone)]
44+
enum Init {
45+
Loop,
46+
Memset,
47+
}
48+
49+
// CHECK-LABEL: @byte_enum_array
50+
#[no_mangle]
51+
pub fn byte_enum_array() {
52+
// CHECK: call void @llvm.memset.p0i8.i[[WIDTH:[0-9]+]](i8* {{.*}}, i8 {{.*}}, i[[WIDTH]] 4
53+
// CHECK-NOT: br label %slice_loop_header{{.*}}
54+
let x = [Init::Memset; 4];
55+
drop(&x);
56+
}
57+
58+
// CHECK-LABEL: @zeroed_integer_array
59+
#[no_mangle]
60+
pub fn zeroed_integer_array() {
61+
// CHECK: call void @llvm.memset.p0i8.i[[WIDTH:[0-9]+]](i8* {{.*}}, i8 0, i[[WIDTH]] 16
62+
// CHECK-NOT: br label %slice_loop_header{{.*}}
63+
let x = [0u32; 4];
64+
drop(&x);
65+
}
66+
67+
// CHECK-LABEL: @nonzero_integer_array
68+
#[no_mangle]
69+
pub fn nonzero_integer_array() {
70+
// CHECK: br label %slice_loop_header{{.*}}
71+
// CHECK-NOT: call void @llvm.memset.p0i8
72+
let x = [0x1a_2b_3c_4d_u32; 4];
73+
drop(&x);
74+
}

0 commit comments

Comments
 (0)