Skip to content

Commit e20fc95

Browse files
refactor/sync: convert to the new memory management API
Use .read()/.write_only()/.read_write() instead of .sync()/.add_device()/.get() calls. REFERENCE: autumnai/collenchyma#37, autumnai/collenchyma#62
1 parent 37d1994 commit e20fc95

17 files changed

+126
-210
lines changed

src/layer.rs

+14-100
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,8 @@ impl<B: IBackend> Layer<B> {
210210
}
211211

212212
let backend: Rc<IBackend<F=B::F>> = self.backend.clone();
213-
blob_data = Arc::new(RwLock::new(SharedTensor::new(backend.device(), &vec![1,1,1]).unwrap())); // [1,1,1] for CUDA
214-
blob_gradient = Arc::new(RwLock::new(SharedTensor::new(backend.device(), &vec![1,1,1]).unwrap())); // [1,1,1] for CUDA
213+
blob_data = Arc::new(RwLock::new(SharedTensor::new(&[1,1,1]))); // [1,1,1] for CUDA
214+
blob_gradient = Arc::new(RwLock::new(SharedTensor::new(&[1,1,1]))); // [1,1,1] for CUDA
215215
}
216216
self.output_blob_names.push(blob_name.clone());
217217
self.output_blobs_data.push(blob_data.clone());
@@ -234,8 +234,8 @@ impl<B: IBackend> Layer<B> {
234234
info!("{} -> {}", self.name, blob_name);
235235

236236
let backend: Rc<IBackend<F=B::F>> = self.backend.clone();
237-
let output_data = Arc::new(RwLock::new(SharedTensor::new(backend.device(), &vec![1,1,1]).unwrap())); // [1,1,1] for CUDA
238-
let output_gradient = Arc::new(RwLock::new(SharedTensor::new(backend.device(), &vec![1,1,1]).unwrap())); // [1,1,1] for CUDA
237+
let output_data = Arc::new(RwLock::new(SharedTensor::new(&[1,1,1]))); // [1,1,1] for CUDA
238+
let output_gradient = Arc::new(RwLock::new(SharedTensor::new(&[1,1,1]))); // [1,1,1] for CUDA
239239
self.output_blobs_data.push(output_data);
240240
self.output_blobs_gradient.push(output_gradient);
241241
}
@@ -264,8 +264,8 @@ impl<B: IBackend> Layer<B> {
264264
let net_weight_id = weights_len;
265265
let output_data = self.output_blobs_data[weight_id].read().unwrap();
266266
debug!("Layer {} - creating weight and gradient of size {:?}", &layer_config.name, output_data.desc());
267-
let weight_data = Arc::new(RwLock::new(SharedTensor::<f32>::new(output_data.latest_device(), output_data.desc()).unwrap()));
268-
let weight_gradient = Arc::new(RwLock::new(SharedTensor::<f32>::new(output_data.latest_device(), output_data.desc()).unwrap()));
267+
let weight_data = Arc::new(RwLock::new(SharedTensor::new(output_data.desc())));
268+
let weight_gradient = Arc::new(RwLock::new(SharedTensor::new(output_data.desc())));
269269
self.weights_data.push(weight_data.clone());
270270
self.weights_gradient.push(weight_gradient.clone());
271271

@@ -460,11 +460,6 @@ impl<B: IBackend> Layer<B> {
460460
self.input_blobs_data[input_i].write().unwrap().reshape(&reshaped_shape).unwrap();
461461
}
462462

463-
self.worker.sync(&self.backend,
464-
&mut self.input_blobs_data, &mut self.input_blobs_gradient,
465-
&mut self.weights_data, &mut self.weights_gradient,
466-
&mut self.output_blobs_data, &mut self.output_blobs_gradient);
467-
468463
let forward_time = timeit_loops!(1, {
469464
if self.is_using_in_place() {
470465
self.worker.forward(&self.backend, &vec![], &self.weights_data, &mut self.output_blobs_data);
@@ -497,11 +492,6 @@ impl<B: IBackend> Layer<B> {
497492
self.output_blobs_gradient[output_i] = output.clone();
498493
}
499494

500-
self.worker.sync(&self.backend,
501-
&mut self.input_blobs_data, &mut self.input_blobs_gradient,
502-
&mut self.weights_data, &mut self.weights_gradient,
503-
&mut self.output_blobs_data, &mut self.output_blobs_gradient);
504-
505495
if self.is_using_in_place() {
506496
self.worker.backward_input(&self.backend,
507497
&self.weights_data,
@@ -527,11 +517,6 @@ impl<B: IBackend> Layer<B> {
527517
///
528518
/// This method is mostly used when doing backpropagation.
529519
pub fn backward_parameters(&mut self) {
530-
self.worker.sync(&self.backend,
531-
&mut self.input_blobs_data, &mut self.input_blobs_gradient,
532-
&mut self.weights_data, &mut self.weights_gradient,
533-
&mut self.output_blobs_data, &mut self.output_blobs_gradient);
534-
535520
self.worker.backward_parameters(&self.backend,
536521
&self.output_blobs_data,
537522
&self.output_blobs_gradient,
@@ -553,13 +538,11 @@ impl<B: IBackend> Layer<B> {
553538
///
554539
/// [3]: ../solver/enum.LRPolicy.html
555540
pub fn update_weights<SolverB: IBackend + ::util::SolverOps<f32>>(&mut self, backend: &SolverB) {
556-
let mut shared_a = ::util::native_scalar(-1f32);
557-
let _ = shared_a.add_device(IBackend::device(backend));
558-
shared_a.sync(IBackend::device(backend)).unwrap();
541+
// PERF: allocate this scalar once
542+
let shared_a = ::util::native_scalar(-1f32);
559543
for (weight_gradient, weight_data) in self.learnable_weights_gradients().iter().zip(&mut self.learnable_weights_data()) {
560-
weight_gradient.write().unwrap().sync(IBackend::device(backend)).unwrap();
561-
weight_data.write().unwrap().sync(IBackend::device(backend)).unwrap();
562-
backend.axpy_plain(&shared_a, &weight_gradient.read().unwrap(), &mut weight_data.write().unwrap()).unwrap();
544+
backend.axpy(&shared_a, &weight_gradient.read().unwrap(),
545+
&mut weight_data.write().unwrap()).unwrap();
563546
}
564547
}
565548

@@ -695,7 +678,6 @@ impl<B: IBackend> Layer<B> {
695678
}
696679

697680
let mut weight_lock = weight.write().unwrap();
698-
weight_lock.sync(native_backend.device()).unwrap();
699681

700682
let capnp_tensor = capnp_weight.get_tensor().unwrap();
701683
let mut shape = Vec::new();
@@ -705,7 +687,7 @@ impl<B: IBackend> Layer<B> {
705687
}
706688
weight_lock.reshape(&shape).unwrap();
707689

708-
let mut native_slice = weight_lock.get_mut(native_backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice::<f32>();
690+
let mut native_slice = weight_lock.write_only(native_backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice::<f32>();
709691
let data = capnp_tensor.get_data().unwrap();
710692
for k in 0..data.len() {
711693
native_slice[k as usize] = data.get(k);
@@ -814,8 +796,7 @@ impl<'a, B: IBackend> CapnpWrite<'a> for Layer<B> {
814796
let mut capnp_weight = weights.borrow().get(i as u32);
815797
capnp_weight.set_name(name);
816798

817-
let mut weight_lock = weight.write().unwrap();
818-
weight_lock.sync(native_backend.device()).unwrap();
799+
let weight_lock = weight.write().unwrap();
819800

820801
let mut tensor = capnp_weight.init_tensor();
821802
{
@@ -825,7 +806,8 @@ impl<'a, B: IBackend> CapnpWrite<'a> for Layer<B> {
825806
}
826807
}
827808
{
828-
let native_slice = weight_lock.get(native_backend.device()).unwrap().as_native().unwrap().as_slice::<f32>();
809+
let native_slice = weight_lock.read(native_backend.device())
810+
.unwrap().as_native().unwrap().as_slice::<f32>();
829811
let mut tensor_data = tensor.borrow().init_data(native_slice.len() as u32);
830812
for (i, datum) in native_slice.iter().enumerate() {
831813
tensor_data.set(i as u32, *datum);
@@ -1025,74 +1007,6 @@ pub trait ILayer<B: IBackend> : ComputeOutput<f32, B> + ComputeInputGradient<f32
10251007
self.compute_parameters_gradient(backend, &output_data_, &output_gradients_, &input_data_, &mut weights_gradients_);
10261008
}
10271009

1028-
/// Synchronize the blobs before doing a forward or backward operation.
1029-
///
1030-
/// This is necessary because the forward_layer and backward_layer methods only immutably
1031-
/// borrow the corresponding input blobs and weights which they are not supposed to change.
1032-
/// However synchronizing all blobs to the same device may be neccessary for some computations,
1033-
/// which can only be done with a mutable borrow.
1034-
fn sync(&self,
1035-
backend: &B,
1036-
input_data: &mut [ArcLock<SharedTensor<f32>>],
1037-
input_gradients: &mut [ArcLock<SharedTensor<f32>>],
1038-
weights_data: &mut [ArcLock<SharedTensor<f32>>],
1039-
weights_gradients: &mut [ArcLock<SharedTensor<f32>>],
1040-
output_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
1041-
output_gradients: &mut Vec<ArcLock<SharedTensor<f32>>>) {
1042-
if self.sync_native() {
1043-
let backend = native_backend();
1044-
for tensor in input_data {
1045-
let mut sync = tensor.write().unwrap();
1046-
match sync.add_device(backend.device()) { _ => sync.sync(backend.device()).unwrap() }
1047-
}
1048-
for tensor in input_gradients {
1049-
let mut sync = tensor.write().unwrap();
1050-
match sync.add_device(backend.device()) { _ => sync.sync(backend.device()).unwrap() }
1051-
}
1052-
for tensor in weights_data {
1053-
let mut sync = tensor.write().unwrap();
1054-
match sync.add_device(backend.device()) { _ => sync.sync(backend.device()).unwrap() }
1055-
}
1056-
for tensor in weights_gradients {
1057-
let mut sync = tensor.write().unwrap();
1058-
match sync.add_device(backend.device()) { _ => sync.sync(backend.device()).unwrap() }
1059-
}
1060-
for tensor in output_data {
1061-
let mut sync = tensor.write().unwrap();
1062-
match sync.add_device(backend.device()) { _ => sync.sync(backend.device()).unwrap() }
1063-
}
1064-
for tensor in output_gradients {
1065-
let mut sync = tensor.write().unwrap();
1066-
match sync.add_device(backend.device()) { _ => sync.sync(backend.device()).unwrap() }
1067-
}
1068-
} else {
1069-
for tensor in input_data {
1070-
let mut sync = tensor.write().unwrap();
1071-
match sync.add_device(backend.device()) { _ => sync.sync(backend.device()).unwrap() }
1072-
}
1073-
for tensor in input_gradients {
1074-
let mut sync = tensor.write().unwrap();
1075-
match sync.add_device(backend.device()) { _ => sync.sync(backend.device()).unwrap() }
1076-
}
1077-
for tensor in weights_data {
1078-
let mut sync = tensor.write().unwrap();
1079-
match sync.add_device(backend.device()) { _ => sync.sync(backend.device()).unwrap() }
1080-
}
1081-
for tensor in weights_gradients {
1082-
let mut sync = tensor.write().unwrap();
1083-
match sync.add_device(backend.device()) { _ => sync.sync(backend.device()).unwrap() }
1084-
}
1085-
for tensor in output_data {
1086-
let mut sync = tensor.write().unwrap();
1087-
match sync.add_device(backend.device()) { _ => sync.sync(backend.device()).unwrap() }
1088-
}
1089-
for tensor in output_gradients {
1090-
let mut sync = tensor.write().unwrap();
1091-
match sync.add_device(backend.device()) { _ => sync.sync(backend.device()).unwrap() }
1092-
}
1093-
}
1094-
}
1095-
10961010
/// Return whether "anonymous" output blobs are created automatically for the layer.
10971011
///
10981012
/// If this method returns true, Network::init will create enough "anonymous" output

src/layers/activation/relu.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ impl<B: IBackend + Relu<f32> + ReluPointwise<f32>> ComputeOutput<f32, B> for ReL
5656
input_data: &[&SharedTensor<f32>],
5757
output_data: &mut [&mut SharedTensor<f32>]) {
5858
match input_data.get(0) {
59-
Some(input) => backend.relu_plain(input, output_data[0]).unwrap(),
60-
None => backend.relu_pointwise_plain(output_data[0]).unwrap(),
59+
Some(input) => backend.relu(input, output_data[0]).unwrap(),
60+
None => backend.relu_pointwise(output_data[0]).unwrap(),
6161
}
6262
}
6363
}
@@ -72,8 +72,8 @@ impl<B: IBackend + Relu<f32> + ReluPointwise<f32>> ComputeInputGradient<f32, B>
7272
input_data: &[&SharedTensor<f32>],
7373
input_gradients: &mut [&mut SharedTensor<f32>]) {
7474
match output_data.get(0) {
75-
Some(_) => backend.relu_grad_plain(output_data[0], output_gradients[0], input_data[0], input_gradients[0]).unwrap(),
76-
None => backend.relu_pointwise_grad_plain(input_data[0], input_gradients[0]).unwrap(),
75+
Some(_) => backend.relu_grad(output_data[0], output_gradients[0], input_data[0], input_gradients[0]).unwrap(),
76+
None => backend.relu_pointwise_grad(input_data[0], input_gradients[0]).unwrap(),
7777
}
7878
}
7979
}
@@ -115,7 +115,7 @@ impl<B: IBackend + Relu<f32>> ComputeOutput<f32, B> for ReLU {
115115
input_data: &[&SharedTensor<f32>],
116116
output_data: &mut [&mut SharedTensor<f32>]) {
117117
match input_data.get(0) {
118-
Some(input) => backend.relu_plain(input, output_data[0]).unwrap(),
118+
Some(input) => backend.relu(input, output_data[0]).unwrap(),
119119
None => panic!("No input provided for ReLU layer."),
120120
}
121121
}
@@ -131,7 +131,7 @@ impl<B: IBackend + Relu<f32>> ComputeInputGradient<f32, B> for ReLU {
131131
input_data: &[&SharedTensor<f32>],
132132
input_gradients: &mut [&mut SharedTensor<f32>]) {
133133
match output_data.get(0) {
134-
Some(_) => backend.relu_grad_plain(output_data[0], output_gradients[0], input_data[0], input_gradients[0]).unwrap(),
134+
Some(_) => backend.relu_grad(output_data[0], output_gradients[0], input_data[0], input_gradients[0]).unwrap(),
135135
None => panic!("No output_data provided for ReLU layer backward."),
136136
}
137137
}

src/layers/activation/sigmoid.rs

+8-6
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ impl<B: IBackend + conn::Sigmoid<f32> + conn::SigmoidPointwise<f32>> ComputeOutp
6060
input_data: &[&SharedTensor<f32>],
6161
output_data: &mut [&mut SharedTensor<f32>]) {
6262
match input_data.get(0) {
63-
Some(input) => backend.sigmoid_plain(input, output_data[0]).unwrap(),
64-
None => backend.sigmoid_pointwise_plain(output_data[0]).unwrap(),
63+
Some(input) => backend.sigmoid(input, output_data[0]).unwrap(),
64+
None => backend.sigmoid_pointwise(output_data[0]).unwrap(),
6565
}
6666
}
6767
}
@@ -76,8 +76,9 @@ impl<B: IBackend + conn::Sigmoid<f32> + conn::SigmoidPointwise<f32>> ComputeInpu
7676
input_data: &[&SharedTensor<f32>],
7777
input_gradients: &mut [&mut SharedTensor<f32>]) {
7878
match output_data.get(0) {
79-
Some(_) => backend.sigmoid_grad_plain(output_data[0], output_gradients[0], input_data[0], input_gradients[0]).unwrap(),
80-
None => backend.sigmoid_pointwise_grad_plain(input_data[0], input_gradients[0]).unwrap(),
79+
Some(_) => backend.sigmoid_grad(output_data[0], output_gradients[0],
80+
input_data[0], input_gradients[0]).unwrap(),
81+
None => backend.sigmoid_pointwise_grad(input_data[0], input_gradients[0]).unwrap(),
8182
}
8283
}
8384
}
@@ -119,7 +120,7 @@ impl<B: IBackend + conn::Sigmoid<f32>> ComputeOutput<f32, B> for Sigmoid {
119120
input_data: &[&SharedTensor<f32>],
120121
output_data: &mut [&mut SharedTensor<f32>]) {
121122
match input_data.get(0) {
122-
Some(input) => backend.sigmoid_plain(input, output_data[0]).unwrap(),
123+
Some(input) => backend.sigmoid(input, output_data[0]).unwrap(),
123124
None => panic!("No input provided for Sigmoid layer."),
124125
}
125126
}
@@ -135,7 +136,8 @@ impl<B: IBackend + conn::Sigmoid<f32>> ComputeInputGradient<f32, B> for Sigmoid
135136
input_data: &[&SharedTensor<f32>],
136137
input_gradients: &mut [&mut SharedTensor<f32>]) {
137138
match output_data.get(0) {
138-
Some(_) => backend.sigmoid_grad_plain(output_data[0], output_gradients[0], input_data[0], input_gradients[0]).unwrap(),
139+
Some(_) => backend.sigmoid_grad(output_data[0], output_gradients[0],
140+
input_data[0], input_gradients[0]).unwrap(),
139141
None => panic!("No output_data provided for Sigmoid layer backward."),
140142
}
141143
}

src/layers/activation/tanh.rs

+8-6
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ impl<B: IBackend + conn::Tanh<f32> + conn::TanhPointwise<f32>> ComputeOutput<f32
5757
input_data: &[&SharedTensor<f32>],
5858
output_data: &mut [&mut SharedTensor<f32>]) {
5959
match input_data.get(0) {
60-
Some(input) => backend.tanh_plain(input, output_data[0]).unwrap(),
61-
None => backend.tanh_pointwise_plain(output_data[0]).unwrap(),
60+
Some(input) => backend.tanh(input, output_data[0]).unwrap(),
61+
None => backend.tanh_pointwise(output_data[0]).unwrap(),
6262
}
6363
}
6464
}
@@ -73,8 +73,9 @@ impl<B: IBackend + conn::Tanh<f32> + conn::TanhPointwise<f32>> ComputeInputGradi
7373
input_data: &[&SharedTensor<f32>],
7474
input_gradients: &mut [&mut SharedTensor<f32>]) {
7575
match output_data.get(0) {
76-
Some(_) => backend.tanh_grad_plain(output_data[0], output_gradients[0], input_data[0], input_gradients[0]).unwrap(),
77-
None => backend.tanh_pointwise_grad_plain(input_data[0], input_gradients[0]).unwrap(),
76+
Some(_) => backend.tanh_grad(output_data[0], output_gradients[0],
77+
input_data[0], input_gradients[0]).unwrap(),
78+
None => backend.tanh_pointwise_grad(input_data[0], input_gradients[0]).unwrap(),
7879
}
7980
}
8081
}
@@ -116,7 +117,7 @@ impl<B: IBackend + conn::Tanh<f32>> ComputeOutput<f32, B> for TanH {
116117
input_data: &[&SharedTensor<f32>],
117118
output_data: &mut [&mut SharedTensor<f32>]) {
118119
match input_data.get(0) {
119-
Some(input) => backend.tanh_plain(input, output_data[0]).unwrap(),
120+
Some(input) => backend.tanh(input, output_data[0]).unwrap(),
120121
None => panic!("No input provided for TanH layer."),
121122
}
122123
}
@@ -132,7 +133,8 @@ impl<B: IBackend + conn::Tanh<f32>> ComputeInputGradient<f32, B> for TanH {
132133
input_data: &[&SharedTensor<f32>],
133134
input_gradients: &mut [&mut SharedTensor<f32>]) {
134135
match output_data.get(0) {
135-
Some(_) => backend.tanh_grad_plain(output_data[0], output_gradients[0], input_data[0], input_gradients[0]).unwrap(),
136+
Some(_) => backend.tanh_grad(output_data[0], output_gradients[0],
137+
input_data[0], input_gradients[0]).unwrap(),
136138
None => panic!("No output_data provided for TanH layer backward."),
137139
}
138140
}

0 commit comments

Comments
 (0)