Skip to content

Commit f5f25c3

Browse files
committed
feat/convolution: add shared workspace for convolution layer
1 parent bd03c7d commit f5f25c3

File tree

6 files changed

+73
-15
lines changed

6 files changed

+73
-15
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ license = "MIT OR Apache-2.0"
1616
[dependencies]
1717
collenchyma = { version = "0.0.8", default-features = false }
1818
collenchyma-blas = { version = "0.2.0", default-features = false }
19-
collenchyma-nn = { version = "0.3.1", default-features = false }
19+
collenchyma-nn = { version = "0.3.2", default-features = false }
2020

2121
log = "0.3.2"
2222
rand = "0.3.0"

examples/benchmarks.rs

+3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#[macro_use]
22
extern crate timeit;
3+
extern crate env_logger;
34
extern crate collenchyma as co;
45
extern crate leaf;
56

@@ -12,6 +13,8 @@ use std::rc::Rc;
1213
use std::env;
1314

1415
fn main() {
16+
env_logger::init().unwrap();
17+
1518
let nets: Vec<String> = vec!("alexnet".to_string(), "overfeat".to_string(), "vgg".to_string());
1619
if let Some(net) = env::args().nth(1) {
1720
if nets.contains(&net) {

src/layer.rs

+15
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ impl<B: IBackend + LayerOps<f32> + 'static> Layer<B> {
192192

193193
self.worker.init(self.backend.clone());
194194
self.reshape();
195+
self.worker.resize_shared_workspace(self.backend.clone(), None);
195196
for t in &self.output_blobs_data {
196197
debug!("Layer {} - output shape: {:?}", self.name, t.read().unwrap().desc());
197198
}
@@ -719,6 +720,20 @@ pub trait ILayer<B: IBackend> : ComputeOutput<f32, B> + ComputeInputGradient<f32
719720
output_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
720721
output_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>) {}
721722

723+
/// Adjust size of shared workspace.
724+
///
725+
/// Is used by layers that need a workspace.
726+
/// The layer should either:
727+
///
728+
/// - leave the workspace as is if it bigger than required by this layer
729+
/// - resize the workspace to the required size if smaller
730+
/// - create the workspace if the `workspace` is `None`
731+
///
732+
/// The reference to the workspace should be saved in the layer.
733+
fn resize_shared_workspace(&mut self, backend: Rc<B>, workspace: Option<ArcLock<SharedTensor<u8>>>) -> Option<ArcLock<SharedTensor<u8>>> {
734+
workspace
735+
}
736+
722737
/// Compute the [feedforward][1] layer output using the provided Backend.
723738
/// [1]: https://en.wikipedia.org/wiki/Feedforward_neural_network
724739
///

src/layers/activation/relu.rs

+4
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,13 @@ impl<B: IBackend + Relu<f32> + ReluPointwise<f32>> ILayer<B> for ReLU {
3434
if let Some(inp) = input_data.get(0) {
3535
let read_inp = inp.read().unwrap();
3636
let input_desc = read_inp.desc();
37+
debug!("ONE");
3738
input_gradient[0].write().unwrap().resize(input_desc).unwrap();
39+
debug!("TWO");
3840
output_data[0].write().unwrap().resize(input_desc).unwrap();
41+
debug!("THREE");
3942
output_gradient[0].write().unwrap().resize(input_desc).unwrap();
43+
debug!("FOUR");
4044
}
4145
}
4246
}

src/layers/common/convolution.rs

+37-14
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
//! Does this convolution with a set of learnable filters, each producing one
44
//! feature map in the output tensor.
55
use std::rc::Rc;
6+
use std::sync::{Arc, RwLock};
67
use co::prelude::*;
78
use conn;
9+
use conn::ConvolutionConfig as connConvolutionConfig;
810
use layer::*;
9-
use util::{ArcLock, native_backend, cast_vec_usize_to_i32};
11+
use util::{ArcLock, cast_vec_usize_to_i32};
1012
use weight::FillerType;
1113
use super::FilterLayer;
1214

@@ -19,7 +21,8 @@ pub struct Convolution<B: conn::Convolution<f32>> {
1921
stride: Vec<usize>,
2022
padding: Vec<usize>,
2123

22-
convolution_configs: Option<Rc<B::CC>>,
24+
workspace: Option<ArcLock<SharedTensor<u8>>>,
25+
convolution_config: Option<Rc<B::CC>>,
2326
}
2427

2528
impl<B: conn::Convolution<f32>> Convolution<B> {
@@ -34,7 +37,8 @@ impl<B: conn::Convolution<f32>> Convolution<B> {
3437

3538
axis: config.axis(),
3639

37-
convolution_configs: None,
40+
workspace: None,
41+
convolution_config: None,
3842
}
3943
}
4044

@@ -103,7 +107,7 @@ impl<B: IBackend + conn::Convolution<f32>> ILayer<B> for Convolution<B> {
103107
}
104108

105109
fn reshape(&mut self,
106-
backend: ::std::rc::Rc<B>,
110+
backend: Rc<B>,
107111
input_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
108112
input_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>,
109113
weights_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
@@ -125,12 +129,10 @@ impl<B: IBackend + conn::Convolution<f32>> ILayer<B> for Convolution<B> {
125129
let stride = cast_vec_usize_to_i32(self.stride_dims(num_spatial_dims));
126130
let padding = cast_vec_usize_to_i32(self.padding_dims(num_spatial_dims));
127131

128-
// add copy on native as workaround for bug in new_convolution_config
129-
let native = native_backend();
130-
let _ = filter.add_device(native.device());
131132
let config = backend.new_convolution_config(&inp, &output_data, &mut filter,
132133
conn::ConvForwardAlgo::Auto, conn::ConvBackwardFilterAlgo::Auto, conn::ConvBackwardDataAlgo::Auto,
133134
&stride, &padding).unwrap();
135+
134136
// resize and fill weights
135137
weights_data[0].write().unwrap().resize(filter.desc()).unwrap();
136138
let filler = FillerType::Glorot {
@@ -139,9 +141,27 @@ impl<B: IBackend + conn::Convolution<f32>> ILayer<B> for Convolution<B> {
139141
};
140142
filler.fill(&mut weights_data[0].write().unwrap());
141143
weights_gradient[0].write().unwrap().resize(filter.desc()).unwrap();
142-
self.convolution_configs = Some(Rc::new(config));
144+
self.convolution_config = Some(Rc::new(config));
143145
}
144146
}
147+
148+
fn resize_shared_workspace(&mut self, backend: Rc<B>, workspace: Option<ArcLock<SharedTensor<u8>>>) -> Option<ArcLock<SharedTensor<u8>>> {
149+
let required_size = self.convolution_config.as_ref().unwrap().workspace_size();
150+
let new_workspace = if workspace.is_none() {
151+
Arc::new(RwLock::new(SharedTensor::<u8>::new(IBackend::device(&*backend), &(required_size)).unwrap()))
152+
} else {
153+
let old_workspace = workspace.as_ref().unwrap().clone();
154+
let old_workspace_size = old_workspace.read().unwrap().capacity();
155+
if old_workspace_size < required_size {
156+
Arc::new(RwLock::new(SharedTensor::<u8>::new(IBackend::device(&*backend), &(required_size)).unwrap()))
157+
} else {
158+
workspace.unwrap()
159+
}
160+
};
161+
162+
self.workspace = Some(new_workspace.clone());
163+
Some(new_workspace)
164+
}
145165
}
146166

147167
impl<B: IBackend + conn::Convolution<f32>> ComputeOutput<f32, B> for Convolution<B> {
@@ -151,8 +171,9 @@ impl<B: IBackend + conn::Convolution<f32>> ComputeOutput<f32, B> for Convolution
151171
input_data: &[&SharedTensor<f32>],
152172
output_data: &mut [&mut SharedTensor<f32>]) {
153173
let filter_data = weights[0];
154-
let conv_config = self.convolution_configs.as_ref().unwrap();
155-
backend.convolution_plain(filter_data, input_data[0], output_data[0], conv_config).unwrap();
174+
let conv_config = self.convolution_config.as_ref().unwrap();
175+
let mut workspace = self.workspace.as_ref().unwrap().write().unwrap();
176+
backend.convolution_plain(filter_data, input_data[0], output_data[0], &mut workspace, conv_config).unwrap();
156177
}
157178
}
158179

@@ -165,9 +186,10 @@ impl<B: IBackend + conn::Convolution<f32>> ComputeInputGradient<f32, B> for Conv
165186
input_data: &[&SharedTensor<f32>],
166187
input_gradients: &mut [&mut SharedTensor<f32>]) {
167188
let filter_data = weights_data[0];
168-
let conv_config = self.convolution_configs.as_ref().unwrap();
189+
let conv_config = self.convolution_config.as_ref().unwrap();
190+
let mut workspace = self.workspace.as_ref().unwrap().write().unwrap();
169191
// compute gradient w.r.t. input
170-
backend.convolution_grad_data_plain(filter_data, output_gradients[0], input_gradients[0], conv_config).unwrap();
192+
backend.convolution_grad_data_plain(filter_data, output_gradients[0], input_gradients[0], &mut workspace, conv_config).unwrap();
171193
}
172194
}
173195

@@ -180,9 +202,10 @@ impl<B: IBackend + conn::Convolution<f32>> ComputeParametersGradient<f32, B> for
180202
parameters_gradients: &mut [&mut SharedTensor<f32>]) {
181203
// TODO: compute gradient w.r.t to bias
182204
let filter_gradient = &mut parameters_gradients[0];
183-
let conv_config = self.convolution_configs.as_ref().unwrap();
205+
let conv_config = self.convolution_config.as_ref().unwrap();
206+
let mut workspace = self.workspace.as_ref().unwrap().write().unwrap();
184207
// compute gradient w.r.t. filter
185-
backend.convolution_grad_filter_plain(input_data[0], output_gradients[0], filter_gradient, conv_config).unwrap();
208+
backend.convolution_grad_filter_plain(input_data[0], output_gradients[0], filter_gradient, &mut workspace, conv_config).unwrap();
186209
}
187210
}
188211

src/layers/common/sequential.rs

+13
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,10 @@ impl<B: IBackend + LayerOps<f32> + 'static> Sequential<B> {
9494
}
9595
}
9696

97+
let mut shared_workspace = None;
9798
for layer_config in &config.layers {
9899
self.init_layer(backend.clone(), &layer_config, &mut registry, weight_registry);
100+
shared_workspace = self.resize_shared_workspace(backend.clone(), shared_workspace);
99101
}
100102

101103
// Go through the net backwards to determine which blobs contribute to the
@@ -223,6 +225,17 @@ impl<B: IBackend + LayerOps<f32> + 'static> ILayer<B> for Sequential<B> {
223225
Some(gradients)
224226
}
225227

228+
fn resize_shared_workspace(&mut self, backend: Rc<B>, workspace: Option<ArcLock<SharedTensor<u8>>>) -> Option<ArcLock<SharedTensor<u8>>> {
229+
debug!("Resizing shared workspace {:?}", workspace.is_some());
230+
let mut shared_workspace = workspace;
231+
232+
for layer in &self.layers {
233+
shared_workspace = layer.borrow_mut().worker.resize_shared_workspace(backend.clone(), shared_workspace);
234+
}
235+
236+
shared_workspace
237+
}
238+
226239
fn forward(&self,
227240
backend: &B,
228241
input_data: &[ArcLock<SharedTensor<f32>>],

0 commit comments

Comments
 (0)