Skip to content

Commit 64c45d1

Browse files
committed
Merge branch 'master' into wip/tanh
2 parents b1d5ec9 + 520f874 commit 64c45d1

File tree

4 files changed

+60
-54
lines changed

4 files changed

+60
-54
lines changed

benches/network_benches.rs

+12-22
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,8 @@ mod cuda {
1414
use std::sync::{Arc, RwLock};
1515
use leaf::layers::*;
1616
use leaf::layer::*;
17-
use leaf::network::*;
1817
use std::rc::Rc;
1918

20-
#[cfg(feature = "native")]
21-
fn native_backend() -> Rc<Backend<Native>> {
22-
Rc::new(Backend::<Native>::default().unwrap())
23-
}
24-
2519
#[cfg(feature = "cuda")]
2620
fn cuda_backend() -> Rc<Backend<Cuda>> {
2721
Rc::new(Backend::<Cuda>::default().unwrap())
@@ -76,7 +70,7 @@ mod cuda {
7670
#[ignore]
7771
#[cfg(feature = "cuda")]
7872
fn bench_mnsit_forward_1(b: &mut Bencher) {
79-
let mut cfg = NetworkConfig::default();
73+
let mut cfg = SequentialConfig::default();
8074
// set up input
8175
cfg.add_input("in", &vec![1, 30, 30]);
8276
cfg.add_input("label", &vec![1, 1, 10]);
@@ -98,18 +92,14 @@ mod cuda {
9892
// cfg.add_layer(loss_cfg);
9993

10094
let backend = cuda_backend();
101-
let native_backend = native_backend();
102-
let mut network = Network::from_config(backend.clone(), &cfg);
103-
let loss = &mut 0f32;
95+
let mut network = Layer::from_config(
96+
backend.clone(), &LayerConfig::new("network", LayerType::Sequential(cfg)));
10497

10598
let _ = timeit_loops!(10, {
10699
let inp = SharedTensor::<f32>::new(backend.device(), &vec![1, 30, 30]).unwrap();
107-
let label = SharedTensor::<f32>::new(native_backend.device(), &vec![1, 1, 10]).unwrap();
108-
109100
let inp_lock = Arc::new(RwLock::new(inp));
110-
let label_lock = Arc::new(RwLock::new(label));
111101

112-
network.forward(&[inp_lock, label_lock], loss);
102+
network.forward(&[inp_lock]);
113103
});
114104
// b.iter(|| {
115105
// for _ in 0..1 {
@@ -128,7 +118,7 @@ mod cuda {
128118
// #[ignore]
129119
#[cfg(feature = "cuda")]
130120
fn alexnet_forward(b: &mut Bencher) {
131-
let mut cfg = NetworkConfig::default();
121+
let mut cfg = SequentialConfig::default();
132122
// Layer: data
133123
cfg.add_input("data", &vec![128, 3, 224, 224]);
134124
// Layer: conv1
@@ -265,15 +255,15 @@ mod cuda {
265255

266256
let backend = cuda_backend();
267257
// let native_backend = native_backend();
268-
let mut network = Network::from_config(backend.clone(), &cfg);
258+
let mut network = Layer::from_config(
259+
backend.clone(), &LayerConfig::new("network", LayerType::Sequential(cfg)));
269260

270261
let func = || {
271262
let forward_time = timeit_loops!(1, {
272-
let loss = &mut 0f32;
273263
let inp = SharedTensor::<f32>::new(backend.device(), &vec![128, 3, 112, 112]).unwrap();
274264

275265
let inp_lock = Arc::new(RwLock::new(inp));
276-
network.forward(&[inp_lock], loss);
266+
network.forward(&[inp_lock]);
277267
});
278268
println!("Forward step: {}", forward_time);
279269
};
@@ -285,7 +275,7 @@ mod cuda {
285275
#[cfg(feature = "cuda")]
286276
fn small_alexnet_forward(b: &mut Bencher) {
287277
// let _ = env_logger::init();
288-
let mut cfg = NetworkConfig::default();
278+
let mut cfg = SequentialConfig::default();
289279
// Layer: data
290280
cfg.add_input("data", &vec![128, 3, 112, 112]);
291281
// Layer: conv1
@@ -422,14 +412,14 @@ mod cuda {
422412

423413
let backend = cuda_backend();
424414
// let native_backend = native_backend();
425-
let mut network = Network::from_config(backend.clone(), &cfg);
415+
let mut network = Layer::from_config(
416+
backend.clone(), &LayerConfig::new("network", LayerType::Sequential(cfg)));
426417

427418
let mut func = || {
428-
let loss = &mut 0f32;
429419
let inp = SharedTensor::<f32>::new(backend.device(), &vec![128, 3, 112, 112]).unwrap();
430420

431421
let inp_lock = Arc::new(RwLock::new(inp));
432-
network.forward(&[inp_lock], loss);
422+
network.forward(&[inp_lock]);
433423
};
434424
{ func(); bench_profile(b, func, 10); }
435425
}

src/layer.rs

+12-12
Original file line numberDiff line numberDiff line change
@@ -757,15 +757,15 @@ pub trait ILayer<B: IBackend> : ComputeOutput<f32, B> + ComputeInputGradient<f32
757757
output_data: &mut [ArcLock<SharedTensor<f32>>]) {
758758
// aquire all the locks
759759
let inp: Vec<_> = input_data.iter().map(|b| b.read().unwrap()).collect();
760-
let input_data_: Vec<&SharedTensor<f32>> = inp.iter().enumerate().map(|(_, val)| &**val).collect();
760+
let input_data_: Vec<&SharedTensor<f32>> = inp.iter().map(|val| &**val).collect();
761761

762762
let wgts: Vec<_> = weights_data.iter().map(|w| w.read().unwrap()).collect();
763-
let weights_data_: Vec<&SharedTensor<f32>> = wgts.iter().enumerate().map(|(_, val)| &**val).collect();
763+
let weights_data_: Vec<&SharedTensor<f32>> = wgts.iter().map(|val| &**val).collect();
764764

765765
let out_ref = output_data.iter().cloned().collect::<Vec<_>>();
766766
let mut out = &mut out_ref.iter().map(|b| b.write().unwrap()).collect::<Vec<_>>();
767767
let mut output_w = &mut out.iter_mut().map(|a| a).collect::<Vec<_>>();
768-
let mut output_data_: Vec<&mut SharedTensor<f32>> = output_w.iter_mut().enumerate().map(|(_, val)| &mut ***val).collect();
768+
let mut output_data_: Vec<&mut SharedTensor<f32>> = output_w.iter_mut().map(|val| &mut ***val).collect();
769769

770770
self.compute_output(backend, &weights_data_, &input_data_, &mut output_data_);
771771
}
@@ -786,17 +786,17 @@ pub trait ILayer<B: IBackend> : ComputeOutput<f32, B> + ComputeInputGradient<f32
786786
input_data: &[ArcLock<SharedTensor<f32>>],
787787
input_gradients: &mut [ArcLock<SharedTensor<f32>>]) {
788788
let wgts_data: Vec<_> = weights_data.iter().map(|b| b.read().unwrap()).collect();
789-
let weights_data_: Vec<&SharedTensor<f32>> = wgts_data.iter().enumerate().map(|(_, val)| &**val).collect();
789+
let weights_data_: Vec<&SharedTensor<f32>> = wgts_data.iter().map(|val| &**val).collect();
790790
let out_data: Vec<_> = output_data.iter().map(|b| b.read().unwrap()).collect();
791-
let output_data_: Vec<&SharedTensor<f32>> = out_data.iter().enumerate().map(|(_, val)| &**val).collect();
791+
let output_data_: Vec<&SharedTensor<f32>> = out_data.iter().map(|val| &**val).collect();
792792
let out_gradient: Vec<_> = output_gradients.iter().map(|b| b.read().unwrap()).collect();
793-
let output_gradients_: Vec<&SharedTensor<f32>> = out_gradient.iter().enumerate().map(|(_, val)| &**val).collect();
793+
let output_gradients_: Vec<&SharedTensor<f32>> = out_gradient.iter().map(|val| &**val).collect();
794794
let inp_data: Vec<_> = input_data.iter().map(|b| b.read().unwrap()).collect();
795-
let input_data_: Vec<&SharedTensor<f32>> = inp_data.iter().enumerate().map(|(_, val)| &**val).collect();
795+
let input_data_: Vec<&SharedTensor<f32>> = inp_data.iter().map(|val| &**val).collect();
796796
let btm_gradient_ref = input_gradients.iter().cloned().collect::<Vec<_>>();
797797
let mut btm_gradient = &mut btm_gradient_ref.iter().map(|b| b.write().unwrap()).collect::<Vec<_>>();
798798
let mut input_gradient = &mut btm_gradient.iter_mut().map(|a| a).collect::<Vec<_>>();
799-
let mut input_gradients_: Vec<&mut SharedTensor<f32>> = input_gradient.iter_mut().enumerate().map(|(_, val)| &mut ***val).collect();
799+
let mut input_gradients_: Vec<&mut SharedTensor<f32>> = input_gradient.iter_mut().map(|val| &mut ***val).collect();
800800

801801
self.compute_input_gradient(backend, &weights_data_, &output_data_, &output_gradients_, &input_data_, &mut input_gradients_);
802802
}
@@ -816,15 +816,15 @@ pub trait ILayer<B: IBackend> : ComputeOutput<f32, B> + ComputeInputGradient<f32
816816
input_data: &[ArcLock<SharedTensor<f32>>],
817817
weights_gradients: &mut [ArcLock<SharedTensor<f32>>]) {
818818
let out_data: Vec<_> = output_data.iter().map(|b| b.read().unwrap()).collect();
819-
let output_data_: Vec<&SharedTensor<f32>> = out_data.iter().enumerate().map(|(_, val)| &**val).collect();
819+
let output_data_: Vec<&SharedTensor<f32>> = out_data.iter().map(|val| &**val).collect();
820820
let out_gradients: Vec<_> = output_gradients.iter().map(|b| b.read().unwrap()).collect();
821-
let output_gradients_: Vec<&SharedTensor<f32>> = out_gradients.iter().enumerate().map(|(_, val)| &**val).collect();
821+
let output_gradients_: Vec<&SharedTensor<f32>> = out_gradients.iter().map(|val| &**val).collect();
822822
let inp_data: Vec<_> = input_data.iter().map(|b| b.read().unwrap()).collect();
823-
let input_data_: Vec<&SharedTensor<f32>> = inp_data.iter().enumerate().map(|(_, val)| &**val).collect();
823+
let input_data_: Vec<&SharedTensor<f32>> = inp_data.iter().map(|val| &**val).collect();
824824
let wgt_gradient_ref = weights_gradients.iter().cloned().collect::<Vec<_>>();
825825
let mut wgt_gradient = &mut wgt_gradient_ref.iter().map(|b| b.write().unwrap()).collect::<Vec<_>>();
826826
let mut weights_gradient = &mut wgt_gradient.iter_mut().map(|a| a).collect::<Vec<_>>();
827-
let mut weights_gradients_: Vec<&mut SharedTensor<f32>> = weights_gradient.iter_mut().enumerate().map(|(_, val)| &mut ***val).collect();
827+
let mut weights_gradients_: Vec<&mut SharedTensor<f32>> = weights_gradient.iter_mut().map(|val| &mut ***val).collect();
828828

829829
self.compute_parameters_gradient(backend, &output_data_, &output_gradients_, &input_data_, &mut weights_gradients_);
830830
}

src/solver/mod.rs

-2
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,6 @@ impl<SolverB: IBackend + SolverOps<f32> + 'static, B: IBackend + LayerOps<f32> +
7070

7171
/// Train the network with one minibatch
7272
pub fn train_minibatch(&mut self, mb_data: ArcLock<SharedTensor<f32>>, mb_target: ArcLock<SharedTensor<f32>>) -> ArcLock<SharedTensor<f32>> {
73-
self.net.clear_weights_gradients();
74-
7573
// forward through network and classifier
7674
let network_out = self.net.forward(&[mb_data])[0].clone();
7775
let _ = self.objective.forward(&[network_out.clone(), mb_target]);

src/solvers/sgd/momentum.rs

+36-18
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use std::rc::Rc;
2121
use std::sync::{Arc, RwLock};
2222
use util::*;
2323

24-
#[derive(Debug, Clone)]
24+
#[derive(Debug)]
2525
/// Stochastic Gradient Descent with Momentum.
2626
///
2727
/// See [module description][1] for more information.
@@ -31,6 +31,11 @@ pub struct Momentum<SolverB: IBackend + SolverOps<f32>> {
3131
history: Vec<ArcLock<SharedTensor<f32>>>,
3232
/// The backend used for computing the gradient.
3333
backend: Rc<SolverB>,
34+
35+
/// Scalar that temporarily holds learing rate for weight update computations
36+
lr: SharedTensor<f32>,
37+
/// Scalar that temporarily holds momentum for weight update computations
38+
momentum: SharedTensor<f32>,
3439
}
3540

3641
impl<SolverB: IBackend + SolverOps<f32>> Momentum<SolverB> {
@@ -41,9 +46,19 @@ impl<SolverB: IBackend + SolverOps<f32>> Momentum<SolverB> {
4146
///
4247
/// [2]: ../../../solver/struct.Solver.html#method.from_config
4348
pub fn new(backend: Rc<SolverB>) -> Momentum<SolverB> {
49+
let (lr, momentum) = {
50+
let device = IBackend::device(backend.as_ref());
51+
52+
(SharedTensor::<f32>::new(device, &1).unwrap(),
53+
SharedTensor::<f32>::new(device, &1).unwrap())
54+
};
55+
4456
Momentum {
4557
history: Vec::new(),
46-
backend: backend
58+
backend: backend,
59+
60+
lr: lr,
61+
momentum: momentum,
4762
}
4863
}
4964

@@ -56,28 +71,31 @@ impl<B: IBackend + SolverOps<f32>, NetB: IBackend + LayerOps<f32> + 'static> SGD
5671
history_blob_id: usize,
5772
global_lr: &f32,
5873
blob_lr: &f32) {
59-
let history_blob = &self.history[history_blob_id];
60-
let local_momentum = config.momentum;
61-
let local_lr = global_lr * blob_lr;
74+
::weight::FillerType::Constant {
75+
value: global_lr * blob_lr
76+
}.fill(&mut self.lr);
77+
78+
::weight::FillerType::Constant {
79+
value: config.momentum
80+
}.fill(&mut self.momentum);
6281

63-
let native_backend = native_backend();
6482
let backend = ISolver::<B, NetB>::backend(self);
6583
let device = IBackend::device(backend);
6684

67-
let lr_shared = native_scalar(local_lr);
68-
let momentum_shared = native_scalar(local_momentum);
85+
let history_blob = &self.history[history_blob_id];
86+
87+
let _ = weight_gradient.write().unwrap().add_device(device);
88+
weight_gradient.write().unwrap().sync(device).unwrap();
89+
let _ = history_blob.write().unwrap().add_device(device);
90+
history_blob.write().unwrap().sync(device).unwrap();
6991

70-
let _ = weight_gradient.write().unwrap().add_device(native_backend.device());
71-
weight_gradient.write().unwrap().sync(native_backend.device()).unwrap();
72-
let _ = history_blob.write().unwrap().add_device(native_backend.device());
73-
history_blob.write().unwrap().sync(native_backend.device()).unwrap();
74-
Axpby::<f32>::axpby_plain(&native_backend,
75-
&lr_shared,
76-
&weight_gradient.read().unwrap(),
77-
&momentum_shared,
78-
&mut history_blob.write().unwrap()).unwrap();
92+
Axpby::axpby_plain(backend,
93+
&self.lr,
94+
&weight_gradient.read().unwrap(),
95+
&self.momentum,
96+
&mut history_blob.write().unwrap()).unwrap();
7997

80-
native_backend.copy_plain(
98+
backend.copy_plain(
8199
&history_blob.read().unwrap(), &mut weight_gradient.write().unwrap()).unwrap();
82100
}
83101
}

0 commit comments

Comments
 (0)