Skip to content

Commit b1d5ec9

Browse files
hobofanMichaelHirn
authored and
MichaelHirn
committed
feat/layers: add tanh layer
1 parent 3b25a48 commit b1d5ec9

File tree

5 files changed

+156
-0
lines changed

5 files changed

+156
-0
lines changed

src/layer.rs

+7
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,7 @@ impl<B: IBackend + LayerOps<f32> + 'static> Layer<B> {
697697
LayerType::Softmax => Box::new(Softmax::default()),
698698
LayerType::ReLU => Box::new(ReLU),
699699
LayerType::Sigmoid => Box::new(Sigmoid),
700+
LayerType::TanH => Box::new(TanH),
700701
LayerType::NegativeLogLikelihood(layer_config) => Box::new(NegativeLogLikelihood::from_config(&layer_config)),
701702
LayerType::Reshape(layer_config) => Box::new(Reshape::from_config(&layer_config)),
702703
}
@@ -1123,6 +1124,8 @@ pub enum LayerType {
11231124
ReLU,
11241125
/// Sigmoid Layer
11251126
Sigmoid,
1127+
/// TanH Layer
1128+
TanH,
11261129
// Loss layers
11271130
/// NegativeLogLikelihood Layer
11281131
NegativeLogLikelihood(NegativeLogLikelihoodConfig),
@@ -1151,6 +1154,10 @@ impl LayerType {
11511154
LayerType::Sigmoid => true,
11521155
#[cfg(feature="native")]
11531156
LayerType::Sigmoid => false,
1157+
#[cfg(all(feature="cuda", not(feature="native")))]
1158+
LayerType::TanH => true,
1159+
#[cfg(feature="native")]
1160+
LayerType::TanH => false,
11541161
LayerType::NegativeLogLikelihood(_) => false,
11551162
LayerType::Reshape(_) => true,
11561163
}

src/layers/activation/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ macro_rules! impl_ilayer_activation {
3131

3232
pub use self::relu::ReLU;
3333
pub use self::sigmoid::Sigmoid;
34+
pub use self::tanh::TanH;
3435

3536
pub mod relu;
3637
pub mod sigmoid;
38+
pub mod tanh;

src/layers/activation/tanh.rs

+142
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
//! Applies the nonlinear TanH function.
2+
//!
3+
//! Non-linearity activation function: y = sinh(x) / cosh(x)
4+
//!
5+
//! You might consider using ReLU as an alternative.
6+
//!
7+
//! ReLU, compared to TanH:
8+
//!
9+
//! * reduces the likelyhood of vanishing gradients
10+
//! * increases the likelyhood of a more beneficial sparse representation
11+
//! * can be computed faster
12+
//! * is therefore the most popular activation function in DNNs as of this writing (2016).
13+
use co::{IBackend, SharedTensor};
14+
use conn;
15+
use layer::*;
16+
use util::ArcLock;
17+
18+
#[derive(Debug, Clone)]
19+
#[allow(missing_copy_implementations)]
20+
/// TanH Activation Layer
21+
pub struct TanH;
22+
23+
//
24+
// Tanh + TanhPointwise
25+
// Only on CUDA
26+
#[cfg(all(feature="cuda", not(feature="native")))]
27+
impl<B: IBackend + conn::Tanh<f32> + conn::TanhPointwise<f32>> ILayer<B> for TanH {
28+
impl_ilayer_activation!();
29+
30+
fn compute_in_place(&self) -> bool {
31+
true
32+
}
33+
34+
fn reshape(&mut self,
35+
backend: ::std::rc::Rc<B>,
36+
input_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
37+
input_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>,
38+
weights_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
39+
weights_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>,
40+
output_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
41+
output_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>) {
42+
if let Some(inp) = input_data.get(0) {
43+
let read_inp = inp.read().unwrap();
44+
let input_desc = read_inp.desc();
45+
input_gradient[0].write().unwrap().resize(input_desc).unwrap();
46+
output_data[0].write().unwrap().resize(input_desc).unwrap();
47+
output_gradient[0].write().unwrap().resize(input_desc).unwrap();
48+
}
49+
}
50+
}
51+
52+
#[cfg(all(feature="cuda", not(feature="native")))]
53+
impl<B: IBackend + conn::Tanh<f32> + conn::TanhPointwise<f32>> ComputeOutput<f32, B> for TanH {
54+
fn compute_output(&self,
55+
backend: &B,
56+
_weights: &[&SharedTensor<f32>],
57+
input_data: &[&SharedTensor<f32>],
58+
output_data: &mut [&mut SharedTensor<f32>]) {
59+
match input_data.get(0) {
60+
Some(input) => backend.tanh_plain(input, output_data[0]).unwrap(),
61+
None => backend.tanh_pointwise_plain(output_data[0]).unwrap(),
62+
}
63+
}
64+
}
65+
66+
#[cfg(all(feature="cuda", not(feature="native")))]
67+
impl<B: IBackend + conn::Tanh<f32> + conn::TanhPointwise<f32>> ComputeInputGradient<f32, B> for TanH {
68+
fn compute_input_gradient(&self,
69+
backend: &B,
70+
weights_data: &[&SharedTensor<f32>],
71+
output_data: &[&SharedTensor<f32>],
72+
output_gradients: &[&SharedTensor<f32>],
73+
input_data: &[&SharedTensor<f32>],
74+
input_gradients: &mut [&mut SharedTensor<f32>]) {
75+
match output_data.get(0) {
76+
Some(_) => backend.tanh_grad_plain(output_data[0], output_gradients[0], input_data[0], input_gradients[0]).unwrap(),
77+
None => backend.tanh_pointwise_grad_plain(input_data[0], input_gradients[0]).unwrap(),
78+
}
79+
}
80+
}
81+
82+
#[cfg(all(feature="cuda", not(feature="native")))]
83+
impl<B: IBackend + conn::Tanh<f32> + conn::TanhPointwise<f32>> ComputeParametersGradient<f32, B> for TanH {}
84+
85+
//
86+
// Tanh without TanhPointwise
87+
// Only on CUDA
88+
//
89+
#[cfg(feature="native")]
90+
impl<B: IBackend + conn::Tanh<f32>> ILayer<B> for TanH {
91+
impl_ilayer_activation!();
92+
93+
fn reshape(&mut self,
94+
backend: ::std::rc::Rc<B>,
95+
input_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
96+
input_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>,
97+
weights_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
98+
weights_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>,
99+
output_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
100+
output_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>) {
101+
if let Some(inp) = input_data.get(0) {
102+
let read_inp = inp.read().unwrap();
103+
let input_desc = read_inp.desc();
104+
input_gradient[0].write().unwrap().resize(input_desc).unwrap();
105+
output_data[0].write().unwrap().resize(input_desc).unwrap();
106+
output_gradient[0].write().unwrap().resize(input_desc).unwrap();
107+
}
108+
}
109+
}
110+
111+
#[cfg(feature="native")]
112+
impl<B: IBackend + conn::Tanh<f32>> ComputeOutput<f32, B> for TanH {
113+
fn compute_output(&self,
114+
backend: &B,
115+
_weights: &[&SharedTensor<f32>],
116+
input_data: &[&SharedTensor<f32>],
117+
output_data: &mut [&mut SharedTensor<f32>]) {
118+
match input_data.get(0) {
119+
Some(input) => backend.tanh_plain(input, output_data[0]).unwrap(),
120+
None => panic!("No input provided for TanH layer."),
121+
}
122+
}
123+
}
124+
125+
#[cfg(feature="native")]
126+
impl<B: IBackend + conn::Tanh<f32>> ComputeInputGradient<f32, B> for TanH {
127+
fn compute_input_gradient(&self,
128+
backend: &B,
129+
weights_data: &[&SharedTensor<f32>],
130+
output_data: &[&SharedTensor<f32>],
131+
output_gradients: &[&SharedTensor<f32>],
132+
input_data: &[&SharedTensor<f32>],
133+
input_gradients: &mut [&mut SharedTensor<f32>]) {
134+
match output_data.get(0) {
135+
Some(_) => backend.tanh_grad_plain(output_data[0], output_gradients[0], input_data[0], input_gradients[0]).unwrap(),
136+
None => panic!("No output_data provided for TanH layer backward."),
137+
}
138+
}
139+
}
140+
141+
#[cfg(feature="native")]
142+
impl<B: IBackend + conn::Tanh<f32>> ComputeParametersGradient<f32, B> for TanH {}

src/layers/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
pub use self::activation::{
5050
ReLU,
5151
Sigmoid,
52+
TanH,
5253
};
5354

5455
#[cfg(all(feature="cuda", not(feature="native")))]

src/util.rs

+4
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,14 @@ pub trait LayerOps<F> : conn::Convolution<F>
111111
+ conn::Pooling<F>
112112
+ conn::Relu<F> + conn::ReluPointwise<F>
113113
+ conn::Sigmoid<F> + conn::SigmoidPointwise<F>
114+
+ conn::Tanh<F> + conn::TanhPointwise<F>
114115
+ conn::Softmax<F> + conn::LogSoftmax<F>
115116
+ Gemm<F> {}
116117
#[cfg(feature="native")]
117118
/// Encapsulates all traits used in Layers.
118119
pub trait LayerOps<F> : conn::Relu<F>
119120
+ conn::Sigmoid<F>
121+
+ conn::Tanh<F>
120122
+ conn::Softmax<F> + conn::LogSoftmax<F>
121123
+ Gemm<F> {}
122124

@@ -125,10 +127,12 @@ impl<T: conn::Convolution<f32>
125127
+ conn::Pooling<f32>
126128
+ conn::Relu<f32> + conn::ReluPointwise<f32>
127129
+ conn::Sigmoid<f32> + conn::SigmoidPointwise<f32>
130+
+ conn::Tanh<f32> + conn::TanhPointwise<f32>
128131
+ conn::Softmax<f32> + conn::LogSoftmax<f32>
129132
+ Gemm<f32>> LayerOps<f32> for T {}
130133
#[cfg(feature="native")]
131134
impl<T: conn::Relu<f32>
132135
+ conn::Sigmoid<f32>
136+
+ conn::Tanh<f32>
133137
+ conn::Softmax<f32> + conn::LogSoftmax<f32>
134138
+ Gemm<f32>> LayerOps<f32> for T {}

0 commit comments

Comments
 (0)