|
| 1 | +//! Applies the nonlinear TanH function. |
| 2 | +//! |
| 3 | +//! Non-linearity activation function: y = sinh(x) / cosh(x) |
| 4 | +//! |
| 5 | +//! You might consider using ReLU as an alternative. |
| 6 | +//! |
| 7 | +//! ReLU, compared to TanH: |
| 8 | +//! |
| 9 | +//! * reduces the likelyhood of vanishing gradients |
| 10 | +//! * increases the likelyhood of a more beneficial sparse representation |
| 11 | +//! * can be computed faster |
| 12 | +//! * is therefore the most popular activation function in DNNs as of this writing (2016). |
| 13 | +use co::{IBackend, SharedTensor}; |
| 14 | +use conn; |
| 15 | +use layer::*; |
| 16 | +use util::ArcLock; |
| 17 | + |
| 18 | +#[derive(Debug, Clone)] |
| 19 | +#[allow(missing_copy_implementations)] |
| 20 | +/// TanH Activation Layer |
| 21 | +pub struct TanH; |
| 22 | + |
| 23 | +// |
| 24 | +// Tanh + TanhPointwise |
| 25 | +// Only on CUDA |
| 26 | +#[cfg(all(feature="cuda", not(feature="native")))] |
| 27 | +impl<B: IBackend + conn::Tanh<f32> + conn::TanhPointwise<f32>> ILayer<B> for TanH { |
| 28 | + impl_ilayer_activation!(); |
| 29 | + |
| 30 | + fn compute_in_place(&self) -> bool { |
| 31 | + true |
| 32 | + } |
| 33 | + |
| 34 | + fn reshape(&mut self, |
| 35 | + backend: ::std::rc::Rc<B>, |
| 36 | + input_data: &mut Vec<ArcLock<SharedTensor<f32>>>, |
| 37 | + input_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>, |
| 38 | + weights_data: &mut Vec<ArcLock<SharedTensor<f32>>>, |
| 39 | + weights_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>, |
| 40 | + output_data: &mut Vec<ArcLock<SharedTensor<f32>>>, |
| 41 | + output_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>) { |
| 42 | + if let Some(inp) = input_data.get(0) { |
| 43 | + let read_inp = inp.read().unwrap(); |
| 44 | + let input_desc = read_inp.desc(); |
| 45 | + input_gradient[0].write().unwrap().resize(input_desc).unwrap(); |
| 46 | + output_data[0].write().unwrap().resize(input_desc).unwrap(); |
| 47 | + output_gradient[0].write().unwrap().resize(input_desc).unwrap(); |
| 48 | + } |
| 49 | + } |
| 50 | +} |
| 51 | + |
| 52 | +#[cfg(all(feature="cuda", not(feature="native")))] |
| 53 | +impl<B: IBackend + conn::Tanh<f32> + conn::TanhPointwise<f32>> ComputeOutput<f32, B> for TanH { |
| 54 | + fn compute_output(&self, |
| 55 | + backend: &B, |
| 56 | + _weights: &[&SharedTensor<f32>], |
| 57 | + input_data: &[&SharedTensor<f32>], |
| 58 | + output_data: &mut [&mut SharedTensor<f32>]) { |
| 59 | + match input_data.get(0) { |
| 60 | + Some(input) => backend.tanh_plain(input, output_data[0]).unwrap(), |
| 61 | + None => backend.tanh_pointwise_plain(output_data[0]).unwrap(), |
| 62 | + } |
| 63 | + } |
| 64 | +} |
| 65 | + |
| 66 | +#[cfg(all(feature="cuda", not(feature="native")))] |
| 67 | +impl<B: IBackend + conn::Tanh<f32> + conn::TanhPointwise<f32>> ComputeInputGradient<f32, B> for TanH { |
| 68 | + fn compute_input_gradient(&self, |
| 69 | + backend: &B, |
| 70 | + weights_data: &[&SharedTensor<f32>], |
| 71 | + output_data: &[&SharedTensor<f32>], |
| 72 | + output_gradients: &[&SharedTensor<f32>], |
| 73 | + input_data: &[&SharedTensor<f32>], |
| 74 | + input_gradients: &mut [&mut SharedTensor<f32>]) { |
| 75 | + match output_data.get(0) { |
| 76 | + Some(_) => backend.tanh_grad_plain(output_data[0], output_gradients[0], input_data[0], input_gradients[0]).unwrap(), |
| 77 | + None => backend.tanh_pointwise_grad_plain(input_data[0], input_gradients[0]).unwrap(), |
| 78 | + } |
| 79 | + } |
| 80 | +} |
| 81 | + |
| 82 | +#[cfg(all(feature="cuda", not(feature="native")))] |
| 83 | +impl<B: IBackend + conn::Tanh<f32> + conn::TanhPointwise<f32>> ComputeParametersGradient<f32, B> for TanH {} |
| 84 | + |
| 85 | +// |
| 86 | +// Tanh without TanhPointwise |
| 87 | +// Only on CUDA |
| 88 | +// |
| 89 | +#[cfg(feature="native")] |
| 90 | +impl<B: IBackend + conn::Tanh<f32>> ILayer<B> for TanH { |
| 91 | + impl_ilayer_activation!(); |
| 92 | + |
| 93 | + fn reshape(&mut self, |
| 94 | + backend: ::std::rc::Rc<B>, |
| 95 | + input_data: &mut Vec<ArcLock<SharedTensor<f32>>>, |
| 96 | + input_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>, |
| 97 | + weights_data: &mut Vec<ArcLock<SharedTensor<f32>>>, |
| 98 | + weights_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>, |
| 99 | + output_data: &mut Vec<ArcLock<SharedTensor<f32>>>, |
| 100 | + output_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>) { |
| 101 | + if let Some(inp) = input_data.get(0) { |
| 102 | + let read_inp = inp.read().unwrap(); |
| 103 | + let input_desc = read_inp.desc(); |
| 104 | + input_gradient[0].write().unwrap().resize(input_desc).unwrap(); |
| 105 | + output_data[0].write().unwrap().resize(input_desc).unwrap(); |
| 106 | + output_gradient[0].write().unwrap().resize(input_desc).unwrap(); |
| 107 | + } |
| 108 | + } |
| 109 | +} |
| 110 | + |
| 111 | +#[cfg(feature="native")] |
| 112 | +impl<B: IBackend + conn::Tanh<f32>> ComputeOutput<f32, B> for TanH { |
| 113 | + fn compute_output(&self, |
| 114 | + backend: &B, |
| 115 | + _weights: &[&SharedTensor<f32>], |
| 116 | + input_data: &[&SharedTensor<f32>], |
| 117 | + output_data: &mut [&mut SharedTensor<f32>]) { |
| 118 | + match input_data.get(0) { |
| 119 | + Some(input) => backend.tanh_plain(input, output_data[0]).unwrap(), |
| 120 | + None => panic!("No input provided for TanH layer."), |
| 121 | + } |
| 122 | + } |
| 123 | +} |
| 124 | + |
| 125 | +#[cfg(feature="native")] |
| 126 | +impl<B: IBackend + conn::Tanh<f32>> ComputeInputGradient<f32, B> for TanH { |
| 127 | + fn compute_input_gradient(&self, |
| 128 | + backend: &B, |
| 129 | + weights_data: &[&SharedTensor<f32>], |
| 130 | + output_data: &[&SharedTensor<f32>], |
| 131 | + output_gradients: &[&SharedTensor<f32>], |
| 132 | + input_data: &[&SharedTensor<f32>], |
| 133 | + input_gradients: &mut [&mut SharedTensor<f32>]) { |
| 134 | + match output_data.get(0) { |
| 135 | + Some(_) => backend.tanh_grad_plain(output_data[0], output_gradients[0], input_data[0], input_gradients[0]).unwrap(), |
| 136 | + None => panic!("No output_data provided for TanH layer backward."), |
| 137 | + } |
| 138 | + } |
| 139 | +} |
| 140 | + |
| 141 | +#[cfg(feature="native")] |
| 142 | +impl<B: IBackend + conn::Tanh<f32>> ComputeParametersGradient<f32, B> for TanH {} |
0 commit comments