|
1 | 1 | //! Provides the generics and interfaces for the specific [Layers][layers].
|
2 | 2 | //! [layers]: ../layers/index.html
|
3 |
| -use co::{IBackend, ITensorDesc, SharedTensor}; |
| 3 | +use co::prelude::*; |
4 | 4 | use layers::*;
|
5 | 5 | use weight::WeightConfig;
|
6 | 6 | use util::{ArcLock, native_backend, LayerOps};
|
@@ -610,13 +610,12 @@ impl<B: IBackend + LayerOps<f32> + 'static> Layer<B> {
|
610 | 610 | /// [3]: ../solver/enum.LRPolicy.html
|
611 | 611 | pub fn update_weights<SolverB: IBackend + ::util::SolverOps<f32>>(&mut self, backend: &SolverB) {
|
612 | 612 | let mut shared_a = ::util::native_scalar(-1f32);
|
613 |
| - let _ = shared_a.add_device(backend.device()); |
614 |
| - shared_a.sync(backend.device()).unwrap(); |
| 613 | + let _ = shared_a.add_device(IBackend::device(backend)); |
| 614 | + shared_a.sync(IBackend::device(backend)).unwrap(); |
615 | 615 | for (weight_gradient, weight_data) in self.learnable_weights_gradients().iter().zip(&mut self.learnable_weights_data()) {
|
616 |
| - weight_gradient.write().unwrap().sync(backend.device()).unwrap(); |
617 |
| - weight_data.write().unwrap().sync(backend.device()).unwrap(); |
| 616 | + weight_gradient.write().unwrap().sync(IBackend::device(backend)).unwrap(); |
| 617 | + weight_data.write().unwrap().sync(IBackend::device(backend)).unwrap(); |
618 | 618 | backend.axpy_plain(&shared_a, &weight_gradient.read().unwrap(), &mut weight_data.write().unwrap()).unwrap();
|
619 |
| - // weight_blob.write().unwrap().apply_diff(backend) // TODO: solver |
620 | 619 | }
|
621 | 620 | }
|
622 | 621 |
|
@@ -690,6 +689,17 @@ impl<B: IBackend + LayerOps<f32> + 'static> Layer<B> {
|
690 | 689 | if let Some(gradients) = self.worker.learnable_weights_gradients() { gradients }
|
691 | 690 | else { self.weights_gradient.clone() }
|
692 | 691 | }
|
| 692 | + |
| 693 | + /// Returns the learning rate for all the learnable weights in the layer. |
| 694 | + /// |
| 695 | + /// If the layer is a container layer it will return all learning rates of the |
| 696 | + /// layers inside it. |
| 697 | + pub fn learnable_weights_lr(&self) -> Vec<Option<f32>> { |
| 698 | + if let Some(lr) = self.worker.learnable_weights_lr() { lr } |
| 699 | + // else { self.weights_lr.clone() } |
| 700 | + else { |
| 701 | + self.learnable_weights_data().iter().map(|_| Some(1f32)).collect::<Vec<_>>() } |
| 702 | + } |
693 | 703 | }
|
694 | 704 |
|
695 | 705 | /// A Layer in a [Neural Network][1] that can handle forward and backward of a computation step.
|
@@ -1024,6 +1034,14 @@ pub trait ILayer<B: IBackend> : ComputeOutput<f32, B> + ComputeInputGradient<f32
|
1024 | 1034 | fn learnable_weights_gradients(&self) -> Option<Vec<ArcLock<SharedTensor<f32>>>> {
|
1025 | 1035 | None
|
1026 | 1036 | }
|
| 1037 | + |
| 1038 | + /// Return the learning rates for the learnable weights inside the layer. |
| 1039 | + /// |
| 1040 | + /// This should only be overridden by container layers, |
| 1041 | + /// where the weights are not easily exposable. |
| 1042 | + fn learnable_weights_lr(&self) -> Option<Vec<Option<f32>>> { |
| 1043 | + None |
| 1044 | + } |
1027 | 1045 | }
|
1028 | 1046 |
|
1029 | 1047 | /// A Layer that can compute the output for a given input.
|
@@ -1061,7 +1079,7 @@ pub trait ComputeParametersGradient<T, B: IBackend> {
|
1061 | 1079 |
|
1062 | 1080 | impl<B: IBackend> fmt::Debug for ILayer<B> {
|
1063 | 1081 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
1064 |
| - write!(f, "({}, {})", "foo", "bar") |
| 1082 | + write!(f, "({})", "ILayer") |
1065 | 1083 | }
|
1066 | 1084 | }
|
1067 | 1085 |
|
|
0 commit comments