@@ -210,8 +210,8 @@ impl<B: IBackend> Layer<B> {
210
210
}
211
211
212
212
let backend: Rc < IBackend < F =B :: F > > = self . backend . clone ( ) ;
213
- blob_data = Arc :: new ( RwLock :: new ( SharedTensor :: new ( backend . device ( ) , & vec ! [ 1 , 1 , 1 ] ) . unwrap ( ) ) ) ; // [1,1,1] for CUDA
214
- blob_gradient = Arc :: new ( RwLock :: new ( SharedTensor :: new ( backend . device ( ) , & vec ! [ 1 , 1 , 1 ] ) . unwrap ( ) ) ) ; // [1,1,1] for CUDA
213
+ blob_data = Arc :: new ( RwLock :: new ( SharedTensor :: new ( & [ 1 , 1 , 1 ] ) ) ) ; // [1,1,1] for CUDA
214
+ blob_gradient = Arc :: new ( RwLock :: new ( SharedTensor :: new ( & [ 1 , 1 , 1 ] ) ) ) ; // [1,1,1] for CUDA
215
215
}
216
216
self . output_blob_names . push ( blob_name. clone ( ) ) ;
217
217
self . output_blobs_data . push ( blob_data. clone ( ) ) ;
@@ -234,8 +234,8 @@ impl<B: IBackend> Layer<B> {
234
234
info ! ( "{} -> {}" , self . name, blob_name) ;
235
235
236
236
let backend: Rc < IBackend < F =B :: F > > = self . backend . clone ( ) ;
237
- let output_data = Arc :: new ( RwLock :: new ( SharedTensor :: new ( backend . device ( ) , & vec ! [ 1 , 1 , 1 ] ) . unwrap ( ) ) ) ; // [1,1,1] for CUDA
238
- let output_gradient = Arc :: new ( RwLock :: new ( SharedTensor :: new ( backend . device ( ) , & vec ! [ 1 , 1 , 1 ] ) . unwrap ( ) ) ) ; // [1,1,1] for CUDA
237
+ let output_data = Arc :: new ( RwLock :: new ( SharedTensor :: new ( & [ 1 , 1 , 1 ] ) ) ) ; // [1,1,1] for CUDA
238
+ let output_gradient = Arc :: new ( RwLock :: new ( SharedTensor :: new ( & [ 1 , 1 , 1 ] ) ) ) ; // [1,1,1] for CUDA
239
239
self . output_blobs_data . push ( output_data) ;
240
240
self . output_blobs_gradient . push ( output_gradient) ;
241
241
}
@@ -264,8 +264,8 @@ impl<B: IBackend> Layer<B> {
264
264
let net_weight_id = weights_len;
265
265
let output_data = self . output_blobs_data [ weight_id] . read ( ) . unwrap ( ) ;
266
266
debug ! ( "Layer {} - creating weight and gradient of size {:?}" , & layer_config. name, output_data. desc( ) ) ;
267
- let weight_data = Arc :: new ( RwLock :: new ( SharedTensor :: < f32 > :: new ( output_data. latest_device ( ) , output_data . desc ( ) ) . unwrap ( ) ) ) ;
268
- let weight_gradient = Arc :: new ( RwLock :: new ( SharedTensor :: < f32 > :: new ( output_data. latest_device ( ) , output_data . desc ( ) ) . unwrap ( ) ) ) ;
267
+ let weight_data = Arc :: new ( RwLock :: new ( SharedTensor :: new ( output_data. desc ( ) ) ) ) ;
268
+ let weight_gradient = Arc :: new ( RwLock :: new ( SharedTensor :: new ( output_data. desc ( ) ) ) ) ;
269
269
self . weights_data . push ( weight_data. clone ( ) ) ;
270
270
self . weights_gradient . push ( weight_gradient. clone ( ) ) ;
271
271
@@ -460,11 +460,6 @@ impl<B: IBackend> Layer<B> {
460
460
self . input_blobs_data [ input_i] . write ( ) . unwrap ( ) . reshape ( & reshaped_shape) . unwrap ( ) ;
461
461
}
462
462
463
- self . worker . sync ( & self . backend ,
464
- & mut self . input_blobs_data , & mut self . input_blobs_gradient ,
465
- & mut self . weights_data , & mut self . weights_gradient ,
466
- & mut self . output_blobs_data , & mut self . output_blobs_gradient ) ;
467
-
468
463
let forward_time = timeit_loops ! ( 1 , {
469
464
if self . is_using_in_place( ) {
470
465
self . worker. forward( & self . backend, & vec![ ] , & self . weights_data, & mut self . output_blobs_data) ;
@@ -497,11 +492,6 @@ impl<B: IBackend> Layer<B> {
497
492
self . output_blobs_gradient [ output_i] = output. clone ( ) ;
498
493
}
499
494
500
- self . worker . sync ( & self . backend ,
501
- & mut self . input_blobs_data , & mut self . input_blobs_gradient ,
502
- & mut self . weights_data , & mut self . weights_gradient ,
503
- & mut self . output_blobs_data , & mut self . output_blobs_gradient ) ;
504
-
505
495
if self . is_using_in_place ( ) {
506
496
self . worker . backward_input ( & self . backend ,
507
497
& self . weights_data ,
@@ -527,11 +517,6 @@ impl<B: IBackend> Layer<B> {
527
517
///
528
518
/// This method is mostly used when doing backpropagation.
529
519
pub fn backward_parameters ( & mut self ) {
530
- self . worker . sync ( & self . backend ,
531
- & mut self . input_blobs_data , & mut self . input_blobs_gradient ,
532
- & mut self . weights_data , & mut self . weights_gradient ,
533
- & mut self . output_blobs_data , & mut self . output_blobs_gradient ) ;
534
-
535
520
self . worker . backward_parameters ( & self . backend ,
536
521
& self . output_blobs_data ,
537
522
& self . output_blobs_gradient ,
@@ -553,13 +538,11 @@ impl<B: IBackend> Layer<B> {
553
538
///
554
539
/// [3]: ../solver/enum.LRPolicy.html
555
540
pub fn update_weights < SolverB : IBackend + :: util:: SolverOps < f32 > > ( & mut self , backend : & SolverB ) {
556
- let mut shared_a = :: util:: native_scalar ( -1f32 ) ;
557
- let _ = shared_a. add_device ( IBackend :: device ( backend) ) ;
558
- shared_a. sync ( IBackend :: device ( backend) ) . unwrap ( ) ;
541
+ // PERF: allocate this scalar once
542
+ let shared_a = :: util:: native_scalar ( -1f32 ) ;
559
543
for ( weight_gradient, weight_data) in self . learnable_weights_gradients ( ) . iter ( ) . zip ( & mut self . learnable_weights_data ( ) ) {
560
- weight_gradient. write ( ) . unwrap ( ) . sync ( IBackend :: device ( backend) ) . unwrap ( ) ;
561
- weight_data. write ( ) . unwrap ( ) . sync ( IBackend :: device ( backend) ) . unwrap ( ) ;
562
- backend. axpy_plain ( & shared_a, & weight_gradient. read ( ) . unwrap ( ) , & mut weight_data. write ( ) . unwrap ( ) ) . unwrap ( ) ;
544
+ backend. axpy ( & shared_a, & weight_gradient. read ( ) . unwrap ( ) ,
545
+ & mut weight_data. write ( ) . unwrap ( ) ) . unwrap ( ) ;
563
546
}
564
547
}
565
548
@@ -695,7 +678,6 @@ impl<B: IBackend> Layer<B> {
695
678
}
696
679
697
680
let mut weight_lock = weight. write ( ) . unwrap ( ) ;
698
- weight_lock. sync ( native_backend. device ( ) ) . unwrap ( ) ;
699
681
700
682
let capnp_tensor = capnp_weight. get_tensor ( ) . unwrap ( ) ;
701
683
let mut shape = Vec :: new ( ) ;
@@ -705,7 +687,7 @@ impl<B: IBackend> Layer<B> {
705
687
}
706
688
weight_lock. reshape ( & shape) . unwrap ( ) ;
707
689
708
- let mut native_slice = weight_lock. get_mut ( native_backend. device ( ) ) . unwrap ( ) . as_mut_native ( ) . unwrap ( ) . as_mut_slice :: < f32 > ( ) ;
690
+ let mut native_slice = weight_lock. write_only ( native_backend. device ( ) ) . unwrap ( ) . as_mut_native ( ) . unwrap ( ) . as_mut_slice :: < f32 > ( ) ;
709
691
let data = capnp_tensor. get_data ( ) . unwrap ( ) ;
710
692
for k in 0 ..data. len ( ) {
711
693
native_slice[ k as usize ] = data. get ( k) ;
@@ -814,8 +796,7 @@ impl<'a, B: IBackend> CapnpWrite<'a> for Layer<B> {
814
796
let mut capnp_weight = weights. borrow ( ) . get ( i as u32 ) ;
815
797
capnp_weight. set_name ( name) ;
816
798
817
- let mut weight_lock = weight. write ( ) . unwrap ( ) ;
818
- weight_lock. sync ( native_backend. device ( ) ) . unwrap ( ) ;
799
+ let weight_lock = weight. write ( ) . unwrap ( ) ;
819
800
820
801
let mut tensor = capnp_weight. init_tensor ( ) ;
821
802
{
@@ -825,7 +806,8 @@ impl<'a, B: IBackend> CapnpWrite<'a> for Layer<B> {
825
806
}
826
807
}
827
808
{
828
- let native_slice = weight_lock. get ( native_backend. device ( ) ) . unwrap ( ) . as_native ( ) . unwrap ( ) . as_slice :: < f32 > ( ) ;
809
+ let native_slice = weight_lock. read ( native_backend. device ( ) )
810
+ . unwrap ( ) . as_native ( ) . unwrap ( ) . as_slice :: < f32 > ( ) ;
829
811
let mut tensor_data = tensor. borrow ( ) . init_data ( native_slice. len ( ) as u32 ) ;
830
812
for ( i, datum) in native_slice. iter ( ) . enumerate ( ) {
831
813
tensor_data. set ( i as u32 , * datum) ;
@@ -1025,74 +1007,6 @@ pub trait ILayer<B: IBackend> : ComputeOutput<f32, B> + ComputeInputGradient<f32
1025
1007
self . compute_parameters_gradient ( backend, & output_data_, & output_gradients_, & input_data_, & mut weights_gradients_) ;
1026
1008
}
1027
1009
1028
- /// Synchronize the blobs before doing a forward or backward operation.
1029
- ///
1030
- /// This is necessary because the forward_layer and backward_layer methods only immutably
1031
- /// borrow the corresponding input blobs and weights which they are not supposed to change.
1032
- /// However synchronizing all blobs to the same device may be neccessary for some computations,
1033
- /// which can only be done with a mutable borrow.
1034
- fn sync ( & self ,
1035
- backend : & B ,
1036
- input_data : & mut [ ArcLock < SharedTensor < f32 > > ] ,
1037
- input_gradients : & mut [ ArcLock < SharedTensor < f32 > > ] ,
1038
- weights_data : & mut [ ArcLock < SharedTensor < f32 > > ] ,
1039
- weights_gradients : & mut [ ArcLock < SharedTensor < f32 > > ] ,
1040
- output_data : & mut Vec < ArcLock < SharedTensor < f32 > > > ,
1041
- output_gradients : & mut Vec < ArcLock < SharedTensor < f32 > > > ) {
1042
- if self . sync_native ( ) {
1043
- let backend = native_backend ( ) ;
1044
- for tensor in input_data {
1045
- let mut sync = tensor. write ( ) . unwrap ( ) ;
1046
- match sync. add_device ( backend. device ( ) ) { _ => sync. sync ( backend. device ( ) ) . unwrap ( ) }
1047
- }
1048
- for tensor in input_gradients {
1049
- let mut sync = tensor. write ( ) . unwrap ( ) ;
1050
- match sync. add_device ( backend. device ( ) ) { _ => sync. sync ( backend. device ( ) ) . unwrap ( ) }
1051
- }
1052
- for tensor in weights_data {
1053
- let mut sync = tensor. write ( ) . unwrap ( ) ;
1054
- match sync. add_device ( backend. device ( ) ) { _ => sync. sync ( backend. device ( ) ) . unwrap ( ) }
1055
- }
1056
- for tensor in weights_gradients {
1057
- let mut sync = tensor. write ( ) . unwrap ( ) ;
1058
- match sync. add_device ( backend. device ( ) ) { _ => sync. sync ( backend. device ( ) ) . unwrap ( ) }
1059
- }
1060
- for tensor in output_data {
1061
- let mut sync = tensor. write ( ) . unwrap ( ) ;
1062
- match sync. add_device ( backend. device ( ) ) { _ => sync. sync ( backend. device ( ) ) . unwrap ( ) }
1063
- }
1064
- for tensor in output_gradients {
1065
- let mut sync = tensor. write ( ) . unwrap ( ) ;
1066
- match sync. add_device ( backend. device ( ) ) { _ => sync. sync ( backend. device ( ) ) . unwrap ( ) }
1067
- }
1068
- } else {
1069
- for tensor in input_data {
1070
- let mut sync = tensor. write ( ) . unwrap ( ) ;
1071
- match sync. add_device ( backend. device ( ) ) { _ => sync. sync ( backend. device ( ) ) . unwrap ( ) }
1072
- }
1073
- for tensor in input_gradients {
1074
- let mut sync = tensor. write ( ) . unwrap ( ) ;
1075
- match sync. add_device ( backend. device ( ) ) { _ => sync. sync ( backend. device ( ) ) . unwrap ( ) }
1076
- }
1077
- for tensor in weights_data {
1078
- let mut sync = tensor. write ( ) . unwrap ( ) ;
1079
- match sync. add_device ( backend. device ( ) ) { _ => sync. sync ( backend. device ( ) ) . unwrap ( ) }
1080
- }
1081
- for tensor in weights_gradients {
1082
- let mut sync = tensor. write ( ) . unwrap ( ) ;
1083
- match sync. add_device ( backend. device ( ) ) { _ => sync. sync ( backend. device ( ) ) . unwrap ( ) }
1084
- }
1085
- for tensor in output_data {
1086
- let mut sync = tensor. write ( ) . unwrap ( ) ;
1087
- match sync. add_device ( backend. device ( ) ) { _ => sync. sync ( backend. device ( ) ) . unwrap ( ) }
1088
- }
1089
- for tensor in output_gradients {
1090
- let mut sync = tensor. write ( ) . unwrap ( ) ;
1091
- match sync. add_device ( backend. device ( ) ) { _ => sync. sync ( backend. device ( ) ) . unwrap ( ) }
1092
- }
1093
- }
1094
- }
1095
-
1096
1010
/// Return whether "anonymous" output blobs are created automatically for the layer.
1097
1011
///
1098
1012
/// If this method returns true, Network::init will create enough "anonymous" output
0 commit comments