feat/convolution: remove convolution axis

hobofan · hobofan · commit a8345ee1555c · 2016-03-03T15:46:42.000+01:00
diff --git a/benches/network_benches.rs b/benches/network_benches.rs
@@ -136,8 +136,7 @@ mod cuda {
             num_output: 64,
             filter_shape: vec![11],
             padding: vec![2],
-            stride: vec![4],
-            axis: None
+            stride: vec![4]
         };
         let mut conv1_cfg = LayerConfig::new("conv1", LayerType::Convolution(conv1_layer_cfg));
         conv1_cfg.add_input("data");
@@ -164,8 +163,7 @@ mod cuda {
             num_output: 192,
             filter_shape: vec![5],
             padding: vec![2],
-            stride: vec![1],
-            axis: None
+            stride: vec![1]
         };
         let mut conv2_cfg = LayerConfig::new("conv2", LayerType::Convolution(conv2_layer_cfg));
         conv2_cfg.add_input("pool1_out");
@@ -192,8 +190,7 @@ mod cuda {
             num_output: 384,
             filter_shape: vec![3],
             padding: vec![1],
-            stride: vec![1],
-            axis: None
+            stride: vec![1]
         };
         let mut conv3_cfg = LayerConfig::new("conv3", LayerType::Convolution(conv3_layer_cfg));
         conv3_cfg.add_input("pool2_out");
@@ -209,8 +206,7 @@ mod cuda {
             num_output: 256,
             filter_shape: vec![3],
             padding: vec![1],
-            stride: vec![1],
-            axis: None
+            stride: vec![1]
         };
         let mut conv4_cfg = LayerConfig::new("conv4", LayerType::Convolution(conv4_layer_cfg));
         conv4_cfg.add_input("conv3_out");
@@ -226,8 +222,7 @@ mod cuda {
             num_output: 256,
             filter_shape: vec![3],
             padding: vec![1],
-            stride: vec![1],
-            axis: None
+            stride: vec![1]
         };
         let mut conv5_cfg = LayerConfig::new("conv5", LayerType::Convolution(conv5_layer_cfg));
         conv5_cfg.add_input("conv4_out");
@@ -298,8 +293,7 @@ mod cuda {
             num_output: 32,
             filter_shape: vec![11],
             padding: vec![2],
-            stride: vec![4],
-            axis: None
+            stride: vec![4]
         };
         let mut conv1_cfg = LayerConfig::new("conv1", LayerType::Convolution(conv1_layer_cfg));
         conv1_cfg.add_input("data");
@@ -326,8 +320,7 @@ mod cuda {
             num_output: 96,
             filter_shape: vec![5],
             padding: vec![2],
-            stride: vec![1],
-            axis: None
+            stride: vec![1]
         };
         let mut conv2_cfg = LayerConfig::new("conv2", LayerType::Convolution(conv2_layer_cfg));
         conv2_cfg.add_input("pool1_out");
@@ -354,8 +347,7 @@ mod cuda {
             num_output: 142,
             filter_shape: vec![3],
             padding: vec![1],
-            stride: vec![1],
-            axis: None
+            stride: vec![1]
         };
         let mut conv3_cfg = LayerConfig::new("conv3", LayerType::Convolution(conv3_layer_cfg));
         conv3_cfg.add_input("pool2_out");
@@ -371,8 +363,7 @@ mod cuda {
             num_output: 128,
             filter_shape: vec![3],
             padding: vec![1],
-            stride: vec![1],
-            axis: None
+            stride: vec![1]
         };
         let mut conv4_cfg = LayerConfig::new("conv4", LayerType::Convolution(conv4_layer_cfg));
         conv4_cfg.add_input("conv3_out");
@@ -388,8 +379,7 @@ mod cuda {
             num_output: 128,
             filter_shape: vec![3],
             padding: vec![1],
-            stride: vec![1],
-            axis: None
+            stride: vec![1]
         };
         let mut conv5_cfg = LayerConfig::new("conv5", LayerType::Convolution(conv5_layer_cfg));
         conv5_cfg.add_input("conv4_out");
diff --git a/examples/benchmarks.rs b/examples/benchmarks.rs
@@ -119,27 +119,27 @@ fn bench_alexnet() {
     let mut cfg = SequentialConfig::default();
     cfg.add_input("data", &vec![128, 3, 224, 224]);
 
-    let conv1_layer_cfg = ConvolutionConfig { num_output: 64, filter_shape: vec![11], padding: vec![2], stride: vec![4], axis: None };
+    let conv1_layer_cfg = ConvolutionConfig { num_output: 64, filter_shape: vec![11], padding: vec![2], stride: vec![4] };
     cfg.add_layer(LayerConfig::new("conv1", conv1_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv1/relu", LayerType::ReLU));
     let pool1_layer_cfg = PoolingConfig { mode: PoolingMode::Max, filter_shape: vec![3], stride: vec![2], padding: vec![0] };
     cfg.add_layer(LayerConfig::new("pool1", pool1_layer_cfg));
 
-    let conv2_layer_cfg = ConvolutionConfig { num_output: 192, filter_shape: vec![5], padding: vec![2], stride: vec![1], axis: None };
+    let conv2_layer_cfg = ConvolutionConfig { num_output: 192, filter_shape: vec![5], padding: vec![2], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv2", conv2_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv2/relu", LayerType::ReLU));
     let pool2_layer_cfg = PoolingConfig { mode: PoolingMode::Max, filter_shape: vec![3], stride: vec![2], padding: vec![0] };
     cfg.add_layer(LayerConfig::new("pool2", pool2_layer_cfg));
 
-    let conv3_layer_cfg = ConvolutionConfig { num_output: 384, filter_shape: vec![3], padding: vec![1], stride: vec![1], axis: None };
+    let conv3_layer_cfg = ConvolutionConfig { num_output: 384, filter_shape: vec![3], padding: vec![1], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv3", conv3_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv3/relu", LayerType::ReLU));
 
-    let conv4_layer_cfg = ConvolutionConfig { num_output: 256, filter_shape: vec![3], padding: vec![1], stride: vec![1], axis: None };
+    let conv4_layer_cfg = ConvolutionConfig { num_output: 256, filter_shape: vec![3], padding: vec![1], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv4", conv4_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv4/relu", LayerType::ReLU));
 
-    let conv5_layer_cfg = ConvolutionConfig { num_output: 256, filter_shape: vec![3], padding: vec![1], stride: vec![1], axis: None };
+    let conv5_layer_cfg = ConvolutionConfig { num_output: 256, filter_shape: vec![3], padding: vec![1], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv5", conv5_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv5/relu", LayerType::ReLU));
     let pool3_layer_cfg = PoolingConfig { mode: PoolingMode::Max, filter_shape: vec![3], stride: vec![2], padding: vec![0] };
@@ -201,27 +201,27 @@ fn bench_overfeat() {
     let mut cfg = SequentialConfig::default();
     cfg.add_input("data", &vec![128, 3, 231, 231]);
 
-    let conv1_layer_cfg = ConvolutionConfig { num_output: 96, filter_shape: vec![11], padding: vec![0], stride: vec![4], axis: None };
+    let conv1_layer_cfg = ConvolutionConfig { num_output: 96, filter_shape: vec![11], padding: vec![0], stride: vec![4] };
     cfg.add_layer(LayerConfig::new("conv1", conv1_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv1/relu", LayerType::ReLU));
     let pool1_layer_cfg = PoolingConfig { mode: PoolingMode::Max, filter_shape: vec![2], stride: vec![2], padding: vec![0] };
     cfg.add_layer(LayerConfig::new("pool1", pool1_layer_cfg));
 
-    let conv2_layer_cfg = ConvolutionConfig { num_output: 256, filter_shape: vec![5], padding: vec![0], stride: vec![1], axis: None };
+    let conv2_layer_cfg = ConvolutionConfig { num_output: 256, filter_shape: vec![5], padding: vec![0], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv2", conv2_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv2/relu", LayerType::ReLU));
     let pool2_layer_cfg = PoolingConfig { mode: PoolingMode::Max, filter_shape: vec![2], stride: vec![2], padding: vec![0] };
     cfg.add_layer(LayerConfig::new("pool2", pool2_layer_cfg));
 
-    let conv3_layer_cfg = ConvolutionConfig { num_output: 512, filter_shape: vec![3], padding: vec![1], stride: vec![1], axis: None };
+    let conv3_layer_cfg = ConvolutionConfig { num_output: 512, filter_shape: vec![3], padding: vec![1], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv3", conv3_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv3/relu", LayerType::ReLU));
 
-    let conv4_layer_cfg = ConvolutionConfig { num_output: 1024, filter_shape: vec![3], padding: vec![1], stride: vec![1], axis: None };
+    let conv4_layer_cfg = ConvolutionConfig { num_output: 1024, filter_shape: vec![3], padding: vec![1], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv4", conv4_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv4/relu", LayerType::ReLU));
 
-    let conv5_layer_cfg = ConvolutionConfig { num_output: 1024, filter_shape: vec![3], padding: vec![1], stride: vec![1], axis: None };
+    let conv5_layer_cfg = ConvolutionConfig { num_output: 1024, filter_shape: vec![3], padding: vec![1], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv5", conv5_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv5/relu", LayerType::ReLU));
     let pool5_layer_cfg = PoolingConfig { mode: PoolingMode::Max, filter_shape: vec![2], stride: vec![2], padding: vec![0] };
@@ -283,43 +283,43 @@ fn bench_vgg_a() {
     let mut cfg = SequentialConfig::default();
     cfg.add_input("data", &vec![64, 3, 224, 224]);
 
-    let conv1_layer_cfg = ConvolutionConfig { num_output: 64, filter_shape: vec![3], padding: vec![1], stride: vec![1], axis: None };
+    let conv1_layer_cfg = ConvolutionConfig { num_output: 64, filter_shape: vec![3], padding: vec![1], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv1", conv1_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv1/relu", LayerType::ReLU));
     let pool1_layer_cfg = PoolingConfig { mode: PoolingMode::Max, filter_shape: vec![2], stride: vec![2], padding: vec![0] };
     cfg.add_layer(LayerConfig::new("pool1", pool1_layer_cfg));
 
-    let conv2_layer_cfg = ConvolutionConfig { num_output: 128, filter_shape: vec![3], padding: vec![1], stride: vec![1], axis: None };
+    let conv2_layer_cfg = ConvolutionConfig { num_output: 128, filter_shape: vec![3], padding: vec![1], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv2", conv2_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv2/relu", LayerType::ReLU));
     let pool2_layer_cfg = PoolingConfig { mode: PoolingMode::Max, filter_shape: vec![2], stride: vec![2], padding: vec![0] };
     cfg.add_layer(LayerConfig::new("pool2", pool2_layer_cfg));
 
-    let conv3_layer_cfg = ConvolutionConfig { num_output: 256, filter_shape: vec![3], padding: vec![1], stride: vec![1], axis: None };
+    let conv3_layer_cfg = ConvolutionConfig { num_output: 256, filter_shape: vec![3], padding: vec![1], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv3", conv3_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv3/relu", LayerType::ReLU));
 
-    let conv4_layer_cfg = ConvolutionConfig { num_output: 256, filter_shape: vec![3], padding: vec![1], stride: vec![1], axis: None };
+    let conv4_layer_cfg = ConvolutionConfig { num_output: 256, filter_shape: vec![3], padding: vec![1], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv4", conv4_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv4/relu", LayerType::ReLU));
     let pool3_layer_cfg = PoolingConfig { mode: PoolingMode::Max, filter_shape: vec![2], stride: vec![2], padding: vec![0] };
     cfg.add_layer(LayerConfig::new("pool3", pool3_layer_cfg));
 
-    let conv5_layer_cfg = ConvolutionConfig { num_output: 512, filter_shape: vec![3], padding: vec![1], stride: vec![1], axis: None };
+    let conv5_layer_cfg = ConvolutionConfig { num_output: 512, filter_shape: vec![3], padding: vec![1], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv5", conv5_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv5/relu", LayerType::ReLU));
 
-    let conv6_layer_cfg = ConvolutionConfig { num_output: 512, filter_shape: vec![3], padding: vec![1], stride: vec![1], axis: None };
+    let conv6_layer_cfg = ConvolutionConfig { num_output: 512, filter_shape: vec![3], padding: vec![1], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv6", conv6_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv6/relu", LayerType::ReLU));
     let pool4_layer_cfg = PoolingConfig { mode: PoolingMode::Max, filter_shape: vec![2], stride: vec![2], padding: vec![0] };
     cfg.add_layer(LayerConfig::new("pool4", pool4_layer_cfg));
 
-    let conv7_layer_cfg = ConvolutionConfig { num_output: 512, filter_shape: vec![3], padding: vec![1], stride: vec![1], axis: None };
+    let conv7_layer_cfg = ConvolutionConfig { num_output: 512, filter_shape: vec![3], padding: vec![1], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv7", conv7_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv7/relu", LayerType::ReLU));
 
-    let conv8_layer_cfg = ConvolutionConfig { num_output: 512, filter_shape: vec![3], padding: vec![1], stride: vec![1], axis: None };
+    let conv8_layer_cfg = ConvolutionConfig { num_output: 512, filter_shape: vec![3], padding: vec![1], stride: vec![1] };
     cfg.add_layer(LayerConfig::new("conv8", conv8_layer_cfg));
     cfg.add_layer(LayerConfig::new("conv8/relu", LayerType::ReLU));
     let pool5_layer_cfg = PoolingConfig { mode: PoolingMode::Max, filter_shape: vec![2], stride: vec![2], padding: vec![0] };
diff --git a/src/layers/common/convolution.rs b/src/layers/common/convolution.rs
@@ -1,7 +1,16 @@
 //! Convolves the input tensor.
 //!
-//! Does this convolution with a set of learnable filters, each producing one
-//! feature map in the output tensor.
+//! Computes this convolution with a set of learnable filters,
+//! each producing one feature map in the output tensor.
+//!
+//! [This site][cs231n_convnets] provides a good overview of the functionality
+//! of convolutional layers.
+//!
+//! ## Input Data
+//!
+//! The layer expects the input to be in 4D NCHW format (2 spatial dimensions).
+//!
+//! [cs231n_convnets]: https://cs231n.github.io/convolutional-networks
 use std::rc::Rc;
 use std::sync::{Arc, RwLock};
 use co::prelude::*;
@@ -15,7 +24,6 @@ use super::FilterLayer;
 #[derive(Debug, Clone)]
 /// Convolution Layer
 pub struct Convolution<B: conn::Convolution<f32>> {
-    axis: usize,
     num_output: usize,
     filter_shape: Vec<usize>,
     stride: Vec<usize>,
@@ -35,8 +43,6 @@ impl<B: conn::Convolution<f32>> Convolution<B> {
             stride: config.stride.clone(),
             padding: config.padding.clone(),
 
-            axis: config.axis(),
-
             workspace: None,
             convolution_config: None,
         }
@@ -46,7 +52,7 @@ impl<B: conn::Convolution<f32>> Convolution<B> {
         let num_spatial_dims = self.num_spatial_dims(input_shape);
         let spatial_dims = self.spatial_filter_dims(num_spatial_dims);
         let filter_n = self.num_output; // number of output feature maps
-        let filter_c = input_shape[self.axis]; // number of input feature maps
+        let filter_c = input_shape[1]; // number of input feature maps
         let filter_h = spatial_dims[0];
         let filter_w = spatial_dims[1];
 
@@ -61,7 +67,7 @@ impl<B: conn::Convolution<f32>> Convolution<B> {
 }
 
 impl<B: conn::Convolution<f32>> FilterLayer for Convolution<B> {
-    /// Calculates the number of spatial dimensions for the pooling operation.
+    /// Calculates the number of spatial dimensions for the convolution operation.
     fn num_spatial_dims(&self, input_shape: &[usize]) -> usize {
         match input_shape.len() {
             4 => 2,
@@ -75,11 +81,11 @@ impl<B: conn::Convolution<f32>> FilterLayer for Convolution<B> {
         let padding = self.padding_dims(num_spatial_dims);
         let stride = self.stride_dims(num_spatial_dims);
         let mut output_shape = Vec::new();
-        for dim in &input_shape[0..self.axis].to_vec() {
+        for dim in &input_shape[0..1].to_vec() {
             output_shape.push(*dim);
         }
         output_shape.push(self.num_output);
-        for spatial_dim in Self::calculate_spatial_output_dims(&input_shape[(self.axis + 1)..], &filter, &padding, &stride) {
+        for spatial_dim in Self::calculate_spatial_output_dims(&input_shape[2..], &filter, &padding, &stride) {
             output_shape.push(spatial_dim);
         }
 
@@ -213,33 +219,14 @@ impl<B: IBackend + conn::Convolution<f32>> ComputeParametersGradient<f32, B> for
 #[derive(Debug, Clone)]
 /// Specifies configuration parameters for a Convolution Layer.
 pub struct ConvolutionConfig {
-    /// The number of output values
+    /// The number of output feature maps
     pub num_output: usize,
     /// The size of the kernel
     pub filter_shape: Vec<usize>,
     /// The stride size
     pub stride: Vec<usize>,
     /// The padding size
     pub padding: Vec<usize>,
-    /// The axis to interpret as "channels" when performing convolution.
-    ///
-    /// Preceding dimensions are treated as independent inputs, and
-    /// succeeding dimensions are treated as "spatial".
-    ///
-    /// Defaults to `1`
-    pub axis: Option<usize>,
-}
-
-impl ConvolutionConfig {
-    /// The axis to interpret as "channels" when performing convolution.
-    ///
-    /// Preceding dimensions are treated as independent inputs, and
-    /// succeeding dimensions are treated as "spatial".
-    ///
-    /// Defaults to `1`
-    pub fn axis(&self) -> usize {
-        self.axis.unwrap_or(1)
-    }
 }
 
 impl Into<LayerType> for ConvolutionConfig {
@@ -263,8 +250,6 @@ mod tests {
             filter_shape: vec![11],
             padding: vec![2],
             stride: vec![4],
-
-            axis: None,
         };
         let layer = Convolution::<Backend<Cuda>>::from_config(&cfg);
         let num_spatial_dims = layer.num_spatial_dims(&vec![1, 3, 224, 224]);
diff --git a/src/layers/common/linear.rs b/src/layers/common/linear.rs
@@ -7,7 +7,7 @@
 //! - `x`: input value
 //! - `b`: bias (not implemented yet)
 //!
-//! ## Input
+//! ## Input Data
 //!
 //! The input can either have one or two dimensions:
 //!