autumnai
diff --git a/‎Cargo.toml
+4-4 b/‎Cargo.toml
+4-4
diff --git a/‎FEATURE-FLAGS.md
+84 b/‎FEATURE-FLAGS.md
+84
diff --git a/‎README.md
+3-1 b/‎README.md
+3-1
diff --git a/‎examples/benchmarks.rs
+9-9 b/‎examples/benchmarks.rs
+9-9
diff --git a/‎src/layer.rs
+12 b/‎src/layer.rs
+12
diff --git a/‎src/layers/activation/relu.rs
+70-1 b/‎src/layers/activation/relu.rs
+70-1
@@ -14,8 +14,8 @@ keywords = ["deep-learning", "neural-networks", "machine-learning", "framework"]
 license = "MIT OR Apache-2.0"
 
 [dependencies]
-collenchyma = { version = "0.0.8", default-features = false }
-collenchyma-blas = { version = "0.2.0", default-features = false }
+collenchyma = { version = "0.0.8", default-features = false, features = ["native"] } # native feature to read/write data into tensors
+collenchyma-blas = { version = "0.2.0", default-features = false, features = ["native"] } # only compiles with native feature
 collenchyma-nn = { version = "0.3.2", default-features = false }
 
 log = "0.3.2"
@@ -30,8 +30,8 @@ timeit = "0.1.2"
 env_logger = "0.3"
 
 [features]
-default = ["native", "cuda", "opencl"]
-native = ["collenchyma/native", "collenchyma-blas/native", "collenchyma-nn/native"]
+default = ["native"]
+native = ["collenchyma-blas/native", "collenchyma-nn/native"]
 cuda = ["collenchyma/cuda", "collenchyma-blas/cuda", "collenchyma-nn/cuda"]
 opencl = ["collenchyma/opencl", "collenchyma-blas/opencl", "collenchyma-nn/opencl"]
 
 
@@ -0,0 +1,84 @@
+# Feature flags in Leaf
+
+## The problem(s)
+
+Supporting different backends is an important concept in Leaf.
+
+Optimally we would like to always have to choice of running Leaf on all backends.
+However in reality there are some tradeoffs that have to be made.
+
+One problem is that certain backends require the presence of special hardware to
+run (CUDA needs NVIDIA GPUs), or the libraries to address them are not present on
+the developers machine which is necessary for compilation.
+
+Another challenge is that not all backends have support for the same operations,
+which constrains neural networks with special requirements to the backends that
+provide those operations. Due to some limitations in the current version of Rust
+(1.7) allowing differently featured backends can not be that easily supported.
+See [Issue #81](https://github.com/autumnai/leaf/issues/81).
+
+## The solution
+
+Feature flags are a well known concept to add opt-in functionality that is
+not necessary for every use-case of a library and are a good solution to the first
+problem.
+Luckily, Cargo, Rust's package manager has built-in support for feature flags.
+
+A simple dependency with additional features enabled in a `Cargo.toml` looks like this:
+```toml
+[dependencies]
+leaf = { version = "0.2.0", features = ["cuda"] }
+```
+
+Feature flags are usually used in an additive way, but **some configurations
+of features for Leaf might actually take away some functionality**.
+We do this because we want the models to be portable across different backends,
+which is not possible if e.g. the CUDA backend supports Convolution layers while
+the Native backend doesn't. To make it possible we deactivate those features that
+are only available on a single backend, effectively "dumbing down" the backends.
+
+Example:
+- feature flags are `cuda` -> `Convolution` Layer **is available** since the CUDA backend provides the required traits and there is no native backend it has to be compatible with.
+- feature flags are `native` -> `Convolution` Layer **is not available** since the native backend does not provide the required traits and there are no other frameworks present.
+- feature flags are `native cuda` -> `Convolution` Layer **is not available** since the native backend does not provide the required traits, and the CUDA backend has been dumbed down.
+
+## Using the feature flags
+
+One thing we have ignored until now are default feature flags. Cargo allows to
+define a set of features that should be included in a package by default .
+One of the default feature flags of Leaf is the `native` flag. When looking at
+the above example you might notice that the only way we can unleash the full
+power of the CUDA backend is by deactivating the default `native` flag.
+Cargo allows us to do that either via the `--no-default-features` on the CLI or
+by specifying `default-feature = false` for a dependency in `Cargo.toml`.
+
+#### In your project
+
+The simple `Cargo.toml` example above works in simple cases but if you want
+to provide the same flexibility of backends in your project, you can reexport
+the feature flags.
+
+A typical example (including collenchyma) would look like this:
+```toml
+[dependencies]
+leaf = { version = "0.2.0", default-features = false }
+# the native collenchyma feature is neccesary to read/write tensors
+collenchyma = { version = "0.0.8", default-features = false, features = ["native"] }
+
+[features]
+default = ["native"]
+native  = ["leaf/native"]
+opencl  = ["leaf/opencl", "collenchyma/opencl"]
+cuda    = ["leaf/cuda", "collenchyma/cuda"]
+
+```
+
+Building your project would then look like this:
+```sh
+# having both native and CUDA backends
+# `native` is provided by default, and `cuda` explicitly specified by `--features cuda`
+cargo build --features cuda
+# unleashing CUDA
+# `native` default not included because of `--no-default-features`, and `cuda` explicitly specified by `--features cuda`
+cargo build --no-default-features --features cuda
+```
@@ -86,6 +86,8 @@ cuda    = ["leaf/cuda"]
 opencl  = ["leaf/opencl"]
 ```
 
+> More information on the use of feature flags in Leaf can be found in [FEATURE-FLAGS.md](./FEATURE-FLAGS.md)
+
 
 ## Examples
 
@@ -100,7 +102,7 @@ the install guide, clone this repoistory and then run
 
 ```bash
 # The examples currently require CUDA support.
-cargo run --release --example benchmarks
+cargo run --release --no-default-features --features cuda --example benchmarks alexnet
 ```
 
 [leaf-examples]: https://github.com/autumnai/leaf-examples
 
@@ -112,12 +112,12 @@ fn get_time_scale<'a>(sec: f64) -> (f64, &'a str) {
     }
 }
 
-#[cfg(not(feature = "cuda"))]
+#[cfg(feature="native")]
 fn bench_alexnet() {
     println!("Examples run only with CUDA support at the moment, because of missing native convolution implementation for the Collenchyma NN Plugin.");
-    println!("Try compiling with the \"cuda\" feature flag.");
+    println!("Try running with `cargo run --release --no-default-features --features cuda --example benchmarks alexnet`.");
 }
-#[cfg(feature = "cuda")]
+#[cfg(all(feature="cuda", not(feature="native")))]
 fn bench_alexnet() {
     let mut cfg = SequentialConfig::default();
     cfg.add_input("data", &vec![128, 3, 224, 224]);
@@ -194,12 +194,12 @@ fn bench_alexnet() {
     }
 }
 
-#[cfg(not(feature = "cuda"))]
+#[cfg(feature="native")]
 fn bench_overfeat() {
     println!("Examples run only with CUDA support at the moment, because of missing native convolution implementation for the Collenchyma NN Plugin.");
-    println!("Try compiling with the \"cuda\" feature flag.");
+    println!("Try running with `cargo run --release --no-default-features --features cuda --example benchmarks overfeat`.");
 }
-#[cfg(feature = "cuda")]
+#[cfg(all(feature="cuda", not(feature="native")))]
 fn bench_overfeat() {
     let mut cfg = SequentialConfig::default();
     cfg.add_input("data", &vec![128, 3, 231, 231]);
@@ -276,12 +276,12 @@ fn bench_overfeat() {
     }
 }
 
-#[cfg(not(feature = "cuda"))]
+#[cfg(feature="native")]
 fn bench_vgg_a() {
     println!("Examples run only with CUDA support at the moment, because of missing native convolution implementation for the Collenchyma NN Plugin.");
-    println!("Try compiling with the \"cuda\" feature flag.");
+    println!("Try running with `cargo run --release --no-default-features --features cuda --example benchmarks vgg`.");
 }
-#[cfg(feature = "cuda")]
+#[cfg(all(feature="cuda", not(feature="native")))]
 fn bench_vgg_a() {
     let mut cfg = SequentialConfig::default();
     cfg.add_input("data", &vec![64, 3, 224, 224]);
 
@@ -687,9 +687,11 @@ impl<B: IBackend + LayerOps<f32> + 'static> Layer<B> {
     /// [3]: ../layers/index.html
     fn worker_from_config(backend: Rc<B>, config: &LayerConfig) -> Box<ILayer<B>> {
         match config.layer_type.clone() {
+            #[cfg(all(feature="cuda", not(feature="native")))]
             LayerType::Convolution(layer_config) => Box::new(Convolution::from_config(&layer_config)),
             LayerType::Linear(layer_config) => Box::new(Linear::from_config(&layer_config)),
             LayerType::LogSoftmax => Box::new(LogSoftmax::default()),
+            #[cfg(all(feature="cuda", not(feature="native")))]
             LayerType::Pooling(layer_config) => Box::new(Pooling::from_config(&layer_config)),
             LayerType::Sequential(layer_config) => Box::new(Sequential::from_config(backend, &layer_config)),
             LayerType::Softmax => Box::new(Softmax::default()),
@@ -1103,12 +1105,14 @@ pub struct LayerConfig {
 pub enum LayerType {
     // Common layers
     /// Convolution Layer
+    #[cfg(all(feature="cuda", not(feature="native")))]
     Convolution(ConvolutionConfig),
     /// Linear Layer
     Linear(LinearConfig),
     /// LogSoftmax Layer
     LogSoftmax,
     /// Pooling Layer
+    #[cfg(all(feature="cuda", not(feature="native")))]
     Pooling(PoolingConfig),
     /// Sequential Layer
     Sequential(SequentialConfig),
@@ -1131,14 +1135,22 @@ impl LayerType {
     /// Returns wether the LayerType supports in-place operations.
     pub fn supports_in_place(&self) -> bool {
         match *self {
+            #[cfg(all(feature="cuda", not(feature="native")))]
             LayerType::Convolution(_) => false,
             LayerType::Linear(_) => false,
             LayerType::LogSoftmax => false,
+            #[cfg(all(feature="cuda", not(feature="native")))]
             LayerType::Pooling(_) => false,
             LayerType::Sequential(_) => false,
             LayerType::Softmax => false,
+            #[cfg(all(feature="cuda", not(feature="native")))]
             LayerType::ReLU => true,
+            #[cfg(feature="native")]
+            LayerType::ReLU => false,
+            #[cfg(all(feature="cuda", not(feature="native")))]
             LayerType::Sigmoid => true,
+            #[cfg(feature="native")]
+            LayerType::Sigmoid => false,
             LayerType::NegativeLogLikelihood(_) => false,
             LayerType::Reshape(_) => true,
         }
 
@@ -7,7 +7,9 @@
 //! needed in a Sigmoid layer.
 
 use co::{IBackend,SharedTensor};
-use conn::{Relu, ReluPointwise};
+use conn::Relu;
+#[cfg(all(feature="cuda", not(feature="native")))]
+use conn::ReluPointwise;
 use layer::*;
 use util::ArcLock;
 
@@ -16,6 +18,11 @@ use util::ArcLock;
 /// ReLU Activation Layer
 pub struct ReLU;
 
+//
+// ReLU + ReLUPointwise
+// Only on CUDA
+//
+#[cfg(all(feature="cuda", not(feature="native")))]
 impl<B: IBackend + Relu<f32> + ReluPointwise<f32>> ILayer<B> for ReLU {
     impl_ilayer_activation!();
 
@@ -41,6 +48,7 @@ impl<B: IBackend + Relu<f32> + ReluPointwise<f32>> ILayer<B> for ReLU {
     }
 }
 
+#[cfg(all(feature="cuda", not(feature="native")))]
 impl<B: IBackend + Relu<f32> + ReluPointwise<f32>> ComputeOutput<f32, B> for ReLU {
     fn compute_output(&self,
                       backend: &B,
@@ -54,6 +62,7 @@ impl<B: IBackend + Relu<f32> + ReluPointwise<f32>> ComputeOutput<f32, B> for ReL
     }
 }
 
+#[cfg(all(feature="cuda", not(feature="native")))]
 impl<B: IBackend + Relu<f32> + ReluPointwise<f32>> ComputeInputGradient<f32, B> for ReLU {
     fn compute_input_gradient(&self,
                               backend: &B,
@@ -69,4 +78,64 @@ impl<B: IBackend + Relu<f32> + ReluPointwise<f32>> ComputeInputGradient<f32, B>
     }
 }
 
+#[cfg(all(feature="cuda", not(feature="native")))]
 impl<B: IBackend + Relu<f32> + ReluPointwise<f32>> ComputeParametersGradient<f32, B> for ReLU {}
+
+//
+// ReLU without ReLUPointwise
+// Only on CUDA
+//
+#[cfg(feature="native")]
+impl<B: IBackend + Relu<f32>> ILayer<B> for ReLU {
+    impl_ilayer_activation!();
+
+    fn reshape(&mut self,
+               backend: ::std::rc::Rc<B>,
+               input_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
+               input_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>,
+               weights_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
+               weights_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>,
+               output_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
+               output_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>) {
+        if let Some(inp) = input_data.get(0) {
+            let read_inp = inp.read().unwrap();
+            let input_desc = read_inp.desc();
+            input_gradient[0].write().unwrap().resize(input_desc).unwrap();
+            output_data[0].write().unwrap().resize(input_desc).unwrap();
+            output_gradient[0].write().unwrap().resize(input_desc).unwrap();
+        }
+    }
+}
+
+#[cfg(feature="native")]
+impl<B: IBackend + Relu<f32>> ComputeOutput<f32, B> for ReLU {
+    fn compute_output(&self,
+                      backend: &B,
+                      _weights: &[&SharedTensor<f32>],
+                      input_data: &[&SharedTensor<f32>],
+                      output_data: &mut [&mut SharedTensor<f32>]) {
+        match input_data.get(0) {
+            Some(input) => backend.relu_plain(input, output_data[0]).unwrap(),
+            None => panic!("No input provided for ReLU layer."),
+        }
+    }
+}
+
+#[cfg(feature="native")]
+impl<B: IBackend + Relu<f32>> ComputeInputGradient<f32, B> for ReLU {
+    fn compute_input_gradient(&self,
+                              backend: &B,
+                              weights_data: &[&SharedTensor<f32>],
+                              output_data: &[&SharedTensor<f32>],
+                              output_gradients: &[&SharedTensor<f32>],
+                              input_data: &[&SharedTensor<f32>],
+                              input_gradients: &mut [&mut SharedTensor<f32>]) {
+        match output_data.get(0) {
+            Some(_) => backend.relu_grad_plain(output_data[0], output_gradients[0], input_data[0], input_gradients[0]).unwrap(),
+            None => panic!("No output_data provided for ReLU layer backward."),
+        }
+    }
+}
+
+#[cfg(feature="native")]
+impl<B: IBackend + Relu<f32>> ComputeParametersGradient<f32, B> for ReLU {}
Original file line number	Diff line number	Diff line change
`@@ -112,12 +112,12 @@ fn get_time_scale<'a>(sec: f64) -> (f64, &'a str) {`
`112`	`112`	`}`
`113`	`113`	`}`
`114`	`114`
`115`		`-#[cfg(not(feature = "cuda"))]`
	`115`	`+#[cfg(feature="native")]`
`116`	`116`	`fn bench_alexnet() {`
`117`	`117`	`println!("Examples run only with CUDA support at the moment, because of missing native convolution implementation for the Collenchyma NN Plugin.");`
`118`		`- println!("Try compiling with the \"cuda\" feature flag.");`
	`118`	+ println!("Try running with `cargo run --release --no-default-features --features cuda --example benchmarks alexnet`.");
`119`	`119`	`}`
`120`		`-#[cfg(feature = "cuda")]`
	`120`	`+#[cfg(all(feature="cuda", not(feature="native")))]`
`121`	`121`	`fn bench_alexnet() {`
`122`	`122`	`let mut cfg = SequentialConfig::default();`
`123`	`123`	`cfg.add_input("data", &vec![128, 3, 224, 224]);`
`@@ -194,12 +194,12 @@ fn bench_alexnet() {`
`194`	`194`	`}`
`195`	`195`	`}`
`196`	`196`
`197`		`-#[cfg(not(feature = "cuda"))]`
	`197`	`+#[cfg(feature="native")]`
`198`	`198`	`fn bench_overfeat() {`
`199`	`199`	`println!("Examples run only with CUDA support at the moment, because of missing native convolution implementation for the Collenchyma NN Plugin.");`
`200`		`- println!("Try compiling with the \"cuda\" feature flag.");`
	`200`	+ println!("Try running with `cargo run --release --no-default-features --features cuda --example benchmarks overfeat`.");
`201`	`201`	`}`
`202`		`-#[cfg(feature = "cuda")]`
	`202`	`+#[cfg(all(feature="cuda", not(feature="native")))]`
`203`	`203`	`fn bench_overfeat() {`
`204`	`204`	`let mut cfg = SequentialConfig::default();`
`205`	`205`	`cfg.add_input("data", &vec![128, 3, 231, 231]);`
`@@ -276,12 +276,12 @@ fn bench_overfeat() {`
`276`	`276`	`}`
`277`	`277`	`}`
`278`	`278`
`279`		`-#[cfg(not(feature = "cuda"))]`
	`279`	`+#[cfg(feature="native")]`
`280`	`280`	`fn bench_vgg_a() {`
`281`	`281`	`println!("Examples run only with CUDA support at the moment, because of missing native convolution implementation for the Collenchyma NN Plugin.");`
`282`		`- println!("Try compiling with the \"cuda\" feature flag.");`
	`282`	+ println!("Try running with `cargo run --release --no-default-features --features cuda --example benchmarks vgg`.");
`283`	`283`	`}`
`284`		`-#[cfg(feature = "cuda")]`
	`284`	`+#[cfg(all(feature="cuda", not(feature="native")))]`
`285`	`285`	`fn bench_vgg_a() {`
`286`	`286`	`let mut cfg = SequentialConfig::default();`
`287`	`287`	`cfg.add_input("data", &vec![64, 3, 224, 224]);`