Dobiasd
diff --git a/‎.travis.yml
Lines changed: 1 addition & 1 deletion b/‎.travis.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎INSTALL.md
Lines changed: 1 addition & 1 deletion b/‎INSTALL.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/fdeep/layers/bidirectional_layer.hpp
Lines changed: 41 additions & 34 deletions b/‎include/fdeep/layers/bidirectional_layer.hpp
Lines changed: 41 additions & 34 deletions
diff --git a/‎include/fdeep/layers/gru_layer.hpp
Lines changed: 19 additions & 7 deletions b/‎include/fdeep/layers/gru_layer.hpp
Lines changed: 19 additions & 7 deletions
diff --git a/‎include/fdeep/layers/lstm_layer.hpp
Lines changed: 22 additions & 11 deletions b/‎include/fdeep/layers/lstm_layer.hpp
Lines changed: 22 additions & 11 deletions
@@ -33,7 +33,7 @@ install:
   - sudo apt-get install libblas-dev liblapack-dev libatlas-base-dev gfortran
   # python libs
   - sudo pip3 install --upgrade pip
-  - sudo pip3 install numpy scipy h5py "tensorflow==2.0.0"
+  - sudo pip3 install numpy scipy h5py "tensorflow==2.1.0"
   - echo "Version numbers of TensorFlow and Keras:"
   - python3 -c "import tensorflow as tf; import tensorflow.keras; print(tf.__version__); print(tensorflow.keras.__version__)"
   # FunctionalPlus
 
@@ -6,7 +6,7 @@ list(APPEND CMAKE_MODULE_PATH "${FDEEP_TOP_DIR}/cmake")
 
 include(cmake/hunter.cmake) # default off
 
-project(frugally-deep VERSION 0.12.1)
+project(frugally-deep VERSION 0.13.0)
 
 message(STATUS "===( ${PROJECT_NAME} ${PROJECT_VERSION} )===")
 
 
@@ -63,7 +63,7 @@ Just add a *conanfile.txt* with frugally-deep as a requirement and chose the gen
 
 ```
 [requires]
-frugally-deep/v0.12.1-p0@dobiasd/stable
+frugally-deep/v0.13.0-p0@dobiasd/stable
 
 [generators]
 cmake
 
@@ -173,7 +173,7 @@ Requirements and Installation
 
 - A **C++14**-compatible compiler: Compilers from these versions on are fine: GCC 4.9, Clang 3.7 (libc++ 3.7) and Visual C++ 2015.
 - Python 3.5 or higher.
-- TensorFlow 2.0.0
+- TensorFlow 2.1.0
 
 Guides for different ways to install frugally-deep can be found in [`INSTALL.md`](INSTALL.md).
 
 
@@ -56,17 +56,23 @@ class bidirectional_layer : public layer
         forward_state_h_(stateful ? tensor(tensor_shape(n_units), static_cast<float_type>(0)) : fplus::nothing<tensor>()),
         forward_state_c_(stateful && wrapped_layer_type_has_state_c(wrapped_layer_type) ? tensor(tensor_shape(n_units), static_cast<float_type>(0)) : fplus::nothing<tensor>()),
         backward_state_h_(stateful ? tensor(tensor_shape(n_units), static_cast<float_type>(0)) : fplus::nothing<tensor>()),
-        backward_state_c_(stateful && wrapped_layer_type_has_state_c(wrapped_layer_type) ? tensor(tensor_shape(n_units), static_cast<float_type>(0)) : fplus::nothing<tensor>())
+        backward_state_c_(stateful && wrapped_layer_type_has_state_c(wrapped_layer_type) ? tensor(tensor_shape(n_units), static_cast<float_type>(0)) : fplus::nothing<tensor>()),
+        use_avail_input_state_for_stateful_(true)
+
         {
     }
 
     void reset_states() override
      {
+         // TF 2.1 Bug: reset_states() does nothing in TF 2.1.
+         // the implementation below is how TF 2.1 should behave.
+         // to match TF 2.1, just comment out the code below.
         if (is_stateful()) {
             forward_state_h_ = tensor(tensor_shape(n_units_), static_cast<float_type>(0));
             forward_state_c_ = tensor(tensor_shape(n_units_), static_cast<float_type>(0));
             backward_state_h_ = tensor(tensor_shape(n_units_), static_cast<float_type>(0));
             backward_state_c_ = tensor(tensor_shape(n_units_), static_cast<float_type>(0));
+            use_avail_input_state_for_stateful_ = true;
         }
      }
 
@@ -110,29 +116,26 @@ class bidirectional_layer : public layer
             assertion(inputs.size() == 1 || inputs.size() == 5,
                 "Invalid number of input tensors.");
 
-            tensor forward_state_h = inputs.size() == 5
-            ? inputs[1]
-            : is_stateful()
-                ? forward_state_h_.unsafe_get_just()
-                : tensor(tensor_shape(n_units_), static_cast<float_type>(0));
-
-            tensor forward_state_c = inputs.size() == 5
-            ? inputs[2]
-            : is_stateful()
-                ? forward_state_c_.unsafe_get_just()
-                : tensor(tensor_shape(n_units_), static_cast<float_type>(0));
-
-            tensor backward_state_h = inputs.size() == 5
-            ? inputs[3]
-            : is_stateful()
-                ? backward_state_h_.unsafe_get_just()
-                : tensor(tensor_shape(n_units_), static_cast<float_type>(0));
-
-            tensor backward_state_c = inputs.size() == 5
-            ? inputs[4]
-            : is_stateful()
-                ? backward_state_c_.unsafe_get_just()
-                : tensor(tensor_shape(n_units_), static_cast<float_type>(0));
+            bool initial_state_provided = inputs.size() == 5;
+            bool use_last_state_for_initial_state = is_stateful() && !use_avail_input_state_for_stateful_;
+            bool use_input_initial_state = initial_state_provided && !use_last_state_for_initial_state;
+            // bool use_zero_initial_state = !use_input_initial_state && !use_last_state_for_initial_state;
+
+            tensor forward_state_h = use_input_initial_state ? inputs[1] :
+                use_last_state_for_initial_state ? forward_state_h_.unsafe_get_just() :
+                tensor(tensor_shape(n_units_), static_cast<float_type>(0)); // use_zero_initial_state
+
+            tensor forward_state_c = use_input_initial_state ? inputs[2] :
+                use_last_state_for_initial_state ? forward_state_c_.unsafe_get_just() :
+                tensor(tensor_shape(n_units_), static_cast<float_type>(0)); // use_zero_initial_state
+
+            tensor backward_state_h = use_input_initial_state ? inputs[3] :
+                use_last_state_for_initial_state ? backward_state_h_.unsafe_get_just() :
+                tensor(tensor_shape(n_units_), static_cast<float_type>(0)); // use_zero_initial_state
+
+            tensor backward_state_c = use_input_initial_state ? inputs[4] :
+                use_last_state_for_initial_state ? backward_state_c_.unsafe_get_just() :
+                tensor(tensor_shape(n_units_), static_cast<float_type>(0)); // use_zero_initial_state
 
             result_forward = lstm_impl(input, forward_state_h, forward_state_c,
                                        n_units_, use_bias_, return_sequences_, stateful_,
@@ -147,24 +150,26 @@ class bidirectional_layer : public layer
                 forward_state_c_ = forward_state_c;
                 backward_state_h_ = backward_state_h;
                 backward_state_c_ = backward_state_c;
+                use_avail_input_state_for_stateful_ = false;
              }
         }
         else if (wrapped_layer_type_ == "GRU" || wrapped_layer_type_ == "CuDNNGRU")
         {
             assertion(inputs.size() == 1 || inputs.size() == 3,
                 "Invalid number of input tensors.");
 
-            tensor forward_state_h = inputs.size() == 3
-            ? inputs[1]
-            : is_stateful()
-                ? forward_state_h_.unsafe_get_just()
-                : tensor(tensor_shape(n_units_), static_cast<float_type>(0));
+            bool initial_state_provided = inputs.size() == 3;
+            bool use_last_state_for_initial_state = is_stateful() && !use_avail_input_state_for_stateful_;
+            bool use_input_initial_state = initial_state_provided && !use_last_state_for_initial_state;
+            // bool use_zero_initial_state = !use_input_initial_state && !use_last_state_for_initial_state;
+
+            tensor forward_state_h = use_input_initial_state ? inputs[1] :
+                use_last_state_for_initial_state ? forward_state_h_.unsafe_get_just() :
+                tensor(tensor_shape(n_units_), static_cast<float_type>(0)); // use_zero_initial_state
 
-            tensor backward_state_h = inputs.size() == 3
-            ? inputs[2]
-            : is_stateful()
-                ? backward_state_h_.unsafe_get_just()
-                : tensor(tensor_shape(n_units_), static_cast<float_type>(0));
+            tensor backward_state_h = use_input_initial_state ? inputs[2] :
+                use_last_state_for_initial_state ? backward_state_h_.unsafe_get_just() :
+                tensor(tensor_shape(n_units_), static_cast<float_type>(0)); // use_zero_initial_state
 
             result_forward = gru_impl(input, forward_state_h, n_units_, use_bias_, reset_after_, return_sequences_, false,
                                       forward_weights_, forward_recurrent_weights_,
@@ -175,6 +180,7 @@ class bidirectional_layer : public layer
             if (is_stateful()) {
                 forward_state_h_ = forward_state_h;
                 backward_state_h_ = backward_state_h;
+                use_avail_input_state_for_stateful_ = false;
              }
         }
         else
@@ -223,6 +229,7 @@ class bidirectional_layer : public layer
     mutable fplus::maybe<tensor> forward_state_c_;
     mutable fplus::maybe<tensor> backward_state_h_;
     mutable fplus::maybe<tensor> backward_state_c_;
+    mutable bool use_avail_input_state_for_stateful_;
 };
 
 } // namespace internal
 
@@ -44,7 +44,8 @@ class gru_layer : public layer
           weights_(weights),
           recurrent_weights_(recurrent_weights),
           bias_(bias),
-          state_h_(stateful ? tensor(tensor_shape(n_units), static_cast<float_type>(0)) : fplus::nothing<tensor>())
+          state_h_(stateful ? tensor(tensor_shape(n_units), static_cast<float_type>(0)) : fplus::nothing<tensor>()),
+          use_avail_input_state_for_stateful_(true)
 
     {
     }
@@ -53,6 +54,7 @@ class gru_layer : public layer
     {
         if (is_stateful()) {
             state_h_ = tensor(tensor_shape(n_units_), static_cast<float_type>(0));
+            use_avail_input_state_for_stateful_ = true;
         }
     }
 
@@ -77,17 +79,26 @@ class gru_layer : public layer
         assertion(inputs.size() == 1 || inputs.size() == 2,
                 "Invalid number of input tensors.");
 
-        tensor state_h = inputs.size() == 2
-            ? inputs[1]
-            : is_stateful()
-                ? state_h_.unsafe_get_just()
-                : tensor(tensor_shape(n_units_), static_cast<float_type>(0));
-
+        // RNN behaivor since TF 2.1:
+        // If an *initial state input is provided*, this is used always for non-stateful models
+        // but only on reset for stateful models (including the very first call)
+        // If *no input state is provided*, then initial state is 0 for non-stateful
+        // and, for stateful, it carries the state from previous call, unless state-reset, in which case it set to 0
+        bool initial_state_provided = inputs.size() == 2;
+        bool use_last_state_for_initial_state = is_stateful() && !use_avail_input_state_for_stateful_;
+        bool use_input_initial_state = initial_state_provided && !use_last_state_for_initial_state;
+        // bool use_zero_initial_state = !use_input_initial_state && !use_last_state_for_initial_state;
+
+        tensor state_h = use_input_initial_state ? inputs[1] :
+                        use_last_state_for_initial_state ? state_h_.unsafe_get_just() :
+                        tensor(tensor_shape(n_units_), static_cast<float_type>(0)); // use_zero_initial_state 
+      
         const auto result = gru_impl(input, state_h, n_units_, use_bias_,
             reset_after_, return_sequences_, return_state_, weights_, recurrent_weights_,
             bias_, activation_, recurrent_activation_);
         if (is_stateful()) {
             state_h_ = state_h;
+            use_avail_input_state_for_stateful_ = false;
         }
         return result;
     }
@@ -104,6 +115,7 @@ class gru_layer : public layer
     const float_vec recurrent_weights_;
     const float_vec bias_;
     mutable fplus::maybe<tensor> state_h_;
+    mutable bool use_avail_input_state_for_stateful_;
 };
 
 } // namespace internal
 
@@ -43,7 +43,9 @@ class lstm_layer : public layer
           recurrent_weights_(recurrent_weights),
           bias_(bias),
           state_h_(stateful ? tensor(tensor_shape(n_units), static_cast<float_type>(0)) : fplus::nothing<tensor>()),
-          state_c_(stateful ? tensor(tensor_shape(n_units), static_cast<float_type>(0)) : fplus::nothing<tensor>())
+          state_c_(stateful ? tensor(tensor_shape(n_units), static_cast<float_type>(0)) : fplus::nothing<tensor>()),
+          use_avail_input_state_for_stateful_(true)
+
     {
     }
 
@@ -52,6 +54,7 @@ class lstm_layer : public layer
         if (is_stateful()) {
             state_h_ = tensor(tensor_shape(n_units_), static_cast<float_type>(0));
             state_c_ = tensor(tensor_shape(n_units_), static_cast<float_type>(0));
+            use_avail_input_state_for_stateful_ = true;
         }
     }
 
@@ -74,24 +77,31 @@ class lstm_layer : public layer
         assertion(inputs.size() == 1 || inputs.size() == 3,
                 "Invalid number of input tensors.");
 
-        tensor state_h = inputs.size() == 3
-            ? inputs[1]
-            : is_stateful()
-                ? state_h_.unsafe_get_just()
-                : tensor(tensor_shape(n_units_), static_cast<float_type>(0));
+        // RNN behaivor since TF 2.1:
+        // If an *initial state input is provided*, this is used always for non-stateful models
+        // but only on reset for stateful models (including the very first call)
+        // If *no input state is provided*, then initial state is 0 for non-stateful
+        // and, for stateful, it carries the state from previous call, unless state-reset, in which case it set to 0
+        bool initial_state_provided = inputs.size() == 3;
+        bool use_last_state_for_initial_state = is_stateful() && !use_avail_input_state_for_stateful_;
+        bool use_input_initial_state = initial_state_provided && !use_last_state_for_initial_state;
+        // bool use_zero_initial_state = !use_input_initial_state && !use_last_state_for_initial_state;
+
+        tensor state_h = use_input_initial_state ? inputs[1] :
+                        use_last_state_for_initial_state ? state_h_.unsafe_get_just() :
+                        tensor(tensor_shape(n_units_), static_cast<float_type>(0)); // use_zero_initial_state
 
-        tensor state_c = inputs.size() == 3
-            ? inputs[2]
-            : is_stateful()
-                ? state_c_.unsafe_get_just()
-                : tensor(tensor_shape(n_units_), static_cast<float_type>(0));
+        tensor state_c = use_input_initial_state ? inputs[2] :
+                        use_last_state_for_initial_state ? state_c_.unsafe_get_just() :
+                        tensor(tensor_shape(n_units_), static_cast<float_type>(0)); // use_zero_initial_state
 
         const auto result = lstm_impl(input, state_h, state_c,
             n_units_, use_bias_, return_sequences_, return_state_, weights_,
             recurrent_weights_, bias_, activation_, recurrent_activation_);
         if (is_stateful()) {
             state_h_ = state_h;
             state_c_ = state_c;
+            use_avail_input_state_for_stateful_ = false;
         }
         return result;
     }
@@ -108,6 +118,7 @@ class lstm_layer : public layer
     const float_vec bias_;
     mutable fplus::maybe<tensor> state_h_;
     mutable fplus::maybe<tensor> state_c_;
+    mutable bool use_avail_input_state_for_stateful_;
 };
 
 } // namespace internal