diff --git a/src/TensorFlowNET.Core/Variables/variables.py.cs b/src/TensorFlowNET.Core/Variables/variables.py.cs
index 0c07e0243..91f57e292 100644
--- a/src/TensorFlowNET.Core/Variables/variables.py.cs
+++ b/src/TensorFlowNET.Core/Variables/variables.py.cs
@@ -72,7 +72,9 @@ public static List<IVariableV1> global_variables(string scope = null)
         public static Operation variables_initializer(IVariableV1[] var_list, string name = "init")
         {
             if (var_list.Length > 0)
+            {
                 return control_flow_ops.group(var_list.Select(x => x.Initializer).ToArray(), name);
+            }
             else
                 return gen_control_flow_ops.no_op(name: name);
         }
@@ -152,10 +154,5 @@ public static Operation _safe_initial_value_from_op(string name, Operation op, D
 
             return op;
         }
-
-        public static Tensor global_variables_initializer()
-        {
-            throw new NotImplementedException();
-        }
     }
 }
diff --git a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
index fc2280051..e2d6db912 100644
--- a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
@@ -776,8 +776,6 @@ public void testUnconnectedGradientsNoneUnconnectedGradients()
         [TestMethod]
         public void testUnconnectedGradientsZerosUnconnectedGradients()
         {
-
-
             //def testUnconnectedGradientsZerosUnconnectedGradients(self):
             //  with ops.Graph().as_default():
             //    x = constant(1.0, shape=[2, 2])
diff --git a/test/TensorFlowNET.UnitTest/PythonTest.cs b/test/TensorFlowNET.UnitTest/PythonTest.cs
index 50cc2b328..090ef097c 100644
--- a/test/TensorFlowNET.UnitTest/PythonTest.cs
+++ b/test/TensorFlowNET.UnitTest/PythonTest.cs
@@ -6,6 +6,7 @@
 using System.Linq;
 using Tensorflow;
 using static Tensorflow.Binding;
+using System.Collections.Generic;
 
 namespace TensorFlowNET.UnitTest
 {
@@ -144,6 +145,40 @@ public void assertAllClose(double value, NDArray array2, double eps = 1e-5)
             Assert.IsTrue(np.allclose(array1, array2, rtol: eps));
         }
 
+        private class CollectionComparer : IComparer
+        {
+            private readonly double _epsilon;
+
+            public CollectionComparer(double eps = 1e-06)
+            {
+                _epsilon = eps;
+            }
+            public int Compare(object x, object y)
+            {
+                var a = (double)x;
+                var b = (double)y;
+
+                double delta = Math.Abs(a - b);
+                if (delta < _epsilon)
+                {
+                    return 0;
+                }
+                return a.CompareTo(b);
+            }
+        }
+
+        public void assertAllCloseAccordingToType<T>(
+            ICollection expected,
+            ICollection<T> given,
+            double eps = 1e-6,
+            float float_eps = 1e-6f)
+        {
+            // TODO: check if any of arguments is not double and change toletance
+            // remove givenAsDouble and cast expected instead
+            var givenAsDouble = given.Select(x => Convert.ToDouble(x)).ToArray();
+            CollectionAssert.AreEqual(expected, givenAsDouble, new CollectionComparer(eps));
+        }
+
         public void assertProtoEquals(object toProto, object o)
         {
             throw new NotImplementedException();
@@ -153,6 +188,20 @@ public void assertProtoEquals(object toProto, object o)
 
         #region tensor evaluation and test session
 
+        private Session _cached_session = null;
+        private Graph _cached_graph = null;
+        private object _cached_config = null;
+        private bool _cached_force_gpu = false;
+
+        private void _ClearCachedSession()
+        {
+            if (self._cached_session != null)
+            {
+                self._cached_session.Dispose();
+                self._cached_session = null;
+            }
+        }
+
         //protected object _eval_helper(Tensor[] tensors)
         //{
         //    if (tensors == null)
@@ -196,17 +245,25 @@ public T evaluate<T>(Tensor tensor)
             //    return self._eval_helper(tensors)
             //  else:
             {
-                var sess = tf.Session();
+                var sess = tf.get_default_session();
                 var ndarray = tensor.eval(sess);
-                if (typeof(T) == typeof(double))
+                if (typeof(T) == typeof(double)
+                    || typeof(T) == typeof(float)
+                    || typeof(T) == typeof(int))
+                {
+                    result = Convert.ChangeType(ndarray, typeof(T));
+                }
+                else if (typeof(T) == typeof(double[]))
+                {
+                    result = ndarray.ToMultiDimArray<double>();
+                }
+                else if (typeof(T) == typeof(float[]))
                 {
-                    double x = ndarray;
-                    result = x;
+                    result = ndarray.ToMultiDimArray<float>();
                 }
-                else if (typeof(T) == typeof(int))
+                else if (typeof(T) == typeof(int[]))
                 {
-                    int x = ndarray;
-                    result = x;
+                    result = ndarray.ToMultiDimArray<int>();
                 }
                 else
                 {
@@ -218,9 +275,56 @@ public T evaluate<T>(Tensor tensor)
         }
 
 
-        public Session cached_session()
+        ///Returns a TensorFlow Session for use in executing tests.
+        public Session cached_session(
+            Graph graph = null, object config = null, bool use_gpu = false, bool force_gpu = false)
         {
-            throw new NotImplementedException();
+            // This method behaves differently than self.session(): for performance reasons
+            // `cached_session` will by default reuse the same session within the same
+            // test.The session returned by this function will only be closed at the end
+            // of the test(in the TearDown function).
+
+            // Use the `use_gpu` and `force_gpu` options to control where ops are run.If
+            // `force_gpu` is True, all ops are pinned to `/ device:GPU:0`. Otherwise, if
+            // `use_gpu` is True, TensorFlow tries to run as many ops on the GPU as
+            // possible.If both `force_gpu and `use_gpu` are False, all ops are pinned to
+            // the CPU.
+
+            // Example:
+            // python
+            // class MyOperatorTest(test_util.TensorFlowTestCase) :
+            //   def testMyOperator(self):
+            //     with self.cached_session() as sess:
+            //       valid_input = [1.0, 2.0, 3.0, 4.0, 5.0]
+            //     result = MyOperator(valid_input).eval()
+            //       self.assertEqual(result, [1.0, 2.0, 3.0, 5.0, 8.0]
+            //       invalid_input = [-1.0, 2.0, 7.0]
+            //     with self.assertRaisesOpError("negative input not supported"):
+            //         MyOperator(invalid_input).eval()
+
+
+            // Args:
+            //   graph: Optional graph to use during the returned session.
+            //   config: An optional config_pb2.ConfigProto to use to configure the
+            //     session.
+            //   use_gpu: If True, attempt to run as many ops as possible on GPU.
+            //   force_gpu: If True, pin all ops to `/device:GPU:0`.
+
+            // Yields:
+            //   A Session object that should be used as a context manager to surround
+            //   the graph building and execution code in a test case.
+
+
+            // TODO:
+            //  if context.executing_eagerly():
+            //    return self._eval_helper(tensors)
+            //  else:
+            {
+                var sess = self._get_cached_session(
+                    graph, config, force_gpu, crash_if_inconsistent_args: true);
+                using var cached = self._constrain_devices_and_set_default(sess, use_gpu, force_gpu);
+                return cached;
+            }
         }
 
         //Returns a TensorFlow Session for use in executing tests.
@@ -268,6 +372,40 @@ public Session session(Graph graph = null, object config = null, bool use_gpu =
             return s.as_default();
         }
 
+        private Session _constrain_devices_and_set_default(Session sess, bool use_gpu, bool force_gpu)
+        {
+            // Set the session and its graph to global default and constrain devices."""
+            if (tf.executing_eagerly())
+                return null;
+            else
+            {
+                sess.graph.as_default();
+                sess.as_default();
+                {
+                    if (force_gpu)
+                    {
+                        // TODO:
+
+                        // Use the name of an actual device if one is detected, or
+                        // '/device:GPU:0' otherwise
+                        /* var gpu_name = gpu_device_name();
+                        if (!gpu_name)
+                            gpu_name = "/device:GPU:0"
+                        using (sess.graph.device(gpu_name)) {
+                            yield return sess;
+                        }*/
+                        return sess;
+                    }
+                    else if (use_gpu)
+                        return sess;
+                    else
+                        using (sess.graph.device("/device:CPU:0"))
+                            return sess;
+                }
+
+            }
+        }
+
         // See session() for details.
         private Session _create_session(Graph graph, object cfg, bool forceGpu)
         {
@@ -312,6 +450,54 @@ private Session _create_session(Graph graph, object cfg, bool forceGpu)
             return new Session(graph);//, config = prepare_config(config))
         }
 
+        private Session _get_cached_session(
+                          Graph graph = null,
+                          object config = null,
+                          bool force_gpu = false,
+                          bool crash_if_inconsistent_args = true)
+        {
+            // See cached_session() for documentation.
+            if (self._cached_session == null)
+            {
+                var sess = self._create_session(graph, config, force_gpu);
+                self._cached_session = sess;
+                self._cached_graph = graph;
+                self._cached_config = config;
+                self._cached_force_gpu = force_gpu;
+                return sess;
+            }
+            else
+            {
+
+                if (crash_if_inconsistent_args && self._cached_graph != null && !self._cached_graph.Equals(graph))
+                    throw new ValueError(@"The graph used to get the cached session is 
+                                           different than the one that was used to create the
+                                           session. Maybe create a new session with 
+                                           self.session()");
+                if (crash_if_inconsistent_args && self._cached_config != null && !self._cached_config.Equals(config))
+                {
+                    throw new ValueError(@"The config used to get the cached session is 
+                                           different than the one that was used to create the 
+                                           session. Maybe create a new session with 
+                                           self.session()");
+                }
+                if (crash_if_inconsistent_args && !self._cached_force_gpu.Equals(force_gpu))
+                {
+                    throw new ValueError(@"The force_gpu value used to get the cached session is 
+                                           different than the one that was used to create the 
+                                           session. Maybe create a new session with 
+                                           self.session()");
+                }
+                return _cached_session;
+            }
+        }
+
+        [TestCleanup]
+        public void Cleanup()
+        {
+            _ClearCachedSession();
+        }
+
         #endregion
 
         public void AssetSequenceEqual<T>(T[] a, T[] b)
diff --git a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
new file mode 100644
index 000000000..d766890b2
--- /dev/null
+++ b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
@@ -0,0 +1,119 @@
+﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
+using System;
+using Tensorflow;
+using Tensorflow.NumPy;
+using static Tensorflow.Binding;
+
+namespace TensorFlowNET.UnitTest.Training
+{
+    [TestClass]
+    public class GradientDescentOptimizerTest : PythonTest
+    {
+        private static TF_DataType GetTypeForNumericType<T>() where T : struct
+        {
+            return Type.GetTypeCode(typeof(T)) switch
+            {
+                TypeCode.Single => np.float32,
+                TypeCode.Double => np.float64,
+                _ => throw new NotImplementedException(),
+            };
+        }
+
+        private void TestBasic<T>() where T : struct
+        {
+            var dtype = GetTypeForNumericType<T>();
+
+            // train.GradientDescentOptimizer is V1 only API.
+            tf.Graph().as_default();
+            using (var sess = self.cached_session())
+            {
+                var var0 = tf.Variable(new[] { 1.0, 2.0 }, dtype: dtype);
+                var var1 = tf.Variable(new[] { 3.0, 4.0 }, dtype: dtype);
+                var grads0 = tf.constant(new[] { 0.1, 0.1 }, dtype: dtype);
+                var grads1 = tf.constant(new[] { 0.01, 0.01 }, dtype: dtype);
+                var optimizer = tf.train.GradientDescentOptimizer(3.0f);
+                var grads_and_vars = new[] {
+                    Tuple.Create(grads0, var0 as IVariableV1),
+                    Tuple.Create(grads1, var1 as IVariableV1)
+                };
+                var sgd_op = optimizer.apply_gradients(grads_and_vars);
+
+                var global_variables = tf.global_variables_initializer();
+                sess.run(global_variables);
+
+                var initialVar0 = sess.run(var0);
+                var initialVar1 = sess.run(var1);
+                // Fetch params to validate initial values
+                self.assertAllCloseAccordingToType(new[] { 1.0, 2.0 }, self.evaluate<T[]>(var0));
+                self.assertAllCloseAccordingToType(new[] { 3.0, 4.0 }, self.evaluate<T[]>(var1));
+                // Run 1 step of sgd
+                sgd_op.run();
+                // Validate updated params
+                self.assertAllCloseAccordingToType(
+                    new[] { 1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1 },
+                    self.evaluate<T[]>(var0));
+                self.assertAllCloseAccordingToType(
+                    new[] { 3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01 },
+                    self.evaluate<T[]>(var1));
+                // TODO: self.assertEqual(0, len(optimizer.variables()));
+            }
+        }
+
+        [TestMethod]
+        public void TestBasic()
+        {
+            //TODO: add np.half
+            TestBasic<float>();
+            TestBasic<double>();
+        }
+
+        private void TestTensorLearningRate<T>() where T : struct
+        {
+            var dtype = GetTypeForNumericType<T>();
+
+            // train.GradientDescentOptimizer is V1 only API.
+            tf.Graph().as_default();
+            using (var sess = self.cached_session())
+            {
+                var var0 = tf.Variable(new[] { 1.0, 2.0 }, dtype: dtype);
+                var var1 = tf.Variable(new[] { 3.0, 4.0 }, dtype: dtype);
+                var grads0 = tf.constant(new[] { 0.1, 0.1 }, dtype: dtype);
+                var grads1 = tf.constant(new[] { 0.01, 0.01 }, dtype: dtype);
+                var lrate = constant_op.constant(3.0);
+                var grads_and_vars = new[] {
+                    Tuple.Create(grads0, var0 as IVariableV1),
+                    Tuple.Create(grads1, var1 as IVariableV1)
+                };
+                var sgd_op = tf.train.GradientDescentOptimizer(lrate)
+                    .apply_gradients(grads_and_vars);
+
+                var global_variables = tf.global_variables_initializer();
+                sess.run(global_variables);
+
+                var initialVar0 = sess.run(var0);
+                var initialVar1 = sess.run(var1);
+                // Fetch params to validate initial values
+                self.assertAllCloseAccordingToType(new[] { 1.0, 2.0 }, self.evaluate<T[]>(var0));
+                self.assertAllCloseAccordingToType(new[] { 3.0, 4.0 }, self.evaluate<T[]>(var1));
+                // Run 1 step of sgd
+                sgd_op.run();
+                // Validate updated params
+                self.assertAllCloseAccordingToType(
+                    new[] { 1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1 },
+                    self.evaluate<T[]>(var0));
+                self.assertAllCloseAccordingToType(
+                    new[] { 3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01 },
+                    self.evaluate<T[]>(var1));
+                // TODO: self.assertEqual(0, len(optimizer.variables()));
+            }
+        }
+
+        [TestMethod]
+        public void TestTensorLearningRate()
+        {
+            //TODO: add np.half
+            TestTensorLearningRate<float>();
+            TestTensorLearningRate<double>();
+        }
+    }
+}