From 1a40ee37adabf212c513f1603b50764f3f5dc82e Mon Sep 17 00:00:00 2001
From: Balyshev Artem <43214667+BalyshevArtem@users.noreply.github.com>
Date: Fri, 23 Aug 2024 12:44:03 +0300
Subject: [PATCH] [onert-micro] Introduce training configure tool (#13593)

This pr supports training configure tool in onert-micro.

ONE-DCO-1.0-Signed-off-by: Artem Balyshev <a.balyshev@samsung.com>
---
 onert-micro/CMakeLists.txt                    |  11 +-
 .../training-configure-tool/CMakeLists.txt    |  27 ++
 .../TrainingConfigureTool.cpp                 | 143 +++++++
 .../include/SparseBackpropagationHandler.h    |  39 ++
 .../include/SparseBackpropagationHelper.h     |  61 +++
 .../TensorRankSparseBackpropagationHandler.h  |  44 ++
 .../include/TrainConfigData.h                 | 106 +++++
 .../include/TrainingConfigureFileHandler.h    |  49 +++
 .../include/TrainingDriverHandler.h           |  39 ++
 .../src/SparseBackpropagationHandler.cpp      | 170 ++++++++
 .../src/SparseBackpropagationHelper.cpp       | 394 ++++++++++++++++++
 ...TensorRankSparseBackpropagationHandler.cpp | 127 ++++++
 .../src/TrainingConfigureFileHandler.cpp      | 160 +++++++
 .../src/TrainingDriverHandler.cpp             | 269 ++++++++++++
 14 files changed, 1638 insertions(+), 1 deletion(-)
 create mode 100644 onert-micro/training-configure-tool/CMakeLists.txt
 create mode 100644 onert-micro/training-configure-tool/TrainingConfigureTool.cpp
 create mode 100644 onert-micro/training-configure-tool/include/SparseBackpropagationHandler.h
 create mode 100644 onert-micro/training-configure-tool/include/SparseBackpropagationHelper.h
 create mode 100644 onert-micro/training-configure-tool/include/TensorRankSparseBackpropagationHandler.h
 create mode 100644 onert-micro/training-configure-tool/include/TrainConfigData.h
 create mode 100644 onert-micro/training-configure-tool/include/TrainingConfigureFileHandler.h
 create mode 100644 onert-micro/training-configure-tool/include/TrainingDriverHandler.h
 create mode 100644 onert-micro/training-configure-tool/src/SparseBackpropagationHandler.cpp
 create mode 100644 onert-micro/training-configure-tool/src/SparseBackpropagationHelper.cpp
 create mode 100644 onert-micro/training-configure-tool/src/TensorRankSparseBackpropagationHandler.cpp
 create mode 100644 onert-micro/training-configure-tool/src/TrainingConfigureFileHandler.cpp
 create mode 100644 onert-micro/training-configure-tool/src/TrainingDriverHandler.cpp

diff --git a/onert-micro/CMakeLists.txt b/onert-micro/CMakeLists.txt
index 7537f2955fb..c1ba692d290 100644
--- a/onert-micro/CMakeLists.txt
+++ b/onert-micro/CMakeLists.txt
@@ -174,7 +174,14 @@ endif ()
 if (DIS_DYN_SHAPES)
     message(STATUS "ONERT-MICRO will not use dynamic shapes")
     add_definitions(-DDIS_DYN_SHAPES)
-endif ()
+    list(APPEND CMAKE_ARM_OPTIONS "-DDIS_DYN_SHAPES=ON")
+endif()
+
+if (OM_MEMORY_ESTIMATE)
+    message(STATUS "ONERT-MICRO will use memory estimation")
+    add_definitions(-DOM_MEMORY_ESTIMATE)
+    list(APPEND CMAKE_ARM_OPTIONS "-DOM_MEMORY_ESTIMATE=ON")
+endif()
 
 set(MICRO_ARM_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/standalone_arm")
 file(MAKE_DIRECTORY "${MICRO_ARM_BUILD_DIR}")
@@ -197,6 +204,7 @@ unset(KERNELS CACHE)
 unset(USE_STATIC_KERNEL CACHE)
 unset(DIS_QUANT CACHE)
 unset(DIS_FLOAT CACHE)
+unset(OM_MEMORY_ESTIMATE CACHE)
 unset(ENABLE_ONERT_MICRO_TEST CACHE)
 unset(NOT_BUILD_EXTERNALS CACHE)
 
@@ -213,6 +221,7 @@ add_custom_command(
 add_custom_target(onert_micro_arm DEPENDS "${MICRO_ARM_BINARY}")
 
 add_subdirectory(eval-driver)
+add_subdirectory(training-configure-tool)
 
 # Should be after add_subdirectory
 unset(ENABLE_ONERT_MICRO_TRAINING CACHE)
diff --git a/onert-micro/training-configure-tool/CMakeLists.txt b/onert-micro/training-configure-tool/CMakeLists.txt
new file mode 100644
index 00000000000..41e497bffca
--- /dev/null
+++ b/onert-micro/training-configure-tool/CMakeLists.txt
@@ -0,0 +1,27 @@
+message(STATUS "START Training Config Tool")
+
+add_definitions(-DOM_MEMORY_ESTIMATE)
+
+set(TRAIN_CONFIG_TOOL_SRC
+        TrainingConfigureTool.cpp
+        src/SparseBackpropagationHandler.cpp
+        src/TensorRankSparseBackpropagationHandler.cpp
+        src/TrainingConfigureFileHandler.cpp
+        src/TrainingDriverHandler.cpp
+        src/SparseBackpropagationHelper.cpp)
+
+add_executable(train_config_tool ${TRAIN_CONFIG_TOOL_SRC})
+
+# This variable is needed to separate standalone interpreter libraries from the libraries used in tool
+set(CUSTOM_OM_SUFFIX "_train_config_tool")
+add_subdirectory(${NNAS_PROJECT_SOURCE_DIR}/onert-micro/onert-micro ${CMAKE_CURRENT_BINARY_DIR}/onert-micro)
+
+target_include_directories(train_config_tool PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/onert_micro/include")
+target_include_directories(train_config_tool PUBLIC "include")
+target_link_libraries(train_config_tool PUBLIC onert_micro_interpreter)
+target_include_directories(train_config_tool PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/onert_micro/include")
+target_link_libraries(train_config_tool PUBLIC onert_micro_training_interpreter)
+
+install(TARGETS train_config_tool DESTINATION bin)
+
+message(STATUS "DONE Training Config Tool")
diff --git a/onert-micro/training-configure-tool/TrainingConfigureTool.cpp b/onert-micro/training-configure-tool/TrainingConfigureTool.cpp
new file mode 100644
index 00000000000..d5ca00dfd4b
--- /dev/null
+++ b/onert-micro/training-configure-tool/TrainingConfigureTool.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "include/SparseBackpropagationHandler.h"
+#include "include/TensorRankSparseBackpropagationHandler.h"
+
+#include "TrainingDriverHandler.h"
+
+#include <iostream>
+
+int entry(int argc, char **argv)
+{
+  if (argc != 9 and argc != 10)
+  {
+    std::cerr << "Two variant of usage with and without wof file: " << argv[0]
+              << " <path/to/circle/model> "
+                 " optional(<path/to/wof/file>) <path/to/save/train/config/result> "
+                 "<path/to/input/train_data> "
+                 "<path/to/input/target_train_data> "
+                 "<path/to/input/test_data> "
+                 "<path/to/input/target_test_data>"
+                 "num_of_train_smpl "
+                 "num_of_test_smpl\n";
+    return EXIT_FAILURE;
+  }
+
+  training_configure_tool::TrainData train_data;
+
+  if (argc == 10)
+  {
+    train_data.circle_model_path = argv[1];
+    train_data.wof_file_path = argv[2];
+    train_data.output_tool_file_path = argv[3];
+    train_data.input_input_train_data_path = argv[4];
+    train_data.input_target_train_data_path = argv[5];
+    train_data.input_input_test_data_path = argv[6];
+    train_data.input_target_test_data_path = argv[7];
+    train_data.num_train_data_samples = atoi(argv[8]);
+    train_data.num_test_data_samples = atoi(argv[9]);
+  }
+  else if (argc == 9)
+  {
+    train_data.circle_model_path = argv[1];
+    train_data.output_tool_file_path = argv[2];
+    train_data.input_input_train_data_path = argv[3];
+    train_data.input_target_train_data_path = argv[4];
+    train_data.input_input_test_data_path = argv[5];
+    train_data.input_target_test_data_path = argv[6];
+    train_data.num_train_data_samples = atoi(argv[7]);
+    train_data.num_test_data_samples = atoi(argv[8]);
+  }
+  else
+  {
+    throw std::runtime_error("Unknown commands number\n");
+  }
+
+  // Configure training mode
+  onert_micro::OMConfig config;
+
+  // Set user defined training settings
+  const uint32_t training_epochs = 25;
+  const float lambda = 0.001f;
+  const uint32_t BATCH_SIZE = 64;
+  const uint32_t num_train_layers = 0;
+  const onert_micro::OMLoss loss = onert_micro::CROSS_ENTROPY;
+  const onert_micro::OMTrainOptimizer train_optimizer = onert_micro::ADAM;
+  const float beta = 0.9;
+  const float beta_squares = 0.999;
+  const float epsilon = 1e-07;
+
+  config.train_mode = true;
+  {
+    onert_micro::OMTrainingContext train_context;
+    train_context.batch_size = BATCH_SIZE;
+    train_context.num_of_train_layers = num_train_layers;
+    train_context.learning_rate = lambda;
+    train_context.loss = loss;
+    train_context.optimizer = train_optimizer;
+    train_context.beta = beta;
+    train_context.beta_squares = beta_squares;
+    train_context.epsilon = epsilon;
+    train_context.epochs = training_epochs;
+
+    config.training_context = train_context;
+  }
+
+  train_data.metrics_to_check_best_config = onert_micro::CROSS_ENTROPY_METRICS;
+  train_data.memory_above_restriction = 300000;
+  train_data.acceptable_diff = 0.02;
+  // Find sparse backpropagation best configure
+  std::unordered_set<uint16_t> best_trainable_op_indexes;
+  training_configure_tool::findBestTrainableOpIndexes(config, train_data,
+                                                      best_trainable_op_indexes);
+
+  // Find the best train tensors ranks
+  training_configure_tool::TrainConfigFileData config_result;
+  auto res = training_configure_tool::findBestSparseBackpropagationTensorsRanks(
+    config, train_data, best_trainable_op_indexes, config_result.trainable_op_indexes_with_ranks);
+
+  // Save result into file
+  assert(!config_result.trainable_op_indexes_with_ranks.empty());
+  training_configure_tool::createResultFile(config_result, train_data.output_tool_file_path);
+
+  return EXIT_SUCCESS;
+}
+
+int entry(int argc, char **argv);
+
+#ifdef NDEBUG
+int main(int argc, char **argv)
+{
+  try
+  {
+    return entry(argc, argv);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "ERROR: " << e.what() << std::endl;
+  }
+
+  return 255;
+}
+#else  // NDEBUG
+int main(int argc, char **argv)
+{
+  // NOTE main does not catch internal exceptions for debug build to make it easy to
+  //      check the stacktrace with a debugger
+  return entry(argc, argv);
+}
+#endif // !NDEBUG
diff --git a/onert-micro/training-configure-tool/include/SparseBackpropagationHandler.h b/onert-micro/training-configure-tool/include/SparseBackpropagationHandler.h
new file mode 100644
index 00000000000..3e03777480d
--- /dev/null
+++ b/onert-micro/training-configure-tool/include/SparseBackpropagationHandler.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ONERT_MICRO_TRAINING_CONFIG_TOOL_SPARSE_BACKPROPAGATION_HANDLER
+#define ONERT_MICRO_TRAINING_CONFIG_TOOL_SPARSE_BACKPROPAGATION_HANDLER
+
+#include "OMStatus.h"
+#include "OMConfig.h"
+#include "TrainConfigData.h"
+#include "TrainingConfigureFileHandler.h"
+
+#include <vector>
+
+namespace training_configure_tool
+{
+
+/*
+ * Method to find the most trainable (which gets the best metric result) operators indexes.
+ */
+onert_micro::OMStatus
+findBestTrainableOpIndexes(onert_micro::OMConfig &config, TrainData &train_data,
+                           std::unordered_set<uint16_t> &best_trainable_op_indexes);
+
+} // namespace training_configure_tool
+
+#endif // ONERT_MICRO_TRAINING_CONFIG_TOOL_SPARSE_BACKPROPAGATION_HANDLER
diff --git a/onert-micro/training-configure-tool/include/SparseBackpropagationHelper.h b/onert-micro/training-configure-tool/include/SparseBackpropagationHelper.h
new file mode 100644
index 00000000000..3c355dca3f0
--- /dev/null
+++ b/onert-micro/training-configure-tool/include/SparseBackpropagationHelper.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ONERT_MICRO_TRAINING_CONFIG_TOOL_SPARSE_BACKPROPAGATION_HELPER
+#define ONERT_MICRO_TRAINING_CONFIG_TOOL_SPARSE_BACKPROPAGATION_HELPER
+
+#include "OMStatus.h"
+#include "OMConfig.h"
+#include "TrainConfigData.h"
+
+#include <vector>
+#include <unordered_set>
+
+namespace training_configure_tool
+{
+
+// Find is left train result is better then right in terms of metric result and memory consumptions.
+// acceptable_diff - acceptable difference in metric values in order to select the best result in
+// memory.
+bool cmpTrainResults(const training_configure_tool::TrainResult &left,
+                     const training_configure_tool::TrainResult &right,
+                     const float acceptable_diff);
+
+// To find all trainable ops indexes in the model - initial_train_op_indexes
+std::unordered_set<uint16_t> findAllTrainableOps(const char *circle_model_path);
+
+// To generate all possible sets from initial_train_op_indexes
+std::vector<std::unordered_set<uint16_t>>
+generateAllPossibleOpIndexesSets(const std::unordered_set<uint16_t> &initial_train_op_indexes);
+
+// Remove operations indexes sets with peak memory footprint greater then given restriction:
+//    1 - Run train interpreter with all this sets with single train sample and single test sample
+//    to obtain approximately peak memory footprint for each set.
+//    2 - Cut according to max peak memory.
+std::vector<std::unordered_set<uint16_t>> selectOpIndexesSetsAccordingToMemoryRestriction(
+  const std::vector<std::unordered_set<uint16_t>> &op_indexes_sets, onert_micro::OMConfig config,
+  training_configure_tool::TrainData train_data);
+
+// Find All combinations with ranks for current selected op indexes.
+// Return vector of all possible combinations of train rank for every op.
+std::vector<std::unordered_map<uint16_t, OpTrainableRank>>
+findAllTensorsRanksCombinations(const std::unordered_set<uint16_t> &selected_op_indexes,
+                                onert_micro::OMConfig config,
+                                training_configure_tool::TrainData train_data);
+
+} // namespace training_configure_tool
+
+#endif // ONERT_MICRO_TRAINING_CONFIG_TOOL_SPARSE_BACKPROPAGATION_HELPER
diff --git a/onert-micro/training-configure-tool/include/TensorRankSparseBackpropagationHandler.h b/onert-micro/training-configure-tool/include/TensorRankSparseBackpropagationHandler.h
new file mode 100644
index 00000000000..c1da3a954c9
--- /dev/null
+++ b/onert-micro/training-configure-tool/include/TensorRankSparseBackpropagationHandler.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ONERT_MICRO_TRAINING_CONFIG_TOOL_TENSOR_RANK_SPARSE_BACKPROPAGATION_HANDLER
+#define ONERT_MICRO_TRAINING_CONFIG_TOOL_TENSOR_RANK_SPARSE_BACKPROPAGATION_HANDLER
+
+#include "OMStatus.h"
+#include "OMConfig.h"
+#include "TrainConfigData.h"
+#include "TrainingConfigureFileHandler.h"
+
+#include <vector>
+#include <unordered_map>
+
+namespace training_configure_tool
+{
+
+/*
+ * Method to find the most trainable (which gets the best metric result and less peak memory) train
+ * ranks for every operation in selected operators indexes. Note: Train rank - this is an indicator
+ * of how much data of the current operation we will train (for example, the entire operation, only
+ * the bias, only the upper half, and so on)
+ */
+onert_micro::OMStatus findBestSparseBackpropagationTensorsRanks(
+  onert_micro::OMConfig &config, TrainData &train_data,
+  const std::unordered_set<uint16_t> &selected_op_indexes,
+  std::unordered_map<uint16_t, OpTrainableRank> &best_train_ranks);
+
+} // namespace training_configure_tool
+
+#endif // ONERT_MICRO_TRAINING_CONFIG_TOOL_TENSOR_RANK_SPARSE_BACKPROPAGATION_HANDLER
diff --git a/onert-micro/training-configure-tool/include/TrainConfigData.h b/onert-micro/training-configure-tool/include/TrainConfigData.h
new file mode 100644
index 00000000000..5d3dff06c4d
--- /dev/null
+++ b/onert-micro/training-configure-tool/include/TrainConfigData.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAIN_CONFIG_DATA
+#define ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAIN_CONFIG_DATA
+
+#include "OMConfig.h"
+
+#include <vector>
+#include <limits>
+#include <cassert>
+#include <unordered_set>
+#include <unordered_map>
+
+namespace training_configure_tool
+{
+
+// Enum  to indicate the degree(rank) to which part of the operation we will train:
+// this is an indicator of how much data of the current operation we will train
+// (for example, the entire operation, only the bias, only the upper half, and so on)
+enum OpTrainableRank
+{
+  ALL = 0,            // 0 - Train all weights in the operation
+  ONLY_BIAS = 1,      // 1 - Train bias only in the operation
+  UP_1_2_PART = 2,    // 2 - Train the upper 1/2 part of the operation
+  LOWER_1_2_PART = 3, // 3 - Train the lower 1/2 part of the operation
+  MAX_VALUE = 4,
+  // TODO add more
+};
+
+// Information for saving the data necessary for training.
+// metrics_to_check_best_config - the metric by which the best configuration will be selected.
+// acceptable_diff - acceptable difference in metric values in order to select the best result in
+// memory. memory_above_restriction - the upper limit of memory that cannot be exceeded
+struct TrainData
+{
+  const char *circle_model_path = nullptr;
+  const char *wof_file_path = nullptr;
+  const char *output_tool_file_path = nullptr;
+  const char *input_input_train_data_path = nullptr;
+  const char *input_target_train_data_path = nullptr;
+  const char *input_input_test_data_path = nullptr;
+  const char *input_target_test_data_path = nullptr;
+  int32_t num_train_data_samples = 0;
+  int32_t num_test_data_samples = 0;
+  onert_micro::OMMetrics metrics_to_check_best_config = {};
+  float acceptable_diff = 0.01;
+  size_t memory_above_restriction = 0;
+};
+
+// Struct to save data which will be saved in result file
+struct TrainConfigFileData
+{
+  std::unordered_map<uint16_t, OpTrainableRank> trainable_op_indexes_with_ranks;
+};
+
+// Information that is the result of training
+// best_metrics_results - obtained best metric result during training
+// peak_memory_footprint - peak memory footprint obtained during training
+struct TrainResult
+{
+  std::pair<onert_micro::OMMetrics, float> best_metrics_results = {};
+  size_t peak_memory_footprint = 0;
+
+  TrainResult() = default;
+  explicit TrainResult(TrainData train_data)
+  {
+    peak_memory_footprint = std::numeric_limits<size_t>::max();
+    switch (train_data.metrics_to_check_best_config)
+    {
+      case onert_micro::ACCURACY:
+        best_metrics_results = {onert_micro::ACCURACY, 0.f};
+        break;
+      case onert_micro::CROSS_ENTROPY_METRICS:
+        best_metrics_results = {onert_micro::CROSS_ENTROPY_METRICS,
+                                std::numeric_limits<float>::max()};
+        break;
+      case onert_micro::MSE_METRICS:
+        best_metrics_results = {onert_micro::MSE_METRICS, std::numeric_limits<float>::max()};
+        break;
+      case onert_micro::MAE_METRICS:
+        best_metrics_results = {onert_micro::MAE_METRICS, std::numeric_limits<float>::max()};
+        break;
+      default:
+        assert(false && "Unsupported type");
+        break;
+    }
+  }
+};
+
+} // namespace training_configure_tool
+
+#endif // ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAIN_CONFIG_DATA
diff --git a/onert-micro/training-configure-tool/include/TrainingConfigureFileHandler.h b/onert-micro/training-configure-tool/include/TrainingConfigureFileHandler.h
new file mode 100644
index 00000000000..1de10e68477
--- /dev/null
+++ b/onert-micro/training-configure-tool/include/TrainingConfigureFileHandler.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAINING_CONFIGURE_FILE_HANDLER
+#define ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAINING_CONFIGURE_FILE_HANDLER
+
+#include "OMStatus.h"
+#include "TrainConfigData.h"
+
+#include <fstream>
+#include <vector>
+#include <unordered_set>
+
+namespace training_configure_tool
+{
+
+using DataBuffer = std::vector<char>;
+
+void readDataFromFile(const std::string &filename, char *data, size_t data_size,
+                      size_t start_position = 0);
+
+void writeDataToFile(const std::string &filename, const char *data, size_t data_size);
+
+DataBuffer readFile(const char *path);
+
+// Save train config data into file
+onert_micro::OMStatus createResultFile(const TrainConfigFileData &train_data,
+                                       const char *save_path);
+
+// Save train config data into buffer
+onert_micro::OMStatus createResultData(const TrainConfigFileData &train_data,
+                                       std::vector<char> &result_buffer);
+
+} // namespace training_configure_tool
+
+#endif // ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAINING_CONFIGURE_FILE_HANDLER
diff --git a/onert-micro/training-configure-tool/include/TrainingDriverHandler.h b/onert-micro/training-configure-tool/include/TrainingDriverHandler.h
new file mode 100644
index 00000000000..9205347aaac
--- /dev/null
+++ b/onert-micro/training-configure-tool/include/TrainingDriverHandler.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAINING_DRIVER_HANDLER
+#define ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAINING_DRIVER_HANDLER
+
+#include "TrainConfigData.h"
+#include "OMConfig.h"
+#include "OMStatus.h"
+
+#include <fstream>
+#include <vector>
+
+namespace training_configure_tool
+{
+
+// To start training with the current set conditions and the current configuration and save the
+// result
+onert_micro::OMStatus
+runTrainProcessWithCurConfig(onert_micro::OMConfig &config,
+                             const training_configure_tool::TrainData &train_data,
+                             TrainResult &train_result);
+
+} // namespace training_configure_tool
+
+#endif // ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAINING_DRIVER_HANDLER
diff --git a/onert-micro/training-configure-tool/src/SparseBackpropagationHandler.cpp b/onert-micro/training-configure-tool/src/SparseBackpropagationHandler.cpp
new file mode 100644
index 00000000000..190aef7514b
--- /dev/null
+++ b/onert-micro/training-configure-tool/src/SparseBackpropagationHandler.cpp
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SparseBackpropagationHandler.h"
+#include "SparseBackpropagationHelper.h"
+#include "TrainingDriverHandler.h"
+
+#include <unordered_set>
+#include <cassert>
+
+#define MODEL_TYPE float
+
+#define PRINT 0
+
+using namespace onert_micro;
+
+OMStatus training_configure_tool::findBestTrainableOpIndexes(
+  OMConfig &config, training_configure_tool::TrainData &train_data,
+  std::unordered_set<uint16_t> &best_trainable_op_indexes)
+{
+  // Clear to find best values
+  best_trainable_op_indexes.clear();
+  // 1 - Find all trainable ops indexes in the model - initial_train_op_indexes
+  // 2 - Generate all possible sets from initial_train_op_indexes
+  // 3 - If above memory restriction is defined - then remove operations indexes sets with peak
+  // memory footprint greater then given restriction 4 - Try all found sets to find best metrics
+  // results
+
+  // 1 - Find all trainable ops indexes in the model - initial_train_op_indexes
+  std::unordered_set<uint16_t> initial_train_op_indexes =
+    training_configure_tool::findAllTrainableOps(train_data.circle_model_path);
+  assert(!initial_train_op_indexes.empty());
+  if (initial_train_op_indexes.empty())
+    return UnknownError;
+#if PRINT
+  printf("Found next trainable indexes in the model: ");
+  for (auto i : initial_train_op_indexes)
+  {
+    printf("%d ", i);
+  }
+  printf("\n");
+#endif
+
+  // 2 - Generate all possible sets from initial_train_op_indexes
+  std::vector<std::unordered_set<uint16_t>> all_possible_train_op_indexes_sets =
+    training_configure_tool::generateAllPossibleOpIndexesSets(initial_train_op_indexes);
+  assert(all_possible_train_op_indexes_sets.empty() == false);
+  if (all_possible_train_op_indexes_sets.empty() == true)
+    return UnknownError;
+#if PRINT
+  printf("Found %zu unique trainable ops indexes in the model:\n",
+         all_possible_train_op_indexes_sets.size());
+  for (const auto &s : all_possible_train_op_indexes_sets)
+  {
+    printf("Op indexes set = { ");
+    for (auto i : s)
+    {
+      printf("%d ", i);
+    }
+    printf("}\n");
+  }
+#endif
+  // Clear initial due to is not needed
+  initial_train_op_indexes.clear();
+
+  // 3 - If above memory restriction is defined, then save only sets with peak memory less then
+  // restriction
+  std::vector<std::unordered_set<uint16_t>> selected_op_indexes_sets =
+    training_configure_tool::selectOpIndexesSetsAccordingToMemoryRestriction(
+      all_possible_train_op_indexes_sets, config, train_data);
+#if PRINT
+  printf("Found %zu selected op indexes sets:\n", selected_op_indexes_sets.size());
+  for (const auto &s : selected_op_indexes_sets)
+  {
+    printf("Op indexes set = { ");
+    for (auto i : s)
+    {
+      printf("%d ", i);
+    }
+    printf("}\n");
+  }
+#endif
+  // Clear not needed object
+  all_possible_train_op_indexes_sets.clear();
+
+  // 4 - Try all found sets to find best metrics results
+  // To save best values
+  TrainResult best_train_result(train_data);
+  for (const auto &index_set : selected_op_indexes_sets)
+  {
+#if PRINT
+    printf("Current checked op indexes set = { ");
+    for (auto i : index_set)
+    {
+      printf("%d ", i);
+    }
+    printf("}\n");
+#endif
+
+    // Construct mapping with current indexes - use default train ALL parts
+    std::unordered_map<uint16_t, OpTrainableRank> train_op_ranks;
+    for (auto index : index_set)
+    {
+      train_op_ranks[index] = ALL;
+    }
+
+    std::vector<char> tmp_buffer;
+    // Create data with current buffer information
+    createResultData({train_op_ranks}, tmp_buffer);
+    config.training_context.training_config_info_data = tmp_buffer.data();
+
+    TrainResult train_result(train_data);
+    // Run train with this information
+    runTrainProcessWithCurConfig(config, train_data, train_result);
+
+#if PRINT
+    printf("Find the following result:\n");
+    if (train_result.best_metrics_results.first == CROSS_ENTROPY_METRICS)
+    {
+      printf("CROSS_ENTROPY_METRIC = %f\n", train_result.best_metrics_results.second);
+      printf("PEAK_MEMORY_RESULT = %zu\n", train_result.peak_memory_footprint);
+    }
+#endif
+
+    // Compare with best result to find best
+    bool cmp_result = cmpTrainResults(train_result, best_train_result, train_data.acceptable_diff);
+    if (cmp_result)
+    {
+      // Cur rest is better
+#if PRINT
+      printf("BETTER RESULT\n");
+#endif
+      best_train_result = train_result;
+      best_trainable_op_indexes = index_set;
+    }
+  }
+
+#if PRINT
+  printf("FINISH\n");
+
+  printf("Best op indexes set = { ");
+  for (auto i : best_trainable_op_indexes)
+  {
+    printf("%d ", i);
+  }
+  printf("}\n");
+
+  printf("Find the following result:\n");
+  if (best_train_result.best_metrics_results.first == CROSS_ENTROPY_METRICS)
+  {
+    printf("CROSS_ENTROPY_METRIC = %f\n", best_train_result.best_metrics_results.second);
+    printf("PEAK_MEMORY_RESULT = %zu\n", best_train_result.peak_memory_footprint);
+  }
+#endif
+
+  return Ok;
+}
diff --git a/onert-micro/training-configure-tool/src/SparseBackpropagationHelper.cpp b/onert-micro/training-configure-tool/src/SparseBackpropagationHelper.cpp
new file mode 100644
index 00000000000..144ea9ee2e2
--- /dev/null
+++ b/onert-micro/training-configure-tool/src/SparseBackpropagationHelper.cpp
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SparseBackpropagationHelper.h"
+#include "TrainingConfigureFileHandler.h"
+#include "TrainingDriverHandler.h"
+#include "TrainingConfigureFileHandler.h"
+#include "core/reader/OMCircleReader.h"
+
+#include <cassert>
+
+#define MODEL_TYPE float
+#define PRINT 0
+
+using namespace onert_micro;
+
+namespace
+{
+
+bool isTrainableWeights(const circle::OperatorCode *opcode)
+{
+  switch (opcode->builtin_code())
+  {
+    case circle::BuiltinOperator_FULLY_CONNECTED:
+    case circle::BuiltinOperator_CONV_2D:
+      return true;
+    default:
+      return false;
+  }
+}
+
+void generateRankCombinations(
+  const std::unordered_map<uint16_t, std::unordered_set<uint16_t>> &index_to_possible_ranks,
+  const std::vector<uint16_t> &indices, size_t currentIndex,
+  std::vector<uint16_t> &currentCombination, std::vector<std::vector<uint16_t>> &result)
+{
+  if (currentIndex == indices.size())
+  {
+    result.push_back(currentCombination);
+    return;
+  }
+  uint16_t index = indices[currentIndex];
+  for (uint16_t rank : index_to_possible_ranks.at(index))
+  {
+    currentCombination.push_back(rank);
+    generateRankCombinations(index_to_possible_ranks, indices, currentIndex + 1, currentCombination,
+                             result);
+    currentCombination.pop_back();
+  }
+}
+
+// Find pairs: selected ops indexes - divided dim max rank value
+std::unordered_map<uint16_t, uint32_t> findTrainableTensorsMaxDivideRankAccordingToOperatorIndex(
+  const std::unordered_set<uint16_t> &selected_op_indexes,
+  const onert_micro::core::reader::OMCircleReader &reader)
+{
+  std::unordered_map<uint16_t, uint32_t> operator_index_to_tensor_max_divide_rank;
+
+  // Read ops
+  auto operators = reader.operators();
+  assert(operators != nullptr);
+
+  auto op_size = operators->size();
+
+  // Obtain operation codes
+  auto op_codes = reader.opcodes();
+
+  const auto tensors = reader.tensors();
+
+  // Go over selected best op indexes
+  for (auto op_index : selected_op_indexes)
+  {
+    auto cur_op = operators->operator[](op_index);
+
+    // Get opcode index
+    uint32_t cur_opcode_index = cur_op->opcode_index();
+    assert(cur_opcode_index < op_codes->size());
+
+    const auto opcode = op_codes->operator[](cur_opcode_index);
+
+    const auto inputs_tensors = cur_op->inputs();
+
+    uint32_t tensor_divided_dim_value = 0;
+
+    switch (opcode->builtin_code())
+    {
+      case circle::BuiltinOperator_FULLY_CONNECTED:
+      case circle::BuiltinOperator_CONV_2D:
+      {
+        assert(inputs_tensors->size() >= 2);
+        auto tensor_index = inputs_tensors->operator[](1);
+        assert(tensor_index != -1);
+        assert(tensor_index < tensors->size());
+        auto tensor = tensors->operator[](tensor_index);
+
+        // For FC and Conv2D op tool provide rank over 0 dimensional
+        tensor_divided_dim_value = tensor->shape()->operator[](0);
+
+        break;
+      }
+      default:
+        assert(false && "Unsupported type");
+        tensor_divided_dim_value = 0; // Not supported
+    }
+
+    assert(tensor_divided_dim_value != 0);
+    operator_index_to_tensor_max_divide_rank[op_index] = tensor_divided_dim_value;
+  }
+
+  return operator_index_to_tensor_max_divide_rank;
+}
+
+void recursiveGenerateAllPossibleOpIndexesSetsHelper(
+  std::vector<std::unordered_set<uint16_t>> &result, std::unordered_set<uint16_t> &cur_set,
+  std::unordered_set<uint16_t>::const_iterator cur_it_set_value,
+  std::unordered_set<uint16_t>::const_iterator &end_it_set)
+{
+  // If we reach end of the initial set then finish
+  if (cur_it_set_value == end_it_set)
+  {
+    // If set is not empty add to final result
+    if (cur_set.empty() == false)
+      result.push_back(cur_set);
+    return;
+  }
+
+  // Add value to current set
+  uint16_t cur_index = *cur_it_set_value;
+  cur_set.insert(cur_index);
+  // Run further and move iterator to next position
+  cur_it_set_value++;
+  recursiveGenerateAllPossibleOpIndexesSetsHelper(result, cur_set, cur_it_set_value, end_it_set);
+  // Remove current index from set
+  cur_set.erase(cur_index);
+  // Run again recursive functions but now without current index
+  recursiveGenerateAllPossibleOpIndexesSetsHelper(result, cur_set, cur_it_set_value, end_it_set);
+}
+
+} // namespace
+
+// Find is left train result is better then right
+bool training_configure_tool::cmpTrainResults(const training_configure_tool::TrainResult &left,
+                                              const training_configure_tool::TrainResult &right,
+                                              const float acceptable_diff)
+{
+  // Metrics should be the same
+  assert(left.best_metrics_results.first == right.best_metrics_results.first);
+  OMMetrics metric = left.best_metrics_results.first;
+  float left_metric_res = left.best_metrics_results.second;
+  float right_metric_res = right.best_metrics_results.second;
+
+  bool is_in_acceptable_diff = std::abs(left_metric_res - right_metric_res) <= acceptable_diff;
+  if (is_in_acceptable_diff)
+  {
+    return left.peak_memory_footprint < right.peak_memory_footprint;
+  }
+
+  switch (metric)
+  {
+    case onert_micro::ACCURACY:
+    {
+      return left.best_metrics_results.second > right.best_metrics_results.second;
+    }
+    break;
+    case onert_micro::CROSS_ENTROPY_METRICS:
+    case onert_micro::MSE_METRICS:
+    case onert_micro::MAE_METRICS:
+    {
+
+      return left.best_metrics_results.second < right.best_metrics_results.second;
+    }
+    break;
+    default:
+      assert(false && "Unsupported type");
+      break;
+  }
+  return true;
+}
+
+// Remove operations indexes sets with peak memory footprint greater then given restriction:
+//    1 - Run train interpreter with all this sets with single train sample and single test sample
+//    to obtain approximately peak memory footprint for each set 2 - Cut according to max peak
+//    memory
+std::vector<std::unordered_set<uint16_t>>
+training_configure_tool::selectOpIndexesSetsAccordingToMemoryRestriction(
+  const std::vector<std::unordered_set<uint16_t>> &op_indexes_sets, onert_micro::OMConfig config,
+  training_configure_tool::TrainData train_data)
+{
+  // It 0 - then is not set
+  if (train_data.memory_above_restriction == 0)
+  {
+    return op_indexes_sets;
+  }
+
+  std::vector<std::unordered_set<uint16_t>> result;
+
+  // To obtain real estimation we need minimum batch_size = 2 and num_train_data_samples = 4
+  // Change config train and test sample values
+  train_data.num_test_data_samples = 0;
+  train_data.num_train_data_samples = std::min(4, train_data.num_train_data_samples);
+  // To disable tests
+  train_data.metrics_to_check_best_config = NONE;
+  // Set number of the epochs and batch size to one
+  config.training_context.epochs = 1;
+  config.training_context.batch_size = std::min(2u, config.training_context.batch_size);
+
+  for (const auto &op_indexes_set : op_indexes_sets)
+  {
+#if PRINT
+    printf("Start checking: { ");
+    for (auto i : op_indexes_set)
+    {
+      printf("%d ", i);
+    }
+    printf("}\n");
+#endif
+    // Construct mapping with current indexes - use default train ALL parts
+    std::unordered_map<uint16_t, OpTrainableRank> train_op_ranks;
+    for (auto index : op_indexes_set)
+    {
+      train_op_ranks[index] = ALL;
+    }
+
+    std::vector<char> tmp_buffer;
+    // Create data with current buffer information
+    createResultData({train_op_ranks}, tmp_buffer);
+    config.training_context.training_config_info_data = tmp_buffer.data();
+
+    TrainResult train_result;
+    // Run train with this information
+    runTrainProcessWithCurConfig(config, train_data, train_result);
+#if PRINT
+    printf("CURRENT MEMORY PEAK = %zu\n", train_result.peak_memory_footprint);
+#endif
+    if (train_result.peak_memory_footprint < train_data.memory_above_restriction)
+    {
+#if PRINT
+      printf("Added to the result\n");
+#endif
+      result.push_back(op_indexes_set);
+    }
+  }
+
+  return result;
+}
+
+// To generate all possible sets from initial_train_op_indexes
+std::vector<std::unordered_set<uint16_t>> training_configure_tool::generateAllPossibleOpIndexesSets(
+  const std::unordered_set<uint16_t> &initial_train_op_indexes)
+{
+  std::vector<std::unordered_set<uint16_t>> result;
+  std::unordered_set<uint16_t> cur_set;
+
+  auto begin_it = initial_train_op_indexes.begin();
+  auto end_it = initial_train_op_indexes.end();
+  recursiveGenerateAllPossibleOpIndexesSetsHelper(result, cur_set, begin_it, end_it);
+
+  return result;
+}
+
+// To find all trainable ops indexes in the model - initial_train_op_indexes
+std::unordered_set<uint16_t>
+training_configure_tool::findAllTrainableOps(const char *circle_model_path)
+{
+  std::unordered_set<uint16_t> result;
+
+  training_configure_tool::DataBuffer model_ptr =
+    training_configure_tool::readFile(circle_model_path);
+
+  // Init reader
+  OMStatus status = Ok;
+  core::reader::OMCircleReader reader;
+  assert(model_ptr.data() != nullptr);
+  status = reader.parse(model_ptr.data());
+  assert(status == Ok);
+  // return empty set
+  if (status != Ok)
+    return result;
+  // TODO: support multi subgraph models
+  status = reader.select_subgraph(0);
+  // return empty set
+  if (status != Ok)
+    return result;
+
+  // Read ops
+  auto operators = reader.operators();
+  assert(operators != nullptr);
+
+  auto op_size = operators->size();
+
+  // Obtain operation codes
+  auto op_codes = reader.opcodes();
+
+  // Run through all ops
+  for (uint32_t i = 0; i < op_size; ++i)
+  {
+    auto cur_op = operators->operator[](i);
+
+    // Get opcode index
+    uint32_t cur_opcode_index = cur_op->opcode_index();
+    assert(cur_opcode_index < op_codes->size());
+
+    const auto opcode = op_codes->operator[](cur_opcode_index);
+
+    // If op is trainable - insert it
+    if (isTrainableWeights(opcode))
+      result.insert(static_cast<uint16_t>(i));
+  }
+
+  return result;
+}
+
+std::vector<std::unordered_map<uint16_t, training_configure_tool::OpTrainableRank>>
+training_configure_tool::findAllTensorsRanksCombinations(
+  const std::unordered_set<uint16_t> &selected_op_indexes, onert_micro::OMConfig config,
+  training_configure_tool::TrainData train_data)
+{
+  // 1 - Find pairs: selected ops indexes - divided dim max rank value
+  // 2 - Find for every tensor index every possible rank according to its opcode and size
+  // 3 - Get result
+  std::vector<std::unordered_map<uint16_t, training_configure_tool::OpTrainableRank>> result;
+
+  training_configure_tool::DataBuffer model_ptr =
+    training_configure_tool::readFile(train_data.circle_model_path);
+
+  // Init reader
+  OMStatus status = Ok;
+  core::reader::OMCircleReader reader;
+  assert(model_ptr.data() != nullptr);
+  status = reader.parse(model_ptr.data());
+  assert(status == Ok);
+  // return empty set
+  if (status != Ok)
+    return result;
+  // TODO: support multi subgraph models
+  status = reader.select_subgraph(0);
+  // return empty set
+  if (status != Ok)
+    return result;
+
+  // 1 - Find pairs: selected ops indexes - divided dim max rank value
+  std::unordered_map<uint16_t, uint32_t> operator_index_to_tensor_index =
+    findTrainableTensorsMaxDivideRankAccordingToOperatorIndex(selected_op_indexes, reader);
+  assert(operator_index_to_tensor_index.size() == selected_op_indexes.size());
+  // 2 - Find for every tensor index every possible rank according to its opcode and size
+  std::unordered_map<uint16_t, std::unordered_set<uint16_t>> op_index_to_all_possible_ranks;
+  for (auto &p : operator_index_to_tensor_index)
+  {
+    const auto op_index = p.first;
+    const auto max_value = p.second;
+
+    uint16_t cur_value = 2;
+    op_index_to_all_possible_ranks[op_index] = {ALL, ONLY_BIAS};
+    while (cur_value < uint32_t(OpTrainableRank::MAX_VALUE) and cur_value <= max_value)
+    {
+      auto new_value = cur_value * 2;
+      while (cur_value < uint16_t(OpTrainableRank::MAX_VALUE) and cur_value < new_value)
+      {
+        op_index_to_all_possible_ranks[op_index].insert(cur_value);
+        cur_value++;
+      }
+    }
+  }
+  // Get all op indices
+  std::vector<uint16_t> indices(selected_op_indexes.begin(), selected_op_indexes.end());
+  std::vector<std::vector<uint16_t>> rank_combinations;
+  std::vector<uint16_t> cur_v;
+  generateRankCombinations(op_index_to_all_possible_ranks, indices, 0, cur_v, rank_combinations);
+
+  for (const auto &ranks : rank_combinations)
+  {
+    std::unordered_map<uint16_t, OpTrainableRank> combination;
+    for (size_t i = 0; i < indices.size(); ++i)
+    {
+      combination[indices[i]] = OpTrainableRank(ranks[i]);
+    }
+    result.push_back(std::move(combination));
+  }
+
+  return result;
+}
diff --git a/onert-micro/training-configure-tool/src/TensorRankSparseBackpropagationHandler.cpp b/onert-micro/training-configure-tool/src/TensorRankSparseBackpropagationHandler.cpp
new file mode 100644
index 00000000000..b03ee8164a7
--- /dev/null
+++ b/onert-micro/training-configure-tool/src/TensorRankSparseBackpropagationHandler.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorRankSparseBackpropagationHandler.h"
+#include "SparseBackpropagationHelper.h"
+#include "TrainingDriverHandler.h"
+
+#include <unordered_set>
+#include <cassert>
+
+#define MODEL_TYPE float
+
+#define PRINT 0
+
+using namespace onert_micro;
+
+namespace
+{
+
+} // namespace
+
+OMStatus training_configure_tool::findBestSparseBackpropagationTensorsRanks(
+  onert_micro::OMConfig &config, TrainData &train_data,
+  const std::unordered_set<uint16_t> &selected_op_indexes,
+  std::unordered_map<uint16_t, OpTrainableRank> &best_train_ranks)
+{
+  // Clear to find best values
+  best_train_ranks.clear();
+
+  // 1 - Find All combinations with ranks for current selected op indexes
+  // 2 - Run all of them to find best variant
+
+  // 1 - Find All combinations with ranks for current selected op indexes
+  std::vector<std::unordered_map<uint16_t, OpTrainableRank>> all_combinations =
+    findAllTensorsRanksCombinations(selected_op_indexes, config, train_data);
+
+#if PRINT
+  printf("All combinations: op_index : rank_value; { \n");
+  for (const auto &combination : all_combinations)
+  {
+    for (auto &p : combination)
+    {
+      printf("(%d : %d); ", p.first, p.second);
+    }
+    printf("\n");
+  }
+  printf("}\n");
+
+#endif // PRINT
+
+  // 2 - Run all of them to find best variant
+  TrainResult best_train_result(train_data);
+  for (const auto &combination : all_combinations)
+  {
+#if PRINT
+    printf("Current checked combination: op_index : rank_value; { ");
+    for (auto &p : combination)
+    {
+      printf("(%d : %d); ", p.first, p.second);
+    }
+    printf("}\n");
+#endif
+
+    std::vector<char> tmp_buffer;
+    // Create data with current buffer information
+    createResultData({combination}, tmp_buffer);
+    config.training_context.training_config_info_data = tmp_buffer.data();
+
+    TrainResult train_result(train_data);
+    // Run train with this information
+    runTrainProcessWithCurConfig(config, train_data, train_result);
+
+#if PRINT
+    printf("Find the following result:\n");
+    if (train_result.best_metrics_results.first == CROSS_ENTROPY_METRICS)
+    {
+      printf("CROSS_ENTROPY_METRIC = %f\n", train_result.best_metrics_results.second);
+      printf("PEAK_MEMORY_RESULT = %zu\n", train_result.peak_memory_footprint);
+    }
+#endif
+
+    // Compare with best result to find best
+    bool cmp_result = cmpTrainResults(train_result, best_train_result, train_data.acceptable_diff);
+    if (cmp_result)
+    {
+      // Cur rest is better
+#if PRINT
+      printf("BETTER RESULT\n");
+#endif
+      best_train_result = train_result;
+      best_train_ranks = combination;
+    }
+  }
+
+#if PRINT
+  printf("FINISH\n");
+
+  printf("Best rank combination: op_index : rank_value; { ");
+  for (auto &p : best_train_ranks)
+  {
+    printf("(%d : %d); ", p.first, p.second);
+  }
+  printf("}\n");
+
+  printf("Find the following result:\n");
+  if (best_train_result.best_metrics_results.first == CROSS_ENTROPY_METRICS)
+  {
+    printf("CROSS_ENTROPY_METRIC = %f\n", best_train_result.best_metrics_results.second);
+    printf("PEAK_MEMORY_RESULT = %zu\n", best_train_result.peak_memory_footprint);
+  }
+#endif
+
+  return Ok;
+}
diff --git a/onert-micro/training-configure-tool/src/TrainingConfigureFileHandler.cpp b/onert-micro/training-configure-tool/src/TrainingConfigureFileHandler.cpp
new file mode 100644
index 00000000000..2d99113488f
--- /dev/null
+++ b/onert-micro/training-configure-tool/src/TrainingConfigureFileHandler.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TrainingConfigureFileHandler.h"
+
+#include <fstream>
+#include <vector>
+#include <cstring>
+
+namespace
+{
+
+constexpr uint16_t MAGIC_NUMBER = 29;
+constexpr uint8_t SCHEMA_VERSION = 1;
+
+void writeTrainConfigFileDataIntoBuffer(
+  const training_configure_tool::TrainConfigFileData &train_data, std::vector<char> &buffer)
+{
+  const auto &train_op_indexes_with_ranks = train_data.trainable_op_indexes_with_ranks;
+
+  // Resize to calculated size
+  auto buffer_size = 8 + train_op_indexes_with_ranks.size() * 2;
+  buffer.resize(buffer_size);
+
+  // Point to start of the buffer
+  char *cur_ptr = buffer.data();
+
+  // Write MAGIC_NUMBER
+  std::memcpy(cur_ptr, &MAGIC_NUMBER, sizeof(MAGIC_NUMBER));
+  cur_ptr += 2;
+
+  // Write SCHEMA_VERSION
+  std::memcpy(cur_ptr, &SCHEMA_VERSION, sizeof(SCHEMA_VERSION));
+  cur_ptr += 1;
+
+  // Miss RESERVED field
+  cur_ptr += 1;
+
+  // Write number of layers
+  auto layers_num = static_cast<int32_t>(train_op_indexes_with_ranks.size());
+  std::memcpy(cur_ptr, &layers_num, sizeof(layers_num));
+  cur_ptr += 4;
+
+  // Write trainable layers positions
+  for (const auto &p : train_op_indexes_with_ranks)
+  {
+    auto cur_layer_pos = p.first;
+    std::memcpy(cur_ptr, &cur_layer_pos, sizeof(cur_layer_pos));
+    cur_ptr += 2;
+  }
+  // Write code to define train rank of the trainable operation
+  for (const auto &p : train_op_indexes_with_ranks)
+  {
+    const auto cur_layer_pos = static_cast<uint8_t>(p.second);
+    std::memcpy(cur_ptr, &cur_layer_pos, sizeof(cur_layer_pos));
+    cur_ptr += 1;
+  }
+}
+
+} // namespace
+
+void training_configure_tool::readDataFromFile(const std::string &filename, char *data,
+                                               size_t data_size, size_t start_position)
+{
+  std::streampos start = start_position;
+
+  std::ifstream fs(filename, std::ifstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+
+  fs.seekg(start);
+
+  if (fs.read(data, data_size).fail())
+    throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
+  fs.close();
+}
+
+void training_configure_tool::writeDataToFile(const std::string &filename, const char *data,
+                                              size_t data_size)
+{
+  std::ofstream fs(filename, std::ofstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.write(data, data_size).fail())
+  {
+    throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+  }
+}
+
+training_configure_tool::DataBuffer training_configure_tool::readFile(const char *path)
+{
+  std::ifstream file(path, std::ios::binary | std::ios::in);
+  if (!file.good())
+  {
+    std::string errmsg = "Failed to open file";
+    throw std::runtime_error(errmsg.c_str());
+  }
+
+  file.seekg(0, std::ios::end);
+  auto fileSize = file.tellg();
+  file.seekg(0, std::ios::beg);
+
+  // reserve capacity
+  DataBuffer model_data(fileSize);
+
+  // read the data
+  file.read(model_data.data(), fileSize);
+  if (file.fail())
+  {
+    std::string errmsg = "Failed to read file";
+    throw std::runtime_error(errmsg.c_str());
+  }
+
+  return model_data;
+}
+
+onert_micro::OMStatus
+training_configure_tool::createResultFile(const TrainConfigFileData &train_data,
+                                          const char *save_path)
+{
+  std::vector<char> buffer;
+
+  writeTrainConfigFileDataIntoBuffer(train_data, buffer);
+
+  // Open or create file
+  // Note: if the file existed, it will be overwritten
+  std::ofstream out_file(save_path, std::ios::binary | std::ios::trunc);
+  if (not out_file.is_open())
+    return onert_micro::UnknownError;
+
+  // Write data
+  out_file.write(buffer.data(), static_cast<long>(buffer.size()));
+
+  // Close file
+  out_file.close();
+
+  return onert_micro::Ok;
+}
+
+onert_micro::OMStatus
+training_configure_tool::createResultData(const TrainConfigFileData &train_data,
+                                          std::vector<char> &result_buffer)
+{
+  writeTrainConfigFileDataIntoBuffer(train_data, result_buffer);
+
+  return onert_micro::Ok;
+}
diff --git a/onert-micro/training-configure-tool/src/TrainingDriverHandler.cpp b/onert-micro/training-configure-tool/src/TrainingDriverHandler.cpp
new file mode 100644
index 00000000000..fe4a3ad8431
--- /dev/null
+++ b/onert-micro/training-configure-tool/src/TrainingDriverHandler.cpp
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TrainingDriverHandler.h"
+#include "OMTrainingInterpreter.h"
+#include "TrainingConfigureFileHandler.h"
+
+#include <iostream>
+#include <numeric>
+
+using namespace onert_micro;
+
+namespace
+{
+
+#define MODEL_TYPE float
+#define PRINT 0
+
+float findAverage(const std::vector<float> &values)
+{
+  auto res = std::accumulate(values.begin(), values.end(), 0.f);
+  return res / static_cast<float>(values.size());
+}
+
+} // namespace
+
+OMStatus training_configure_tool::runTrainProcessWithCurConfig(
+  OMConfig &config, const training_configure_tool::TrainData &train_data, TrainResult &train_result)
+{
+  // Clear previous results
+  train_result.peak_memory_footprint = 0;
+
+  training_configure_tool::DataBuffer circle_model =
+    training_configure_tool::readFile(train_data.circle_model_path);
+  training_configure_tool::DataBuffer wof_data;
+  // If defined wof file
+  if (train_data.wof_file_path != nullptr)
+    wof_data = training_configure_tool::readFile(train_data.wof_file_path);
+
+  // Save model size and model ptr in config
+  config.model_size = circle_model.size();
+  config.model_ptr = circle_model.data();
+
+  // If defined wof file
+  if (train_data.wof_file_path != nullptr)
+    config.wof_ptr = nullptr;
+
+  config.train_mode = true;
+
+  // Create training interpreter and import models
+  onert_micro::OMTrainingInterpreter train_interpreter;
+  train_interpreter.importTrainModel(config.model_ptr, config);
+
+  const auto batch_size = config.training_context.batch_size;
+  // TODO: support more inputs
+  const auto input_size = train_interpreter.getInputSizeAt(0);
+  const auto output_size = train_interpreter.getOutputSizeAt(0);
+
+  // Temporary buffer to read input data from file using BATCH_SIZE
+  float training_input[batch_size * input_size];
+  float training_target[batch_size * output_size];
+  // Note: here test size used with BATCH_SIZE = 1
+  float test_input[input_size];
+  float test_target[output_size];
+
+  // Best results
+  float max_accuracy = std::numeric_limits<float>::min();
+  float min_mse = std::numeric_limits<float>::max();
+  float min_mae = std::numeric_limits<float>::max();
+  float min_entropy = std::numeric_limits<float>::max();
+
+  const auto training_epochs = config.training_context.epochs;
+  for (uint32_t e = 0; e < training_epochs; ++e)
+  {
+#if PRINT
+    printf("Epoch: %d/%d\n", e + 1, training_epochs);
+#endif
+    std::vector<float> accuracy_v;
+    std::vector<float> cross_entropy_v;
+    std::vector<float> mse_v;
+    std::vector<float> mae_v;
+
+    // Run train for current epoch
+    config.training_context.num_epoch = e + 1;
+    uint32_t num_steps = train_data.num_train_data_samples / batch_size;
+    for (int i = 0; i < num_steps; ++i)
+    {
+      uint32_t cur_batch_size =
+        std::min(batch_size, train_data.num_train_data_samples - batch_size * i - 1);
+      cur_batch_size = std::max(1u, cur_batch_size);
+
+      config.training_context.batch_size = cur_batch_size;
+
+      // Read current input and target data
+      training_configure_tool::readDataFromFile(train_data.input_input_train_data_path,
+                                                reinterpret_cast<char *>(training_input),
+                                                sizeof(float) * input_size * cur_batch_size,
+                                                i * sizeof(MODEL_TYPE) * input_size * batch_size);
+
+      training_configure_tool::readDataFromFile(train_data.input_target_train_data_path,
+                                                reinterpret_cast<char *>(training_target),
+                                                sizeof(float) * output_size * cur_batch_size,
+                                                i * sizeof(MODEL_TYPE) * output_size * batch_size);
+
+      // Set input and target
+      train_interpreter.setInput(reinterpret_cast<uint8_t *>(training_input), 0);
+      train_interpreter.setTarget(reinterpret_cast<uint8_t *>(training_target), 0);
+
+      // Train with current batch size
+      train_interpreter.trainSingleStep(config);
+    }
+
+    train_interpreter.reset();
+
+    // Reset num step value
+    config.training_context.num_step = 0;
+    num_steps = train_data.num_test_data_samples;
+
+    accuracy_v.clear();
+    cross_entropy_v.clear();
+    mae_v.clear();
+    mse_v.clear();
+
+    if (train_data.metrics_to_check_best_config == NONE)
+      continue;
+
+    for (int i = 0; i < num_steps; ++i)
+    {
+      uint32_t cur_batch_size = 1;
+      training_configure_tool::readDataFromFile(
+        train_data.input_input_test_data_path, reinterpret_cast<char *>(test_input),
+        sizeof(float) * input_size * cur_batch_size, i * sizeof(MODEL_TYPE) * input_size);
+
+      training_configure_tool::readDataFromFile(
+        train_data.input_target_test_data_path, reinterpret_cast<char *>(test_target),
+        sizeof(float) * output_size * cur_batch_size, i * sizeof(MODEL_TYPE) * output_size);
+
+      train_interpreter.setInput(reinterpret_cast<uint8_t *>(test_input), 0);
+      train_interpreter.setTarget(reinterpret_cast<uint8_t *>(test_target), 0);
+
+      switch (train_data.metrics_to_check_best_config)
+      {
+        case onert_micro::CROSS_ENTROPY_METRICS:
+        {
+          float cross_entropy_metric = 0.f;
+          train_interpreter.evaluateMetric(onert_micro::CROSS_ENTROPY_METRICS,
+                                           reinterpret_cast<void *>(&cross_entropy_metric),
+                                           cur_batch_size);
+          cross_entropy_v.push_back(cross_entropy_metric);
+        }
+        break;
+        case onert_micro::ACCURACY:
+        {
+          float accuracy = 0.f;
+          train_interpreter.evaluateMetric(onert_micro::ACCURACY,
+                                           reinterpret_cast<void *>(&accuracy), cur_batch_size);
+          accuracy_v.push_back(accuracy);
+        }
+        break;
+        case onert_micro::MSE_METRICS:
+        {
+          float mse = 0.f;
+          train_interpreter.evaluateMetric(onert_micro::MSE_METRICS, reinterpret_cast<void *>(&mse),
+                                           cur_batch_size);
+          mse_v.push_back(mse);
+        }
+        break;
+        case onert_micro::MAE_METRICS:
+        {
+          float mae = 0.f;
+          train_interpreter.evaluateMetric(onert_micro::MAE_METRICS, reinterpret_cast<void *>(&mae),
+                                           cur_batch_size);
+          mae_v.push_back(mae);
+        }
+        break;
+        default:
+        {
+          assert(false && "Not supported");
+          return UnsupportedType;
+        }
+      }
+    }
+
+    // Calculate and use average values
+    switch (train_data.metrics_to_check_best_config)
+    {
+      case onert_micro::CROSS_ENTROPY_METRICS:
+      {
+        auto average_value = findAverage(cross_entropy_v);
+        if (average_value < min_entropy)
+          min_entropy = average_value;
+      }
+      break;
+      case onert_micro::ACCURACY:
+      {
+        auto average_value = findAverage(accuracy_v);
+        if (average_value > max_accuracy)
+          max_accuracy = average_value;
+      }
+      break;
+      case onert_micro::MSE_METRICS:
+      {
+        auto average_value = findAverage(mse_v);
+        if (average_value < min_mse)
+          min_mse = average_value;
+      }
+      break;
+      case onert_micro::MAE_METRICS:
+      {
+        auto average_value = findAverage(mae_v);
+        if (average_value < min_mae)
+          min_mae = average_value;
+      }
+      break;
+      default:
+      {
+        assert(false && "Not supported");
+        return UnsupportedType;
+      }
+    }
+  }
+  train_result.peak_memory_footprint = train_interpreter.getPeakFootprintMemory();
+  switch (train_data.metrics_to_check_best_config)
+  {
+    case onert_micro::CROSS_ENTROPY_METRICS:
+    {
+      train_result.best_metrics_results = {train_data.metrics_to_check_best_config, min_entropy};
+    }
+    break;
+    case onert_micro::ACCURACY:
+    {
+      train_result.best_metrics_results = {train_data.metrics_to_check_best_config, max_accuracy};
+    }
+    break;
+    case onert_micro::MSE_METRICS:
+    {
+      train_result.best_metrics_results = {train_data.metrics_to_check_best_config, min_mse};
+    }
+    break;
+    case onert_micro::MAE_METRICS:
+    {
+      train_result.best_metrics_results = {train_data.metrics_to_check_best_config, min_mae};
+    }
+    break;
+    case onert_micro::NONE:
+    {
+      break;
+    }
+    default:
+    {
+      assert(false && "Not supported");
+      return UnsupportedType;
+    }
+  }
+  return Ok;
+}