From 1a40ee37adabf212c513f1603b50764f3f5dc82e Mon Sep 17 00:00:00 2001 From: Balyshev Artem <43214667+BalyshevArtem@users.noreply.github.com> Date: Fri, 23 Aug 2024 12:44:03 +0300 Subject: [PATCH] [onert-micro] Introduce training configure tool (#13593) This pr supports training configure tool in onert-micro. ONE-DCO-1.0-Signed-off-by: Artem Balyshev --- onert-micro/CMakeLists.txt | 11 +- .../training-configure-tool/CMakeLists.txt | 27 ++ .../TrainingConfigureTool.cpp | 143 +++++++ .../include/SparseBackpropagationHandler.h | 39 ++ .../include/SparseBackpropagationHelper.h | 61 +++ .../TensorRankSparseBackpropagationHandler.h | 44 ++ .../include/TrainConfigData.h | 106 +++++ .../include/TrainingConfigureFileHandler.h | 49 +++ .../include/TrainingDriverHandler.h | 39 ++ .../src/SparseBackpropagationHandler.cpp | 170 ++++++++ .../src/SparseBackpropagationHelper.cpp | 394 ++++++++++++++++++ ...TensorRankSparseBackpropagationHandler.cpp | 127 ++++++ .../src/TrainingConfigureFileHandler.cpp | 160 +++++++ .../src/TrainingDriverHandler.cpp | 269 ++++++++++++ 14 files changed, 1638 insertions(+), 1 deletion(-) create mode 100644 onert-micro/training-configure-tool/CMakeLists.txt create mode 100644 onert-micro/training-configure-tool/TrainingConfigureTool.cpp create mode 100644 onert-micro/training-configure-tool/include/SparseBackpropagationHandler.h create mode 100644 onert-micro/training-configure-tool/include/SparseBackpropagationHelper.h create mode 100644 onert-micro/training-configure-tool/include/TensorRankSparseBackpropagationHandler.h create mode 100644 onert-micro/training-configure-tool/include/TrainConfigData.h create mode 100644 onert-micro/training-configure-tool/include/TrainingConfigureFileHandler.h create mode 100644 onert-micro/training-configure-tool/include/TrainingDriverHandler.h create mode 100644 onert-micro/training-configure-tool/src/SparseBackpropagationHandler.cpp create mode 100644 onert-micro/training-configure-tool/src/SparseBackpropagationHelper.cpp create mode 100644 onert-micro/training-configure-tool/src/TensorRankSparseBackpropagationHandler.cpp create mode 100644 onert-micro/training-configure-tool/src/TrainingConfigureFileHandler.cpp create mode 100644 onert-micro/training-configure-tool/src/TrainingDriverHandler.cpp diff --git a/onert-micro/CMakeLists.txt b/onert-micro/CMakeLists.txt index 7537f2955fb..c1ba692d290 100644 --- a/onert-micro/CMakeLists.txt +++ b/onert-micro/CMakeLists.txt @@ -174,7 +174,14 @@ endif () if (DIS_DYN_SHAPES) message(STATUS "ONERT-MICRO will not use dynamic shapes") add_definitions(-DDIS_DYN_SHAPES) -endif () + list(APPEND CMAKE_ARM_OPTIONS "-DDIS_DYN_SHAPES=ON") +endif() + +if (OM_MEMORY_ESTIMATE) + message(STATUS "ONERT-MICRO will use memory estimation") + add_definitions(-DOM_MEMORY_ESTIMATE) + list(APPEND CMAKE_ARM_OPTIONS "-DOM_MEMORY_ESTIMATE=ON") +endif() set(MICRO_ARM_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/standalone_arm") file(MAKE_DIRECTORY "${MICRO_ARM_BUILD_DIR}") @@ -197,6 +204,7 @@ unset(KERNELS CACHE) unset(USE_STATIC_KERNEL CACHE) unset(DIS_QUANT CACHE) unset(DIS_FLOAT CACHE) +unset(OM_MEMORY_ESTIMATE CACHE) unset(ENABLE_ONERT_MICRO_TEST CACHE) unset(NOT_BUILD_EXTERNALS CACHE) @@ -213,6 +221,7 @@ add_custom_command( add_custom_target(onert_micro_arm DEPENDS "${MICRO_ARM_BINARY}") add_subdirectory(eval-driver) +add_subdirectory(training-configure-tool) # Should be after add_subdirectory unset(ENABLE_ONERT_MICRO_TRAINING CACHE) diff --git a/onert-micro/training-configure-tool/CMakeLists.txt b/onert-micro/training-configure-tool/CMakeLists.txt new file mode 100644 index 00000000000..41e497bffca --- /dev/null +++ b/onert-micro/training-configure-tool/CMakeLists.txt @@ -0,0 +1,27 @@ +message(STATUS "START Training Config Tool") + +add_definitions(-DOM_MEMORY_ESTIMATE) + +set(TRAIN_CONFIG_TOOL_SRC + TrainingConfigureTool.cpp + src/SparseBackpropagationHandler.cpp + src/TensorRankSparseBackpropagationHandler.cpp + src/TrainingConfigureFileHandler.cpp + src/TrainingDriverHandler.cpp + src/SparseBackpropagationHelper.cpp) + +add_executable(train_config_tool ${TRAIN_CONFIG_TOOL_SRC}) + +# This variable is needed to separate standalone interpreter libraries from the libraries used in tool +set(CUSTOM_OM_SUFFIX "_train_config_tool") +add_subdirectory(${NNAS_PROJECT_SOURCE_DIR}/onert-micro/onert-micro ${CMAKE_CURRENT_BINARY_DIR}/onert-micro) + +target_include_directories(train_config_tool PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/onert_micro/include") +target_include_directories(train_config_tool PUBLIC "include") +target_link_libraries(train_config_tool PUBLIC onert_micro_interpreter) +target_include_directories(train_config_tool PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/onert_micro/include") +target_link_libraries(train_config_tool PUBLIC onert_micro_training_interpreter) + +install(TARGETS train_config_tool DESTINATION bin) + +message(STATUS "DONE Training Config Tool") diff --git a/onert-micro/training-configure-tool/TrainingConfigureTool.cpp b/onert-micro/training-configure-tool/TrainingConfigureTool.cpp new file mode 100644 index 00000000000..d5ca00dfd4b --- /dev/null +++ b/onert-micro/training-configure-tool/TrainingConfigureTool.cpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "include/SparseBackpropagationHandler.h" +#include "include/TensorRankSparseBackpropagationHandler.h" + +#include "TrainingDriverHandler.h" + +#include + +int entry(int argc, char **argv) +{ + if (argc != 9 and argc != 10) + { + std::cerr << "Two variant of usage with and without wof file: " << argv[0] + << " " + " optional() " + " " + " " + " " + "" + "num_of_train_smpl " + "num_of_test_smpl\n"; + return EXIT_FAILURE; + } + + training_configure_tool::TrainData train_data; + + if (argc == 10) + { + train_data.circle_model_path = argv[1]; + train_data.wof_file_path = argv[2]; + train_data.output_tool_file_path = argv[3]; + train_data.input_input_train_data_path = argv[4]; + train_data.input_target_train_data_path = argv[5]; + train_data.input_input_test_data_path = argv[6]; + train_data.input_target_test_data_path = argv[7]; + train_data.num_train_data_samples = atoi(argv[8]); + train_data.num_test_data_samples = atoi(argv[9]); + } + else if (argc == 9) + { + train_data.circle_model_path = argv[1]; + train_data.output_tool_file_path = argv[2]; + train_data.input_input_train_data_path = argv[3]; + train_data.input_target_train_data_path = argv[4]; + train_data.input_input_test_data_path = argv[5]; + train_data.input_target_test_data_path = argv[6]; + train_data.num_train_data_samples = atoi(argv[7]); + train_data.num_test_data_samples = atoi(argv[8]); + } + else + { + throw std::runtime_error("Unknown commands number\n"); + } + + // Configure training mode + onert_micro::OMConfig config; + + // Set user defined training settings + const uint32_t training_epochs = 25; + const float lambda = 0.001f; + const uint32_t BATCH_SIZE = 64; + const uint32_t num_train_layers = 0; + const onert_micro::OMLoss loss = onert_micro::CROSS_ENTROPY; + const onert_micro::OMTrainOptimizer train_optimizer = onert_micro::ADAM; + const float beta = 0.9; + const float beta_squares = 0.999; + const float epsilon = 1e-07; + + config.train_mode = true; + { + onert_micro::OMTrainingContext train_context; + train_context.batch_size = BATCH_SIZE; + train_context.num_of_train_layers = num_train_layers; + train_context.learning_rate = lambda; + train_context.loss = loss; + train_context.optimizer = train_optimizer; + train_context.beta = beta; + train_context.beta_squares = beta_squares; + train_context.epsilon = epsilon; + train_context.epochs = training_epochs; + + config.training_context = train_context; + } + + train_data.metrics_to_check_best_config = onert_micro::CROSS_ENTROPY_METRICS; + train_data.memory_above_restriction = 300000; + train_data.acceptable_diff = 0.02; + // Find sparse backpropagation best configure + std::unordered_set best_trainable_op_indexes; + training_configure_tool::findBestTrainableOpIndexes(config, train_data, + best_trainable_op_indexes); + + // Find the best train tensors ranks + training_configure_tool::TrainConfigFileData config_result; + auto res = training_configure_tool::findBestSparseBackpropagationTensorsRanks( + config, train_data, best_trainable_op_indexes, config_result.trainable_op_indexes_with_ranks); + + // Save result into file + assert(!config_result.trainable_op_indexes_with_ranks.empty()); + training_configure_tool::createResultFile(config_result, train_data.output_tool_file_path); + + return EXIT_SUCCESS; +} + +int entry(int argc, char **argv); + +#ifdef NDEBUG +int main(int argc, char **argv) +{ + try + { + return entry(argc, argv); + } + catch (const std::exception &e) + { + std::cerr << "ERROR: " << e.what() << std::endl; + } + + return 255; +} +#else // NDEBUG +int main(int argc, char **argv) +{ + // NOTE main does not catch internal exceptions for debug build to make it easy to + // check the stacktrace with a debugger + return entry(argc, argv); +} +#endif // !NDEBUG diff --git a/onert-micro/training-configure-tool/include/SparseBackpropagationHandler.h b/onert-micro/training-configure-tool/include/SparseBackpropagationHandler.h new file mode 100644 index 00000000000..3e03777480d --- /dev/null +++ b/onert-micro/training-configure-tool/include/SparseBackpropagationHandler.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_TRAINING_CONFIG_TOOL_SPARSE_BACKPROPAGATION_HANDLER +#define ONERT_MICRO_TRAINING_CONFIG_TOOL_SPARSE_BACKPROPAGATION_HANDLER + +#include "OMStatus.h" +#include "OMConfig.h" +#include "TrainConfigData.h" +#include "TrainingConfigureFileHandler.h" + +#include + +namespace training_configure_tool +{ + +/* + * Method to find the most trainable (which gets the best metric result) operators indexes. + */ +onert_micro::OMStatus +findBestTrainableOpIndexes(onert_micro::OMConfig &config, TrainData &train_data, + std::unordered_set &best_trainable_op_indexes); + +} // namespace training_configure_tool + +#endif // ONERT_MICRO_TRAINING_CONFIG_TOOL_SPARSE_BACKPROPAGATION_HANDLER diff --git a/onert-micro/training-configure-tool/include/SparseBackpropagationHelper.h b/onert-micro/training-configure-tool/include/SparseBackpropagationHelper.h new file mode 100644 index 00000000000..3c355dca3f0 --- /dev/null +++ b/onert-micro/training-configure-tool/include/SparseBackpropagationHelper.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_TRAINING_CONFIG_TOOL_SPARSE_BACKPROPAGATION_HELPER +#define ONERT_MICRO_TRAINING_CONFIG_TOOL_SPARSE_BACKPROPAGATION_HELPER + +#include "OMStatus.h" +#include "OMConfig.h" +#include "TrainConfigData.h" + +#include +#include + +namespace training_configure_tool +{ + +// Find is left train result is better then right in terms of metric result and memory consumptions. +// acceptable_diff - acceptable difference in metric values in order to select the best result in +// memory. +bool cmpTrainResults(const training_configure_tool::TrainResult &left, + const training_configure_tool::TrainResult &right, + const float acceptable_diff); + +// To find all trainable ops indexes in the model - initial_train_op_indexes +std::unordered_set findAllTrainableOps(const char *circle_model_path); + +// To generate all possible sets from initial_train_op_indexes +std::vector> +generateAllPossibleOpIndexesSets(const std::unordered_set &initial_train_op_indexes); + +// Remove operations indexes sets with peak memory footprint greater then given restriction: +// 1 - Run train interpreter with all this sets with single train sample and single test sample +// to obtain approximately peak memory footprint for each set. +// 2 - Cut according to max peak memory. +std::vector> selectOpIndexesSetsAccordingToMemoryRestriction( + const std::vector> &op_indexes_sets, onert_micro::OMConfig config, + training_configure_tool::TrainData train_data); + +// Find All combinations with ranks for current selected op indexes. +// Return vector of all possible combinations of train rank for every op. +std::vector> +findAllTensorsRanksCombinations(const std::unordered_set &selected_op_indexes, + onert_micro::OMConfig config, + training_configure_tool::TrainData train_data); + +} // namespace training_configure_tool + +#endif // ONERT_MICRO_TRAINING_CONFIG_TOOL_SPARSE_BACKPROPAGATION_HELPER diff --git a/onert-micro/training-configure-tool/include/TensorRankSparseBackpropagationHandler.h b/onert-micro/training-configure-tool/include/TensorRankSparseBackpropagationHandler.h new file mode 100644 index 00000000000..c1da3a954c9 --- /dev/null +++ b/onert-micro/training-configure-tool/include/TensorRankSparseBackpropagationHandler.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_TRAINING_CONFIG_TOOL_TENSOR_RANK_SPARSE_BACKPROPAGATION_HANDLER +#define ONERT_MICRO_TRAINING_CONFIG_TOOL_TENSOR_RANK_SPARSE_BACKPROPAGATION_HANDLER + +#include "OMStatus.h" +#include "OMConfig.h" +#include "TrainConfigData.h" +#include "TrainingConfigureFileHandler.h" + +#include +#include + +namespace training_configure_tool +{ + +/* + * Method to find the most trainable (which gets the best metric result and less peak memory) train + * ranks for every operation in selected operators indexes. Note: Train rank - this is an indicator + * of how much data of the current operation we will train (for example, the entire operation, only + * the bias, only the upper half, and so on) + */ +onert_micro::OMStatus findBestSparseBackpropagationTensorsRanks( + onert_micro::OMConfig &config, TrainData &train_data, + const std::unordered_set &selected_op_indexes, + std::unordered_map &best_train_ranks); + +} // namespace training_configure_tool + +#endif // ONERT_MICRO_TRAINING_CONFIG_TOOL_TENSOR_RANK_SPARSE_BACKPROPAGATION_HANDLER diff --git a/onert-micro/training-configure-tool/include/TrainConfigData.h b/onert-micro/training-configure-tool/include/TrainConfigData.h new file mode 100644 index 00000000000..5d3dff06c4d --- /dev/null +++ b/onert-micro/training-configure-tool/include/TrainConfigData.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAIN_CONFIG_DATA +#define ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAIN_CONFIG_DATA + +#include "OMConfig.h" + +#include +#include +#include +#include +#include + +namespace training_configure_tool +{ + +// Enum to indicate the degree(rank) to which part of the operation we will train: +// this is an indicator of how much data of the current operation we will train +// (for example, the entire operation, only the bias, only the upper half, and so on) +enum OpTrainableRank +{ + ALL = 0, // 0 - Train all weights in the operation + ONLY_BIAS = 1, // 1 - Train bias only in the operation + UP_1_2_PART = 2, // 2 - Train the upper 1/2 part of the operation + LOWER_1_2_PART = 3, // 3 - Train the lower 1/2 part of the operation + MAX_VALUE = 4, + // TODO add more +}; + +// Information for saving the data necessary for training. +// metrics_to_check_best_config - the metric by which the best configuration will be selected. +// acceptable_diff - acceptable difference in metric values in order to select the best result in +// memory. memory_above_restriction - the upper limit of memory that cannot be exceeded +struct TrainData +{ + const char *circle_model_path = nullptr; + const char *wof_file_path = nullptr; + const char *output_tool_file_path = nullptr; + const char *input_input_train_data_path = nullptr; + const char *input_target_train_data_path = nullptr; + const char *input_input_test_data_path = nullptr; + const char *input_target_test_data_path = nullptr; + int32_t num_train_data_samples = 0; + int32_t num_test_data_samples = 0; + onert_micro::OMMetrics metrics_to_check_best_config = {}; + float acceptable_diff = 0.01; + size_t memory_above_restriction = 0; +}; + +// Struct to save data which will be saved in result file +struct TrainConfigFileData +{ + std::unordered_map trainable_op_indexes_with_ranks; +}; + +// Information that is the result of training +// best_metrics_results - obtained best metric result during training +// peak_memory_footprint - peak memory footprint obtained during training +struct TrainResult +{ + std::pair best_metrics_results = {}; + size_t peak_memory_footprint = 0; + + TrainResult() = default; + explicit TrainResult(TrainData train_data) + { + peak_memory_footprint = std::numeric_limits::max(); + switch (train_data.metrics_to_check_best_config) + { + case onert_micro::ACCURACY: + best_metrics_results = {onert_micro::ACCURACY, 0.f}; + break; + case onert_micro::CROSS_ENTROPY_METRICS: + best_metrics_results = {onert_micro::CROSS_ENTROPY_METRICS, + std::numeric_limits::max()}; + break; + case onert_micro::MSE_METRICS: + best_metrics_results = {onert_micro::MSE_METRICS, std::numeric_limits::max()}; + break; + case onert_micro::MAE_METRICS: + best_metrics_results = {onert_micro::MAE_METRICS, std::numeric_limits::max()}; + break; + default: + assert(false && "Unsupported type"); + break; + } + } +}; + +} // namespace training_configure_tool + +#endif // ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAIN_CONFIG_DATA diff --git a/onert-micro/training-configure-tool/include/TrainingConfigureFileHandler.h b/onert-micro/training-configure-tool/include/TrainingConfigureFileHandler.h new file mode 100644 index 00000000000..1de10e68477 --- /dev/null +++ b/onert-micro/training-configure-tool/include/TrainingConfigureFileHandler.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAINING_CONFIGURE_FILE_HANDLER +#define ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAINING_CONFIGURE_FILE_HANDLER + +#include "OMStatus.h" +#include "TrainConfigData.h" + +#include +#include +#include + +namespace training_configure_tool +{ + +using DataBuffer = std::vector; + +void readDataFromFile(const std::string &filename, char *data, size_t data_size, + size_t start_position = 0); + +void writeDataToFile(const std::string &filename, const char *data, size_t data_size); + +DataBuffer readFile(const char *path); + +// Save train config data into file +onert_micro::OMStatus createResultFile(const TrainConfigFileData &train_data, + const char *save_path); + +// Save train config data into buffer +onert_micro::OMStatus createResultData(const TrainConfigFileData &train_data, + std::vector &result_buffer); + +} // namespace training_configure_tool + +#endif // ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAINING_CONFIGURE_FILE_HANDLER diff --git a/onert-micro/training-configure-tool/include/TrainingDriverHandler.h b/onert-micro/training-configure-tool/include/TrainingDriverHandler.h new file mode 100644 index 00000000000..9205347aaac --- /dev/null +++ b/onert-micro/training-configure-tool/include/TrainingDriverHandler.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAINING_DRIVER_HANDLER +#define ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAINING_DRIVER_HANDLER + +#include "TrainConfigData.h" +#include "OMConfig.h" +#include "OMStatus.h" + +#include +#include + +namespace training_configure_tool +{ + +// To start training with the current set conditions and the current configuration and save the +// result +onert_micro::OMStatus +runTrainProcessWithCurConfig(onert_micro::OMConfig &config, + const training_configure_tool::TrainData &train_data, + TrainResult &train_result); + +} // namespace training_configure_tool + +#endif // ONERT_MICRO_TRAINING_CONFIG_TOOL_TRAINING_DRIVER_HANDLER diff --git a/onert-micro/training-configure-tool/src/SparseBackpropagationHandler.cpp b/onert-micro/training-configure-tool/src/SparseBackpropagationHandler.cpp new file mode 100644 index 00000000000..190aef7514b --- /dev/null +++ b/onert-micro/training-configure-tool/src/SparseBackpropagationHandler.cpp @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SparseBackpropagationHandler.h" +#include "SparseBackpropagationHelper.h" +#include "TrainingDriverHandler.h" + +#include +#include + +#define MODEL_TYPE float + +#define PRINT 0 + +using namespace onert_micro; + +OMStatus training_configure_tool::findBestTrainableOpIndexes( + OMConfig &config, training_configure_tool::TrainData &train_data, + std::unordered_set &best_trainable_op_indexes) +{ + // Clear to find best values + best_trainable_op_indexes.clear(); + // 1 - Find all trainable ops indexes in the model - initial_train_op_indexes + // 2 - Generate all possible sets from initial_train_op_indexes + // 3 - If above memory restriction is defined - then remove operations indexes sets with peak + // memory footprint greater then given restriction 4 - Try all found sets to find best metrics + // results + + // 1 - Find all trainable ops indexes in the model - initial_train_op_indexes + std::unordered_set initial_train_op_indexes = + training_configure_tool::findAllTrainableOps(train_data.circle_model_path); + assert(!initial_train_op_indexes.empty()); + if (initial_train_op_indexes.empty()) + return UnknownError; +#if PRINT + printf("Found next trainable indexes in the model: "); + for (auto i : initial_train_op_indexes) + { + printf("%d ", i); + } + printf("\n"); +#endif + + // 2 - Generate all possible sets from initial_train_op_indexes + std::vector> all_possible_train_op_indexes_sets = + training_configure_tool::generateAllPossibleOpIndexesSets(initial_train_op_indexes); + assert(all_possible_train_op_indexes_sets.empty() == false); + if (all_possible_train_op_indexes_sets.empty() == true) + return UnknownError; +#if PRINT + printf("Found %zu unique trainable ops indexes in the model:\n", + all_possible_train_op_indexes_sets.size()); + for (const auto &s : all_possible_train_op_indexes_sets) + { + printf("Op indexes set = { "); + for (auto i : s) + { + printf("%d ", i); + } + printf("}\n"); + } +#endif + // Clear initial due to is not needed + initial_train_op_indexes.clear(); + + // 3 - If above memory restriction is defined, then save only sets with peak memory less then + // restriction + std::vector> selected_op_indexes_sets = + training_configure_tool::selectOpIndexesSetsAccordingToMemoryRestriction( + all_possible_train_op_indexes_sets, config, train_data); +#if PRINT + printf("Found %zu selected op indexes sets:\n", selected_op_indexes_sets.size()); + for (const auto &s : selected_op_indexes_sets) + { + printf("Op indexes set = { "); + for (auto i : s) + { + printf("%d ", i); + } + printf("}\n"); + } +#endif + // Clear not needed object + all_possible_train_op_indexes_sets.clear(); + + // 4 - Try all found sets to find best metrics results + // To save best values + TrainResult best_train_result(train_data); + for (const auto &index_set : selected_op_indexes_sets) + { +#if PRINT + printf("Current checked op indexes set = { "); + for (auto i : index_set) + { + printf("%d ", i); + } + printf("}\n"); +#endif + + // Construct mapping with current indexes - use default train ALL parts + std::unordered_map train_op_ranks; + for (auto index : index_set) + { + train_op_ranks[index] = ALL; + } + + std::vector tmp_buffer; + // Create data with current buffer information + createResultData({train_op_ranks}, tmp_buffer); + config.training_context.training_config_info_data = tmp_buffer.data(); + + TrainResult train_result(train_data); + // Run train with this information + runTrainProcessWithCurConfig(config, train_data, train_result); + +#if PRINT + printf("Find the following result:\n"); + if (train_result.best_metrics_results.first == CROSS_ENTROPY_METRICS) + { + printf("CROSS_ENTROPY_METRIC = %f\n", train_result.best_metrics_results.second); + printf("PEAK_MEMORY_RESULT = %zu\n", train_result.peak_memory_footprint); + } +#endif + + // Compare with best result to find best + bool cmp_result = cmpTrainResults(train_result, best_train_result, train_data.acceptable_diff); + if (cmp_result) + { + // Cur rest is better +#if PRINT + printf("BETTER RESULT\n"); +#endif + best_train_result = train_result; + best_trainable_op_indexes = index_set; + } + } + +#if PRINT + printf("FINISH\n"); + + printf("Best op indexes set = { "); + for (auto i : best_trainable_op_indexes) + { + printf("%d ", i); + } + printf("}\n"); + + printf("Find the following result:\n"); + if (best_train_result.best_metrics_results.first == CROSS_ENTROPY_METRICS) + { + printf("CROSS_ENTROPY_METRIC = %f\n", best_train_result.best_metrics_results.second); + printf("PEAK_MEMORY_RESULT = %zu\n", best_train_result.peak_memory_footprint); + } +#endif + + return Ok; +} diff --git a/onert-micro/training-configure-tool/src/SparseBackpropagationHelper.cpp b/onert-micro/training-configure-tool/src/SparseBackpropagationHelper.cpp new file mode 100644 index 00000000000..144ea9ee2e2 --- /dev/null +++ b/onert-micro/training-configure-tool/src/SparseBackpropagationHelper.cpp @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SparseBackpropagationHelper.h" +#include "TrainingConfigureFileHandler.h" +#include "TrainingDriverHandler.h" +#include "TrainingConfigureFileHandler.h" +#include "core/reader/OMCircleReader.h" + +#include + +#define MODEL_TYPE float +#define PRINT 0 + +using namespace onert_micro; + +namespace +{ + +bool isTrainableWeights(const circle::OperatorCode *opcode) +{ + switch (opcode->builtin_code()) + { + case circle::BuiltinOperator_FULLY_CONNECTED: + case circle::BuiltinOperator_CONV_2D: + return true; + default: + return false; + } +} + +void generateRankCombinations( + const std::unordered_map> &index_to_possible_ranks, + const std::vector &indices, size_t currentIndex, + std::vector ¤tCombination, std::vector> &result) +{ + if (currentIndex == indices.size()) + { + result.push_back(currentCombination); + return; + } + uint16_t index = indices[currentIndex]; + for (uint16_t rank : index_to_possible_ranks.at(index)) + { + currentCombination.push_back(rank); + generateRankCombinations(index_to_possible_ranks, indices, currentIndex + 1, currentCombination, + result); + currentCombination.pop_back(); + } +} + +// Find pairs: selected ops indexes - divided dim max rank value +std::unordered_map findTrainableTensorsMaxDivideRankAccordingToOperatorIndex( + const std::unordered_set &selected_op_indexes, + const onert_micro::core::reader::OMCircleReader &reader) +{ + std::unordered_map operator_index_to_tensor_max_divide_rank; + + // Read ops + auto operators = reader.operators(); + assert(operators != nullptr); + + auto op_size = operators->size(); + + // Obtain operation codes + auto op_codes = reader.opcodes(); + + const auto tensors = reader.tensors(); + + // Go over selected best op indexes + for (auto op_index : selected_op_indexes) + { + auto cur_op = operators->operator[](op_index); + + // Get opcode index + uint32_t cur_opcode_index = cur_op->opcode_index(); + assert(cur_opcode_index < op_codes->size()); + + const auto opcode = op_codes->operator[](cur_opcode_index); + + const auto inputs_tensors = cur_op->inputs(); + + uint32_t tensor_divided_dim_value = 0; + + switch (opcode->builtin_code()) + { + case circle::BuiltinOperator_FULLY_CONNECTED: + case circle::BuiltinOperator_CONV_2D: + { + assert(inputs_tensors->size() >= 2); + auto tensor_index = inputs_tensors->operator[](1); + assert(tensor_index != -1); + assert(tensor_index < tensors->size()); + auto tensor = tensors->operator[](tensor_index); + + // For FC and Conv2D op tool provide rank over 0 dimensional + tensor_divided_dim_value = tensor->shape()->operator[](0); + + break; + } + default: + assert(false && "Unsupported type"); + tensor_divided_dim_value = 0; // Not supported + } + + assert(tensor_divided_dim_value != 0); + operator_index_to_tensor_max_divide_rank[op_index] = tensor_divided_dim_value; + } + + return operator_index_to_tensor_max_divide_rank; +} + +void recursiveGenerateAllPossibleOpIndexesSetsHelper( + std::vector> &result, std::unordered_set &cur_set, + std::unordered_set::const_iterator cur_it_set_value, + std::unordered_set::const_iterator &end_it_set) +{ + // If we reach end of the initial set then finish + if (cur_it_set_value == end_it_set) + { + // If set is not empty add to final result + if (cur_set.empty() == false) + result.push_back(cur_set); + return; + } + + // Add value to current set + uint16_t cur_index = *cur_it_set_value; + cur_set.insert(cur_index); + // Run further and move iterator to next position + cur_it_set_value++; + recursiveGenerateAllPossibleOpIndexesSetsHelper(result, cur_set, cur_it_set_value, end_it_set); + // Remove current index from set + cur_set.erase(cur_index); + // Run again recursive functions but now without current index + recursiveGenerateAllPossibleOpIndexesSetsHelper(result, cur_set, cur_it_set_value, end_it_set); +} + +} // namespace + +// Find is left train result is better then right +bool training_configure_tool::cmpTrainResults(const training_configure_tool::TrainResult &left, + const training_configure_tool::TrainResult &right, + const float acceptable_diff) +{ + // Metrics should be the same + assert(left.best_metrics_results.first == right.best_metrics_results.first); + OMMetrics metric = left.best_metrics_results.first; + float left_metric_res = left.best_metrics_results.second; + float right_metric_res = right.best_metrics_results.second; + + bool is_in_acceptable_diff = std::abs(left_metric_res - right_metric_res) <= acceptable_diff; + if (is_in_acceptable_diff) + { + return left.peak_memory_footprint < right.peak_memory_footprint; + } + + switch (metric) + { + case onert_micro::ACCURACY: + { + return left.best_metrics_results.second > right.best_metrics_results.second; + } + break; + case onert_micro::CROSS_ENTROPY_METRICS: + case onert_micro::MSE_METRICS: + case onert_micro::MAE_METRICS: + { + + return left.best_metrics_results.second < right.best_metrics_results.second; + } + break; + default: + assert(false && "Unsupported type"); + break; + } + return true; +} + +// Remove operations indexes sets with peak memory footprint greater then given restriction: +// 1 - Run train interpreter with all this sets with single train sample and single test sample +// to obtain approximately peak memory footprint for each set 2 - Cut according to max peak +// memory +std::vector> +training_configure_tool::selectOpIndexesSetsAccordingToMemoryRestriction( + const std::vector> &op_indexes_sets, onert_micro::OMConfig config, + training_configure_tool::TrainData train_data) +{ + // It 0 - then is not set + if (train_data.memory_above_restriction == 0) + { + return op_indexes_sets; + } + + std::vector> result; + + // To obtain real estimation we need minimum batch_size = 2 and num_train_data_samples = 4 + // Change config train and test sample values + train_data.num_test_data_samples = 0; + train_data.num_train_data_samples = std::min(4, train_data.num_train_data_samples); + // To disable tests + train_data.metrics_to_check_best_config = NONE; + // Set number of the epochs and batch size to one + config.training_context.epochs = 1; + config.training_context.batch_size = std::min(2u, config.training_context.batch_size); + + for (const auto &op_indexes_set : op_indexes_sets) + { +#if PRINT + printf("Start checking: { "); + for (auto i : op_indexes_set) + { + printf("%d ", i); + } + printf("}\n"); +#endif + // Construct mapping with current indexes - use default train ALL parts + std::unordered_map train_op_ranks; + for (auto index : op_indexes_set) + { + train_op_ranks[index] = ALL; + } + + std::vector tmp_buffer; + // Create data with current buffer information + createResultData({train_op_ranks}, tmp_buffer); + config.training_context.training_config_info_data = tmp_buffer.data(); + + TrainResult train_result; + // Run train with this information + runTrainProcessWithCurConfig(config, train_data, train_result); +#if PRINT + printf("CURRENT MEMORY PEAK = %zu\n", train_result.peak_memory_footprint); +#endif + if (train_result.peak_memory_footprint < train_data.memory_above_restriction) + { +#if PRINT + printf("Added to the result\n"); +#endif + result.push_back(op_indexes_set); + } + } + + return result; +} + +// To generate all possible sets from initial_train_op_indexes +std::vector> training_configure_tool::generateAllPossibleOpIndexesSets( + const std::unordered_set &initial_train_op_indexes) +{ + std::vector> result; + std::unordered_set cur_set; + + auto begin_it = initial_train_op_indexes.begin(); + auto end_it = initial_train_op_indexes.end(); + recursiveGenerateAllPossibleOpIndexesSetsHelper(result, cur_set, begin_it, end_it); + + return result; +} + +// To find all trainable ops indexes in the model - initial_train_op_indexes +std::unordered_set +training_configure_tool::findAllTrainableOps(const char *circle_model_path) +{ + std::unordered_set result; + + training_configure_tool::DataBuffer model_ptr = + training_configure_tool::readFile(circle_model_path); + + // Init reader + OMStatus status = Ok; + core::reader::OMCircleReader reader; + assert(model_ptr.data() != nullptr); + status = reader.parse(model_ptr.data()); + assert(status == Ok); + // return empty set + if (status != Ok) + return result; + // TODO: support multi subgraph models + status = reader.select_subgraph(0); + // return empty set + if (status != Ok) + return result; + + // Read ops + auto operators = reader.operators(); + assert(operators != nullptr); + + auto op_size = operators->size(); + + // Obtain operation codes + auto op_codes = reader.opcodes(); + + // Run through all ops + for (uint32_t i = 0; i < op_size; ++i) + { + auto cur_op = operators->operator[](i); + + // Get opcode index + uint32_t cur_opcode_index = cur_op->opcode_index(); + assert(cur_opcode_index < op_codes->size()); + + const auto opcode = op_codes->operator[](cur_opcode_index); + + // If op is trainable - insert it + if (isTrainableWeights(opcode)) + result.insert(static_cast(i)); + } + + return result; +} + +std::vector> +training_configure_tool::findAllTensorsRanksCombinations( + const std::unordered_set &selected_op_indexes, onert_micro::OMConfig config, + training_configure_tool::TrainData train_data) +{ + // 1 - Find pairs: selected ops indexes - divided dim max rank value + // 2 - Find for every tensor index every possible rank according to its opcode and size + // 3 - Get result + std::vector> result; + + training_configure_tool::DataBuffer model_ptr = + training_configure_tool::readFile(train_data.circle_model_path); + + // Init reader + OMStatus status = Ok; + core::reader::OMCircleReader reader; + assert(model_ptr.data() != nullptr); + status = reader.parse(model_ptr.data()); + assert(status == Ok); + // return empty set + if (status != Ok) + return result; + // TODO: support multi subgraph models + status = reader.select_subgraph(0); + // return empty set + if (status != Ok) + return result; + + // 1 - Find pairs: selected ops indexes - divided dim max rank value + std::unordered_map operator_index_to_tensor_index = + findTrainableTensorsMaxDivideRankAccordingToOperatorIndex(selected_op_indexes, reader); + assert(operator_index_to_tensor_index.size() == selected_op_indexes.size()); + // 2 - Find for every tensor index every possible rank according to its opcode and size + std::unordered_map> op_index_to_all_possible_ranks; + for (auto &p : operator_index_to_tensor_index) + { + const auto op_index = p.first; + const auto max_value = p.second; + + uint16_t cur_value = 2; + op_index_to_all_possible_ranks[op_index] = {ALL, ONLY_BIAS}; + while (cur_value < uint32_t(OpTrainableRank::MAX_VALUE) and cur_value <= max_value) + { + auto new_value = cur_value * 2; + while (cur_value < uint16_t(OpTrainableRank::MAX_VALUE) and cur_value < new_value) + { + op_index_to_all_possible_ranks[op_index].insert(cur_value); + cur_value++; + } + } + } + // Get all op indices + std::vector indices(selected_op_indexes.begin(), selected_op_indexes.end()); + std::vector> rank_combinations; + std::vector cur_v; + generateRankCombinations(op_index_to_all_possible_ranks, indices, 0, cur_v, rank_combinations); + + for (const auto &ranks : rank_combinations) + { + std::unordered_map combination; + for (size_t i = 0; i < indices.size(); ++i) + { + combination[indices[i]] = OpTrainableRank(ranks[i]); + } + result.push_back(std::move(combination)); + } + + return result; +} diff --git a/onert-micro/training-configure-tool/src/TensorRankSparseBackpropagationHandler.cpp b/onert-micro/training-configure-tool/src/TensorRankSparseBackpropagationHandler.cpp new file mode 100644 index 00000000000..b03ee8164a7 --- /dev/null +++ b/onert-micro/training-configure-tool/src/TensorRankSparseBackpropagationHandler.cpp @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorRankSparseBackpropagationHandler.h" +#include "SparseBackpropagationHelper.h" +#include "TrainingDriverHandler.h" + +#include +#include + +#define MODEL_TYPE float + +#define PRINT 0 + +using namespace onert_micro; + +namespace +{ + +} // namespace + +OMStatus training_configure_tool::findBestSparseBackpropagationTensorsRanks( + onert_micro::OMConfig &config, TrainData &train_data, + const std::unordered_set &selected_op_indexes, + std::unordered_map &best_train_ranks) +{ + // Clear to find best values + best_train_ranks.clear(); + + // 1 - Find All combinations with ranks for current selected op indexes + // 2 - Run all of them to find best variant + + // 1 - Find All combinations with ranks for current selected op indexes + std::vector> all_combinations = + findAllTensorsRanksCombinations(selected_op_indexes, config, train_data); + +#if PRINT + printf("All combinations: op_index : rank_value; { \n"); + for (const auto &combination : all_combinations) + { + for (auto &p : combination) + { + printf("(%d : %d); ", p.first, p.second); + } + printf("\n"); + } + printf("}\n"); + +#endif // PRINT + + // 2 - Run all of them to find best variant + TrainResult best_train_result(train_data); + for (const auto &combination : all_combinations) + { +#if PRINT + printf("Current checked combination: op_index : rank_value; { "); + for (auto &p : combination) + { + printf("(%d : %d); ", p.first, p.second); + } + printf("}\n"); +#endif + + std::vector tmp_buffer; + // Create data with current buffer information + createResultData({combination}, tmp_buffer); + config.training_context.training_config_info_data = tmp_buffer.data(); + + TrainResult train_result(train_data); + // Run train with this information + runTrainProcessWithCurConfig(config, train_data, train_result); + +#if PRINT + printf("Find the following result:\n"); + if (train_result.best_metrics_results.first == CROSS_ENTROPY_METRICS) + { + printf("CROSS_ENTROPY_METRIC = %f\n", train_result.best_metrics_results.second); + printf("PEAK_MEMORY_RESULT = %zu\n", train_result.peak_memory_footprint); + } +#endif + + // Compare with best result to find best + bool cmp_result = cmpTrainResults(train_result, best_train_result, train_data.acceptable_diff); + if (cmp_result) + { + // Cur rest is better +#if PRINT + printf("BETTER RESULT\n"); +#endif + best_train_result = train_result; + best_train_ranks = combination; + } + } + +#if PRINT + printf("FINISH\n"); + + printf("Best rank combination: op_index : rank_value; { "); + for (auto &p : best_train_ranks) + { + printf("(%d : %d); ", p.first, p.second); + } + printf("}\n"); + + printf("Find the following result:\n"); + if (best_train_result.best_metrics_results.first == CROSS_ENTROPY_METRICS) + { + printf("CROSS_ENTROPY_METRIC = %f\n", best_train_result.best_metrics_results.second); + printf("PEAK_MEMORY_RESULT = %zu\n", best_train_result.peak_memory_footprint); + } +#endif + + return Ok; +} diff --git a/onert-micro/training-configure-tool/src/TrainingConfigureFileHandler.cpp b/onert-micro/training-configure-tool/src/TrainingConfigureFileHandler.cpp new file mode 100644 index 00000000000..2d99113488f --- /dev/null +++ b/onert-micro/training-configure-tool/src/TrainingConfigureFileHandler.cpp @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TrainingConfigureFileHandler.h" + +#include +#include +#include + +namespace +{ + +constexpr uint16_t MAGIC_NUMBER = 29; +constexpr uint8_t SCHEMA_VERSION = 1; + +void writeTrainConfigFileDataIntoBuffer( + const training_configure_tool::TrainConfigFileData &train_data, std::vector &buffer) +{ + const auto &train_op_indexes_with_ranks = train_data.trainable_op_indexes_with_ranks; + + // Resize to calculated size + auto buffer_size = 8 + train_op_indexes_with_ranks.size() * 2; + buffer.resize(buffer_size); + + // Point to start of the buffer + char *cur_ptr = buffer.data(); + + // Write MAGIC_NUMBER + std::memcpy(cur_ptr, &MAGIC_NUMBER, sizeof(MAGIC_NUMBER)); + cur_ptr += 2; + + // Write SCHEMA_VERSION + std::memcpy(cur_ptr, &SCHEMA_VERSION, sizeof(SCHEMA_VERSION)); + cur_ptr += 1; + + // Miss RESERVED field + cur_ptr += 1; + + // Write number of layers + auto layers_num = static_cast(train_op_indexes_with_ranks.size()); + std::memcpy(cur_ptr, &layers_num, sizeof(layers_num)); + cur_ptr += 4; + + // Write trainable layers positions + for (const auto &p : train_op_indexes_with_ranks) + { + auto cur_layer_pos = p.first; + std::memcpy(cur_ptr, &cur_layer_pos, sizeof(cur_layer_pos)); + cur_ptr += 2; + } + // Write code to define train rank of the trainable operation + for (const auto &p : train_op_indexes_with_ranks) + { + const auto cur_layer_pos = static_cast(p.second); + std::memcpy(cur_ptr, &cur_layer_pos, sizeof(cur_layer_pos)); + cur_ptr += 1; + } +} + +} // namespace + +void training_configure_tool::readDataFromFile(const std::string &filename, char *data, + size_t data_size, size_t start_position) +{ + std::streampos start = start_position; + + std::ifstream fs(filename, std::ifstream::binary); + if (fs.fail()) + throw std::runtime_error("Cannot open file \"" + filename + "\".\n"); + + fs.seekg(start); + + if (fs.read(data, data_size).fail()) + throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n"); + fs.close(); +} + +void training_configure_tool::writeDataToFile(const std::string &filename, const char *data, + size_t data_size) +{ + std::ofstream fs(filename, std::ofstream::binary); + if (fs.fail()) + throw std::runtime_error("Cannot open file \"" + filename + "\".\n"); + if (fs.write(data, data_size).fail()) + { + throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n"); + } +} + +training_configure_tool::DataBuffer training_configure_tool::readFile(const char *path) +{ + std::ifstream file(path, std::ios::binary | std::ios::in); + if (!file.good()) + { + std::string errmsg = "Failed to open file"; + throw std::runtime_error(errmsg.c_str()); + } + + file.seekg(0, std::ios::end); + auto fileSize = file.tellg(); + file.seekg(0, std::ios::beg); + + // reserve capacity + DataBuffer model_data(fileSize); + + // read the data + file.read(model_data.data(), fileSize); + if (file.fail()) + { + std::string errmsg = "Failed to read file"; + throw std::runtime_error(errmsg.c_str()); + } + + return model_data; +} + +onert_micro::OMStatus +training_configure_tool::createResultFile(const TrainConfigFileData &train_data, + const char *save_path) +{ + std::vector buffer; + + writeTrainConfigFileDataIntoBuffer(train_data, buffer); + + // Open or create file + // Note: if the file existed, it will be overwritten + std::ofstream out_file(save_path, std::ios::binary | std::ios::trunc); + if (not out_file.is_open()) + return onert_micro::UnknownError; + + // Write data + out_file.write(buffer.data(), static_cast(buffer.size())); + + // Close file + out_file.close(); + + return onert_micro::Ok; +} + +onert_micro::OMStatus +training_configure_tool::createResultData(const TrainConfigFileData &train_data, + std::vector &result_buffer) +{ + writeTrainConfigFileDataIntoBuffer(train_data, result_buffer); + + return onert_micro::Ok; +} diff --git a/onert-micro/training-configure-tool/src/TrainingDriverHandler.cpp b/onert-micro/training-configure-tool/src/TrainingDriverHandler.cpp new file mode 100644 index 00000000000..fe4a3ad8431 --- /dev/null +++ b/onert-micro/training-configure-tool/src/TrainingDriverHandler.cpp @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TrainingDriverHandler.h" +#include "OMTrainingInterpreter.h" +#include "TrainingConfigureFileHandler.h" + +#include +#include + +using namespace onert_micro; + +namespace +{ + +#define MODEL_TYPE float +#define PRINT 0 + +float findAverage(const std::vector &values) +{ + auto res = std::accumulate(values.begin(), values.end(), 0.f); + return res / static_cast(values.size()); +} + +} // namespace + +OMStatus training_configure_tool::runTrainProcessWithCurConfig( + OMConfig &config, const training_configure_tool::TrainData &train_data, TrainResult &train_result) +{ + // Clear previous results + train_result.peak_memory_footprint = 0; + + training_configure_tool::DataBuffer circle_model = + training_configure_tool::readFile(train_data.circle_model_path); + training_configure_tool::DataBuffer wof_data; + // If defined wof file + if (train_data.wof_file_path != nullptr) + wof_data = training_configure_tool::readFile(train_data.wof_file_path); + + // Save model size and model ptr in config + config.model_size = circle_model.size(); + config.model_ptr = circle_model.data(); + + // If defined wof file + if (train_data.wof_file_path != nullptr) + config.wof_ptr = nullptr; + + config.train_mode = true; + + // Create training interpreter and import models + onert_micro::OMTrainingInterpreter train_interpreter; + train_interpreter.importTrainModel(config.model_ptr, config); + + const auto batch_size = config.training_context.batch_size; + // TODO: support more inputs + const auto input_size = train_interpreter.getInputSizeAt(0); + const auto output_size = train_interpreter.getOutputSizeAt(0); + + // Temporary buffer to read input data from file using BATCH_SIZE + float training_input[batch_size * input_size]; + float training_target[batch_size * output_size]; + // Note: here test size used with BATCH_SIZE = 1 + float test_input[input_size]; + float test_target[output_size]; + + // Best results + float max_accuracy = std::numeric_limits::min(); + float min_mse = std::numeric_limits::max(); + float min_mae = std::numeric_limits::max(); + float min_entropy = std::numeric_limits::max(); + + const auto training_epochs = config.training_context.epochs; + for (uint32_t e = 0; e < training_epochs; ++e) + { +#if PRINT + printf("Epoch: %d/%d\n", e + 1, training_epochs); +#endif + std::vector accuracy_v; + std::vector cross_entropy_v; + std::vector mse_v; + std::vector mae_v; + + // Run train for current epoch + config.training_context.num_epoch = e + 1; + uint32_t num_steps = train_data.num_train_data_samples / batch_size; + for (int i = 0; i < num_steps; ++i) + { + uint32_t cur_batch_size = + std::min(batch_size, train_data.num_train_data_samples - batch_size * i - 1); + cur_batch_size = std::max(1u, cur_batch_size); + + config.training_context.batch_size = cur_batch_size; + + // Read current input and target data + training_configure_tool::readDataFromFile(train_data.input_input_train_data_path, + reinterpret_cast(training_input), + sizeof(float) * input_size * cur_batch_size, + i * sizeof(MODEL_TYPE) * input_size * batch_size); + + training_configure_tool::readDataFromFile(train_data.input_target_train_data_path, + reinterpret_cast(training_target), + sizeof(float) * output_size * cur_batch_size, + i * sizeof(MODEL_TYPE) * output_size * batch_size); + + // Set input and target + train_interpreter.setInput(reinterpret_cast(training_input), 0); + train_interpreter.setTarget(reinterpret_cast(training_target), 0); + + // Train with current batch size + train_interpreter.trainSingleStep(config); + } + + train_interpreter.reset(); + + // Reset num step value + config.training_context.num_step = 0; + num_steps = train_data.num_test_data_samples; + + accuracy_v.clear(); + cross_entropy_v.clear(); + mae_v.clear(); + mse_v.clear(); + + if (train_data.metrics_to_check_best_config == NONE) + continue; + + for (int i = 0; i < num_steps; ++i) + { + uint32_t cur_batch_size = 1; + training_configure_tool::readDataFromFile( + train_data.input_input_test_data_path, reinterpret_cast(test_input), + sizeof(float) * input_size * cur_batch_size, i * sizeof(MODEL_TYPE) * input_size); + + training_configure_tool::readDataFromFile( + train_data.input_target_test_data_path, reinterpret_cast(test_target), + sizeof(float) * output_size * cur_batch_size, i * sizeof(MODEL_TYPE) * output_size); + + train_interpreter.setInput(reinterpret_cast(test_input), 0); + train_interpreter.setTarget(reinterpret_cast(test_target), 0); + + switch (train_data.metrics_to_check_best_config) + { + case onert_micro::CROSS_ENTROPY_METRICS: + { + float cross_entropy_metric = 0.f; + train_interpreter.evaluateMetric(onert_micro::CROSS_ENTROPY_METRICS, + reinterpret_cast(&cross_entropy_metric), + cur_batch_size); + cross_entropy_v.push_back(cross_entropy_metric); + } + break; + case onert_micro::ACCURACY: + { + float accuracy = 0.f; + train_interpreter.evaluateMetric(onert_micro::ACCURACY, + reinterpret_cast(&accuracy), cur_batch_size); + accuracy_v.push_back(accuracy); + } + break; + case onert_micro::MSE_METRICS: + { + float mse = 0.f; + train_interpreter.evaluateMetric(onert_micro::MSE_METRICS, reinterpret_cast(&mse), + cur_batch_size); + mse_v.push_back(mse); + } + break; + case onert_micro::MAE_METRICS: + { + float mae = 0.f; + train_interpreter.evaluateMetric(onert_micro::MAE_METRICS, reinterpret_cast(&mae), + cur_batch_size); + mae_v.push_back(mae); + } + break; + default: + { + assert(false && "Not supported"); + return UnsupportedType; + } + } + } + + // Calculate and use average values + switch (train_data.metrics_to_check_best_config) + { + case onert_micro::CROSS_ENTROPY_METRICS: + { + auto average_value = findAverage(cross_entropy_v); + if (average_value < min_entropy) + min_entropy = average_value; + } + break; + case onert_micro::ACCURACY: + { + auto average_value = findAverage(accuracy_v); + if (average_value > max_accuracy) + max_accuracy = average_value; + } + break; + case onert_micro::MSE_METRICS: + { + auto average_value = findAverage(mse_v); + if (average_value < min_mse) + min_mse = average_value; + } + break; + case onert_micro::MAE_METRICS: + { + auto average_value = findAverage(mae_v); + if (average_value < min_mae) + min_mae = average_value; + } + break; + default: + { + assert(false && "Not supported"); + return UnsupportedType; + } + } + } + train_result.peak_memory_footprint = train_interpreter.getPeakFootprintMemory(); + switch (train_data.metrics_to_check_best_config) + { + case onert_micro::CROSS_ENTROPY_METRICS: + { + train_result.best_metrics_results = {train_data.metrics_to_check_best_config, min_entropy}; + } + break; + case onert_micro::ACCURACY: + { + train_result.best_metrics_results = {train_data.metrics_to_check_best_config, max_accuracy}; + } + break; + case onert_micro::MSE_METRICS: + { + train_result.best_metrics_results = {train_data.metrics_to_check_best_config, min_mse}; + } + break; + case onert_micro::MAE_METRICS: + { + train_result.best_metrics_results = {train_data.metrics_to_check_best_config, min_mae}; + } + break; + case onert_micro::NONE: + { + break; + } + default: + { + assert(false && "Not supported"); + return UnsupportedType; + } + } + return Ok; +}