diff --git a/CMakeLists.txt b/CMakeLists.txt index 95ad346..1b2bb95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,9 +99,11 @@ if(WIN32) else() if(APPLE) add_definitions("-DBUILD_OSX") + add_definitions("-DBUILD_POSIX") else() if(UNIX) add_definitions("-DBUILD_LINUX") + add_definitions("-DBUILD_POSIX") else() message(STATUS "Unsupported OS, good luck!") endif() @@ -129,7 +131,12 @@ if(CN24_BUILD_SQLITE3) endif() include_directories(${CN24_INC}) -add_library(cn24 STATIC SHARED ${CN24_SOURCES} ${CN24_HEADERS}) + +if(${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC) + add_library(cn24 STATIC ${CN24_SOURCES} ${CN24_HEADERS}) +else() + add_library(cn24 STATIC SHARED ${CN24_SOURCES} ${CN24_HEADERS}) +endif() # And now for some dependencies set(CN24_BUILD_PNG ON CACHE BOOL "Build CN24 with libpng support") diff --git a/include/cn24/util/Dataset.h b/include/cn24/util/Dataset.h index e3f308b..539c974 100644 --- a/include/cn24/util/Dataset.h +++ b/include/cn24/util/Dataset.h @@ -161,7 +161,8 @@ class TensorStreamPatchDataset : public Dataset { std::vector class_weights, unsigned int patchsize_x, unsigned int patchsize_y, - dataset_localized_error_function error_function = DefaultLocalizedErrorFunction); + dataset_localized_error_function error_function = DefaultLocalizedErrorFunction, + int training_fd = 0, int testing_fd = 0); // Dataset implementations virtual Task GetTask() const; @@ -219,7 +220,8 @@ class TensorStreamDataset : public Dataset { std::vector class_names, std::vector class_colors, std::vector class_weights, - dataset_localized_error_function error_function = DefaultLocalizedErrorFunction); + dataset_localized_error_function error_function = DefaultLocalizedErrorFunction, + int training_fd = 0, int testing_fd = 0); // Dataset implementations virtual Task GetTask() const; diff --git a/include/cn24/util/Tensor.h b/include/cn24/util/Tensor.h index 6677e43..189098c 100644 --- a/include/cn24/util/Tensor.h +++ b/include/cn24/util/Tensor.h @@ -103,7 +103,8 @@ class Tensor { * @brief Resizes the Tensor with data loss. */ void Resize (const std::size_t samples, const std::size_t width = 1, - const std::size_t height = 1, const std::size_t maps = 1); + const std::size_t height = 1, const std::size_t maps = 1, + datum* const preallocated_memory = nullptr, bool mmapped = false ); /** * @brief Resizes the Tensor to match another Tensor's size. @@ -151,8 +152,11 @@ class Tensor { * * Note that this resizes the stream if necessary and overwrites its content. * @param input The input stream + * @param head_only Set to true to only read the dimensions + * @param try_mmap Set to true to attempt to memory map the file + * @param fd File descriptor for the SAME file as input's underlying */ - void Deserialize (std::istream& input); + void Deserialize (std::istream& input, bool head_only = false, bool try_mmap = false, int fd = 0); /** * @brief Loads a file and resizes the Tensor to match its contents @@ -319,6 +323,12 @@ class Tensor { */ bool cl_gpu_ = false; + /** + * @brief If this is true, the Tensor was memory mapped + */ + bool mmapped_ = false; + void* original_mmap_ = nullptr; + bool hint_ignore_content_ = false; }; diff --git a/src/net/DatasetInputLayer.cpp b/src/net/DatasetInputLayer.cpp index 886ddae..815a644 100644 --- a/src/net/DatasetInputLayer.cpp +++ b/src/net/DatasetInputLayer.cpp @@ -49,6 +49,7 @@ DatasetInputLayer::DatasetInputLayer (Dataset& dataset, // Generate random permutation of the samples // First, we need an array of ascending numbers + LOGDEBUG << "Generating random permutation..." << std::flush; for (unsigned int i = 0; i < elements_training_; i++) { perm_.push_back (i); } diff --git a/src/util/Tensor.cpp b/src/util/Tensor.cpp index be6d6a1..7203dbb 100644 --- a/src/util/Tensor.cpp +++ b/src/util/Tensor.cpp @@ -13,6 +13,13 @@ #include #include + +#ifdef BUILD_POSIX +#include +#include +#include +#endif + #include "PNGUtil.h" #include "JPGUtil.h" @@ -55,6 +62,10 @@ Tensor::Tensor ( const Tensor& tensor, bool intentional ) { } Tensor::Tensor ( Tensor && tensor ) { +#ifdef BUILD_OPENCL + tensor.MoveToCPU(); +#endif + data_ptr_ = tensor.data_ptr_; samples_ = tensor.samples_; maps_ = tensor.maps_; @@ -118,7 +129,7 @@ void Tensor::Shadow ( Tensor& tensor ) { void Tensor::Resize ( const std::size_t samples, const std::size_t width, - const std::size_t height, const std::size_t maps ) { + const std::size_t height, const std::size_t maps, datum* const preallocated_memory, bool mmapped) { // Check if reshaping works if ( Reshape ( samples, width, height, maps ) ) return; @@ -133,12 +144,17 @@ void Tensor::Resize ( const std::size_t samples, const std::size_t width, if ( elements == 0 ) return; - // Allocate + if(preallocated_memory != nullptr) { + data_ptr_ = preallocated_memory; + mmapped_ = mmapped; + } else { + // Allocate #ifdef BLAS_MKL - data_ptr_ = ( datum* ) MKL_malloc ( elements * sizeof ( datum ) / sizeof ( char ), 32 ); + data_ptr_ = ( datum* ) MKL_malloc ( elements * sizeof ( datum ) / sizeof ( char ), 32 ); #else - data_ptr_ = new datum[elements]; + data_ptr_ = new datum[elements]; #endif + } // Save configuration samples_ = samples; @@ -246,7 +262,7 @@ void Tensor::Serialize ( std::ostream& output, bool convert ) { } } -void Tensor::Deserialize ( std::istream& input ) { +void Tensor::Deserialize ( std::istream& input , bool head_only, bool try_mmap, int fd) { #ifdef BUILD_OPENCL MoveToCPU ( true ); #endif @@ -263,11 +279,42 @@ void Tensor::Deserialize ( std::istream& input ) { input.read ( ( char* ) &height, sizeof ( uint64_t ) / sizeof ( char ) ); input.read ( ( char* ) &maps, sizeof ( uint64_t ) / sizeof ( char ) ); - Resize ( samples, width, height, maps ); +#ifdef BUILD_POSIX + if(!try_mmap || fd == 0) +#endif + Resize ( samples, width, height, maps ); + + std::size_t elements = samples * maps * width * height; - if ( elements_ > 0 ) - input.read ( ( char* ) data_ptr_, ( elements_ * sizeof ( datum ) ) - / sizeof ( char ) ); + if ( elements > 0 && !head_only ) { +#ifdef BUILD_POSIX + if(try_mmap && fd != 0) { + // Get page size + long int page_size = sysconf(_SC_PAGESIZE); + long int current_position = input.tellg(); + long int offset_in_page = current_position % page_size; +#ifdef BUILD_LINUX + void* target_mmap = mmap64(NULL,((elements* sizeof(datum)) / sizeof(char)) + offset_in_page, PROT_READ, MAP_PRIVATE, fd, current_position - offset_in_page); +#elif defined(BUILD_OSX) + // OS X is 64-bit by default + void* target_mmap = mmap(NULL,((elements* sizeof(datum)) / sizeof(char)) + offset_in_page, PROT_READ, MAP_PRIVATE, fd, current_position - offset_in_page); +#endif + if(target_mmap == MAP_FAILED) { + LOGERROR << "Memory map failed: " << errno; + } + original_mmap_ = target_mmap; + + target_mmap = (void*)(((long)target_mmap) + offset_in_page); + Resize(samples, width, height, maps, (datum*)target_mmap, true); + input.seekg(( elements * sizeof ( datum ) ) / sizeof ( char ) , std::ios::cur); + } else +#endif + input.read ( ( char* ) data_ptr_, ( elements * sizeof ( datum ) ) + / sizeof ( char ) ); + } + else if(head_only) + input.seekg(( elements * sizeof ( datum ) ) / sizeof ( char ) , std::ios::cur); + } @@ -346,11 +393,17 @@ bool Tensor::CopyMap ( const Tensor& source, const std::size_t source_sample, void Tensor::DeleteIfPossible() { if ( data_ptr_ != nullptr ) { if ( !is_shadow_ ) { + if(mmapped_) { + munmap((void*)original_mmap_, (elements_ * sizeof(datum)) / sizeof(char)); + original_mmap_ = nullptr; + mmapped_ = false; + } else { #ifdef BLAS_MKL - mkl_free ( data_ptr_ ); + mkl_free ( data_ptr_ ); #else - delete[] data_ptr_; + delete[] data_ptr_; #endif + } #ifdef BUILD_OPENCL if ( cl_data_ptr_ != 0 ) { clReleaseMemObject ( (cl_mem)cl_data_ptr_ ); diff --git a/src/util/TensorStreamDataset.cpp b/src/util/TensorStreamDataset.cpp index 7022b4b..6227b50 100644 --- a/src/util/TensorStreamDataset.cpp +++ b/src/util/TensorStreamDataset.cpp @@ -5,6 +5,12 @@ * For licensing information, see the LICENSE file included with this project. */ +#ifdef BUILD_POSIX +#include +#include +#include +#endif + #include #include @@ -25,7 +31,8 @@ TensorStreamDataset::TensorStreamDataset (std::istream& training_stream, std::vector< std::string > class_names, std::vector class_colors, std::vector class_weights, - dataset_localized_error_function error_function) : + dataset_localized_error_function error_function, + int training_fd, int testing_fd ) : classes_ (classes), class_names_ (class_names), class_colors_ (class_colors), class_weights_(class_weights), error_function_ (error_function) { @@ -40,7 +47,7 @@ TensorStreamDataset::TensorStreamDataset (std::istream& training_stream, Tensor tensor; while (!training_stream.eof()) { - tensor.Deserialize (training_stream); + tensor.Deserialize (training_stream, true); if (tensor.elements() == 0) break; @@ -59,7 +66,7 @@ TensorStreamDataset::TensorStreamDataset (std::istream& training_stream, } while (!testing_stream.eof()) { - tensor.Deserialize (testing_stream); + tensor.Deserialize (testing_stream, true); if (tensor.elements() == 0) break; @@ -97,9 +104,13 @@ TensorStreamDataset::TensorStreamDataset (std::istream& training_stream, unsigned int e = 0; max_width_ = 0; max_height_ = 0; + + if((tensor_count_training_ + tensor_count_testing_) > 0) { + LOGINFO << "Deserializing " << (tensor_count_training_ + tensor_count_testing_) / 2 << " Tensors..." << std::endl << std::flush; + } for (unsigned int t = 0; t < (tensor_count_training_ / 2); t++) { - data_[t].Deserialize (training_stream); + data_[t].Deserialize (training_stream, false, true, training_fd); if (data_[t].width() > max_width_) max_width_ = data_[t].width(); @@ -107,11 +118,13 @@ TensorStreamDataset::TensorStreamDataset (std::istream& training_stream, if (data_[t].height() > max_height_) max_height_ = data_[t].height(); - labels_[t].Deserialize (training_stream); + labels_[t].Deserialize (training_stream, false, true, training_fd); + + std::cout << "." << std::flush; } for (unsigned int t = (tensor_count_training_ / 2) ; t < tensors_; t++) { - data_[t].Deserialize (testing_stream); + data_[t].Deserialize (testing_stream, false, true, testing_fd); if (data_[t].width() > max_width_) max_width_ = data_[t].width(); @@ -119,7 +132,9 @@ TensorStreamDataset::TensorStreamDataset (std::istream& training_stream, if (data_[t].height() > max_height_) max_height_ = data_[t].height(); - labels_[t].Deserialize (testing_stream); + labels_[t].Deserialize (testing_stream, false, true, testing_fd); + + std::cout << "." << std::flush; } if (max_width_ & 1) @@ -308,6 +323,9 @@ TensorStreamDataset* TensorStreamDataset::CreateFromConfiguration (std::istream& dataset_localized_error_function error_function = DefaultLocalizedErrorFunction; std::string training_file; std::string testing_file; + int training_fd = 0; + int testing_fd = 0; + bool no_mmap = false; file.clear(); file.seekg (0, std::ios::beg); @@ -315,6 +333,11 @@ TensorStreamDataset* TensorStreamDataset::CreateFromConfiguration (std::istream& while (! file.eof()) { std::string line; std::getline (file, line); + + if (StartsWithIdentifier (line, "nommap")) { + LOGDEBUG << "Dataset requested to not be memory mapped."; + no_mmap = true; + } if (StartsWithIdentifier (line, "classes")) { ParseCountIfPossible (line, "classes", classes); @@ -388,6 +411,13 @@ TensorStreamDataset* TensorStreamDataset::CreateFromConfiguration (std::istream& if(!training_stream->good()) { FATAL("Failed to load " << training_file << "!"); } +#ifdef BUILD_POSIX + if(!no_mmap) + training_fd = open(training_file.c_str(), O_RDONLY); + if(training_fd < 0) { + FATAL("Failed to load " << training_file << "!"); + } +#endif } else { training_stream = new std::istringstream(); } @@ -397,6 +427,13 @@ TensorStreamDataset* TensorStreamDataset::CreateFromConfiguration (std::istream& if(!testing_stream->good()) { FATAL("Failed to load " << testing_file << "!"); } +#ifdef BUILD_POSIX + if(!no_mmap) + testing_fd = open(training_file.c_str(), O_RDONLY); + if(testing_fd < 0) { + FATAL("Failed to load " << testing_file << "!"); + } +#endif } else { testing_stream = new std::istringstream(); } @@ -405,9 +442,9 @@ TensorStreamDataset* TensorStreamDataset::CreateFromConfiguration (std::istream& for (unsigned int c = 0; c < classes; c++) class_weights.push_back(1.0); } - + return new TensorStreamDataset (*training_stream, *testing_stream, classes, - class_names, class_colors, class_weights, error_function); + class_names, class_colors, class_weights, error_function, training_fd, testing_fd); } } diff --git a/src/util/TensorStreamPatchDataset.cpp b/src/util/TensorStreamPatchDataset.cpp index 859f88e..e01f7e5 100644 --- a/src/util/TensorStreamPatchDataset.cpp +++ b/src/util/TensorStreamPatchDataset.cpp @@ -5,6 +5,12 @@ * For licensing information, see the LICENSE file included with this project. */ +#ifdef BUILD_POSIX +#include +#include +#include +#endif + #include #include #include @@ -30,7 +36,8 @@ TensorStreamPatchDataset::TensorStreamPatchDataset(std::istream& training_stream std::vector class_weights, unsigned int patchsize_x, unsigned int patchsize_y, - dataset_localized_error_function error_function) : + dataset_localized_error_function error_function, + int training_fd, int testing_fd ) : classes_(classes), class_names_(class_names), class_colors_(class_colors), class_weights_(class_weights), patchsize_x_(patchsize_x), patchsize_y_(patchsize_y), @@ -46,7 +53,7 @@ TensorStreamPatchDataset::TensorStreamPatchDataset(std::istream& training_stream Tensor tensor; while (!training_stream.eof()) { - tensor.Deserialize(training_stream); + tensor.Deserialize(training_stream, true); if (tensor.elements() == 0) break; @@ -65,7 +72,7 @@ TensorStreamPatchDataset::TensorStreamPatchDataset(std::istream& training_stream } while (!testing_stream.eof()) { - tensor.Deserialize(testing_stream); + tensor.Deserialize(testing_stream, true); if (tensor.elements() == 0) break; @@ -104,9 +111,13 @@ TensorStreamPatchDataset::TensorStreamPatchDataset(std::istream& training_stream // Read tensors unsigned int e = 0; + + if((tensor_count_training_ + tensor_count_testing_) > 0) { + LOGINFO << "Deserializing " << (tensor_count_training_ + tensor_count_testing_) / 2 << " Tensors..." << std::endl << std::flush; + } for (unsigned int t = 0; t < (tensor_count_training_ / 2); t++) { - data_[t].Deserialize(training_stream); + data_[t].Deserialize(training_stream, false, true, training_fd); unsigned int inner_width = data_[t].width() - (patchsize_x_ - 1); unsigned int inner_height = data_[t].height() - (patchsize_y_ - 1); @@ -118,11 +129,13 @@ TensorStreamPatchDataset::TensorStreamPatchDataset(std::istream& training_stream sample_count_training_ += inner_width * inner_height; - labels_[t].Deserialize(training_stream); + labels_[t].Deserialize(training_stream, false, true, training_fd); + + std::cout << "." << std::flush; } for (unsigned int t = (tensor_count_training_ / 2); t < tensors_; t++) { - data_[t].Deserialize(testing_stream); + data_[t].Deserialize(testing_stream, false, true, testing_fd); unsigned int inner_width = data_[t].width() - (patchsize_x_ - 1); unsigned int inner_height = data_[t].height() - (patchsize_y_ - 1); @@ -134,7 +147,9 @@ TensorStreamPatchDataset::TensorStreamPatchDataset(std::istream& training_stream sample_count_testing_ += inner_width * inner_height; - labels_[t].Deserialize(testing_stream); + labels_[t].Deserialize(testing_stream, false, true, testing_fd); + + std::cout << "." << std::flush; } input_maps_ = data_[0].maps(); @@ -318,6 +333,9 @@ TensorStreamPatchDataset* TensorStreamPatchDataset::CreateFromConfiguration (std dataset_localized_error_function error_function = DefaultLocalizedErrorFunction; std::string training_file; std::string testing_file; + int training_fd = 0; + int testing_fd = 0; + bool no_mmap = false; file.clear(); file.seekg (0, std::ios::beg); @@ -325,6 +343,11 @@ TensorStreamPatchDataset* TensorStreamPatchDataset::CreateFromConfiguration (std while (! file.eof()) { std::string line; std::getline (file, line); + + if (StartsWithIdentifier (line, "nommap")) { + LOGDEBUG << "Dataset requested to not be memory mapped."; + no_mmap = true; + } if (StartsWithIdentifier (line, "classes")) { ParseCountIfPossible (line, "classes", classes); @@ -398,6 +421,13 @@ TensorStreamPatchDataset* TensorStreamPatchDataset::CreateFromConfiguration (std if(!training_stream->good()) { FATAL("Failed to load " << training_file << "!"); } +#ifdef BUILD_POSIX + if(!no_mmap) + training_fd = open(training_file.c_str(), O_RDONLY); + if(training_fd < 0) { + FATAL("Failed to load " << training_file << "!"); + } +#endif } else { training_stream = new std::istringstream(); } @@ -407,6 +437,13 @@ TensorStreamPatchDataset* TensorStreamPatchDataset::CreateFromConfiguration (std if(!testing_stream->good()) { FATAL("Failed to load " << testing_file << "!"); } +#ifdef BUILD_POSIX + if(!no_mmap) + testing_fd = open(training_file.c_str(), O_RDONLY); + if(testing_fd < 0) { + FATAL("Failed to load " << testing_file << "!"); + } +#endif } else { testing_stream = new std::istringstream(); } @@ -418,7 +455,7 @@ TensorStreamPatchDataset* TensorStreamPatchDataset::CreateFromConfiguration (std return new TensorStreamPatchDataset (*training_stream, *testing_stream, classes, class_names, class_colors, class_weights, patchsize_x, - patchsize_y, error_function); + patchsize_y, error_function, training_fd, testing_fd); } }