Skip to content

Commit

Permalink
Merge pull request #32 from cvjena/master
Browse files Browse the repository at this point in the history
Pull changes for v1.3.1
  • Loading branch information
Clemens-Alexander Brust committed Sep 28, 2015
2 parents 44a77e4 + 17e6de0 commit 170d778
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 33 deletions.
9 changes: 8 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,11 @@ if(WIN32)
else()
if(APPLE)
add_definitions("-DBUILD_OSX")
add_definitions("-DBUILD_POSIX")
else()
if(UNIX)
add_definitions("-DBUILD_LINUX")
add_definitions("-DBUILD_POSIX")
else()
message(STATUS "Unsupported OS, good luck!")
endif()
Expand Down Expand Up @@ -129,7 +131,12 @@ if(CN24_BUILD_SQLITE3)
endif()

include_directories(${CN24_INC})
add_library(cn24 STATIC SHARED ${CN24_SOURCES} ${CN24_HEADERS})

if(${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC)
add_library(cn24 STATIC ${CN24_SOURCES} ${CN24_HEADERS})
else()
add_library(cn24 STATIC SHARED ${CN24_SOURCES} ${CN24_HEADERS})
endif()

# And now for some dependencies
set(CN24_BUILD_PNG ON CACHE BOOL "Build CN24 with libpng support")
Expand Down
6 changes: 4 additions & 2 deletions include/cn24/util/Dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ class TensorStreamPatchDataset : public Dataset {
std::vector<datum> class_weights,
unsigned int patchsize_x,
unsigned int patchsize_y,
dataset_localized_error_function error_function = DefaultLocalizedErrorFunction);
dataset_localized_error_function error_function = DefaultLocalizedErrorFunction,
int training_fd = 0, int testing_fd = 0);

// Dataset implementations
virtual Task GetTask() const;
Expand Down Expand Up @@ -219,7 +220,8 @@ class TensorStreamDataset : public Dataset {
std::vector<std::string> class_names,
std::vector<unsigned int> class_colors,
std::vector<datum> class_weights,
dataset_localized_error_function error_function = DefaultLocalizedErrorFunction);
dataset_localized_error_function error_function = DefaultLocalizedErrorFunction,
int training_fd = 0, int testing_fd = 0);

// Dataset implementations
virtual Task GetTask() const;
Expand Down
14 changes: 12 additions & 2 deletions include/cn24/util/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@ class Tensor {
* @brief Resizes the Tensor with data loss.
*/
void Resize (const std::size_t samples, const std::size_t width = 1,
const std::size_t height = 1, const std::size_t maps = 1);
const std::size_t height = 1, const std::size_t maps = 1,
datum* const preallocated_memory = nullptr, bool mmapped = false );

/**
* @brief Resizes the Tensor to match another Tensor's size.
Expand Down Expand Up @@ -151,8 +152,11 @@ class Tensor {
*
* Note that this resizes the stream if necessary and overwrites its content.
* @param input The input stream
* @param head_only Set to true to only read the dimensions
* @param try_mmap Set to true to attempt to memory map the file
* @param fd File descriptor for the SAME file as input's underlying
*/
void Deserialize (std::istream& input);
void Deserialize (std::istream& input, bool head_only = false, bool try_mmap = false, int fd = 0);

/**
* @brief Loads a file and resizes the Tensor to match its contents
Expand Down Expand Up @@ -319,6 +323,12 @@ class Tensor {
*/
bool cl_gpu_ = false;

/**
* @brief If this is true, the Tensor was memory mapped
*/
bool mmapped_ = false;
void* original_mmap_ = nullptr;


bool hint_ignore_content_ = false;
};
Expand Down
1 change: 1 addition & 0 deletions src/net/DatasetInputLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ DatasetInputLayer::DatasetInputLayer (Dataset& dataset,

// Generate random permutation of the samples
// First, we need an array of ascending numbers
LOGDEBUG << "Generating random permutation..." << std::flush;
for (unsigned int i = 0; i < elements_training_; i++) {
perm_.push_back (i);
}
Expand Down
75 changes: 64 additions & 11 deletions src/util/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@
#include <cmath>
#include <string>


#ifdef BUILD_POSIX
#include <sys/mman.h>
#include <errno.h>
#include <unistd.h>
#endif

#include "PNGUtil.h"
#include "JPGUtil.h"

Expand Down Expand Up @@ -55,6 +62,10 @@ Tensor::Tensor ( const Tensor& tensor, bool intentional ) {
}

Tensor::Tensor ( Tensor && tensor ) {
#ifdef BUILD_OPENCL
tensor.MoveToCPU();
#endif

data_ptr_ = tensor.data_ptr_;
samples_ = tensor.samples_;
maps_ = tensor.maps_;
Expand Down Expand Up @@ -118,7 +129,7 @@ void Tensor::Shadow ( Tensor& tensor ) {


void Tensor::Resize ( const std::size_t samples, const std::size_t width,
const std::size_t height, const std::size_t maps ) {
const std::size_t height, const std::size_t maps, datum* const preallocated_memory, bool mmapped) {
// Check if reshaping works
if ( Reshape ( samples, width, height, maps ) )
return;
Expand All @@ -133,12 +144,17 @@ void Tensor::Resize ( const std::size_t samples, const std::size_t width,
if ( elements == 0 )
return;

// Allocate
if(preallocated_memory != nullptr) {
data_ptr_ = preallocated_memory;
mmapped_ = mmapped;
} else {
// Allocate
#ifdef BLAS_MKL
data_ptr_ = ( datum* ) MKL_malloc ( elements * sizeof ( datum ) / sizeof ( char ), 32 );
data_ptr_ = ( datum* ) MKL_malloc ( elements * sizeof ( datum ) / sizeof ( char ), 32 );
#else
data_ptr_ = new datum[elements];
data_ptr_ = new datum[elements];
#endif
}

// Save configuration
samples_ = samples;
Expand Down Expand Up @@ -246,7 +262,7 @@ void Tensor::Serialize ( std::ostream& output, bool convert ) {
}
}

void Tensor::Deserialize ( std::istream& input ) {
void Tensor::Deserialize ( std::istream& input , bool head_only, bool try_mmap, int fd) {
#ifdef BUILD_OPENCL
MoveToCPU ( true );
#endif
Expand All @@ -263,11 +279,42 @@ void Tensor::Deserialize ( std::istream& input ) {
input.read ( ( char* ) &height, sizeof ( uint64_t ) / sizeof ( char ) );
input.read ( ( char* ) &maps, sizeof ( uint64_t ) / sizeof ( char ) );

Resize ( samples, width, height, maps );
#ifdef BUILD_POSIX
if(!try_mmap || fd == 0)
#endif
Resize ( samples, width, height, maps );

std::size_t elements = samples * maps * width * height;

if ( elements_ > 0 )
input.read ( ( char* ) data_ptr_, ( elements_ * sizeof ( datum ) )
/ sizeof ( char ) );
if ( elements > 0 && !head_only ) {
#ifdef BUILD_POSIX
if(try_mmap && fd != 0) {
// Get page size
long int page_size = sysconf(_SC_PAGESIZE);
long int current_position = input.tellg();
long int offset_in_page = current_position % page_size;
#ifdef BUILD_LINUX
void* target_mmap = mmap64(NULL,((elements* sizeof(datum)) / sizeof(char)) + offset_in_page, PROT_READ, MAP_PRIVATE, fd, current_position - offset_in_page);
#elif defined(BUILD_OSX)
// OS X is 64-bit by default
void* target_mmap = mmap(NULL,((elements* sizeof(datum)) / sizeof(char)) + offset_in_page, PROT_READ, MAP_PRIVATE, fd, current_position - offset_in_page);
#endif
if(target_mmap == MAP_FAILED) {
LOGERROR << "Memory map failed: " << errno;
}
original_mmap_ = target_mmap;

target_mmap = (void*)(((long)target_mmap) + offset_in_page);
Resize(samples, width, height, maps, (datum*)target_mmap, true);
input.seekg(( elements * sizeof ( datum ) ) / sizeof ( char ) , std::ios::cur);
} else
#endif
input.read ( ( char* ) data_ptr_, ( elements * sizeof ( datum ) )
/ sizeof ( char ) );
}
else if(head_only)
input.seekg(( elements * sizeof ( datum ) ) / sizeof ( char ) , std::ios::cur);

}


Expand Down Expand Up @@ -346,11 +393,17 @@ bool Tensor::CopyMap ( const Tensor& source, const std::size_t source_sample,
void Tensor::DeleteIfPossible() {
if ( data_ptr_ != nullptr ) {
if ( !is_shadow_ ) {
if(mmapped_) {
munmap((void*)original_mmap_, (elements_ * sizeof(datum)) / sizeof(char));
original_mmap_ = nullptr;
mmapped_ = false;
} else {
#ifdef BLAS_MKL
mkl_free ( data_ptr_ );
mkl_free ( data_ptr_ );
#else
delete[] data_ptr_;
delete[] data_ptr_;
#endif
}
#ifdef BUILD_OPENCL
if ( cl_data_ptr_ != 0 ) {
clReleaseMemObject ( (cl_mem)cl_data_ptr_ );
Expand Down
55 changes: 46 additions & 9 deletions src/util/TensorStreamDataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
* For licensing information, see the LICENSE file included with this project.
*/

#ifdef BUILD_POSIX
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#endif

#include <fstream>
#include <cstdlib>

Expand All @@ -25,7 +31,8 @@ TensorStreamDataset::TensorStreamDataset (std::istream& training_stream,
std::vector< std::string > class_names,
std::vector<unsigned int> class_colors,
std::vector<datum> class_weights,
dataset_localized_error_function error_function) :
dataset_localized_error_function error_function,
int training_fd, int testing_fd ) :
classes_ (classes), class_names_ (class_names), class_colors_ (class_colors),
class_weights_(class_weights),
error_function_ (error_function) {
Expand All @@ -40,7 +47,7 @@ TensorStreamDataset::TensorStreamDataset (std::istream& training_stream,
Tensor tensor;

while (!training_stream.eof()) {
tensor.Deserialize (training_stream);
tensor.Deserialize (training_stream, true);

if (tensor.elements() == 0)
break;
Expand All @@ -59,7 +66,7 @@ TensorStreamDataset::TensorStreamDataset (std::istream& training_stream,
}

while (!testing_stream.eof()) {
tensor.Deserialize (testing_stream);
tensor.Deserialize (testing_stream, true);

if (tensor.elements() == 0)
break;
Expand Down Expand Up @@ -97,29 +104,37 @@ TensorStreamDataset::TensorStreamDataset (std::istream& training_stream,
unsigned int e = 0;
max_width_ = 0;
max_height_ = 0;

if((tensor_count_training_ + tensor_count_testing_) > 0) {
LOGINFO << "Deserializing " << (tensor_count_training_ + tensor_count_testing_) / 2 << " Tensors..." << std::endl << std::flush;
}

for (unsigned int t = 0; t < (tensor_count_training_ / 2); t++) {
data_[t].Deserialize (training_stream);
data_[t].Deserialize (training_stream, false, true, training_fd);

if (data_[t].width() > max_width_)
max_width_ = data_[t].width();

if (data_[t].height() > max_height_)
max_height_ = data_[t].height();

labels_[t].Deserialize (training_stream);
labels_[t].Deserialize (training_stream, false, true, training_fd);

std::cout << "." << std::flush;
}

for (unsigned int t = (tensor_count_training_ / 2) ; t < tensors_; t++) {
data_[t].Deserialize (testing_stream);
data_[t].Deserialize (testing_stream, false, true, testing_fd);

if (data_[t].width() > max_width_)
max_width_ = data_[t].width();

if (data_[t].height() > max_height_)
max_height_ = data_[t].height();

labels_[t].Deserialize (testing_stream);
labels_[t].Deserialize (testing_stream, false, true, testing_fd);

std::cout << "." << std::flush;
}

if (max_width_ & 1)
Expand Down Expand Up @@ -308,13 +323,21 @@ TensorStreamDataset* TensorStreamDataset::CreateFromConfiguration (std::istream&
dataset_localized_error_function error_function = DefaultLocalizedErrorFunction;
std::string training_file;
std::string testing_file;
int training_fd = 0;
int testing_fd = 0;
bool no_mmap = false;

file.clear();
file.seekg (0, std::ios::beg);

while (! file.eof()) {
std::string line;
std::getline (file, line);

if (StartsWithIdentifier (line, "nommap")) {
LOGDEBUG << "Dataset requested to not be memory mapped.";
no_mmap = true;
}

if (StartsWithIdentifier (line, "classes")) {
ParseCountIfPossible (line, "classes", classes);
Expand Down Expand Up @@ -388,6 +411,13 @@ TensorStreamDataset* TensorStreamDataset::CreateFromConfiguration (std::istream&
if(!training_stream->good()) {
FATAL("Failed to load " << training_file << "!");
}
#ifdef BUILD_POSIX
if(!no_mmap)
training_fd = open(training_file.c_str(), O_RDONLY);
if(training_fd < 0) {
FATAL("Failed to load " << training_file << "!");
}
#endif
} else {
training_stream = new std::istringstream();
}
Expand All @@ -397,6 +427,13 @@ TensorStreamDataset* TensorStreamDataset::CreateFromConfiguration (std::istream&
if(!testing_stream->good()) {
FATAL("Failed to load " << testing_file << "!");
}
#ifdef BUILD_POSIX
if(!no_mmap)
testing_fd = open(training_file.c_str(), O_RDONLY);
if(testing_fd < 0) {
FATAL("Failed to load " << testing_file << "!");
}
#endif
} else {
testing_stream = new std::istringstream();
}
Expand All @@ -405,9 +442,9 @@ TensorStreamDataset* TensorStreamDataset::CreateFromConfiguration (std::istream&
for (unsigned int c = 0; c < classes; c++)
class_weights.push_back(1.0);
}

return new TensorStreamDataset (*training_stream, *testing_stream, classes,
class_names, class_colors, class_weights, error_function);
class_names, class_colors, class_weights, error_function, training_fd, testing_fd);
}

}
Loading

0 comments on commit 170d778

Please sign in to comment.