diff --git a/.travis.yml b/.travis.yml index bb3b552..527260a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,7 @@ install: - if [ "$TRAVIS_OS_NAME" == "linux" ]; then if [ "$TEST_SUITE" = "opencl" ]; then mkdir -p cl12/CL; fi; fi - if [ "$TRAVIS_OS_NAME" == "linux" ]; then if [ "$TEST_SUITE" = "opencl" ]; then cd cl12/CL; fi; fi - if [ "$TRAVIS_OS_NAME" == "linux" ]; then if [ "$TEST_SUITE" = "opencl" ]; then wget https://www.khronos.org/registry/cl/api/1.2/cl.h; fi; fi - - if [ "$TRAVIS_OS_NAME" == "linux" ]; then if [ "$TEST_SUITE" = "opencl" ]; then wget https://www.khronos.org/registry/cl/api/1.2/cl.hpp; fi; fi +# - if [ "$TRAVIS_OS_NAME" == "linux" ]; then if [ "$TEST_SUITE" = "opencl" ]; then wget https://www.khronos.org/registry/cl/api/1.2/cl.hpp; fi; fi - if [ "$TRAVIS_OS_NAME" == "linux" ]; then if [ "$TEST_SUITE" = "opencl" ]; then wget https://www.khronos.org/registry/cl/api/1.2/cl_d3d10.h; fi; fi - if [ "$TRAVIS_OS_NAME" == "linux" ]; then if [ "$TEST_SUITE" = "opencl" ]; then wget https://www.khronos.org/registry/cl/api/1.2/cl_d3d11.h; fi; fi - if [ "$TRAVIS_OS_NAME" == "linux" ]; then if [ "$TEST_SUITE" = "opencl" ]; then wget https://www.khronos.org/registry/cl/api/1.2/cl_dx9_media_sharing.h; fi; fi @@ -49,7 +49,10 @@ before_script: - if [ "$TEST_SUITE" = "reference" ]; then cmake -DCMAKE_BUILD_TYPE=Release ..; fi - if [ "$TEST_SUITE" = "opencl" ]; then cmake -DCMAKE_BUILD_TYPE=Release -DCN24_BUILD_OPENCL:BOOL=ON -DCN24_BUILD_OPENCL_CLBLAS:BOOL=ON ..; fi -script: make +script: + - make +# OpenCL on Mac OS X supports a maximum work group size of (1,1,1) on CPUs, so we only do a sanity check in that case + - if [ "$TEST_SUITE" == "opencl" ] && [ "$TRAVIS_OS_NAME" == "osx" ]; then ./testOpenCL; else ./runBenchmark --ci;fi os: - linux @@ -62,3 +65,8 @@ compiler: env: - TEST_SUITE=opencl - TEST_SUITE=reference + +matrix: + exclude: + - os: linux + env: TEST_SUITE=opencl diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b2bb95..23c8aa9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,9 @@ cmake_minimum_required(VERSION 2.8) project(CN24 C CXX) -set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type") +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Build type: Select either Debug, RelWithDebInfo or Release" FORCE) +endif() set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/") @@ -197,6 +199,7 @@ if(CN24_BUILD_ACCELERATE) message(STATUS "Using Accelerate include directory: ${ACCELERATE_INCLUDE_DIR}") include_directories(${ACCELERATE_INCLUDE_DIR}) set(CN24_LIBS ${CN24_LIBS} ${ACCELERATE_BLAS}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flax-vector-conversions") add_definitions("-DBUILD_BLAS") add_definitions("-DBLAS_ACCELERATE") endif() @@ -204,9 +207,9 @@ endif() set(CN24_BUILD_OPENCL OFF CACHE BOOL "Build CN24 with OpenCL support") if(CN24_BUILD_OPENCL) find_library(OPENCL_ICDL NAMES OpenCL libOpenCL PATHS $ENV{CUDA_PATH}/lib/x64 - $ENV{INTELOCLSDKROOT}/lib/x64) + $ENV{INTELOCLSDKROOT}/lib/x64 $ENV{AMDAPPSDKROOT}/lib/x86_64) find_path(OPENCL_INCLUDE_DIR CL/cl.h cl.h PATHS $ENV{CUDA_PATH}/include - $ENV{INTELOCLSDKROOT}/include) + $ENV{INTELOCLSDKROOT}/include $ENV{AMDAPPSDKROOT}/include) message(STATUS "Using OpenCL library: ${OPENCL_ICDL}") message(STATUS "Using OpenCL include directory: ${OPENCL_INCLUDE_DIR}") include_directories(${OPENCL_INCLUDE_DIR}) @@ -219,8 +222,8 @@ endif() set(CN24_BUILD_OPENCL_CLBLAS OFF CACHE BOOL "Build CN24 with OpenCL/clBLAS") if(CN24_BUILD_OPENCL_CLBLAS) if(CN24_BUILD_OPENCL) - find_library(CLBLAS_LIBRARY clBLAS libclBLAS) - find_path(CLBLAS_INCLUDE_DIR clBLAS.h) + find_library(CLBLAS_LIBRARY clBLAS libclBLAS PATHS $ENV{AMDAPPSDKROOT}/lib64/import) + find_path(CLBLAS_INCLUDE_DIR clBLAS.h PATHS $ENV{AMDAPPSDKROOT}/include) message(STATUS "Using OpenCL/clBLAS library: ${CLBLAS_LIBRARY}") message(STATUS "Using OpenCL/clBLAS include directory: ${CLBLAS_INCLUDE_DIR}") include_directories(${CLBLAS_INCLUDE_DIR}) diff --git a/include/cn24.h b/include/cn24.h index 65403dc..a4ed5f1 100644 --- a/include/cn24.h +++ b/include/cn24.h @@ -17,14 +17,22 @@ #include "cn24/util/Config.h" #include "cn24/util/Dataset.h" #include "cn24/util/Tensor.h" +#include "cn24/util/CompressedTensor.h" #include "cn24/util/TensorViewer.h" #include "cn24/util/CombinedTensor.h" +#include "cn24/util/TensorStream.h" +#include "cn24/util/CompressedTensorStream.h" +#include "cn24/util/FloatTensorStream.h" #include "cn24/util/PNGUtil.h" #include "cn24/util/JPGUtil.h" #include "cn24/util/Log.h" #include "cn24/util/KITTIData.h" #include "cn24/util/Init.h" #include "cn24/util/GradientTester.h" +#include "cn24/util/StatAggregator.h" +#include "cn24/util/StatSink.h" +#include "cn24/util/ConsoleStatSink.h" +#include "cn24/util/CSVStatSink.h" #include "cn24/math/TensorMath.h" @@ -38,6 +46,7 @@ #include "cn24/net/ConvolutionLayer.h" #include "cn24/net/MaxPoolingLayer.h" #include "cn24/net/AdvancedMaxPoolingLayer.h" +#include "cn24/net/InputDownSamplingLayer.h" #include "cn24/net/LocalResponseNormalizationLayer.h" #include "cn24/net/UpscaleLayer.h" #include "cn24/net/LossFunctionLayer.h" @@ -48,11 +57,13 @@ #include "cn24/net/SpatialPriorLayer.h" #include "cn24/net/ConcatenationLayer.h" #include "cn24/net/GradientAccumulationLayer.h" +#include "cn24/net/SumLayer.h" #include "cn24/net/Net.h" #include "cn24/net/Trainer.h" #include "cn24/net/NetGraph.h" #include "cn24/net/NetStatus.h" #include "cn24/factory/ConfigurableFactory.h" +#include "cn24/factory/SkipLayerNetworkFactory.h" #endif diff --git a/include/cn24/factory/ConfigurableFactory.h b/include/cn24/factory/ConfigurableFactory.h index aed2b25..2f172ca 100644 --- a/include/cn24/factory/ConfigurableFactory.h +++ b/include/cn24/factory/ConfigurableFactory.h @@ -24,8 +24,20 @@ #include "../util/Log.h" namespace Conv { + +class Factory { +public: + virtual int AddLayers(Net& net, Connection data_layer_connection, const unsigned int output_classes, bool add_loss_layer = false, std::ostream& graph_output = std::cout) = 0; + virtual bool AddLayers(NetGraph& graph, NetGraphConnection data_layer_connection, const unsigned int output_classes, bool add_loss_layer = false) = 0; + virtual int patchsizex() = 0; + virtual int patchsizey() = 0; + virtual Layer* CreateLossLayer(const unsigned int output_classes, const datum loss_weight = 1.0) = 0; + virtual void InitOptimalSettings() = 0; + virtual TrainerSettings optimal_settings() const = 0; + virtual Method method() const = 0; +}; -class ConfigurableFactory { +class ConfigurableFactory : public Factory { public: /** * @brief Builds a ConfigurableFactory using an input stream and a random seed diff --git a/include/cn24/factory/SkipLayerNetworkFactory.h b/include/cn24/factory/SkipLayerNetworkFactory.h new file mode 100644 index 0000000..8b97f8b --- /dev/null +++ b/include/cn24/factory/SkipLayerNetworkFactory.h @@ -0,0 +1,28 @@ +#ifndef CONV_SKIPLAYERNETWORKFACTORY_H +#define CONV_SKIPLAYERNETWORKFACTORY_H + +#include + +#include "../net/Net.h" +#include "../net/NetGraph.h" +#include "../net/Trainer.h" +#include "../util/Dataset.h" +#include "../util/Log.h" +#include "ConfigurableFactory.h" + +namespace Conv { + +class SkipLayerNetworkFactory : public Factory { + int AddLayers(Net& net, Connection data_layer_connection, const unsigned int output_classes, bool add_loss_layer = false, std::ostream& graph_output = std::cout); + bool AddLayers(NetGraph& graph, NetGraphConnection data_layer_connection, const unsigned int output_classes, bool add_loss_layer = false); + int patchsizex(); + int patchsizey(); + Layer* CreateLossLayer(const unsigned int output_classes, const datum loss_weight = 1.0); + void InitOptimalSettings(); + TrainerSettings optimal_settings() const; + Method method() const; +}; + +} + +#endif \ No newline at end of file diff --git a/include/cn24/math/TensorMath.h b/include/cn24/math/TensorMath.h index d54283b..4a77d58 100644 --- a/include/cn24/math/TensorMath.h +++ b/include/cn24/math/TensorMath.h @@ -100,6 +100,25 @@ class TensorMath { static void SMS2( const Tensor& source, Tensor& target); + + static void DOWN( + const Tensor& source, + Tensor& target, + const int region_width, + const int region_height, + const datum target_factor); + + static void UP( + const Tensor& source, + Tensor& target, + const int region_width, + const int region_height, + const datum target_factor); + + static void ADD( + const Tensor& source_a, + const Tensor& source_b, + Tensor& target); }; } diff --git a/include/cn24/net/BinaryStatLayer.h b/include/cn24/net/BinaryStatLayer.h index 7d60373..1343acc 100644 --- a/include/cn24/net/BinaryStatLayer.h +++ b/include/cn24/net/BinaryStatLayer.h @@ -19,6 +19,7 @@ #include "Layer.h" #include "StatLayer.h" +#include "../util/StatAggregator.h" namespace Conv { @@ -33,6 +34,8 @@ class BinaryStatLayer: public Layer, public StatLayer { */ BinaryStatLayer(unsigned int thresholds = 24, const datum min_t = -0.458333, const datum max_t = 0.5); + + void UpdateAll(); /** * @brief Prints the current statistics @@ -80,6 +83,13 @@ class BinaryStatLayer: public Layer, public StatLayer { datum* false_negatives_ = nullptr; bool disabled_ = false; + + StatDescriptor* stat_fpr_ = nullptr; + StatDescriptor* stat_fnr_ = nullptr; + StatDescriptor* stat_pre_ = nullptr; + StatDescriptor* stat_rec_ = nullptr; + StatDescriptor* stat_acc_ = nullptr; + StatDescriptor* stat_f1_ = nullptr; }; } diff --git a/include/cn24/net/ConfusionMatrixLayer.h b/include/cn24/net/ConfusionMatrixLayer.h index f5ca02b..143ffb9 100644 --- a/include/cn24/net/ConfusionMatrixLayer.h +++ b/include/cn24/net/ConfusionMatrixLayer.h @@ -19,7 +19,9 @@ #include #include "Layer.h" -#include "StatLayer.h" +#include "StatLayer.h" + +#include "../util/StatAggregator.h" namespace Conv { @@ -34,10 +36,11 @@ class ConfusionMatrixLayer: public Layer, public StatLayer { explicit ConfusionMatrixLayer(std::vector names, const unsigned int classes); + void UpdateAll(); /** * @brief Prints the current statistics * - * @param prefix This is printed before every line ouf output + * @param prefix This is printed before every line of output * @param training Whether the net is currently training. Affects output color */ void Print (std::string prefix, bool training); @@ -80,6 +83,11 @@ class ConfusionMatrixLayer: public Layer, public StatLayer { long double total_ = 0; long double right_ = 0; long double* per_class_ = nullptr; + + StatDescriptor* stat_orr_ = nullptr; + StatDescriptor* stat_arr_ = nullptr; + StatDescriptor* stat_iou_ = nullptr; + }; } diff --git a/include/cn24/net/InputDownSamplingLayer.h b/include/cn24/net/InputDownSamplingLayer.h new file mode 100644 index 0000000..1a81b61 --- /dev/null +++ b/include/cn24/net/InputDownSamplingLayer.h @@ -0,0 +1,70 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ +/** + * @file InputDownSamplingLayer.h + * @class InputDownSamplingLayer + * @brief Layer that scales input down + * + * @author Clemens-Alexander Brust (ikosa dot de at gmail dot com) + */ + +#ifndef CONV_INPUTDOWNSAMPLINGLAYER_H +#define CONV_INPUTDOWNSAMPLINGLAYER_H + +#include +#include + +#include "SimpleLayer.h" + + +namespace Conv { + +class InputDownSamplingLayer : public SimpleLayer { +public: + /** + * @brief Constructs a max-pooling Layer. + * + * @param region_width Width of the pooling regions + * @param region_height Height of the pooling regions + */ + InputDownSamplingLayer(const unsigned int region_width, + const unsigned int region_height); + + // Implementations for SimpleLayer + bool CreateOutputs (const std::vector< CombinedTensor* >& inputs, std::vector< CombinedTensor* >& outputs); + bool Connect (const CombinedTensor* input, CombinedTensor* output); + void FeedForward(); + void BackPropagate(); + + inline unsigned int Gain() { + return gain / (region_width_ * region_height_); + } + + inline std::string GetLayerDescription() { + std::ostringstream ss; + ss << "Input Down-Sampling Layer (" << region_width_ << "x" << region_height_ << ")"; + return ss.str(); + } + + bool IsOpenCLAware(); +private: + // Settings + unsigned int region_width_ = 0; + unsigned int region_height_ = 0; + + // Feature map dimensions + unsigned int input_width_ = 0; + unsigned int input_height_ = 0; + unsigned int output_width_ = 0; + unsigned int output_height_ = 0; + + unsigned int maps_ = 0; +}; + +} + +#endif diff --git a/include/cn24/net/NetGraph.h b/include/cn24/net/NetGraph.h index 2854d38..4ea855e 100644 --- a/include/cn24/net/NetGraph.h +++ b/include/cn24/net/NetGraph.h @@ -20,6 +20,8 @@ #include "NetStatus.h" #include "../util/TensorViewer.h" +#include "StatLayer.h" + #include namespace Conv { @@ -106,6 +108,12 @@ class NetGraph : public NetStatus { // Output void PrintGraph(std::ostream& graph_output); void SetLayerViewEnabled(bool enabled) { layerview_enabled_ = enabled; } + void SetStatLayersEnabled(bool enabled) { + for (unsigned int n = 0; n < GetStatNodes().size(); n++) { + StatLayer* stat_layer = dynamic_cast(GetStatNodes()[n]->layer); + stat_layer->SetDisabled(!enabled); + } + } datum AggregateLoss(); // Status diff --git a/include/cn24/net/NetStatus.h b/include/cn24/net/NetStatus.h index e77edfa..7979921 100644 --- a/include/cn24/net/NetStatus.h +++ b/include/cn24/net/NetStatus.h @@ -15,6 +15,9 @@ #ifndef CONV_NETSTATUS_H #define CONV_NETSTATUS_H +#include "../util/Init.h" +#include "../util/StatAggregator.h" + namespace Conv { class NetStatus{ @@ -29,7 +32,10 @@ class NetStatus{ * * @param is_testing The new testing status */ - inline void SetIsTesting(bool is_testing) { is_testing_ = is_testing; } + inline void SetIsTesting(bool is_testing) { + is_testing_ = is_testing; + System::stat_aggregator->hardcoded_stats_.is_training = !is_testing; + } private: bool is_testing_ = false; }; diff --git a/include/cn24/net/StatLayer.h b/include/cn24/net/StatLayer.h index 783b2df..23a89f1 100644 --- a/include/cn24/net/StatLayer.h +++ b/include/cn24/net/StatLayer.h @@ -23,6 +23,7 @@ namespace Conv { class StatLayer { public: + virtual void UpdateAll() = 0; virtual void Print(std::string prefix, bool training) = 0; virtual void Reset() = 0; virtual void SetDisabled(bool disabled) = 0; diff --git a/include/cn24/net/SumLayer.h b/include/cn24/net/SumLayer.h new file mode 100644 index 0000000..6ab4f13 --- /dev/null +++ b/include/cn24/net/SumLayer.h @@ -0,0 +1,47 @@ +/** + * @file SumLayer.h + * @class SumLayer + * @brief Concatenates the inputs (used to add non-convolvable information). + * + * @author Clemens-Alexander Brust (ikosa dot de at gmail dot com) + */ + +#ifndef CONV_SUMLAYER_H +#define CONV_SUMLAYER_H + +#include + +#include "Layer.h" + +namespace Conv { + +class SumLayer: public Layer { +public: + SumLayer(); + + // Layer implementations + bool CreateOutputs (const std::vector< CombinedTensor* >& inputs, + std::vector< CombinedTensor* >& outputs); + bool Connect (const std::vector< CombinedTensor* >& inputs, + const std::vector< CombinedTensor* >& outputs, + const NetStatus* status ); + void FeedForward(); + void BackPropagate(); + + std::string GetLayerDescription() { return "Sum Layer"; } + void CreateBufferDescriptors(std::vector< NetGraphBuffer >& buffers) { + NetGraphBuffer buffer; + buffer.description = "Output"; + buffers.push_back(buffer); + }; +private: + CombinedTensor* input_a_ = nullptr; + CombinedTensor* input_b_ = nullptr; + CombinedTensor* output_ = nullptr; + + unsigned int maps_ = 0; + unsigned int samples_ = 0; +}; + +} +#endif diff --git a/include/cn24/net/Trainer.h b/include/cn24/net/Trainer.h index 99ec184..c54ce0a 100644 --- a/include/cn24/net/Trainer.h +++ b/include/cn24/net/Trainer.h @@ -18,6 +18,7 @@ #include #include "../util/CombinedTensor.h" +#include "../util/StatAggregator.h" #include "TrainingLayer.h" #include "NetGraph.h" @@ -41,6 +42,7 @@ struct TrainerSettings { datum mu = 1.75; datum eta = 1.5; OPTIMIZATION_METHOD optimization_method = GRADIENT_DESCENT; + bool stats_during_training = true; unsigned int pbatchsize = 1; unsigned int sbatchsize = 1; unsigned int iterations = 500; @@ -61,7 +63,7 @@ class Trainer { * * @param epochs The number of epochs to train */ - void Train (unsigned int epochs); + void Train (unsigned int epochs, bool do_snapshot); /** * @brief Test the net by running every test sample through the net @@ -101,9 +103,13 @@ class Trainer { * (datum) iteration, -settings_.exponent); } + + inline void SetStatsDuringTraining(bool enable) { settings_.stats_during_training = enable; } private: void ApplyGradients (datum lr); + void InitializeStats(); + // References for easy access NetGraph& graph_; std::vector parameters_; @@ -116,12 +122,23 @@ class Trainer { // Sample count unsigned int sample_count_ = 0; + unsigned int weight_count_ = 0; // Learning options TrainerSettings settings_; // State unsigned int epoch_ = 0; + + // Global state + static bool stats_are_initialized_; + static StatDescriptor* stat_aggloss_; + static StatDescriptor* stat_qp_caseA_; + static StatDescriptor* stat_qp_caseB_; + static StatDescriptor* stat_qp_caseC_; + static StatDescriptor* stat_qp_caseM_; + static StatDescriptor* stat_fps_; + static StatDescriptor* stat_sps_; }; diff --git a/include/cn24/net/UpscaleLayer.h b/include/cn24/net/UpscaleLayer.h index 6f834c7..a8d6a55 100644 --- a/include/cn24/net/UpscaleLayer.h +++ b/include/cn24/net/UpscaleLayer.h @@ -41,6 +41,8 @@ class UpscaleLayer : public SimpleLayer { void FeedForward(); void BackPropagate(); + bool IsOpenCLAware() { return true; } + inline std::string GetLayerDescription() { std::ostringstream ss; ss << "Upscale Layer (" << region_width_ << "x" << region_height_ << ")"; diff --git a/include/cn24/util/CSVStatSink.h b/include/cn24/util/CSVStatSink.h new file mode 100644 index 0000000..f377e0c --- /dev/null +++ b/include/cn24/util/CSVStatSink.h @@ -0,0 +1,44 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ +/** + * @file CSVStatSink.h + * @brief Gets data from StatAggregator and processes it into a CSV file + * + * @author Clemens-Alexander Brust (ikosa dot de at gmail dot com) + */ + +#ifndef CONV_CSVSTATSINK_H +#define CONV_CSVSTATSINK_H + +#include +#include +#include +#include + +#include "Config.h" +#include "Log.h" + +#include "StatAggregator.h" +#include "StatSink.h" + +namespace Conv +{ +class CSVStatSink : public StatSink { +public: + ~CSVStatSink() { if(csv_stream_ != nullptr) {csv_stream_->close(); delete csv_stream_; }} + virtual void Initialize(std::vector& stat_descriptors); + virtual void Process(HardcodedStats& hardcoded_stats, std::vector& stats); + virtual void SetCurrentExperiment(std::string current_experiment); + +private: + std::vector stat_descriptors_; + std::ofstream* csv_stream_ = nullptr; +}; + +} + +#endif \ No newline at end of file diff --git a/include/cn24/util/CompressedTensor.h b/include/cn24/util/CompressedTensor.h new file mode 100644 index 0000000..45ff78b --- /dev/null +++ b/include/cn24/util/CompressedTensor.h @@ -0,0 +1,130 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ + +#ifndef CONV_COMPRESSEDTENSOR_H +#define CONV_COMPRESSEDTENSOR_H + +#include +#include +#include + +#include "Log.h" +#include "Config.h" + +#include "Tensor.h" + +namespace Conv { + +class CompressedTensor; +/** + * @brief Prints size to the ostream, may be helpful. + */ +std::ostream& operator<< (std::ostream& output, const CompressedTensor& tensor); + +class CompressedTensor { +public: + /** + * @brief Constructs an empty CompressedTensor of zero size. + */ + CompressedTensor (); + + ~CompressedTensor (); + + /* + * Compression and decompression encapsulated + */ + void Compress(Tensor& tensor); + void Decompress(Tensor& tensor, datum* preallocated_memory = nullptr); + + + /** + * @brief Serializes the CompressedTensor to the stream. + * + * @param output The output stream + * @param convert Convert to byte + */ + void Serialize (std::ostream& output); + + /** + * @brief Deserializes from the stream. + * + * Note that this resizes the stream if necessary and overwrites its content. + * @param input The input stream + * @param head_only Set to true to only read the dimensions + * @param try_mmap Set to true to attempt to memory map the file + * @param fd File descriptor for the SAME file as input's underlying + */ + void Deserialize (std::istream& input, bool head_only = false, bool try_mmap = false, int fd = 0); + + /** + * @brief Writes some tensor statistics to the debug output + */ + void PrintStats(); + + /** + * @brief Deallocates the memory if data_ptr is not a nullptr. + */ + void DeleteIfPossible(); + + // Accessors for the size information + inline std::size_t samples() const { + return samples_; + } + inline std::size_t maps() const { + return maps_; + } + inline std::size_t height() const { + return height_; + } + inline std::size_t width() const { + return width_; + } + inline std::size_t elements() const { + return elements_; + } + inline std::size_t compressed_length() const { + return compressed_length_; + } + +private: + /** + * @brief Resizes the CompressedTensor with data loss. + */ + void Resize (const std::size_t samples, const std::size_t width, + const std::size_t height, const std::size_t maps, + const std::size_t compressed_length, + char* const preallocated_memory = nullptr, bool mmapped = false ); + + // Pointer to the actual data + char* compressed_data_ptr_ = nullptr; + + // Sizes + std::size_t samples_ = 0; + std::size_t maps_ = 0; + std::size_t height_ = 0; + std::size_t width_ = 0; + std::size_t elements_ = 0; + + std::size_t compressed_length_ = 0; + + static void CompressData(void* uncompressed, const std::size_t& uncompressed_elements, void* compressed, std::size_t& compressed_length); + static void DecompressData(void* uncompressed, std::size_t& uncompressed_elements, void* compressed, const std::size_t& compressed_length); + +public: + + /** + * @brief If this is true, the CompressedTensor was memory mapped + */ + bool mmapped_ = false; + void* original_mmap_ = nullptr; +}; + + + +} + +#endif diff --git a/include/cn24/util/CompressedTensorStream.h b/include/cn24/util/CompressedTensorStream.h new file mode 100644 index 0000000..a8f67a4 --- /dev/null +++ b/include/cn24/util/CompressedTensorStream.h @@ -0,0 +1,52 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ + +#ifndef CONV_COMPRESSEDTENSORSTREAM_H +#define CONV_COMPRESSEDTENSORSTREAM_H + +#include +#include +#include + +#include "Log.h" +#include "Config.h" + +#include "Tensor.h" +#include "CompressedTensor.h" + +#include "TensorStream.h" + +#define CN24_CTS_MAGIC 0xC24CC24CC24CC24C + +namespace Conv { + +class CompressedTensorStream : public TensorStream { +public: + + ~CompressedTensorStream() { + for(CompressedTensor* tensor: tensors_) { + delete tensor; + } + } + + // TensorStream implementations + std::size_t GetWidth(unsigned int index) { return index < tensors_.size() ? tensors_[index]->width() : 0; } + std::size_t GetHeight(unsigned int index) { return index < tensors_.size() ? tensors_[index]->height() : 0; } + std::size_t GetMaps(unsigned int index) { return index < tensors_.size() ? tensors_[index]->maps() : 0; } + std::size_t GetSamples(unsigned int index) { return index < tensors_.size() ? tensors_[index]->samples() : 0; } + unsigned int GetTensorCount() { return tensors_.size(); } + unsigned int LoadFile(std::string path); + bool CopySample(const unsigned int source_index, const std::size_t source_sample, Tensor& target, const std::size_t target_sample); +private: + std::vector tensors_; + std::size_t max_elements_ = 0; + Tensor temp_tensor_; +}; + +} + +#endif \ No newline at end of file diff --git a/include/cn24/util/ConsoleStatSink.h b/include/cn24/util/ConsoleStatSink.h new file mode 100644 index 0000000..a315d4d --- /dev/null +++ b/include/cn24/util/ConsoleStatSink.h @@ -0,0 +1,57 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ +/** + * @file ConsoleStatSink.h + * @brief Gets data from StatAggregator and processes it + * + * @author Clemens-Alexander Brust (ikosa dot de at gmail dot com) + */ + +#ifndef CONV_CONSOLESTATSINK_H +#define CONV_CONSOLESTATSINK_H + +#include +#include +#include + +#include "Config.h" +#include "Log.h" + +#include "StatAggregator.h" +#include "StatSink.h" + +namespace Conv +{ +// Forward declaration +class ConsoleStatSink : public StatSink { +public: + virtual void Initialize(std::vector& stat_descriptors) { + stat_descriptors_ = stat_descriptors; + LOGDEBUG << "Initializing ConsoleStatSink. Registered Stats:"; + for(unsigned int s = 0; s < stat_descriptors_.size(); s++) { + LOGDEBUG << " - " << stat_descriptors_[s]->description; + } + } + virtual void Process(HardcodedStats& hardcoded_stats, std::vector& stats) { + (hardcoded_stats.is_training ? LOGTRESULT : LOGRESULT) << "Stats for epoch " << hardcoded_stats.epoch << ":" << LOGRESULTEND; + for(unsigned int s = 0; s < stat_descriptors_.size(); s++) { + if(!stats[s]->is_null) { + (hardcoded_stats.is_training ? LOGTRESULT : LOGRESULT) << std::setw(32) << stat_descriptors_[s]->description << ": " << std::setw(24) << stats[s]->value << " " << stat_descriptors_[s]->unit << LOGRESULTEND; + } + } + } + virtual void SetCurrentExperiment(std::string current_experiment) { + LOGINFO << "Beginning Experiment: " << current_experiment; + } + +private: + std::vector stat_descriptors_; +}; + +} + +#endif \ No newline at end of file diff --git a/include/cn24/util/Dataset.h b/include/cn24/util/Dataset.h index 539c974..22754f0 100644 --- a/include/cn24/util/Dataset.h +++ b/include/cn24/util/Dataset.h @@ -18,6 +18,7 @@ #include "Config.h" #include "Tensor.h" +#include "TensorStream.h" namespace Conv { @@ -214,8 +215,10 @@ class TensorStreamPatchDataset : public Dataset { class TensorStreamDataset : public Dataset { public: - TensorStreamDataset(std::istream& training_stream, - std::istream& testing_stream, + TensorStreamDataset(/*std::istream& training_stream, + std::istream& testing_stream,*/ + TensorStream* training_stream, + TensorStream* testing_stream, unsigned int classes, std::vector class_names, std::vector class_colors, @@ -244,8 +247,12 @@ class TensorStreamDataset : public Dataset { private: // Stored data + /* Tensor* data_ = nullptr; Tensor* labels_ = nullptr; + */ + TensorStream* training_stream_; + TensorStream* testing_stream_; Tensor error_cache; diff --git a/include/cn24/util/FloatTensorStream.h b/include/cn24/util/FloatTensorStream.h new file mode 100644 index 0000000..b66ced2 --- /dev/null +++ b/include/cn24/util/FloatTensorStream.h @@ -0,0 +1,46 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ + +#ifndef CONV_FLOATTENSORSTREAM_H +#define CONV_FLOATTENSORSTREAM_H + +#include +#include +#include + +#include "Log.h" +#include "Config.h" + +#include "Tensor.h" +#include "TensorStream.h" + +namespace Conv { + +class FloatTensorStream : public TensorStream { +public: + + ~FloatTensorStream() { + for(Tensor* tensor: tensors_) { + delete tensor; + } + } + + // TensorStream implementations + std::size_t GetWidth(unsigned int index) { return index < tensors_.size() ? tensors_[index]->width() : 0; } + std::size_t GetHeight(unsigned int index) { return index < tensors_.size() ? tensors_[index]->height() : 0; } + std::size_t GetMaps(unsigned int index) { return index < tensors_.size() ? tensors_[index]->maps() : 0; } + std::size_t GetSamples(unsigned int index) { return index < tensors_.size() ? tensors_[index]->samples() : 0; } + unsigned int GetTensorCount() { return tensors_.size(); } + unsigned int LoadFile(std::string path); + bool CopySample(const unsigned int source_index, const std::size_t source_sample, Tensor& target, const std::size_t target_sample); +private: + std::vector tensors_; +}; + +} + +#endif \ No newline at end of file diff --git a/include/cn24/util/GradientTester.h b/include/cn24/util/GradientTester.h index 5bd20b3..aed9529 100644 --- a/include/cn24/util/GradientTester.h +++ b/include/cn24/util/GradientTester.h @@ -22,9 +22,9 @@ class GradientTester { /** * @brief Tests the gradients computed by the net numerically * - * Only call this function on nets with a constant input! + * Only call this function on nets with a constant input, not a DatasetInputLayer! */ - static void TestGradient(NetGraph& net); + static void TestGradient(NetGraph& net, unsigned int skip_weights = 0, bool fatal_fail = false); }; diff --git a/include/cn24/util/Init.h b/include/cn24/util/Init.h index 45a4b19..3bd8b26 100644 --- a/include/cn24/util/Init.h +++ b/include/cn24/util/Init.h @@ -6,7 +6,8 @@ */ /** * @file Init.h - * @brief Provides initialization functions for several subsystems + * @brief Provides initialization functions for several subsystems and a + * singleton "System" class for global objects. * * @author Clemens-Alexander Brust (ikosa dot de at gmail dot com) */ @@ -18,11 +19,13 @@ namespace Conv { class TensorViewer; +class StatAggregator; class System { public: static void Init(int requested_log_level = -1); static void GetExecutablePath(std::string& binary_path); static TensorViewer* viewer; + static StatAggregator* stat_aggregator; static int log_level; }; } diff --git a/include/cn24/util/StatAggregator.h b/include/cn24/util/StatAggregator.h new file mode 100644 index 0000000..765dab9 --- /dev/null +++ b/include/cn24/util/StatAggregator.h @@ -0,0 +1,107 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ +/** + * @file StatAggregator.h + * @brief Collects data from various sources and aggregates them into a statistic + * + * @author Clemens-Alexander Brust (ikosa dot de at gmail dot com) + */ + +#ifndef CONV_STATAGGREGATOR_H +#define CONV_STATAGGREGATOR_H + +#include +#include +#include +#include +#include + +#include "Config.h" + +namespace Conv +{ +// Forward declarations +class StatSink; +class Trainer; +class NetStatus; + +// Hardcoded stats +struct HardcodedStats { + double seconds_elapsed = 0.0; + unsigned long iterations = 0UL; + unsigned long weights = 0UL; + unsigned long epoch = 0UL; + bool is_training = false; + std::string current_experiment = "unnamed"; + + void Reset() { + seconds_elapsed = 0.0; + iterations = 0UL; + weights = 0UL; + } +}; + +struct Stat { + double value = 0.0; + bool is_null = false; +}; + +struct StatDescriptor { + bool nullable = false; + std::string description = ""; + std::string unit = ""; + + // Lambdas for processing + std::function init_function = [] (Stat& stat) {}; + std::function update_function = [] (Stat& stat, double user_value) {}; + std::function output_function = + [] (HardcodedStats& hc_stats, Stat& stat) -> Stat {return stat;}; + + // For easy access + unsigned int stat_id = UINT_MAX; +}; + +class StatAggregator { + friend class Trainer; + friend class NetStatus; +public: + unsigned int RegisterStat(StatDescriptor* stat_descriptor); + unsigned int RegisterSink(StatSink* stat_sink); + void Initialize(); + + void Update(unsigned int stat_id, double user_value); + void Generate(); + + void StartRecording(); + void StopRecording(); + void Reset(); + + void Snapshot(); + + void SetCurrentExperiment(std::string current_experiment); +private: + // State + enum StatAggregatorState { + STOPPED, RECORDING, INIT } state_ = INIT; + std::chrono::time_point start_time_; + + // Stats + HardcodedStats hardcoded_stats_; + std::vector stats_; + + // Descriptors + std::vector stat_descriptors_; + unsigned int stat_descriptor_count_ = 0; + + // Sinks + std::vector stat_sinks_; + unsigned int stat_sink_count_ = 0; +}; + +} + +#endif \ No newline at end of file diff --git a/include/cn24/util/StatSink.h b/include/cn24/util/StatSink.h new file mode 100644 index 0000000..80c5031 --- /dev/null +++ b/include/cn24/util/StatSink.h @@ -0,0 +1,36 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ +/** + * @file StatSink.h + * @brief Gets data from StatAggregator and processes it + * + * @author Clemens-Alexander Brust (ikosa dot de at gmail dot com) + */ + +#ifndef CONV_STATSINK_H +#define CONV_STATSINK_H + +#include +#include + +#include "Config.h" + +#include "StatAggregator.h" + +namespace Conv +{ +// Forward declaration +class StatSink { +public: + virtual void Initialize(std::vector& stat_descriptors) = 0; + virtual void SetCurrentExperiment(std::string current_experiment) = 0; + virtual void Process(HardcodedStats& hardcoded_stats, std::vector& stats) = 0; +}; + +} + +#endif \ No newline at end of file diff --git a/include/cn24/util/Tensor.h b/include/cn24/util/Tensor.h index 189098c..7e1797b 100644 --- a/include/cn24/util/Tensor.h +++ b/include/cn24/util/Tensor.h @@ -104,7 +104,8 @@ class Tensor { */ void Resize (const std::size_t samples, const std::size_t width = 1, const std::size_t height = 1, const std::size_t maps = 1, - datum* const preallocated_memory = nullptr, bool mmapped = false ); + datum* const preallocated_memory = nullptr, bool mmapped = false, + bool dont_delete = false); /** * @brief Resizes the Tensor to match another Tensor's size. diff --git a/include/cn24/util/TensorStream.h b/include/cn24/util/TensorStream.h new file mode 100644 index 0000000..4b1d69a --- /dev/null +++ b/include/cn24/util/TensorStream.h @@ -0,0 +1,62 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ + +#ifndef CONV_TENSORSTREAM_H +#define CONV_TENSORSTREAM_H + +#include +#include +#include +#include + +#include "Log.h" +#include "Config.h" + +#include "Tensor.h" + +namespace Conv { + +class TensorStream { +public: + virtual std::size_t GetWidth(unsigned int index) = 0; + virtual std::size_t GetHeight(unsigned int index) = 0; + virtual std::size_t GetMaps(unsigned int index) = 0; + virtual std::size_t GetSamples(unsigned int index) = 0; + virtual unsigned int LoadFile(std::string path) = 0; + + virtual bool CopySample(const unsigned int source, const std::size_t source_sample, + Tensor& target, const std::size_t target_sample) = 0; + + virtual unsigned int GetTensorCount() = 0; + + static TensorStream* FromFile(std::string path); +}; + +} + +#endif + + + + + + + + + + + + + + + + + + + + + diff --git a/include/private/CLHelper.h b/include/private/CLHelper.h index 0d4619f..ae4eb4d 100644 --- a/include/private/CLHelper.h +++ b/include/private/CLHelper.h @@ -64,6 +64,8 @@ class CLHelper { static cl_kernel k_sms; static cl_kernel k_im2col; static cl_kernel k_col2im; + static cl_kernel k_up; + static cl_kernel k_down; #endif }; diff --git a/kernels/scaling.cl b/kernels/scaling.cl new file mode 100644 index 0000000..f81c1e8 --- /dev/null +++ b/kernels/scaling.cl @@ -0,0 +1,69 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ +__kernel void DOWN ( __global float* X, + __global float* Y, + uint target_width, + uint target_height, + uint source_width, + uint source_height, + uint region_width, + uint region_height, + float target_factor) +{ + uint target_x = get_global_id(0); + uint target_y = get_global_id(1); + uint target_skid = get_global_id(2); + + uint source_x = target_x * region_width; + uint source_y = target_y * region_height; + + uint X_sk = source_width * source_height * target_skid; + + float sum = 0.0; + for(uint ry = 0; ry < region_height; ry++) { + const uint X_line = X_sk + (source_width * (source_y + ry)); + for(uint rx = 0; rx < region_width; rx++) { + const uint X_idx = X_line + source_x + rx; + const float X_val = X[X_idx]; + sum += X_val; + } + } + + uint Y_sk = target_width * target_height * target_skid; + uint Y_line = Y_sk + (target_width * target_y); + uint Y_idx = Y_line + target_x; + Y[Y_idx] = sum * target_factor; +} + + +__kernel void UP ( __global float* X, + __global float* Y, + uint target_width, + uint target_height, + uint source_width, + uint source_height, + uint region_width, + uint region_height, + float target_factor) +{ + uint target_x = get_global_id(0); + uint target_y = get_global_id(1); + uint target_skid = get_global_id(2); + + uint source_x = target_x / region_width; + uint source_y = target_y / region_height; + + uint X_sk = source_width * source_height * target_skid; + const uint X_line = X_sk + (source_width * source_y); + const uint X_idx = X_line + source_x; + const float X_val = X[X_idx]; + + uint Y_sk = target_width * target_height * target_skid; + uint Y_line = Y_sk + (target_width * target_y); + uint Y_idx = Y_line + target_x; + Y[Y_idx] = X_val * target_factor; +} \ No newline at end of file diff --git a/scripts/runexperiments.sh b/scripts/runexperiments.sh index 5acbd9b..0adf25d 100755 --- a/scripts/runexperiments.sh +++ b/scripts/runexperiments.sh @@ -9,7 +9,7 @@ mkdir tmp 2&> /dev/null mkdir logs 2&> /dev/null mkdir csv 2&> /dev/null -echo "Running $ITERATIONS iterations ($EPOCHS epochs each) of network $NETFILE on dataset $DATASET..." +echo "Running $ITERATIONS iterations ($EPOCHS epochs each, testing every 10th epoch) of network $NETFILE on dataset $DATASET..." SIGNATURE=$(basename "$DATASET")_$(basename "$NETFILE")_${EPOCHS}_${ITERATIONS}_$TIMESTAMP SCRFILE=tmp/scr_$SIGNATURE @@ -17,7 +17,8 @@ LOGFILE=tmp/log_$SIGNATURE OLOGFILE=logs/log_$SIGNATURE CSVFILE=csv/csv_$SIGNATURE -echo "reset" > $SCRFILE +echo "set experiment name=$SIGNATURE" > $SCRFILE +echo "reset" >> $SCRFILE echo "set epoch=0" >> $SCRFILE for i in $(seq 1 $ITERATIONS) @@ -25,6 +26,9 @@ do for j in $(seq 1 $EPOCHS) do MODELFILE=tmp/model_${SIGNATURE}_i${i}_j$j + echo "tstat enable=0" >> $SCRFILE + echo "train epochs=9" >> $SCRFILE + echo "tstat enable=1" >> $SCRFILE echo "train" >> $SCRFILE echo "save file=$MODELFILE" >> $SCRFILE echo "test" >> $SCRFILE @@ -34,10 +38,10 @@ do done -./trainNetwork $DATASET $NETFILE $SCRFILE 2&> $LOGFILE +./trainNetwork -v $DATASET $NETFILE $SCRFILE 2&> $LOGFILE mv $LOGFILE $OLOGFILE echo "Training done, output log file: $OLOGFILE" -./logtocsv_multiclass.sh $OLOGFILE > $CSVFILE -echo "Output CSV file: $CSVFILE" +#./logtocsv_multiclass.sh $OLOGFILE > $CSVFILE +#echo "Output CSV file: $CSVFILE" diff --git a/scripts/runexperimentswithmodel.sh b/scripts/runexperimentswithmodel.sh new file mode 100755 index 0000000..de907ba --- /dev/null +++ b/scripts/runexperimentswithmodel.sh @@ -0,0 +1,47 @@ +#!/bin/bash +DATASET=$1 +NETFILE=$2 +EPOCHS=$3 +ITERATIONS=$4 +MODEL=$5 +TIMESTAMP=`date +%s` + +mkdir tmp 2&> /dev/null +mkdir logs 2&> /dev/null +mkdir csv 2&> /dev/null + +echo "Running $ITERATIONS iterations ($EPOCHS * 10 epochs each, testing every 10th epoch) of network $NETFILE on dataset $DATASET, loading model $MODEL..." + +SIGNATURE=$(basename "$DATASET")_$(basename "$NETFILE")_model${MODEL}_${EPOCHS}_${ITERATIONS}_$TIMESTAMP +SCRFILE=tmp/scr_$SIGNATURE +LOGFILE=tmp/log_$SIGNATURE +OLOGFILE=logs/log_$SIGNATURE +CSVFILE=csv/csv_$SIGNATURE + +echo "set experiment name=$SIGNATURE" > $SCRFILE +echo "reset" >> $SCRFILE +echo "load file=$MODEL" >> $SCRFILE +echo "set epoch=0" >> $SCRFILE + +for i in $(seq 1 $ITERATIONS) +do + for j in $(seq 1 $EPOCHS) + do + MODELFILE=tmp/model_${SIGNATURE}_i${i}_j$j + echo "tstat enable=0" >> $SCRFILE + echo "train epochs=9" >> $SCRFILE + echo "tstat enable=1" >> $SCRFILE + echo "train" >> $SCRFILE + echo "save file=$MODELFILE" >> $SCRFILE + echo "test" >> $SCRFILE + done + echo "reset" >> $SCRFILE + echo "load file=$MODEL" >> $SCRFILE + echo "set epoch=0" >> $SCRFILE +done + + +./trainNetwork -v $DATASET $NETFILE $SCRFILE 2&> $LOGFILE + +mv $LOGFILE $OLOGFILE +echo "Training done, output log file: $OLOGFILE" diff --git a/src/factory/ConfigurableFactory.cpp b/src/factory/ConfigurableFactory.cpp index f62efcf..e8ab9b1 100644 --- a/src/factory/ConfigurableFactory.cpp +++ b/src/factory/ConfigurableFactory.cpp @@ -13,10 +13,12 @@ #include "ResizeLayer.h" #include "MaxPoolingLayer.h" #include "AdvancedMaxPoolingLayer.h" +#include "InputDownSamplingLayer.h" #include "NonLinearityLayer.h" #include "UpscaleLayer.h" #include "SpatialPriorLayer.h" #include "ConcatenationLayer.h" +#include "SumLayer.h" #include "ConfigParsing.h" #include "NetGraph.h" @@ -103,6 +105,14 @@ ConfigurableFactory::ConfigurableFactory (std::istream& file, const unsigned int factory *= ky; } + if (StartsWithIdentifier (line, "downsampling")) { + unsigned int kx, ky; + ParseKernelSizeIfPossible (line, "size", kx, ky); + LOGDEBUG << "Adding down-sampling layer to receptive field (" << kx << "," << ky << ")"; + factorx *= kx; + factory *= ky; + } + if (StartsWithIdentifier (line, "amaxpooling")) { unsigned int kx, ky, sx, sy; ParseKernelSizeIfPossible (line, "size", kx, ky); @@ -116,6 +126,9 @@ ConfigurableFactory::ConfigurableFactory (std::istream& file, const unsigned int } } } + + LOGDEBUG << "To achieve this receptive field size manually, start net config with manual rfx=" << receptive_field_x_ + << " rfy=" << receptive_field_y_ << " factorx=" << factorx << " factory=" << factory; if (method_ == PATCH) { receptive_field_x_ += factorx; @@ -479,11 +492,11 @@ bool ConfigurableFactory::AddLayers(NetGraph& net, NetGraphConnection data_layer } if (line.compare(0, 4, "popa") == 0) { - last_connection = stack_a[stack_a_pos]; + last_connection = stack_a[stack_a_pos--]; } if (line.compare(0, 4, "popb") == 0) { - last_connection = stack_b[stack_b_pos]; + last_connection = stack_b[stack_b_pos--]; } /* @@ -555,6 +568,19 @@ bool ConfigurableFactory::AddLayers(NetGraph& net, NetGraphConnection data_layer last_connection.backprop = true; } + if (StartsWithIdentifier (line, "downsampling")) { + unsigned int kx = 1, ky = 1; + ParseKernelSizeIfPossible (line, "size", kx, ky); + + InputDownSamplingLayer* mp = new InputDownSamplingLayer (kx, ky); + + NetGraphNode* node = new NetGraphNode(mp, last_connection); + net.AddNode(node); + last_connection.buffer = 0; + last_connection.node = node; + last_connection.backprop = false; + } + if (StartsWithIdentifier (line, "amaxpooling")) { unsigned int kx = 1, ky = 1, sx, sy; ParseKernelSizeIfPossible (line, "size", kx, ky); @@ -644,6 +670,46 @@ bool ConfigurableFactory::AddLayers(NetGraph& net, NetGraphConnection data_layer last_connection.node = node; last_connection.backprop = true; } + + if (StartsWithIdentifier(line, "sum")){ + std::string stack_name; + NetGraphConnection* stack_ptr; + int stack_pos; + ParseStringParamIfPossible(line, "stack", stack_name); + if (stack_name.compare(0, 1, "b") == 0){ + stack_ptr = stack_b; + stack_pos = stack_b_pos; + } + else { + stack_ptr = stack_a; + stack_pos = stack_a_pos; + } + SumLayer* l = new SumLayer(); + NetGraphNode* node = new NetGraphNode(l); + for (int p = stack_pos; p >= 0; p--) { + node->input_connections.push_back(stack_ptr[p]); + } + net.AddNode(node); + last_connection.buffer = 0; + last_connection.node = node; + last_connection.backprop = true; + } + + if (StartsWithIdentifier(line, "upscale")){ + unsigned int ufx = 1, ufy = 1; + unsigned int o = 0; + ParseKernelSizeIfPossible(line, "factor", ufx, ufy); + ParseCountIfPossible(line, "is_output", o); + UpscaleLayer* l = new UpscaleLayer(ufx, ufy); + NetGraphNode* node = new NetGraphNode(l, last_connection); + node->is_output = (o == 1); + net.AddNode(node); + last_connection.buffer = 0; + last_connection.node = node; + last_connection.backprop = true; + is_output = (o == 1); + already_upscaled = true; + } if (is_output && !already_upscaled && method_ == FCN && (factorx != 1 || factory != 1)) { UpscaleLayer* l = new UpscaleLayer(factorx, factory); diff --git a/src/factory/SkipLayerNetworkFactory.cpp b/src/factory/SkipLayerNetworkFactory.cpp new file mode 100644 index 0000000..2e49b44 --- /dev/null +++ b/src/factory/SkipLayerNetworkFactory.cpp @@ -0,0 +1,69 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ +#include + +#include "ErrorLayer.h" + +#include "ConvolutionLayer.h" +#include "LocalResponseNormalizationLayer.h" +#include "ResizeLayer.h" +#include "MaxPoolingLayer.h" +#include "AdvancedMaxPoolingLayer.h" +#include "InputDownSamplingLayer.h" +#include "NonLinearityLayer.h" +#include "UpscaleLayer.h" +#include "SpatialPriorLayer.h" +#include "ConcatenationLayer.h" +#include "ConfigParsing.h" +#include "NetGraph.h" + +#include "SkipLayerNetworkFactory.h" + +namespace Conv { + +bool SkipLayerNetworkFactory::AddLayers(NetGraph& graph, NetGraphConnection data_layer_connection, const unsigned int output_classes, bool add_loss_layer) +{ + return false; +} + +int SkipLayerNetworkFactory::AddLayers(Net& net, Connection data_layer_connection, const unsigned int output_classes, bool add_loss_layer, std::ostream& graph_output) +{ + return 0; +} + +Layer* SkipLayerNetworkFactory::CreateLossLayer(const unsigned int output_classes, const datum loss_weight) +{ + return nullptr; +} + +void SkipLayerNetworkFactory::InitOptimalSettings() +{ + +} + +Method SkipLayerNetworkFactory::method() const +{ + return Method::FCN; +} + +TrainerSettings SkipLayerNetworkFactory::optimal_settings() const +{ + TrainerSettings s; + return s; +} + +int SkipLayerNetworkFactory::patchsizex() +{ + return 0; +} + +int SkipLayerNetworkFactory::patchsizey() +{ + return 0; +} + +} diff --git a/src/math/TensorMath.cpp b/src/math/TensorMath.cpp index 7d7cb25..02f0999 100644 --- a/src/math/TensorMath.cpp +++ b/src/math/TensorMath.cpp @@ -451,4 +451,180 @@ void TensorMath::SMS(const Tensor& source, Tensor& target) target.hint_ignore_content_ = false; } +void TensorMath::DOWN(const Tensor& source, Tensor& target, const int region_width, const int region_height, const datum target_factor) +{ +#ifdef BUILD_OPENCL + if(source.cl_gpu_ || target.cl_gpu_) { + ((Tensor&)source).MoveToGPU(); + target.MoveToGPU(true); + const int target_width = target.width(); + const int target_height = target.height(); + const int source_width = source.width(); + const int source_height = source.height(); + const int maps = target.maps(); + const int samples = target.samples(); + cl_uint error = 0; + + error |= clSetKernelArg (CLHelper::k_down, 0, sizeof (cl_mem), &(((Tensor&)source).cl_data_ptr_)); + error |= clSetKernelArg (CLHelper::k_down, 1, sizeof (cl_mem), &(target.cl_data_ptr_)); + error |= clSetKernelArg (CLHelper::k_down, 2, sizeof (cl_uint), &target_width); + error |= clSetKernelArg (CLHelper::k_down, 3, sizeof (cl_uint), &target_height); + error |= clSetKernelArg (CLHelper::k_down, 4, sizeof (cl_uint), &source_width); + error |= clSetKernelArg (CLHelper::k_down, 5, sizeof (cl_uint), &source_height); + error |= clSetKernelArg (CLHelper::k_down, 6, sizeof (cl_uint), ®ion_width); + error |= clSetKernelArg (CLHelper::k_down, 7, sizeof (cl_uint), ®ion_height); + error |= clSetKernelArg (CLHelper::k_down, 8, sizeof (cl_float), &target_factor); + + if (error != CL_SUCCESS) { + FATAL("Error setting kernel args: " << (signed int) error); + } + + size_t global_work_size[] = {(size_t)target.width(), (size_t)target.height(), (size_t)(target.maps() * target.samples())}; + + error = clEnqueueNDRangeKernel (CLHelper::queue, CLHelper::k_down, 3, NULL, + global_work_size, NULL, 0, NULL, NULL); + if (error != CL_SUCCESS) { + FATAL("Error enqueueing kernel: " << (signed int) error); + } + +#ifdef BRUTAL_FINISH + error = clFinish (CLHelper::queue); + if (error != CL_SUCCESS) { + FATAL("Error finishing command queue: " << (signed int) error); + } +#endif + } else { +#endif + const int target_width = target.width(); + const int target_height = target.height(); + const int maps = target.maps(); + const int samples = target.samples(); + for(int sample = 0; sample < samples; sample++) { + for(int map = 0; map < maps; map++) { + for(unsigned int target_y = 0; target_y < target_height; target_y++) { + const unsigned int source_y = region_height * target_y; + for(unsigned int target_x = 0; target_x < target_width; target_x++) { + const unsigned int source_x = region_width * target_x; + datum sum = 0; + for(unsigned int ry = 0; ry < region_height; ry++) { + for(unsigned int rx = 0; rx < region_width; rx++) { + const datum* src = source.data_ptr_const(source_x + rx, source_y + ry, map, sample); + sum += *src; + } + } + datum* tgt = target.data_ptr(target_x, target_y, map, sample); + *tgt = sum * target_factor; + } + } + } + } + +#ifdef BUILD_OPENCL + } +#endif + + target.hint_ignore_content_ = false; +} + +void TensorMath::UP(const Tensor& source, Tensor& target, const int region_width, const int region_height, const datum target_factor) +{ +#ifdef BUILD_OPENCL + if(source.cl_gpu_ || target.cl_gpu_) { + ((Tensor&)source).MoveToGPU(); + target.MoveToGPU(true); + const int target_width = target.width(); + const int target_height = target.height(); + const int source_width = source.width(); + const int source_height = source.height(); + const int maps = target.maps(); + const int samples = target.samples(); + cl_uint error = 0; + + error |= clSetKernelArg (CLHelper::k_up, 0, sizeof (cl_mem), &(((Tensor&)source).cl_data_ptr_)); + error |= clSetKernelArg (CLHelper::k_up, 1, sizeof (cl_mem), &(target.cl_data_ptr_)); + error |= clSetKernelArg (CLHelper::k_up, 2, sizeof (cl_uint), &target_width); + error |= clSetKernelArg (CLHelper::k_up, 3, sizeof (cl_uint), &target_height); + error |= clSetKernelArg (CLHelper::k_up, 4, sizeof (cl_uint), &source_width); + error |= clSetKernelArg (CLHelper::k_up, 5, sizeof (cl_uint), &source_height); + error |= clSetKernelArg (CLHelper::k_up, 6, sizeof (cl_uint), ®ion_width); + error |= clSetKernelArg (CLHelper::k_up, 7, sizeof (cl_uint), ®ion_height); + error |= clSetKernelArg (CLHelper::k_up, 8, sizeof (cl_float), &target_factor); + + if (error != CL_SUCCESS) { + FATAL("Error setting kernel args: " << (signed int) error); + } + + size_t global_work_size[] = {(size_t)target.width(), (size_t)target.height(), (size_t)(target.maps() * target.samples())}; + + error = clEnqueueNDRangeKernel (CLHelper::queue, CLHelper::k_up, 3, NULL, + global_work_size, NULL, 0, NULL, NULL); + if (error != CL_SUCCESS) { + FATAL("Error enqueueing kernel: " << (signed int) error); + } + +#ifdef BRUTAL_FINISH + error = clFinish (CLHelper::queue); + if (error != CL_SUCCESS) { + FATAL("Error finishing command queue: " << (signed int) error); + } +#endif + } else { +#endif + const datum region_area = (datum)region_width * (datum)region_height; + const int width = source.width(); + const int height = source.height(); + const int maps = source.maps(); + const int samples = source.samples(); + for(int sample = 0; sample < samples; sample++) { + for(int map = 0; map < maps; map++) { + for(unsigned int y = 0; y < height; y++) { + const unsigned int iy = region_height * y; + for(unsigned int x = 0; x < width; x++) { + const unsigned int ix = region_width * x; + const datum* src = source.data_ptr_const(x, y, map, sample); + datum sum = *src; + for(unsigned int ry = 0; ry < region_height; ry++) { + for(unsigned int rx = 0; rx < region_width; rx++) { + datum* tgt = target.data_ptr(ix + rx, iy + ry, map, sample); + *tgt = sum * target_factor; + } + } + } + } + } + } +#ifdef BUILD_OPENCL + } +#endif + + target.hint_ignore_content_ = false; +} + +void TensorMath::ADD(const Tensor& source_a, const Tensor& source_b, Tensor& target) +{ +#ifdef BUILD_OPENCL + ((Tensor&)source_a).MoveToCPU(); + ((Tensor&)source_b).MoveToCPU(); + target.MoveToCPU(true); +#endif + if((source_a.samples() != source_b.samples()) + || (source_b.samples() != target.samples()) + || (source_a.elements() != source_b.elements()) + || (source_b.elements() != target.elements())) { + FATAL("Dimensions don't match!"); + } + + #pragma omp parallel for default(shared) + for(unsigned int element = 0; element < source_a.elements(); element++) { + const datum* source_a_ptr = &(source_a.data_ptr_const()[element]); + const datum* source_b_ptr = &(source_b.data_ptr_const()[element]); + datum* target_ptr = &(target.data_ptr()[element]); + + *target_ptr = *source_a_ptr + *source_b_ptr; + } + + target.hint_ignore_content_ = false; +} + + } diff --git a/src/net/BinaryStatLayer.cpp b/src/net/BinaryStatLayer.cpp index 895aa8a..d286f5f 100644 --- a/src/net/BinaryStatLayer.cpp +++ b/src/net/BinaryStatLayer.cpp @@ -6,6 +6,7 @@ */ #include "Log.h" #include "Init.h" +#include "StatAggregator.h" #include "BinaryStatLayer.h" @@ -37,6 +38,132 @@ BinaryStatLayer::BinaryStatLayer ( const unsigned int thresholds, } Reset(); + + // Initialize stat descriptors + stat_fpr_ = new StatDescriptor; + stat_fnr_ = new StatDescriptor; + stat_pre_ = new StatDescriptor; + stat_rec_ = new StatDescriptor; + stat_acc_ = new StatDescriptor; + stat_f1_ = new StatDescriptor; + + stat_fpr_->description = "False Positive Rate"; + stat_fpr_->unit = "%"; + stat_fpr_->nullable = true; + stat_fpr_->init_function = [this] (Stat& stat) { stat.is_null = true; stat.value = 0; Reset(); }; + stat_fpr_->update_function = [] (Stat& stat, double user_value) { stat.is_null = false; stat.value = user_value; }; + stat_fpr_->output_function = [] (HardcodedStats& hc_stats, Stat& stat) -> Stat { return stat; }; + + stat_fnr_->description = "False Negative Rate"; + stat_fnr_->unit = "%"; + stat_fnr_->nullable = true; + stat_fnr_->init_function = [] (Stat& stat) { stat.is_null = true; stat.value = 0; }; + stat_fnr_->update_function = [] (Stat& stat, double user_value) { stat.is_null = false; stat.value = user_value; }; + stat_fnr_->output_function = [] (HardcodedStats& hc_stats, Stat& stat) -> Stat { return stat; }; + + stat_pre_->description = "Precision"; + stat_pre_->unit = "%"; + stat_pre_->nullable = true; + stat_pre_->init_function = [] (Stat& stat) { stat.is_null = true; stat.value = 0; }; + stat_pre_->update_function = [] (Stat& stat, double user_value) { stat.is_null = false; stat.value = user_value; }; + stat_pre_->output_function = [] (HardcodedStats& hc_stats, Stat& stat) -> Stat { return stat; }; + + stat_rec_->description = "Recall"; + stat_rec_->unit = "%"; + stat_rec_->nullable = true; + stat_rec_->init_function = [] (Stat& stat) { stat.is_null = true; stat.value = 0; }; + stat_rec_->update_function = [] (Stat& stat, double user_value) { stat.is_null = false; stat.value = user_value; }; + stat_rec_->output_function = [] (HardcodedStats& hc_stats, Stat& stat) -> Stat { return stat; }; + + stat_acc_->description = "Accuracy"; + stat_acc_->unit = "%"; + stat_acc_->nullable = true; + stat_acc_->init_function = [] (Stat& stat) { stat.is_null = true; stat.value = 0; }; + stat_acc_->update_function = [] (Stat& stat, double user_value) { stat.is_null = false; stat.value = user_value; }; + stat_acc_->output_function = [] (HardcodedStats& hc_stats, Stat& stat) -> Stat { return stat; }; + + stat_f1_->description = "F1 Value"; + stat_f1_->unit = "%"; + stat_f1_->nullable = true; + stat_f1_->init_function = [] (Stat& stat) { stat.is_null = true; stat.value = 0; }; + stat_f1_->update_function = [] (Stat& stat, double user_value) { stat.is_null = false; stat.value = user_value; }; + stat_f1_->output_function = [] (HardcodedStats& hc_stats, Stat& stat) -> Stat { return stat; }; + + // Register stats + System::stat_aggregator->RegisterStat(stat_fpr_); + System::stat_aggregator->RegisterStat(stat_fnr_); + System::stat_aggregator->RegisterStat(stat_pre_); + System::stat_aggregator->RegisterStat(stat_rec_); + System::stat_aggregator->RegisterStat(stat_acc_); + System::stat_aggregator->RegisterStat(stat_f1_); +} + +void BinaryStatLayer::UpdateAll() { + // Calculate metrics + datum fmax = -2; + unsigned int tfmax = -1; + + for ( unsigned int t = 0; t < thresholds_; t++ ) { + datum precision = -1; + datum recall = -1; + datum f1 = -1; + + if ( ( true_positives_[t] + false_positives_[t] ) > 0 ) + precision = ( true_positives_[t] ) / + ( true_positives_[t] + false_positives_[t] ); + + if ( ( true_positives_[t] + false_negatives_[t] ) > 0 ) + recall = ( true_positives_[t] ) / + ( true_positives_[t] + false_negatives_[t] ); + + if ( precision >= 0 && recall >= 0 ) { + f1 = 2 * precision * recall / ( precision + recall ); + } + + if ( f1 > fmax ) { + fmax = f1; + tfmax = t; + } + } + + datum fpr = -1; + datum fnr = -1; + datum precision = -1; + datum recall = -1; + datum f1 = -1; + datum acc = -1; + + if ( ( true_positives_[tfmax] + false_positives_[tfmax] ) > 0 ) + precision = ( true_positives_[tfmax] ) / + ( true_positives_[tfmax] + false_positives_[tfmax] ); + + if ( ( true_positives_[tfmax] + false_negatives_[tfmax] ) > 0 ) + recall = ( true_positives_[tfmax] ) / + ( true_positives_[tfmax] + false_negatives_[tfmax] ); + + if ( ( false_positives_[tfmax] + true_negatives_[tfmax] ) > 0 ) + fpr = ( false_positives_[tfmax] ) / + ( false_positives_[tfmax] + true_negatives_[tfmax] ); + + if ( ( true_positives_[tfmax] + false_negatives_[tfmax] ) > 0 ) + fnr = ( false_negatives_[tfmax] ) / + ( true_positives_[tfmax] + false_negatives_[tfmax] ); + + if ( precision >= 0 && recall >= 0 ) + f1 = 2 * precision * recall / ( precision + recall ); + + acc = ( true_positives_[tfmax] + true_negatives_[tfmax] ) / + ( true_positives_[tfmax] + true_negatives_[tfmax] + + false_negatives_[tfmax] + false_positives_[tfmax] + ); + + // Update stats + if(fpr >= 0) System::stat_aggregator->Update(stat_fpr_->stat_id, 100.0 * fpr); + if(fnr >= 0) System::stat_aggregator->Update(stat_fnr_->stat_id, 100.0 * fnr); + if(precision >= 0) System::stat_aggregator->Update(stat_pre_->stat_id, 100.0 * precision); + if(recall >= 0) System::stat_aggregator->Update(stat_rec_->stat_id, 100.0 * recall); + if(acc >= 0) System::stat_aggregator->Update(stat_acc_->stat_id, 100.0 * acc); + if(f1 >= 0) System::stat_aggregator->Update(stat_f1_->stat_id, 100.0 * f1); } bool BinaryStatLayer::CreateOutputs ( const std::vector< CombinedTensor* >& inputs, std::vector< CombinedTensor* >& outputs ) { @@ -141,85 +268,7 @@ void BinaryStatLayer::Reset() { } void BinaryStatLayer::Print ( std::string prefix, bool training ) { - datum fmax = -2; - unsigned int tfmax = -1; - - for ( unsigned int t = 0; t < thresholds_; t++ ) { - datum precision = -1; - datum recall = -1; - datum f1 = -1; - - if ( ( true_positives_[t] + false_positives_[t] ) > 0 ) - precision = ( true_positives_[t] ) / - ( true_positives_[t] + false_positives_[t] ); - - if ( ( true_positives_[t] + false_negatives_[t] ) > 0 ) - recall = ( true_positives_[t] ) / - ( true_positives_[t] + false_negatives_[t] ); - - /*acc = ( true_positives_[t] + true_negatives_[t] ) / - ( true_positives_[t] + true_negatives_[t] + - false_negatives_[t] + false_positives_[t] - ); - - LOGDEBUG << "Accuracy (" << threshold_values_[t] << "): " << acc;*/ - - if ( precision >= 0 && recall >= 0 ) { - f1 = 2 * precision * recall / ( precision + recall ); - } - - if ( f1 > fmax ) { - fmax = f1; - tfmax = t; - } - } - - datum fpr = -1; - datum fnr = -1; - datum precision = -1; - datum recall = -1; - datum f1 = -1; - datum acc = -1; - - if ( ( true_positives_[tfmax] + false_positives_[tfmax] ) > 0 ) - precision = ( true_positives_[tfmax] ) / - ( true_positives_[tfmax] + false_positives_[tfmax] ); - - if ( ( true_positives_[tfmax] + false_negatives_[tfmax] ) > 0 ) - recall = ( true_positives_[tfmax] ) / - ( true_positives_[tfmax] + false_negatives_[tfmax] ); - - if ( ( false_positives_[tfmax] + true_negatives_[tfmax] ) > 0 ) - fpr = ( false_positives_[tfmax] ) / - ( false_positives_[tfmax] + true_negatives_[tfmax] ); - - if ( ( true_positives_[tfmax] + false_negatives_[tfmax] ) > 0 ) - fnr = ( false_negatives_[tfmax] ) / - ( true_positives_[tfmax] + false_negatives_[tfmax] ); - - if ( precision >= 0 && recall >= 0 ) - f1 = 2 * precision * recall / ( precision + recall ); - - acc = ( true_positives_[tfmax] + true_negatives_[tfmax] ) / - ( true_positives_[tfmax] + true_negatives_[tfmax] + - false_negatives_[tfmax] + false_positives_[tfmax] - ); - - ( training ? LOGTRESULT : LOGRESULT ) - << prefix << " F1 : " << f1 * 100.0 << "% (t=" << threshold_values_[tfmax] - << ")" << LOGRESULTEND; - ( training ? LOGTRESULT : LOGRESULT ) - << prefix << " ACC: " << acc * 100.0 << "%" << LOGRESULTEND; - ( training ? LOGTRESULT : LOGRESULT ) - << prefix << " PRE: " << precision * 100.0 << "%" << LOGRESULTEND; - ( training ? LOGTRESULT : LOGRESULT ) - << prefix << " REC: " << recall * 100.0 << "%" << LOGRESULTEND; - ( training ? LOGTRESULT : LOGRESULT ) - << prefix << " FPR: " << fpr * 100.0 << "%" << LOGRESULTEND; - ( training ? LOGTRESULT : LOGRESULT ) - << prefix << " FNR: " << fnr * 100.0 << "%" << LOGRESULTEND; - - + // Now deprecated } diff --git a/src/net/ConfusionMatrixLayer.cpp b/src/net/ConfusionMatrixLayer.cpp index 27c2f7a..1f189fa 100644 --- a/src/net/ConfusionMatrixLayer.cpp +++ b/src/net/ConfusionMatrixLayer.cpp @@ -8,12 +8,15 @@ #include #include #include +#include "../util/StatAggregator.h" + #include "ConfusionMatrixLayer.h" namespace Conv { ConfusionMatrixLayer::ConfusionMatrixLayer ( std::vector names, const unsigned int classes ) : - classes_ ( classes ), names_ ( names ) { + classes_ ( classes ), names_ ( names ) +{ LOGDEBUG << "Instance created, " << classes << " classes."; for(unsigned int n = 0; n < names_.size(); n++) { if(names_[n].length() > 11) { @@ -21,6 +24,91 @@ ConfusionMatrixLayer::ConfusionMatrixLayer ( names_[n] = original.substr(0,8) + "..."; } } + // Initialize stat descriptors + stat_orr_ = new StatDescriptor; + stat_arr_ = new StatDescriptor; + stat_iou_ = new StatDescriptor; + + stat_orr_->description = "Overall Recognition Rate"; + stat_orr_->unit = "%"; + stat_orr_->nullable = true; + stat_orr_->init_function = [this] (Stat& stat) { stat.is_null = true; stat.value = 0; Reset();}; + stat_orr_->update_function = [] (Stat& stat, double user_value) { stat.is_null = false; stat.value = user_value; }; + stat_orr_->output_function = [] (HardcodedStats& hc_stats, Stat& stat) -> Stat { + return stat; + }; + + stat_arr_->description = "Average Recognition Rate"; + stat_arr_->unit = "%"; + stat_arr_->nullable = true; + stat_arr_->init_function = [] (Stat& stat) { stat.is_null = true; stat.value = 0; }; + stat_arr_->update_function = [] (Stat& stat, double user_value) { stat.is_null = false; stat.value = user_value; }; + stat_arr_->output_function = [] (HardcodedStats& hc_stats, Stat& stat) -> Stat { + return stat; + }; + + stat_iou_->description = "Average Intersection over Union"; + stat_iou_->unit = "%"; + stat_iou_->nullable = true; + stat_iou_->init_function = [] (Stat& stat) { stat.is_null = true; stat.value = 0; }; + stat_iou_->update_function = [] (Stat& stat, double user_value) { stat.is_null = false; stat.value = user_value; }; + stat_iou_->output_function = [] (HardcodedStats& hc_stats, Stat& stat) -> Stat { + return stat; + }; + + // Register with StatAggregator + System::stat_aggregator->RegisterStat(stat_orr_); + System::stat_aggregator->RegisterStat(stat_arr_); + System::stat_aggregator->RegisterStat(stat_iou_); +} + +void ConfusionMatrixLayer::UpdateAll() { + // Don't call Update(...) when there are no samples to keep the null property of the value + if (total_ < 1.0) + return; + + long double orr = 0, arr = 0, iou = 0; + + // Calculate metrics... + + // Overall recognition rate + orr = 100.0L * right_ / total_; + + // Average recognition rate + long double ccount = 0; + long double sum = 0; + + for ( unsigned int c = 0; c < classes_; c++ ) { + if ( per_class_[c] > 0 ) { + sum += matrix_[ ( c * classes_ ) + c] / per_class_[c]; + ccount += 1.0L; + } + } + + arr = 100.0L * sum / ccount; + + // Intersection over union + long double IU_sum = 0; + for(unsigned int t = 0; t < classes_; t++) { + // Calculate IU measure for class T + long double unionn = 0; + for(unsigned int c = 0; c < classes_; c++) { + if(c!=t) { + unionn += matrix_[ ( t * classes_ ) + c]; + unionn += matrix_[ ( c * classes_ ) + t]; + } + } + unionn += matrix_[ ( t * classes_) + t]; + long double IU = (unionn > 0.0) ? (matrix_[ ( t * classes_) + t] / unionn) : 0.0; + IU_sum += IU; + } + + iou = 100.0L * IU_sum / (long double)classes_; + + // Submit metrics to StatAggregator + System::stat_aggregator->Update(stat_orr_->stat_id, (double)orr); + System::stat_aggregator->Update(stat_arr_->stat_id, (double)arr); + System::stat_aggregator->Update(stat_iou_->stat_id, (double)iou); } bool ConfusionMatrixLayer::CreateOutputs ( @@ -128,6 +216,7 @@ void ConfusionMatrixLayer::Reset() { } void ConfusionMatrixLayer::Print ( std::string prefix, bool training ) { + // Print confusion matrix std::stringstream caption; caption << std::setw ( 12 ) << "vCLS ACT>"; @@ -151,24 +240,9 @@ void ConfusionMatrixLayer::Print ( std::string prefix, bool training ) { caption.str ( "" ); } - - (training?LOGTRESULT:LOGRESULT) << prefix << " Overall recognition rate (not normalized): " - << 100.0L * right_ / total_ << "%"; - - long double ccount = 0; - long double sum = 0; - - for ( unsigned int c = 0; c < classes_; c++ ) { - if ( per_class_[c] > 0 ) { - sum += matrix_[ ( c * classes_ ) + c] / per_class_[c]; - ccount += 1.0L; - } - } - - (training?LOGTRESULT:LOGRESULT) << prefix << " Average recognition rate (normalized) : " - << 100.0 * sum / ccount << "%" << LOGRESULTEND; - + // Print IOU long double IU_sum = 0; + for(unsigned int t = 0; t < classes_; t++) { // Calculate IU measure for class T long double unionn = 0; @@ -183,14 +257,14 @@ void ConfusionMatrixLayer::Print ( std::string prefix, bool training ) { IU_sum += IU; caption << std::setw(12) << names_[t]; caption << " IU: "; - caption << std::setw(12) << IU * 100.0; + caption << std::setw(12) << IU * 100.0L; caption << "%"; (training?LOGTRESULT:LOGRESULT) << prefix << caption.str() << LOGRESULTEND; caption.str ( "" ); } (training?LOGTRESULT:LOGRESULT) << prefix << " Average intersection over union : " - << 100.0 * IU_sum / (long double)classes_ << "%" << LOGRESULTEND; + << 100.0L * IU_sum / (long double)classes_ << "%" << LOGRESULTEND; } ConfusionMatrixLayer::~ConfusionMatrixLayer() { diff --git a/src/net/InputDownSamplingLayer.cpp b/src/net/InputDownSamplingLayer.cpp new file mode 100644 index 0000000..4b08b61 --- /dev/null +++ b/src/net/InputDownSamplingLayer.cpp @@ -0,0 +1,98 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ + +#include "Log.h" +#include "TensorMath.h" + +#include "InputDownSamplingLayer.h" + +namespace Conv { + +InputDownSamplingLayer::InputDownSamplingLayer (const unsigned int region_width, + const unsigned int region_height) : + region_width_ (region_width), region_height_ (region_height) { + LOGDEBUG << "Instance created: " << region_width_ << "x" << region_height_ << + " pooling."; +} + +bool InputDownSamplingLayer::CreateOutputs ( + const std::vector< CombinedTensor* >& inputs, + std::vector< CombinedTensor* >& outputs) { + // This is a simple layer, only one input + if (inputs.size() != 1) { + LOGERROR << "Only one input supported!"; + return false; + } + + // Save input node pointer + CombinedTensor* input = inputs[0]; + + // Check if input node pointer is null + if (input == nullptr) { + LOGERROR << "Null pointer input node!"; + return false; + } + + // Validate dimensions + if ( (input->data.width() % region_width_) != 0 || + (input->data.height() % region_height_) != 0) { + LOGERROR << "Input dimensions not divisible by region dimensions!"; + return false; + } + + // Create output + CombinedTensor* output = new CombinedTensor (input->data.samples(), + input->data.width() / region_width_, input->data.height() / region_height_, + input->data.maps()); + + // Tell network about the output + outputs.push_back (output); + + return true; +} + +bool InputDownSamplingLayer::Connect (const CombinedTensor* input, + CombinedTensor* output) { + // TODO Validate dimensions + bool valid = true; + + if (!valid) { + LOGERROR << "Invalid dimensions!"; + return false; + } + + // Save dimensions + input_width_ = input->data.width(); + input_height_ = input->data.height(); + output_width_ = output->data.width(); + output_height_ = output->data.height(); + + maps_ = input->data.maps(); + + return true; +} + +void InputDownSamplingLayer::FeedForward() { + TensorMath::DOWN(input_->data, output_->data, region_width_, region_height_, 1.0f / ((datum)region_width_ * (datum)region_height_)); +} + +void InputDownSamplingLayer::BackPropagate() { + if(backprop_enabled_) { + FATAL("This is a pre-processing layer that does not support backpropagation!"); + } +} + + +bool InputDownSamplingLayer::IsOpenCLAware() { +#ifdef BUILD_OPENCL_MAX + return true; +#else + return false; +#endif +} + +} diff --git a/src/net/NetGraph.cpp b/src/net/NetGraph.cpp index c7ff22c..4360d72 100644 --- a/src/net/NetGraph.cpp +++ b/src/net/NetGraph.cpp @@ -166,12 +166,12 @@ void NetGraph::PrintGraph(std::ostream& graph_output) { for (unsigned int i = 0; i < node->output_buffers.size(); i++) { if (i > 0) node_output << "|"; - node_output << "" << node->output_buffers[i].description; + node_output << "" << node->output_buffers[i].description << " " << node->output_buffers[i].combined_tensor->data; } node_output << "}"; } else if (node->output_buffers.size() == 1) { - node_output << "| " << node->output_buffers[0].description; + node_output << "| " << node->output_buffers[0].description << " " << node->output_buffers[0].combined_tensor->data; } node_output << "}\"];\n"; diff --git a/src/net/SumLayer.cpp b/src/net/SumLayer.cpp new file mode 100644 index 0000000..3d0b7fb --- /dev/null +++ b/src/net/SumLayer.cpp @@ -0,0 +1,115 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ + +#include + +#include "TensorMath.h" +#include "SumLayer.h" + +namespace Conv { + + +SumLayer::SumLayer() { + LOGDEBUG << "Instance created."; +} + +bool SumLayer::CreateOutputs (const std::vector< CombinedTensor* >& inputs, + std::vector< CombinedTensor* >& outputs) { + if(inputs.size() != 2) { + LOGERROR << "Needs two inputs!"; + return false; + } + + CombinedTensor* input_a = inputs[0]; + CombinedTensor* input_b = inputs[1]; + + if(input_a == nullptr || input_b == nullptr) { + LOGERROR << "Null pointer supplied"; + return false; + } + + if(input_a->data.width() != input_b->data.width() + && input_a->data.height() != input_b->data.height()) { + LOGERROR << "Dimensions don't match!"; + } + + if(input_a->data.samples() != input_b->data.samples()) { + LOGERROR << "Sample count doesn't match!"; + return false; + } + + unsigned int maps_a = input_a->data.maps(); + unsigned int maps_b = input_b->data.maps(); + + if(maps_a != maps_b) { + LOGERROR << "Map count doesn't match"; + return false; + } + + unsigned int samples = input_a->data.samples(); + CombinedTensor* output = new CombinedTensor(samples, input_a->data.width(), + input_b->data.height(), maps_a); + + outputs.push_back(output); + return true; +} + +bool SumLayer::Connect (const std::vector< CombinedTensor* >& inputs, + const std::vector< CombinedTensor* >& outputs, + const NetStatus* status ) { + if(inputs.size() != 2) { + LOGERROR << "Needs two inputs!"; + return false; + } + + if(outputs.size() != 1) { + LOGERROR << "Needs exactly one output!"; + return false; + } + + CombinedTensor* input_a = inputs[0]; + CombinedTensor* input_b = inputs[1]; + CombinedTensor* output = outputs[0]; + + if(input_a == nullptr || input_b == nullptr || output == nullptr) { + LOGERROR << "Null pointer supplied"; + return false; + } + + if(input_a->data.samples() != input_b->data.samples()) { + LOGERROR << "Sample count doesn't match!"; + return false; + } + + if((output->data.elements() != input_a->data.elements()) + && (output_->data.elements() != input_b->data.elements())) { + LOGERROR << "Wrong output dimensions!"; + return false; + } + + maps_ = input_a->data.maps(); + samples_ = input_a->data.samples(); + + input_a_ = input_a; + input_b_ = input_b; + output_ = output; + + return true; +} + +void SumLayer::FeedForward() { + TensorMath::ADD(input_a_->data, input_b_->data, output_->data); +} + +void SumLayer::BackPropagate() { + for(unsigned int sample = 0; sample < samples_; sample++) { + Tensor::CopySample(output_->delta, sample, input_a_->delta, sample); + Tensor::CopySample(output_->delta, sample, input_b_->delta, sample); + } +} + +} \ No newline at end of file diff --git a/src/net/Trainer.cpp b/src/net/Trainer.cpp index 0cfec4c..f5ee335 100644 --- a/src/net/Trainer.cpp +++ b/src/net/Trainer.cpp @@ -12,66 +12,240 @@ #include "Net.h" #include "StatLayer.h" #include "CLHelper.h" +#include "StatAggregator.h" +#include "Init.h" #include "Trainer.h" + namespace Conv { - Trainer::Trainer(Conv::NetGraph& graph, TrainerSettings settings) : - graph_(graph), settings_(settings) { - LOGDEBUG << "Instance created"; +bool Trainer::stats_are_initialized_ = false; +StatDescriptor* Trainer::stat_aggloss_ = nullptr; +StatDescriptor* Trainer::stat_qp_caseA_ = nullptr; +StatDescriptor* Trainer::stat_qp_caseB_ = nullptr; +StatDescriptor* Trainer::stat_qp_caseC_ = nullptr; +StatDescriptor* Trainer::stat_qp_caseM_ = nullptr; +StatDescriptor* Trainer::stat_sps_ = nullptr; +StatDescriptor* Trainer::stat_fps_ = nullptr; + +void Trainer::InitializeStats() { + // Only initialize stats once + if (!stats_are_initialized_) { + + stat_aggloss_ = new StatDescriptor; + stat_aggloss_->nullable = true; + stat_aggloss_->description = "Average Aggregate Loss"; + stat_aggloss_->unit = "1/pixel"; + stat_aggloss_->init_function = + [](Stat& stat) {stat.is_null = true; stat.value = 0.0;}; + stat_aggloss_->update_function = + [](Stat& stat, double user_value) {stat.value += user_value; stat.is_null = false;}; + stat_aggloss_->output_function = + [](HardcodedStats& hc_stats, Stat& stat) -> Stat { + Stat return_stat; return_stat.is_null = true; + if (hc_stats.iterations > 0) { + double d_iterations = (double)hc_stats.iterations; + return_stat.value = stat.value / d_iterations; + return_stat.is_null = false; + } + return return_stat; + }; + + stat_qp_caseA_ = new StatDescriptor; + stat_qp_caseA_->nullable = true; + stat_qp_caseA_->description = "QuickProp Case A Percentage"; + stat_qp_caseA_->unit = "%"; + stat_qp_caseA_->init_function = + [](Stat& stat) {stat.is_null = true; stat.value = 0.0;}; + stat_qp_caseA_->update_function = + [](Stat& stat, double user_value) {stat.value += user_value; stat.is_null = false;}; + stat_qp_caseA_->output_function = + [](HardcodedStats& hc_stats, Stat& stat) -> Stat { + Stat return_stat; return_stat.is_null = true; + if (hc_stats.iterations > 0 && hc_stats.weights > 0 && !stat.is_null) { + double d_iterations = (double)hc_stats.iterations; + double d_weights = (double)hc_stats.weights; + return_stat.value = 100.0 * stat.value / (d_iterations * d_weights); + return_stat.is_null = false; + } + return return_stat; + }; + + stat_qp_caseB_ = new StatDescriptor; + stat_qp_caseB_->nullable = true; + stat_qp_caseB_->description = "QuickProp Case B Percentage"; + stat_qp_caseB_->unit = "%"; + stat_qp_caseB_->init_function = + [](Stat& stat) {stat.is_null = true; stat.value = 0.0;}; + stat_qp_caseB_->update_function = + [](Stat& stat, double user_value) {stat.value += user_value; stat.is_null = false;}; + stat_qp_caseB_->output_function = + [](HardcodedStats& hc_stats, Stat& stat) -> Stat { + Stat return_stat; return_stat.is_null = true; + if (hc_stats.iterations > 0 && hc_stats.weights > 0 && !stat.is_null) { + double d_iterations = (double)hc_stats.iterations; + double d_weights = (double)hc_stats.weights; + return_stat.value = 100.0 * stat.value / (d_iterations * d_weights); + return_stat.is_null = false; + } + return return_stat; + }; + + stat_qp_caseC_ = new StatDescriptor; + stat_qp_caseC_->nullable = true; + stat_qp_caseC_->description = "QuickProp Case C Percentage"; + stat_qp_caseC_->unit = "%"; + stat_qp_caseC_->init_function = + [](Stat& stat) {stat.is_null = true; stat.value = 0.0;}; + stat_qp_caseC_->update_function = + [](Stat& stat, double user_value) {stat.value += user_value; stat.is_null = false;}; + stat_qp_caseC_->output_function = + [](HardcodedStats& hc_stats, Stat& stat) -> Stat { + Stat return_stat; return_stat.is_null = true; + if (hc_stats.iterations > 0 && hc_stats.weights > 0 && !stat.is_null) { + double d_iterations = (double)hc_stats.iterations; + double d_weights = (double)hc_stats.weights; + return_stat.value = 100.0 * stat.value / (d_iterations * d_weights); + return_stat.is_null = false; + } + return return_stat; + }; + + stat_qp_caseM_ = new StatDescriptor; + stat_qp_caseM_->nullable = true; + stat_qp_caseM_->description = "QuickProp Case M Percentage"; + stat_qp_caseM_->unit = "%"; + stat_qp_caseM_->init_function = + [](Stat& stat) {stat.is_null = true; stat.value = 0.0;}; + stat_qp_caseM_->update_function = + [](Stat& stat, double user_value) {stat.value += user_value; stat.is_null = false;}; + stat_qp_caseM_->output_function = + [](HardcodedStats& hc_stats, Stat& stat) -> Stat { + Stat return_stat; return_stat.is_null = true; + if (hc_stats.iterations > 0 && hc_stats.weights > 0 && !stat.is_null) { + double d_iterations = (double)hc_stats.iterations; + double d_weights = (double)hc_stats.weights; + return_stat.value = 100.0 * stat.value / (d_iterations * d_weights); + return_stat.is_null = false; + } + return return_stat; + }; + + stat_sps_ = new StatDescriptor; + stat_sps_->nullable = true; + stat_sps_->description = "Pixel Throughput"; + stat_sps_->unit = "pixels/s"; + stat_sps_->init_function = + [](Stat& stat) {stat.is_null = true; stat.value = 0.0;}; + stat_sps_->update_function = + [](Stat& stat, double user_value) {stat.value += user_value; stat.is_null = false;}; + stat_sps_->output_function = + [] (Conv::HardcodedStats& hc_stats, Conv::Stat& stat) { + Conv::Stat return_stat = stat; + return_stat.value = stat.value / hc_stats.seconds_elapsed; + return return_stat; + }; + + stat_fps_ = new StatDescriptor; + stat_fps_->nullable = true; + stat_fps_->description = "Frame Rate"; + stat_fps_->unit = "frames/s"; + stat_fps_->init_function = + [](Stat& stat) {stat.is_null = true; stat.value = 0.0;}; + stat_fps_->update_function = + [](Stat& stat, double user_value) {stat.value += user_value; stat.is_null = false;}; + stat_fps_->output_function = + [] (Conv::HardcodedStats& hc_stats, Conv::Stat& stat) { + Conv::Stat return_stat = stat; + return_stat.value = stat.value / hc_stats.seconds_elapsed; + return return_stat; + }; + + // Register stats + System::stat_aggregator->RegisterStat(stat_aggloss_); + System::stat_aggregator->RegisterStat(stat_qp_caseA_); + System::stat_aggregator->RegisterStat(stat_qp_caseB_); + System::stat_aggregator->RegisterStat(stat_qp_caseC_); + System::stat_aggregator->RegisterStat(stat_qp_caseM_); + System::stat_aggregator->RegisterStat(stat_sps_); + System::stat_aggregator->RegisterStat(stat_fps_); + stats_are_initialized_ = true; + } + + // Move lambdas with reference captures here +} - // We need a training layer to select training samples and some kind of - // loss function to minimize - if (graph_.GetTrainingNodes().size() == 0 || graph_.GetLossNodes().size() == 0) { - FATAL("Net doesn't have training layer or loss function layer!"); - } +Trainer::Trainer(Conv::NetGraph& graph, TrainerSettings settings) : + graph_(graph), settings_(settings) { + LOGDEBUG << "Instance created"; - // Ask the Net for parameters - graph_.GetParameters(parameters_); + // We need a training layer to select training samples and some kind of + // loss function to minimize + if (graph_.GetTrainingNodes().size() == 0 || graph_.GetLossNodes().size() == 0) { + FATAL("Net doesn't have training layer or loss function layer!"); + } - LOGDEBUG << "Optimizing " << parameters_.size() << " sets of parameters."; + // Ask the Net for parameters + graph_.GetParameters(parameters_); - unsigned int w = 0; + LOGDEBUG << "Optimizing " << parameters_.size() << " sets of parameters."; - for (unsigned int p = 0; p < parameters_.size(); p++) { - w += parameters_[p]->data.elements(); + unsigned int w = 0; - // Allocate Tensors for momentum - Tensor* last_delta = new Tensor(); - Tensor* last_gradient = new Tensor(); - Tensor* accumulated_gradient = new Tensor(); - last_delta->Resize (parameters_[p]->data); - last_delta->Clear(); - last_gradient->Resize (parameters_[p]->data); - last_gradient->Clear(); - accumulated_gradient->Resize (parameters_[p]->data); - accumulated_gradient->Clear(); + for (unsigned int p = 0; p < parameters_.size(); p++) { + w += parameters_[p]->data.elements(); - last_deltas_.push_back (last_delta); - last_gradients_.push_back (last_gradient); - accumulated_gradients_.push_back (accumulated_gradient); - } + // Allocate Tensors for momentum + Tensor* last_delta = new Tensor(); + Tensor* last_gradient = new Tensor(); + Tensor* accumulated_gradient = new Tensor(); + last_delta->Resize (parameters_[p]->data); + last_delta->Clear(); + last_gradient->Resize (parameters_[p]->data); + last_gradient->Clear(); + accumulated_gradient->Resize (parameters_[p]->data); + accumulated_gradient->Clear(); - // Outputs the number of weights - LOGDEBUG << "Weights: " << w; + last_deltas_.push_back (last_delta); + last_gradients_.push_back (last_gradient); + accumulated_gradients_.push_back (accumulated_gradient); + } + + // Outputs the number of weights + LOGDEBUG << "Weights: " << w; + weight_count_ = w; + + first_training_layer_ = dynamic_cast(graph_.GetTrainingNodes()[0]->layer); + sample_count_ = first_training_layer_->GetLabelWidth() * first_training_layer_->GetLabelHeight() + * first_training_layer_->GetBatchSize(); - first_training_layer_ = dynamic_cast(graph_.GetTrainingNodes()[0]->layer); - sample_count_ = first_training_layer_->GetLabelWidth() * first_training_layer_->GetLabelHeight() - * first_training_layer_->GetBatchSize(); + InitializeStats(); } -void Trainer::Train (unsigned int epochs) { - // net_.SetTestOnlyStatDisabled (false); - graph_.SetIsTesting(false); +void Trainer::Train (unsigned int epochs, bool do_snapshots) { + // Update hardcoded stats + System::stat_aggregator->hardcoded_stats_.weights = weight_count_; - for (unsigned int e = 0; e < epochs; e++) + graph_.SetIsTesting(false); + graph_.SetStatLayersEnabled(settings_.stats_during_training); + + for (unsigned int e = 0; e < epochs; e++) { Epoch(); + if(do_snapshots) { + System::stat_aggregator->Snapshot(); + // Update hardcoded stats + System::stat_aggregator->hardcoded_stats_.weights = weight_count_; + } + } - // net_.SetTestOnlyStatDisabled (false); + graph_.SetStatLayersEnabled(true); } void Trainer::Test() { + // Update hardcoded stats + System::stat_aggregator->hardcoded_stats_.weights = weight_count_; + datum aggregate_loss = 0.0; datum* loss_sums = new datum[graph_.GetLossNodes().size()]; for (unsigned int n = 0; n < graph_.GetLossNodes().size(); n++) @@ -90,9 +264,8 @@ void Trainer::Test() { LOGDEBUG << "Testing, iterations: " << iterations << ", batch size: " << first_training_layer_->GetBatchSize(); - auto t_begin = std::chrono::system_clock::now(); - for (unsigned int i = 0; i < iterations; i++) { + aggregate_loss = 0.0; graph_.FeedForward(); for (unsigned int n = 0; n < graph_.GetLossNodes().size(); n++) { LossFunctionLayer* lossfunction_layer = dynamic_cast(graph_.GetLossNodes()[n]->layer); @@ -100,41 +273,43 @@ void Trainer::Test() { loss_sums[n] += loss; aggregate_loss += loss; } - } + // Batch/Iteration done + if (System::stat_aggregator->state_ == StatAggregator::RECORDING) + System::stat_aggregator->hardcoded_stats_.iterations++; - auto t_end = std::chrono::system_clock::now(); - std::chrono::duration t_diff = t_end - t_begin; - LOGDEBUG << "Testing, sps: " << - (datum) (sample_count_ * iterations) - / (datum) t_diff.count(); + // Update aggregate loss stat + System::stat_aggregator->Update(stat_aggloss_->stat_id, aggregate_loss + / sample_count_ ); - LOGDEBUG << "Testing, tps: " << - 1000000.0f * (datum) t_diff.count() / - (datum) (sample_count_ * iterations) << " us"; + } + + // Submit performance statistics + System::stat_aggregator->Update(stat_sps_->stat_id, (double)sample_count_ * (double)iterations); + System::stat_aggregator->Update(stat_fps_->stat_id, (double)(first_training_layer_->GetBatchSize()) * (double)iterations); for (unsigned int n = 0; n < graph_.GetLossNodes().size(); n++) { LossFunctionLayer* lossfunction_layer = dynamic_cast(graph_.GetLossNodes()[n]->layer); LOGINFO << "Testing (Epoch " << epoch_ << ", node " << n << ") " << graph_.GetLossNodes()[n]->layer->GetLayerDescription() << " lps: " << loss_sums[n] / (datum)(iterations * sample_count_); } - LOGINFO << "Testing (Epoch " << epoch_ << ") aggregate lps: " << aggregate_loss / (datum)(iterations * sample_count_); for (unsigned int n = 0; n < graph_.GetStatNodes().size(); n++) { StatLayer* stat_layer = dynamic_cast(graph_.GetStatNodes()[n]->layer); std::stringstream epochname; epochname << "Testing - Epoch " << epoch_ << " -"; + stat_layer->UpdateAll(); stat_layer->Print (epochname.str(), false); - stat_layer->Reset(); } for (NetGraphNode* training_node : graph_.GetTrainingNodes()) (dynamic_cast(training_node->layer))->SetTestingMode(false); - graph_.SetIsTesting(false); - delete[] loss_sums; } void Trainer::Epoch() { + // Update hardcoded epoch stat + System::stat_aggregator->hardcoded_stats_.epoch = epoch_; + datum aggregate_loss = 0.0; datum* loss_sums = new datum[graph_.GetLossNodes().size()]; for (unsigned int n = 0; n < graph_.GetLossNodes().size(); n++) @@ -157,8 +332,6 @@ void Trainer::Epoch() { ", bsize: " << first_training_layer_->GetBatchSize() * settings_.sbatchsize << ", current lr: " << CalculateLR (epoch_ * iterations) << std::endl; - auto t_begin = std::chrono::system_clock::now(); - for (unsigned int i = 0; i < iterations; i++) { if ( (50 * i / iterations) > fiftieth) { fiftieth = 50 * i / iterations; @@ -169,6 +342,7 @@ void Trainer::Epoch() { tenth = 10 * i / iterations; std::cout << tenth << "0%" << std::flush; } + aggregate_loss = 0.0; // Reset gradients for (unsigned int np = 0; np < accumulated_gradients_.size(); np++) @@ -207,48 +381,41 @@ void Trainer::Epoch() { } } } - // Calculate annealed learning rate const datum lr = CalculateLR (epoch_ * iterations + i); // Apply gradients with new learning rate ApplyGradients (lr); - } - auto t_end = std::chrono::system_clock::now(); - std::chrono::duration t_diff = t_end - t_begin; - LOGDEBUG << "Training, sps: " << - (datum) (sample_count_ * settings_.sbatchsize - * first_training_layer_->GetLossSamplingProbability() * iterations) - / (datum) t_diff.count(); - - LOGDEBUG << "Training, tps: " << - 1000000.0f * (datum) t_diff.count() / - (datum) (sample_count_ * settings_.sbatchsize - * first_training_layer_->GetLossSamplingProbability() * iterations) << " us"; - -#ifdef BUILD_OPENCL - LOGDEBUG << "Training, GB/s up: " << ((datum)CLHelper::bytes_up)/(1073741824.0 * (datum)t_diff.count()); - LOGDEBUG << "Training, GB/s down: " << ((datum)CLHelper::bytes_down)/(1073741824.0 * (datum)t_diff.count()); - CLHelper::bytes_up = 0; - CLHelper::bytes_down = 0; -#endif + // Batch/Iteration done + if (System::stat_aggregator->state_ == StatAggregator::RECORDING) + System::stat_aggregator->hardcoded_stats_.iterations++; + // Update aggregate loss stat + System::stat_aggregator->Update(stat_aggloss_->stat_id, aggregate_loss + / (first_training_layer_->GetLossSamplingProbability() * sample_count_ * settings_.sbatchsize)); + } + + // Submit performance statistics + System::stat_aggregator->Update(stat_sps_->stat_id, (double)sample_count_ * (double)iterations * (double)(settings_.sbatchsize)); + System::stat_aggregator->Update(stat_fps_->stat_id, (double)(first_training_layer_->GetBatchSize()) * (double)iterations * (double)(settings_.sbatchsize)); + // Display training epoch_error for (unsigned int n = 0; n < graph_.GetLossNodes().size(); n++) { LossFunctionLayer* lossfunction_layer = dynamic_cast(graph_.GetLossNodes()[n]->layer); LOGINFO << "Training (Epoch " << epoch_ << ", node " << n << ") " << graph_.GetLossNodes()[n]->layer->GetLayerDescription() << " lps: " << loss_sums[n] / (datum)(iterations * sample_count_ * settings_.sbatchsize * first_training_layer_->GetLossSamplingProbability()); } - LOGINFO << "Training (Epoch " << epoch_ << ") aggregate lps: " << aggregate_loss / (datum)(iterations * sample_count_ * settings_.sbatchsize * first_training_layer_->GetLossSamplingProbability()); - for (unsigned int n = 0; n < graph_.GetStatNodes().size(); n++) { - StatLayer* stat_layer = dynamic_cast(graph_.GetStatNodes()[n]->layer); - std::stringstream epochname; - epochname << "Training - Epoch " << epoch_ << " -"; - stat_layer->Print (epochname.str(), true); - stat_layer->Reset(); - } + if(settings_.stats_during_training) { + for (unsigned int n = 0; n < graph_.GetStatNodes().size(); n++) { + StatLayer* stat_layer = dynamic_cast(graph_.GetStatNodes()[n]->layer); + std::stringstream epochname; + epochname << "Training - Epoch " << epoch_ << " -"; + stat_layer->UpdateAll(); + stat_layer->Print (epochname.str(), true); + } + } delete[] loss_sums; epoch_++; @@ -256,7 +423,8 @@ void Trainer::Epoch() { void Trainer::ApplyGradients (datum lr) { unsigned int dp = 0; - + unsigned int qp_caseA = 0, qp_caseB = 0, qp_caseC = 0, qp_caseM = 0; + for (unsigned int l = 0; l < graph_.GetNodes().size(); l++) { Layer* const layer = graph_.GetNodes()[l]->layer; datum layer_lr; @@ -290,7 +458,7 @@ void Trainer::ApplyGradients (datum lr) { datum delta = // Average of gradient over minibatch - layer_lr * (w_gradient / (datum) (sample_count_ * settings_.sbatchsize)) + + layer_lr * (w_gradient / ((datum) (sample_count_ * settings_.sbatchsize)) * first_training_layer_->GetLossSamplingProbability()) + // Regularization layer_lr * (settings_.l2_weight * l2_gradient + settings_.l1_weight * l1_gradient); @@ -313,29 +481,36 @@ void Trainer::ApplyGradients (datum lr) { const datum s = settings_.mu / (1.0 + settings_.mu); datum step = 0; - if(last_step > 0.001) { + if(last_step > 0.00001) { if(delta > 0.0) { step += lr * settings_.eta * delta; + qp_caseB++; } if(delta > (s * last_gradient)) { step += settings_.mu * last_step; + qp_caseM++; } else { step += last_step * delta / (last_gradient - delta); } + qp_caseA++; - } else if(last_step < -0.001) { + } else if(last_step < -0.00001) { if(delta < 0.0) { step += lr * settings_.eta * delta; + qp_caseB++; } if(delta < (s * last_gradient)) { step += settings_.mu * last_step; + qp_caseM++; } else { step += last_step * delta / (last_gradient - delta); } + qp_caseA++; } else { step += lr * settings_.eta * delta; + qp_caseC++; } if(step > 1000 || step < -1000) { @@ -358,6 +533,14 @@ void Trainer::ApplyGradients (datum lr) { dp++; } } + + // Update quickprop stats + if(settings_.optimization_method == QUICKPROP) { + System::stat_aggregator->Update(stat_qp_caseA_->stat_id, (double)qp_caseA); + System::stat_aggregator->Update(stat_qp_caseB_->stat_id, (double)qp_caseB); + System::stat_aggregator->Update(stat_qp_caseC_->stat_id, (double)qp_caseC); + System::stat_aggregator->Update(stat_qp_caseM_->stat_id, (double)qp_caseM); + } } std::ostream& operator<< (std::ostream & output, diff --git a/src/net/UpscaleLayer.cpp b/src/net/UpscaleLayer.cpp index d0f1cf9..04d59a1 100644 --- a/src/net/UpscaleLayer.cpp +++ b/src/net/UpscaleLayer.cpp @@ -8,6 +8,7 @@ #include "Log.h" #include "Init.h" +#include "TensorMath.h" #include "UpscaleLayer.h" @@ -70,48 +71,11 @@ bool UpscaleLayer::Connect ( const CombinedTensor* input, } void UpscaleLayer::FeedForward() { - #pragma omp parallel for default(shared) - - for ( std::size_t sample = 0; sample < input_->data.samples(); sample++ ) { - for ( unsigned int map = 0; map < maps_; map++ ) { - for ( unsigned int ox = 0; ox < output_width_; ox++ ) { - for ( unsigned int oy = 0; oy < output_height_; oy++ ) { - const unsigned int ix = ox / region_width_; - const unsigned int iy = oy / region_height_; - const datum ival = *input_->data.data_ptr_const ( ix, iy, map, sample ); - // Feed forward - *output_->data.data_ptr ( ox, oy, map, sample ) = ival; - } - } - } - } + TensorMath::UP(input_->data, output_->data, region_width_, region_height_, 1.0f); } void UpscaleLayer::BackPropagate() { - #pragma omp parallel for default(shared) - - for ( std::size_t sample = 0; sample < input_->data.samples(); sample++ ) { - for ( unsigned int map = 0; map < maps_; map++ ) { - for ( unsigned int ix = 0; ix < input_width_; ix++ ) { - for ( unsigned int iy = 0; iy < input_height_; iy++ ) { - const unsigned int ox = ix * region_width_; - const unsigned int oy = iy * region_height_; - datum sum = 0; - - for ( unsigned int ry = 0; ry < region_height_; ry++ ) { - for ( unsigned int rx = 0; rx < region_width_; rx++ ) { - sum += *output_->delta.data_ptr_const ( ox + rx, oy +ry, map, sample ); - } - } - - *input_->delta.data_ptr ( ix,iy,map,sample ) = sum; // (datum)(region_width_ * region_height_); - } - } - } - } - - return; - + TensorMath::DOWN(output_->delta, input_->delta, region_width_, region_height_, 1.0f); } } diff --git a/src/util/CSVStatSink.cpp b/src/util/CSVStatSink.cpp new file mode 100644 index 0000000..4f7ac90 --- /dev/null +++ b/src/util/CSVStatSink.cpp @@ -0,0 +1,94 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include "CSVStatSink.h" + +namespace Conv { + +void CSVStatSink::Initialize(std::vector& stat_descriptors) { + stat_descriptors_ = stat_descriptors; +} + +void CSVStatSink::Process(Conv::HardcodedStats &hardcoded_stats, std::vector &stats) { + if(csv_stream_ == nullptr) + return; + + // Write hardcoded stats + (*csv_stream_) << (hardcoded_stats.is_training ? "1" : "0") << ","; + (*csv_stream_) << hardcoded_stats.epoch << ","; + (*csv_stream_) << hardcoded_stats.iterations << ","; + (*csv_stream_) << std::setprecision(std::numeric_limits::digits10 + 1) << hardcoded_stats.seconds_elapsed << ","; + + // Write values... + for (unsigned int s = 0; s < stat_descriptors_.size(); s++) { + // ...but only if not NULL + if(!stats[s]->is_null) + (*csv_stream_) << std::setprecision(std::numeric_limits::digits10 + 1) << stats[s]->value; + + // Add comma except for last line + if(s < (stat_descriptors_.size() - 1)) + (*csv_stream_) << ","; + } + (*csv_stream_) << "\n"; + (*csv_stream_) << std::flush; + +} + +bool isnalnum(char c) { + return !std::isalnum((int)c); +} + +void CSVStatSink::SetCurrentExperiment(std::string current_experiment) { + // Close stream if already open + if(csv_stream_ != nullptr) { + csv_stream_->close(); + delete csv_stream_; + } + + // Generate filename + std::stringstream csv_filename_ss; + csv_filename_ss << "csv/" << current_experiment << ".csv"; + std::string csv_filename=csv_filename_ss.str(); + + // Open new stream + csv_stream_ = new std::ofstream(csv_filename, std::ios::out); + + // Test if stream works + if(!csv_stream_ ->good()) { + LOGERROR << "Cannot open " << csv_filename << " for writing!"; + delete csv_stream_; + csv_stream_ = nullptr; + } + // Write header for hardcoded stats + (*csv_stream_) << "IsTraining,Epoch,Iterations,SecondsElapsed,"; + + // Write header for non-hardcoded stats + for (unsigned int s = 0; s < stat_descriptors_.size(); s++) { + std::string description = stat_descriptors_[s]->description; + + // Strip non-alphanumeric characters + description.erase(std::remove_if(description.begin(), description.end(), isnalnum), description.end()); + (*csv_stream_) << description; + if(s < (stat_descriptors_.size() - 1)) + (*csv_stream_) << ","; + } + (*csv_stream_) << "\n"; + (*csv_stream_) << std::flush; +} + +} \ No newline at end of file diff --git a/src/util/CompressedTensor.cpp b/src/util/CompressedTensor.cpp new file mode 100644 index 0000000..b8a76a5 --- /dev/null +++ b/src/util/CompressedTensor.cpp @@ -0,0 +1,349 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifdef BUILD_POSIX +#include +#include +#include +#endif + +#include "PNGUtil.h" +#include "JPGUtil.h" + +#ifdef BLAS_MKL +#include +#endif + +#include "Config.h" +#include "Log.h" +#include "CompressedTensor.h" +#include "CLHelper.h" + +namespace Conv { + +const unsigned int chars_per_datum = sizeof(Conv::datum)/sizeof(char); + +CompressedTensor::CompressedTensor() { + +} + +CompressedTensor::~CompressedTensor() { + DeleteIfPossible(); +} + +void CompressedTensor::Compress(Tensor& tensor) +{ + std::size_t compressed_length = 0; + std::size_t uncompressed_elements = tensor.elements(); +#ifdef BUILD_OPENCL + tensor.MoveToCPU(); +#endif + + void* compressed_buffer = new char[2 * tensor.elements() * chars_per_datum + 2]; + CompressedTensor::CompressData((void*)tensor.data_ptr(), uncompressed_elements, compressed_buffer, compressed_length); + + Resize(tensor.samples(), tensor.width(), tensor.height(), tensor.maps(), compressed_length, (char*)compressed_buffer, false); +} + +void CompressedTensor::Decompress(Tensor& tensor, datum* preallocated_buffer) +{ + std::size_t compressed_length = compressed_length_; + std::size_t uncompressed_elements = 0; + datum* uncompressed_buffer = preallocated_buffer; + if(uncompressed_buffer == nullptr) + uncompressed_buffer = new datum[elements_]; + + CompressedTensor::DecompressData(uncompressed_buffer, uncompressed_elements, compressed_data_ptr_, compressed_length); + + if(uncompressed_elements != elements_) { + FATAL("Decompressed size mismatch!"); + } + + tensor.Resize(samples_, width_, height_, maps_, uncompressed_buffer, false); +} + + +void CompressedTensor::Resize ( const std::size_t samples, const std::size_t width, + const std::size_t height, const std::size_t maps, const std::size_t compressed_length, char* const preallocated_memory, bool mmapped) { + // Delete the old allocation + DeleteIfPossible(); + + // Don't need to allocate zero memory + if ( compressed_length == 0 ) + return; + + if(preallocated_memory != nullptr) { + compressed_data_ptr_ = preallocated_memory; + mmapped_ = mmapped; + } else { + // Allocate + compressed_data_ptr_ = new char[compressed_length]; + } + + // Save configuration + samples_ = samples; + width_ = width; + height_ = height; + maps_ = maps; + elements_ = samples * width * height * maps; + compressed_length_ = compressed_length; +} + +void CompressedTensor::Serialize ( std::ostream& output ) { + uint64_t samples = samples_; + uint64_t width = width_; + uint64_t height = height_; + uint64_t maps = maps_; + uint64_t compressed_length = compressed_length_; + + output.write ( ( const char* ) &samples, sizeof ( uint64_t ) / sizeof ( char ) ); + output.write ( ( const char* ) &width, sizeof ( uint64_t ) / sizeof ( char ) ); + output.write ( ( const char* ) &height, sizeof ( uint64_t ) / sizeof ( char ) ); + output.write ( ( const char* ) &maps, sizeof ( uint64_t ) / sizeof ( char ) ); + output.write ( ( const char* ) &compressed_length, sizeof ( uint64_t ) / sizeof ( char ) ); + + if ( elements_ > 0 ) + output.write ( ( const char* ) compressed_data_ptr_, compressed_length_); +} + +void CompressedTensor::Deserialize ( std::istream& input , bool head_only, bool try_mmap, int fd) { + uint64_t samples = 0; + uint64_t width = 0; + uint64_t height = 0; + uint64_t maps = 0; + uint64_t compressed_length = 0; + + if ( !input.good() ) + LOGERROR << "Cannot deserialize from this stream!"; + + input.read ( ( char* ) &samples, sizeof ( uint64_t ) / sizeof ( char ) ); + input.read ( ( char* ) &width, sizeof ( uint64_t ) / sizeof ( char ) ); + input.read ( ( char* ) &height, sizeof ( uint64_t ) / sizeof ( char ) ); + input.read ( ( char* ) &maps, sizeof ( uint64_t ) / sizeof ( char ) ); + input.read ( ( char* ) &compressed_length, sizeof ( uint64_t ) / sizeof ( char ) ); + +#ifdef BUILD_POSIX + if(!try_mmap || fd == 0) +#endif + Resize ( samples, width, height, maps, compressed_length ); + + if ( compressed_length > 0 && !head_only ) { +#ifdef BUILD_POSIX + if(try_mmap && fd != 0) { + // Get page size + long int page_size = sysconf(_SC_PAGESIZE); + long int current_position = input.tellg(); + long int offset_in_page = current_position % page_size; +#ifdef BUILD_LINUX + void* target_mmap = mmap64(NULL, compressed_length + offset_in_page, PROT_READ, MAP_PRIVATE, fd, current_position - offset_in_page); +#elif defined(BUILD_OSX) + // OS X is 64-bit by default + void* target_mmap = mmap(NULL, compressed_length + offset_in_page, PROT_READ, MAP_PRIVATE, fd, current_position - offset_in_page); +#endif + if(target_mmap == MAP_FAILED) { + LOGERROR << "Memory map failed: " << errno; + } + original_mmap_ = target_mmap; + + target_mmap = (void*)(((long)target_mmap) + offset_in_page); + Resize(samples, width, height, maps, compressed_length, (char*)target_mmap, true); + input.seekg(compressed_length, std::ios::cur); + } else +#endif + input.read ( ( char* ) compressed_data_ptr_, compressed_length); + } + else if(head_only) + input.seekg(compressed_length, std::ios::cur); +} + +void CompressedTensor::DeleteIfPossible() { + if ( compressed_data_ptr_ != nullptr ) { +#ifdef BUILD_POSIX + if(mmapped_) { + munmap((void*)original_mmap_, compressed_length_); + original_mmap_ = nullptr; + mmapped_ = false; + } else { +#endif + delete[] compressed_data_ptr_; +#ifdef BUILD_POSIX + } +#endif + + compressed_data_ptr_ = nullptr; + } + + samples_ = 0; + width_ = 0; + height_ = 0; + maps_ = 0; + elements_ = samples_ * width_ * height_ * maps_; + compressed_length_ = 0; +} + +void CompressedTensor::PrintStats() { + +} + +std::ostream& operator<< ( std::ostream& output, const CompressedTensor& tensor ) { + return output << "C(" << tensor.samples() << "s@" << tensor.width() << + "x" << tensor.height() << "x" << tensor.maps() << "m)"; +} + +/* + * This is the compression part. Don't change this or you will break the file format. + */ +const unsigned char rl_marker = 'X'; +const unsigned char rl_doublemarker = 'X'; +const unsigned char rl_rle = 'Y'; +const unsigned int rl_bytes = 1; +const unsigned int rl_max = (unsigned int)((1L << (8L * (unsigned long)rl_bytes)) - 3L); +const unsigned int rl_min = 1 + (5 + rl_bytes) / chars_per_datum; + +void CompressedTensor::CompressData(void* uncompressed, const std::size_t& uncompressed_elements, void* compressed, std::size_t& compressed_length) +{ + std::size_t bytes_out = 0; + + Conv::datum last_symbol = 0; + std::size_t running_length = 0; + + unsigned char* output_ptr = (unsigned char*)compressed; + + const datum* data_ptr_const = (const datum*) uncompressed; + + for(std::size_t pos = 0; pos <= uncompressed_elements; pos++) { + Conv::datum current_symbol; + if(pos < uncompressed_elements) { + current_symbol = data_ptr_const[pos]; + if(current_symbol == last_symbol) { + // Increase running length + running_length++; + } + } else { + // Force emission of last symbol + } + + + if( + // EOF reached + (pos == uncompressed_elements) || + // Different symbol + (current_symbol != last_symbol) || + // Maxmimum run length reached + (running_length == rl_max)) { + + // Emit... + if(running_length > 0 && running_length < rl_min) { + // Emit single symbol(s) + for(std::size_t r = 0; r < running_length; r++) { + for(std::size_t b = 0; b < chars_per_datum; b++) { + char char_to_emit = ((char*)&last_symbol)[b]; + if(char_to_emit == rl_marker) { + // Emit escaped + *output_ptr = rl_marker; + output_ptr++; bytes_out++; + *output_ptr = rl_doublemarker; + output_ptr++; bytes_out++; + } else { + // Emit directly + *output_ptr = char_to_emit; + output_ptr++; bytes_out++; + } + } + } + } else if(running_length >= rl_min) { + // Emit encoded + *output_ptr = rl_marker; + output_ptr++; bytes_out++; + *output_ptr = rl_rle; + output_ptr++; bytes_out++; + + // Running length output + for(std::size_t b = 0; b < rl_bytes; b++) { + *output_ptr = (running_length >> ((rl_bytes - (b+1)) * 8)) & 0xFF; + output_ptr++; bytes_out++; + } + + for(std::size_t b = 0; b < chars_per_datum; b++) { + unsigned char char_to_emit = ((char*)&last_symbol)[b]; + *output_ptr = char_to_emit; + output_ptr++; bytes_out++; + } + } + + // ...and reset + if(running_length == rl_max) + running_length = 0; + else + running_length = 1; + } + + last_symbol = current_symbol; + } + compressed_length = bytes_out; +} + +void CompressedTensor::DecompressData(void* uncompressed, std::size_t& uncompressed_elements, void* compressed, const std::size_t& compressed_length) +{ + unsigned int bytes_out = 0; + unsigned char* output_ptr = (unsigned char*)uncompressed; + const unsigned char* input_ptr = (const unsigned char*)compressed; + + for(unsigned int pos = 0; pos < compressed_length; pos++) { + unsigned char current_symbol = input_ptr[pos]; + if(current_symbol == rl_marker) { + pos++; current_symbol = input_ptr[pos]; + if(current_symbol == rl_doublemarker) { + // Emit single marker + *output_ptr = rl_marker; + output_ptr++; bytes_out++; + } else if(current_symbol == rl_rle) { + unsigned int running_length = 0; + + // Running length input + for(unsigned int b = 0; b < rl_bytes; b++) { + pos++; current_symbol = input_ptr[pos]; + running_length += current_symbol; + if((b+1) != rl_bytes) + running_length <<= 8; + } + + for(unsigned int r = 0; r < running_length; r++) { + for(unsigned int b = 0; b < chars_per_datum; b++) { + pos++; current_symbol = input_ptr[pos]; + *output_ptr = current_symbol; + output_ptr++; bytes_out++; + } + pos -= chars_per_datum; + } + pos += chars_per_datum; + } else { + FATAL("Incorrect encoding!"); + } + } else { + // Emit directly + *output_ptr = current_symbol; + output_ptr++; bytes_out++; + } + } + if(bytes_out % chars_per_datum != 0) { + FATAL("Compressed length wrong!"); + } + uncompressed_elements = bytes_out / chars_per_datum; +} + + +} diff --git a/src/util/CompressedTensorStream.cpp b/src/util/CompressedTensorStream.cpp new file mode 100644 index 0000000..8f1d7f2 --- /dev/null +++ b/src/util/CompressedTensorStream.cpp @@ -0,0 +1,97 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ + +#include +#include + +#ifdef BUILD_POSIX +#include +#include +#include +#endif + +#include "CompressedTensorStream.h" + +namespace Conv { + +unsigned int CompressedTensorStream::LoadFile(std::string path) +{ + std::ifstream input_stream(path, std::ios::binary | std::ios::in); + if(!input_stream.good()) { + FATAL("Cannot open file: " << path); + } +#ifdef BUILD_POSIX + int input_fd = open(path.c_str(), O_RDONLY); + if(input_fd < 0) { + FATAL("Cannot open file: " << path); + } +#endif + + uint64_t magic = 0; + input_stream.read((char*)&magic, sizeof(uint64_t)/sizeof(char)); + + if(magic != CN24_CTS_MAGIC) { + FATAL("Wrong magic at start of stream!"); + } + + // Go through file + std::cout << std::endl << std::flush; + + while (!input_stream.eof()) { + CompressedTensor* tensor = new CompressedTensor(); +#ifdef BUILD_POSIX + tensor->Deserialize (input_stream, false, true, input_fd); +#else + tensor->Deserialize (input_stream, false); +#endif + + if (tensor->elements() == 0) + break; + + if(tensor->elements() > max_elements_) + max_elements_ = tensor->elements(); + + tensors_.push_back(tensor); + std::cout << "." << std::flush; + input_stream.peek(); + } + + temp_tensor_.Resize(1,max_elements_); + return 0; +} + +bool CompressedTensorStream::CopySample(const unsigned int source, const std::size_t source_sample, + Conv::Tensor& target, const std::size_t target_sample) +{ + if(source < tensors_.size()) { + CompressedTensor* const ctensor = tensors_[source]; + if(source_sample == 0 && ctensor->width() == target.width() && ctensor->height() == target.height() && ctensor->maps() == target.maps() && ctensor->samples() == 1) { + // This is a little hack for faster loading of certain datasets +#ifdef BUILD_OPENCL + target.MoveToCPU(); +#endif + datum* old_data_ptr = temp_tensor_.data_ptr(); + datum* direct_ptr = target.data_ptr(0, 0, 0, target_sample); + temp_tensor_.Resize(1, max_elements_, 1, 1, direct_ptr, false, true); + ctensor->Decompress(temp_tensor_, temp_tensor_.data_ptr()); + + temp_tensor_.Resize(1, max_elements_, 1, 1, old_data_ptr, false, true); + return true; + } else { + ctensor->Decompress(temp_tensor_, temp_tensor_.data_ptr()); + return Tensor::CopySample(temp_tensor_, source_sample, target, target_sample); + } + } else + return false; +} + +} + + + + + diff --git a/src/util/FloatTensorStream.cpp b/src/util/FloatTensorStream.cpp new file mode 100644 index 0000000..443fdcf --- /dev/null +++ b/src/util/FloatTensorStream.cpp @@ -0,0 +1,65 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ + +#include +#include + +#ifdef BUILD_POSIX +#include +#include +#include +#endif + +#include "FloatTensorStream.h" + +namespace Conv { + +unsigned int FloatTensorStream::LoadFile(std::string path) +{ + std::ifstream input_stream(path, std::ios::binary | std::ios::in); + if(!input_stream.good()) { + FATAL("Cannot open file: " << path); + } +#ifdef BUILD_POSIX + int input_fd = open(path.c_str(), O_RDONLY); + if(input_fd < 0) { + FATAL("Cannot open file: " << path); + } +#endif + + // Go through file + std::cout << std::endl << std::flush; + + while (!input_stream.eof()) { + Tensor* tensor = new Tensor(); +#ifdef BUILD_POSIX + tensor->Deserialize (input_stream, false, true, input_fd); +#else + tensor->Deserialize (input_stream, false); +#endif + + if (tensor->elements() == 0) + break; + + tensors_.push_back(tensor); + std::cout << "." << std::flush; + input_stream.peek(); + } + return 0; +} + +bool FloatTensorStream::CopySample(const unsigned int source, const std::size_t source_sample, + Conv::Tensor& target, const std::size_t target_sample) +{ + if(source < tensors_.size()) { + return Tensor::CopySample(*tensors_[source], source_sample, target, target_sample); + } else + return false; +} + +} + diff --git a/src/util/GradientTester.cpp b/src/util/GradientTester.cpp index 8cbf5df..df576d2 100644 --- a/src/util/GradientTester.cpp +++ b/src/util/GradientTester.cpp @@ -11,7 +11,7 @@ namespace Conv { -void GradientTester::TestGradient ( NetGraph& graph ) { +void GradientTester::TestGradient ( NetGraph& graph, unsigned int skip_weights, bool fatal_fail ) { const double epsilon = 0.005; LOGDEBUG << "Testing gradient. FeedForward..."; graph.FeedForward(); @@ -37,8 +37,10 @@ void GradientTester::TestGradient ( NetGraph& graph ) { unsigned int okay = 0; unsigned int tolerable = 0; unsigned int failed = 0; - for(unsigned int e = 0; e < param->data.elements(); e++) + unsigned int total = 0; + for(unsigned int e = 0; e < param->data.elements(); e+=(skip_weights + 1)) { + total++; #ifdef BUILD_OPENCL param->data.MoveToCPU(); param->delta.MoveToCPU(); @@ -83,23 +85,27 @@ graph.FeedForward(); } // std::cout << "\n"; if(passed) { - LOGINFO << "Okay!"; + LOGDEBUG << "Okay!"; } else { LOGERROR << "Failed!"; } - LOGINFO << okay << " of " << param->data.elements() << " gradients okay (delta < 2%)"; - LOGINFO << tolerable << " of " << param->data.elements() << " gradients tolerable (delta < 20%)"; - LOGINFO << failed << " of " << param->data.elements() << " gradients failed (delta >= 20%)"; + LOGDEBUG << okay << " of " << total << " gradients okay (delta < 2%)"; + LOGDEBUG << tolerable << " of " << total << " gradients tolerable (delta < 20%)"; + LOGDEBUG << failed << " of " << total << " gradients failed (delta >= 20%)"; global_okay += okay; global_tolerable += tolerable; global_failed += failed; - global_weights += param->data.elements(); + global_weights += total; } } - LOGINFO << global_okay << " of " << global_weights << " gradients okay (delta < 2%)"; - LOGINFO << global_tolerable << " of " << global_weights << " gradients tolerable (delta < 20%)"; - LOGINFO << global_failed << " of " << global_weights << " gradients failed (delta >= 20%)"; + LOGRESULT << global_okay << " of " << global_weights << " tested gradients okay (delta < 2%)" << LOGRESULTEND; + LOGRESULT << global_tolerable << " of " << global_weights << " tested gradients tolerable (delta < 20%)" << LOGRESULTEND; + LOGRESULT << global_failed << " of " << global_weights << " tested gradients failed (delta >= 20%)" << LOGRESULTEND; + + if (global_failed > 0 && fatal_fail) { + FATAL("Failed gradient check!"); + } } diff --git a/src/util/Init.cpp b/src/util/Init.cpp index bffc3ce..c5a460b 100644 --- a/src/util/Init.cpp +++ b/src/util/Init.cpp @@ -34,6 +34,7 @@ #endif #include "TensorViewer.h" +#include "StatAggregator.h" namespace Conv { @@ -66,9 +67,12 @@ cl_kernel CLHelper::k_setValue = 0; cl_kernel CLHelper::k_sms = 0; cl_kernel CLHelper::k_im2col = 0; cl_kernel CLHelper::k_col2im = 0; +cl_kernel CLHelper::k_up = 0; +cl_kernel CLHelper::k_down = 0; #endif TensorViewer* System::viewer = nullptr; +StatAggregator* System::stat_aggregator = nullptr; int System::log_level = 0; #define STRING_SHA1 GIT_SHA1 @@ -83,7 +87,7 @@ void System::Init(int requested_log_level) { } else log_level = requested_log_level; - LOGINFO << "CN24 version " STRING_SHA1; + LOGINFO << "CN24 v2.0.0 at " STRING_SHA1; LOGINFO << "Copyright (C) 2015 Clemens-Alexander Brust"; LOGINFO << "For licensing information, see the LICENSE" << " file included with this project."; @@ -125,7 +129,12 @@ void System::Init(int requested_log_level) { LOGWARN << "Could not initialize GTK!"; } #endif + + // Initialize global TensorViewer viewer = new TensorViewer(); + + // Initialize global StatAggregator + stat_aggregator = new StatAggregator(); } void System::GetExecutablePath(std::string& binary_path) { @@ -257,6 +266,7 @@ void CLHelper::Init(unsigned int platform_number, unsigned int device_number) { cl_program p_maximum = CreateProgram ( "kernels/maximumPooling.cl" ); cl_program p_amaximum = CreateProgram ( "kernels/advmaximumPooling.cl" ); cl_program p_nonLinearFunctions = CreateProgram ( "kernels/nonLinearFunctions.cl" ); + cl_program p_scaling = CreateProgram ( "kernels/scaling.cl" ); cl_program p_setValue = CreateProgram ( "kernels/setValue.cl" ); cl_program p_sms = CreateProgram ( "kernels/sms.cl" ); cl_program p_im2col = CreateProgram ( "kernels/im2col.cl" ); @@ -392,6 +402,18 @@ void CLHelper::Init(unsigned int platform_number, unsigned int device_number) { if ( error != CL_SUCCESS ) { FATAL ( "Error creating kernel: " << ( signed int ) error ); } + + k_up = clCreateKernel ( p_scaling, "UP", &error ); + + if ( error != CL_SUCCESS ) { + FATAL ( "Error creating kernel: " << ( signed int ) error ); + } + + k_down = clCreateKernel ( p_scaling, "DOWN", &error ); + + if ( error != CL_SUCCESS ) { + FATAL ( "Error creating kernel: " << ( signed int ) error ); + } #ifdef BUILD_CLBLAS cl_int err = clblasSetup(); if (err!=CL_SUCCESS) diff --git a/src/util/StatAggregator.cpp b/src/util/StatAggregator.cpp new file mode 100644 index 0000000..e2aed6e --- /dev/null +++ b/src/util/StatAggregator.cpp @@ -0,0 +1,153 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ + +#include "StatAggregator.h" +#include "StatSink.h" + +#include + +namespace Conv { + +unsigned int StatAggregator::RegisterSink(StatSink* stat_sink) +{ + stat_sinks_.push_back(stat_sink); + return stat_sink_count_++; +} + +unsigned int StatAggregator::RegisterStat(StatDescriptor* stat_descriptor) +{ + stat_descriptors_.push_back(stat_descriptor); + stat_descriptor->stat_id = stat_descriptor_count_; + return stat_descriptor_count_++; +} + +void StatAggregator::Initialize() +{ + if(state_!=INIT) + return; + + // Initialize all StatSinks + for(unsigned int s = 0; s < stat_sink_count_; s++) { + stat_sinks_[s]->Initialize(stat_descriptors_); + } + + // Initialize all statistics + for(unsigned int s = 0; s < stat_descriptor_count_; s++) { + Stat stat; + stats_.push_back(stat); + } + state_ = STOPPED; + + // Reset statistics + Reset(); + + // Send SetCurrentExperiment "message" to all StatSinks at least once before processing + SetCurrentExperiment(hardcoded_stats_.current_experiment); +} + +void StatAggregator::Generate() +{ + std::vector output_stats; + + for(unsigned int s = 0; s < stat_descriptor_count_; s++) { + // We will not check for output_function's validity. We need its output. + Stat* output_stat = new Stat; + *output_stat = stat_descriptors_[s]->output_function(hardcoded_stats_, stats_[s]); + output_stats.push_back(output_stat); + } + + // Call all StatSinks' Process method + for(unsigned int s = 0; s < stat_sink_count_; s++) { + stat_sinks_[s]->Process(hardcoded_stats_, output_stats); + } + + + // Free all the allocated memory + for(unsigned int s = 0; s < stat_descriptor_count_; s++) { + delete (output_stats[s]); + } +} + + +void StatAggregator::Update(unsigned int stat_id, double user_value) +{ + // Ignore this call if not recording + if(state_ != RECORDING) + return; + + if(stat_id < stat_descriptor_count_) { + // We will not check for validity because we provided an initial function. + stat_descriptors_[stat_id]->update_function(stats_[stat_id], user_value); + } +} + +void StatAggregator::Reset() +{ + // Ignore this call if recording + if(state_ != STOPPED) + return; + + hardcoded_stats_.Reset(); + + // Reset non-hardcoded stats + for(unsigned int s = 0; s < stat_descriptor_count_; s++) { + // We will not check for validity because we provided an initial function. + stat_descriptors_[s]->init_function(stats_[s]); + } +} + +void StatAggregator::StartRecording() +{ + // Ignore this call if already recording + if(state_ != STOPPED) + return; + + // Record start time + start_time_ = std::chrono::system_clock::now(); + + state_ = RECORDING; +} + +void StatAggregator::StopRecording() +{ + // Ignore this call if not recording + if(state_ != RECORDING) + return; + + // Record stopping time + auto stop_time = std::chrono::system_clock::now(); + + // Update elapsed time + std::chrono::duration t_diff = stop_time - start_time_; + hardcoded_stats_.seconds_elapsed += t_diff.count(); + + state_ = STOPPED; +} + +void StatAggregator::Snapshot() { + // Ignore this call if not recording + if (state_ != RECORDING) + return; + + StopRecording(); + Generate(); + Reset(); + StartRecording(); +} + +void StatAggregator::SetCurrentExperiment(std::string current_experiment) { + // Only change experiment name when not recording and already initialized + if(state_!=STOPPED) + return; + + // Call all StatSinks' SetCurrentExperiment method + for(unsigned int s = 0; s < stat_sink_count_; s++) { + stat_sinks_[s]->SetCurrentExperiment(current_experiment); + } +} + +} \ No newline at end of file diff --git a/src/util/Tensor.cpp b/src/util/Tensor.cpp index 7203dbb..7e14f60 100644 --- a/src/util/Tensor.cpp +++ b/src/util/Tensor.cpp @@ -129,13 +129,14 @@ void Tensor::Shadow ( Tensor& tensor ) { void Tensor::Resize ( const std::size_t samples, const std::size_t width, - const std::size_t height, const std::size_t maps, datum* const preallocated_memory, bool mmapped) { + const std::size_t height, const std::size_t maps, datum* const preallocated_memory, bool mmapped, bool dont_delete) { // Check if reshaping works - if ( Reshape ( samples, width, height, maps ) ) + if (preallocated_memory == nullptr && Reshape ( samples, width, height, maps ) ) return; - // Delete the old allocation - DeleteIfPossible(); + // Delete the old allocation if it is different from the new one + if(preallocated_memory != data_ptr_ && !dont_delete) + DeleteIfPossible(); // Calculate memory requirement std::size_t elements = samples * maps * width * height; @@ -393,17 +394,21 @@ bool Tensor::CopyMap ( const Tensor& source, const std::size_t source_sample, void Tensor::DeleteIfPossible() { if ( data_ptr_ != nullptr ) { if ( !is_shadow_ ) { +#ifdef BUILD_POSIX if(mmapped_) { munmap((void*)original_mmap_, (elements_ * sizeof(datum)) / sizeof(char)); original_mmap_ = nullptr; mmapped_ = false; } else { +#endif #ifdef BLAS_MKL mkl_free ( data_ptr_ ); #else delete[] data_ptr_; #endif +#ifdef BUILD_POSIX } +#endif #ifdef BUILD_OPENCL if ( cl_data_ptr_ != 0 ) { clReleaseMemObject ( (cl_mem)cl_data_ptr_ ); diff --git a/src/util/TensorStream.cpp b/src/util/TensorStream.cpp new file mode 100644 index 0000000..2977d27 --- /dev/null +++ b/src/util/TensorStream.cpp @@ -0,0 +1,47 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ + +#include +#include +#include + +#ifdef BUILD_POSIX +#include +#include +#include +#endif + +#include "TensorStream.h" +#include "FloatTensorStream.h" +#include "CompressedTensorStream.h" + +namespace Conv { + +TensorStream* TensorStream::FromFile(std::string path) { + std::ifstream input_stream(path, std::ios::in | std::ios::binary); + if(!input_stream.good()) { + FATAL("Cannot open file: " << path); + } + uint64_t magic = 0; + + input_stream.read((char*)&magic, sizeof(uint64_t)/sizeof(char)); + input_stream.close(); + + if(magic == CN24_CTS_MAGIC) { + LOGDEBUG << "Is compressed tensor, loading..."; + CompressedTensorStream* cts = new CompressedTensorStream(); + cts->LoadFile(path); + return cts; + } else { + LOGDEBUG << "Is float tensor, loading..."; + FloatTensorStream* fts = new FloatTensorStream(); + fts->LoadFile(path); + return fts; + } +} + +} \ No newline at end of file diff --git a/src/util/TensorStreamDataset.cpp b/src/util/TensorStreamDataset.cpp index 6227b50..377e9c4 100644 --- a/src/util/TensorStreamDataset.cpp +++ b/src/util/TensorStreamDataset.cpp @@ -23,16 +23,24 @@ #include "KITTIData.h" #include "ConfigParsing.h" +#include "FloatTensorStream.h" + namespace Conv { -TensorStreamDataset::TensorStreamDataset (std::istream& training_stream, +TensorStreamDataset::TensorStreamDataset ( + /* + std::istream& training_stream, std::istream& testing_stream, + */ + TensorStream* training_stream, + TensorStream* testing_stream, unsigned int classes, std::vector< std::string > class_names, std::vector class_colors, std::vector class_weights, dataset_localized_error_function error_function, int training_fd, int testing_fd ) : + training_stream_(training_stream), testing_stream_(testing_stream), classes_ (classes), class_names_ (class_names), class_colors_ (class_colors), class_weights_(class_weights), error_function_ (error_function) { @@ -43,21 +51,11 @@ TensorStreamDataset::TensorStreamDataset (std::istream& training_stream, FATAL ("Class count does not match class information count!"); } + // Count tensors Tensor tensor; - while (!training_stream.eof()) { - tensor.Deserialize (training_stream, true); - - if (tensor.elements() == 0) - break; - - // LOGDEBUG << "Tensor " << tensor_count_training_ << ": " << tensor; - tensor_count_training_++; - - training_stream.peek(); - } - + tensor_count_training_ = training_stream_->GetTensorCount(); LOGDEBUG << tensor_count_training_ / 2 << " training tensors"; // We need alternating label and image tensors, so we need an even count @@ -65,18 +63,7 @@ TensorStreamDataset::TensorStreamDataset (std::istream& training_stream, FATAL ("Odd training tensor count!"); } - while (!testing_stream.eof()) { - tensor.Deserialize (testing_stream, true); - - if (tensor.elements() == 0) - break; - - // LOGDEBUG << "Tensor " << tensor_count_testing_ << ": " << tensor; - tensor_count_testing_++; - - testing_stream.peek(); - } - + tensor_count_testing_ = testing_stream->GetTensorCount(); LOGDEBUG << tensor_count_testing_ / 2 << " testing tensors"; if (tensor_count_testing_ & 1) { @@ -85,56 +72,29 @@ TensorStreamDataset::TensorStreamDataset (std::istream& training_stream, tensors_ = (tensor_count_testing_ + tensor_count_training_) / 2; - // Reset streams - training_stream.clear(); - testing_stream.clear(); - training_stream.seekg (0, std::ios::beg); - testing_stream.seekg (0, std::ios::beg); - - // Allocate arrays that depend on the tensor count - if (tensors_ > 0) { - data_ = new Tensor[tensors_]; - labels_ = new Tensor[tensors_]; - } else { - data_ = new Tensor[1]; - labels_ = new Tensor[1]; - } - // Read tensors unsigned int e = 0; max_width_ = 0; max_height_ = 0; if((tensor_count_training_ + tensor_count_testing_) > 0) { - LOGINFO << "Deserializing " << (tensor_count_training_ + tensor_count_testing_) / 2 << " Tensors..." << std::endl << std::flush; + LOGINFO << "Loaded " << (tensor_count_training_ + tensor_count_testing_) / 2 << " Tensors."; } for (unsigned int t = 0; t < (tensor_count_training_ / 2); t++) { - data_[t].Deserialize (training_stream, false, true, training_fd); - - if (data_[t].width() > max_width_) - max_width_ = data_[t].width(); - - if (data_[t].height() > max_height_) - max_height_ = data_[t].height(); - - labels_[t].Deserialize (training_stream, false, true, training_fd); + if(training_stream_->GetWidth(2*t) > max_width_) + max_width_ = training_stream_->GetWidth(2*t); - std::cout << "." << std::flush; + if(training_stream_->GetHeight(2*t) > max_height_) + max_height_ = training_stream_->GetHeight(2*t); } for (unsigned int t = (tensor_count_training_ / 2) ; t < tensors_; t++) { - data_[t].Deserialize (testing_stream, false, true, testing_fd); - - if (data_[t].width() > max_width_) - max_width_ = data_[t].width(); - - if (data_[t].height() > max_height_) - max_height_ = data_[t].height(); - - labels_[t].Deserialize (testing_stream, false, true, testing_fd); + if(testing_stream_->GetWidth(2*t) > max_width_) + max_width_ = testing_stream_->GetWidth(2*t); - std::cout << "." << std::flush; + if(testing_stream_->GetHeight(2*t) > max_height_) + max_height_ = testing_stream_->GetHeight(2*t); } if (max_width_ & 1) @@ -167,8 +127,13 @@ TensorStreamDataset::TensorStreamDataset (std::istream& training_stream, if (max_height_ & 32) max_height_+=32; - input_maps_ = data_[0].maps(); - label_maps_ = labels_[0].maps(); + if(training_stream_->GetTensorCount() > 0) { + input_maps_ = training_stream_->GetMaps(0); + label_maps_ = training_stream_->GetMaps(1); + } else { + input_maps_ = testing_stream_->GetMaps(0); + label_maps_ = testing_stream_->GetMaps(1); + } // Prepare error cache error_cache.Resize (1, max_width_, max_height_, 1); @@ -233,37 +198,41 @@ bool TensorStreamDataset::SupportsTesting() const { bool TensorStreamDataset::GetTrainingSample (Tensor& data_tensor, Tensor& label_tensor, Tensor& helper_tensor, Tensor& weight_tensor, unsigned int sample, unsigned int index) { if (index < tensor_count_training_ / 2) { bool success = true; - success &= Tensor::CopySample (data_[index], 0, data_tensor, sample); - success &= Tensor::CopySample (labels_[index], 0, label_tensor, sample); + success &= training_stream_->CopySample(2 * index, 0, data_tensor, sample); + success &= training_stream_->CopySample(2 * index + 1, 0, label_tensor, sample); + unsigned int data_width = training_stream_->GetWidth(2 * index); + unsigned int data_height = training_stream_->GetHeight(2 * index); + // Write spatial prior data to helper tensor - for (unsigned int y = 0; y < data_[index].height(); y++) { - for (unsigned int x = 0; x < data_[index].width(); x++) { - *helper_tensor.data_ptr(x, y, 0, sample) = ((datum)x) / ((datum)data_[index].width() - 1); - *helper_tensor.data_ptr(x, y, 1, sample) = ((datum)y) / ((datum)data_[index].height() - 1); + for (unsigned int y = 0; y < data_height; y++) { + for (unsigned int x = 0; x < data_width; x++) { + *helper_tensor.data_ptr(x, y, 0, sample) = ((datum)x) / ((datum)data_width - 1); + *helper_tensor.data_ptr(x, y, 1, sample) = ((datum)y) / ((datum)data_height - 1); } - for (unsigned int x = data_[index].width(); x < GetWidth(); x++) { + for (unsigned int x = data_width; x < GetWidth(); x++) { *helper_tensor.data_ptr(x, y, 0, sample) = 0; *helper_tensor.data_ptr(x, y, 1, sample) = 0; } } - for (unsigned int y = data_[index].height(); y < GetHeight(); y++) { + for (unsigned int y = data_height; y < GetHeight(); y++) { for (unsigned int x = 0; x < GetWidth(); x++) { *helper_tensor.data_ptr(x, y, 0, sample) = 0; *helper_tensor.data_ptr(x, y, 1, sample) = 0; } } - //if (data_[index].width() == GetWidth() && data_[index].height() == GetHeight()) { + //if (data_width == GetWidth() && data_height == GetHeight()) { // success &= Tensor::CopySample (error_cache, 0, weight_tensor, sample); //} else { // Reevaluate error function weight_tensor.Clear (0.0, sample); - for (unsigned int y = 0; y < data_[index].height(); y++) { - for (unsigned int x = 0; x < data_[index].width(); x++) { + #pragma omp parallel for default(shared) + for (unsigned int y = 0; y < data_height; y++) { + for (unsigned int x = 0; x < data_width; x++) { const datum class_weight = class_weights_[label_tensor.PixelMaximum(x, y, sample)]; - *weight_tensor.data_ptr (x, y, 0, sample) = error_function_ (x, y, data_[index].width(), data_[index].height()) * class_weight; + *weight_tensor.data_ptr (x, y, 0, sample) = error_function_ (x, y, data_width, data_height) * class_weight; } } //} @@ -275,38 +244,41 @@ bool TensorStreamDataset::GetTrainingSample (Tensor& data_tensor, Tensor& label_ bool TensorStreamDataset::GetTestingSample (Tensor& data_tensor, Tensor& label_tensor, Tensor& helper_tensor, Tensor& weight_tensor, unsigned int sample, unsigned int index) { if (index < tensor_count_testing_ / 2) { bool success = true; - unsigned int test_index = (tensor_count_training_ / 2) + index; - success &= Tensor::CopySample (data_[test_index], 0, data_tensor, sample); - success &= Tensor::CopySample (labels_[test_index], 0, label_tensor, sample); + success &= testing_stream_->CopySample(2 * index, 0, data_tensor, sample); + success &= testing_stream_->CopySample(2 * index + 1, 0, label_tensor, sample); + + unsigned int data_width = testing_stream_->GetWidth(2 * index); + unsigned int data_height = testing_stream_->GetHeight(2 * index); // Write spatial prior data to helper tensor - for (unsigned int y = 0; y < data_[test_index].height(); y++) { - for (unsigned int x = 0; x < data_[test_index].width(); x++) { - *helper_tensor.data_ptr(x, y, 0, sample) = ((datum)x) / ((datum)data_[test_index].width() - 1); - *helper_tensor.data_ptr(x, y, 1, sample) = ((datum)y) / ((datum)data_[test_index].height() - 1); + for (unsigned int y = 0; y < data_height; y++) { + for (unsigned int x = 0; x < data_width; x++) { + *helper_tensor.data_ptr(x, y, 0, sample) = ((datum)x) / ((datum)data_width - 1); + *helper_tensor.data_ptr(x, y, 1, sample) = ((datum)y) / ((datum)data_height - 1); } - for (unsigned int x = data_[test_index].width(); x < GetWidth(); x++) { + for (unsigned int x = data_width; x < GetWidth(); x++) { *helper_tensor.data_ptr(x, y, 0, sample) = 0; *helper_tensor.data_ptr(x, y, 1, sample) = 0; } } - for (unsigned int y = data_[test_index].height(); y < GetHeight(); y++) { + for (unsigned int y = data_height; y < GetHeight(); y++) { for (unsigned int x = 0; x < GetWidth(); x++) { *helper_tensor.data_ptr(x, y, 0, sample) = 0; *helper_tensor.data_ptr(x, y, 1, sample) = 0; } } - //if (data_[test_index].width() == GetWidth() && data_[test_index].height() == GetHeight()) { + //if (data_width == GetWidth() && data_height == GetHeight()) { // success &= Tensor::CopySample (error_cache, 0, weight_tensor, sample); //} else { // Reevaluate error function weight_tensor.Clear (0.0, sample); - for (unsigned int y = 0; y < data_[test_index].height(); y++) { - for (unsigned int x = 0; x < data_[test_index].width(); x++) { + #pragma omp parallel for default(shared) + for (unsigned int y = 0; y < data_height; y++) { + for (unsigned int x = 0; x < data_width; x++) { const datum class_weight = class_weights_[label_tensor.PixelMaximum(x, y, sample)]; - *weight_tensor.data_ptr (x, y, 0, sample) = error_function_ (x, y, data_[test_index].width(), data_[test_index].height()) * class_weight; + *weight_tensor.data_ptr (x, y, 0, sample) = error_function_ (x, y, data_width, data_height) * class_weight; } } //} @@ -326,6 +298,9 @@ TensorStreamDataset* TensorStreamDataset::CreateFromConfiguration (std::istream& int training_fd = 0; int testing_fd = 0; bool no_mmap = false; + + TensorStream* training_stream = new FloatTensorStream(); + TensorStream* testing_stream = new FloatTensorStream(); file.clear(); file.seekg (0, std::ios::beg); @@ -403,39 +378,14 @@ TensorStreamDataset* TensorStreamDataset::CreateFromConfiguration (std::istream& LOGDEBUG << "Training tensor: " << training_file; LOGDEBUG << "Testing tensor: " << testing_file; - std::istream* training_stream = nullptr; - std::istream* testing_stream = nullptr; - if (!dont_load && (selection == LOAD_BOTH || selection == LOAD_TRAINING_ONLY) && training_file.length() > 0) { - training_stream = new std::ifstream (training_file, std::ios::in | std::ios::binary); - if(!training_stream->good()) { - FATAL("Failed to load " << training_file << "!"); - } -#ifdef BUILD_POSIX - if(!no_mmap) - training_fd = open(training_file.c_str(), O_RDONLY); - if(training_fd < 0) { - FATAL("Failed to load " << training_file << "!"); - } -#endif + training_stream = TensorStream::FromFile(training_file); } else { - training_stream = new std::istringstream(); } if (!dont_load && (selection == LOAD_BOTH || selection == LOAD_TESTING_ONLY) && testing_file.length() > 0) { - testing_stream = new std::ifstream (testing_file, std::ios::in | std::ios::binary); - if(!testing_stream->good()) { - FATAL("Failed to load " << testing_file << "!"); - } -#ifdef BUILD_POSIX - if(!no_mmap) - testing_fd = open(training_file.c_str(), O_RDONLY); - if(testing_fd < 0) { - FATAL("Failed to load " << testing_file << "!"); - } -#endif + testing_stream = TensorStream::FromFile(testing_file); } else { - testing_stream = new std::istringstream(); } if (class_weights.size() != classes) { @@ -443,7 +393,7 @@ TensorStreamDataset* TensorStreamDataset::CreateFromConfiguration (std::istream& class_weights.push_back(1.0); } - return new TensorStreamDataset (*training_stream, *testing_stream, classes, + return new TensorStreamDataset (training_stream, testing_stream, classes, class_names, class_colors, class_weights, error_function, training_fd, testing_fd); } diff --git a/tools/compressTensorStream.cpp b/tools/compressTensorStream.cpp new file mode 100644 index 0000000..fc02079 --- /dev/null +++ b/tools/compressTensorStream.cpp @@ -0,0 +1,72 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ + +#include + +#include +#include + +int main(int argc, char** argv) { + Conv::System::Init(); + + if(argc != 3) { + LOGERROR << "USAGE: " << argv[0] << " "; + } + + std::string input_file_name(argv[1]); + std::string output_file_name(argv[2]); + + std::ifstream input_tensor_stream(input_file_name, std::ios::in | std::ios::binary); + std::ofstream output_tensor_stream(output_file_name, std::ios::out | std::ios::binary); + + if(!input_tensor_stream.good()) + FATAL("Cannot open " << input_file_name); + + if(!output_tensor_stream.good()) + FATAL("Cannot open " << output_file_name); + + long uncompressed_total = 0; + long compressed_total = 0; + + Conv::Tensor tensor; + + uint64_t magic = CN24_CTS_MAGIC; + output_tensor_stream.write((char*)&magic, sizeof(uint64_t)/sizeof(char)); + + while(!input_tensor_stream.eof()) { + tensor.Deserialize(input_tensor_stream); + + LOGDEBUG << "Input tensor: " << tensor; + + unsigned int original_size = tensor.elements() * sizeof(Conv::datum)/sizeof(char); + LOGDEBUG << "Size: " << original_size; + + Conv::CompressedTensor ctensor; + ctensor.Compress(tensor); + + ctensor.Serialize(output_tensor_stream); + + LOGDEBUG << "RLE Size: " << ctensor.compressed_length(); + + ctensor.Decompress(tensor); + unsigned int bytes_out = tensor.elements() * sizeof(Conv::datum)/sizeof(char); + + if(bytes_out != original_size) { + FATAL("Size mismatch! Expected: " << (tensor.elements() * sizeof(Conv::datum)/sizeof(char)) << ", actual: " << bytes_out); + } + + LOGINFO << "Ratio: " << 100.0 * (double)ctensor.compressed_length() / (double)(tensor.elements() * sizeof(Conv::datum)/sizeof(char)) << "%" << std::flush; + compressed_total += ctensor.compressed_length(); + uncompressed_total += tensor.elements() * sizeof(Conv::datum)/sizeof(char); + + input_tensor_stream.peek(); + } + LOGINFO << "Overall ratio: " << 100.0 * (double)compressed_total / (double)uncompressed_total << "%"; + LOGINFO << "Uncompressed: " << uncompressed_total; + LOGINFO << "Compressed : " << compressed_total; + LOGEND; +} \ No newline at end of file diff --git a/tools/makeCompressedTensorStream.cpp b/tools/makeCompressedTensorStream.cpp new file mode 100644 index 0000000..1f9d597 --- /dev/null +++ b/tools/makeCompressedTensorStream.cpp @@ -0,0 +1,195 @@ +/* + * This file is part of the CN24 semantic segmentation software, + * copyright (C) 2015 Clemens-Alexander Brust (ikosa dot de at gmail dot com). + * + * For licensing information, see the LICENSE file included with this project. + */ +/** + * @file makeCompressedTensorStream.cpp + * @brief Tool to import datasets + * + * @author Clemens-Alexander Brust (ikosa dot de at gmail dot com) + */ + +#include +#include + +#include + +int main ( int argc, char** argv ) { + if ( argc < 8 ) { + LOGERROR << "USAGE: " << argv[0] << "