diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..aeab35a --- /dev/null +++ b/.envrc @@ -0,0 +1,5 @@ +# shellcheck shell=bash +if ! has nix_direnv_version || ! nix_direnv_version 3.0.6; then + source_url "https://raw.githubusercontent.com/nix-community/nix-direnv/3.0.6/direnvrc" "sha256-RYcUJaRMf8oF5LznDrlCXbkOQrywm0HDv1VjYGaJGdM=" +fi +use flake . --impure diff --git a/.gitignore b/.gitignore index fdd1b26..f72a890 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,8 @@ cmake_install.cmake Makefile CMakeFiles build + +*.code-workspace +/.vscode +.direnv/ +/result diff --git a/CMakeLists.txt b/CMakeLists.txt index f0f4547..8857a6b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,22 +1,59 @@ cmake_minimum_required(VERSION 3.20) -project(ply2image) +project(ply2image LANGUAGES CXX CUDA) set(DEFAULT_BUILD_TYPE "Release") -if(MSVC) - add_compile_options(/W4 /WX) -else() - add_compile_options(-Wall -Wextra -Wpedantic -Wconversion -Werror) -endif() - find_package(fmt REQUIRED) find_package(PNG REQUIRED) find_package(argparse REQUIRED) +find_package(CUDAToolkit REQUIRED) -add_executable(${PROJECT_NAME} "${CMAKE_SOURCE_DIR}/src/main.cpp") - +add_executable(${PROJECT_NAME} +"${CMAKE_SOURCE_DIR}/src/main.cpp" +"${CMAKE_SOURCE_DIR}/src/ply2image/argparse.cpp" +"${CMAKE_SOURCE_DIR}/src/ply2image/progress_bar.cpp" +"${CMAKE_SOURCE_DIR}/src/ply2image/cuda/tmrp.cu" +) target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_23) target_link_libraries(${PROJECT_NAME} fmt::fmt) target_link_libraries(${PROJECT_NAME} PNG::PNG) target_link_libraries(${PROJECT_NAME} argparse::argparse) + +target_include_directories(${PROJECT_NAME} PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) + +function(CUDA_CONVERT_FLAGS EXISTING_TARGET) + get_property(old_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_COMPILE_OPTIONS) + if(NOT "${old_flags}" STREQUAL "") + string(REPLACE ";" "," CUDA_flags "${old_flags}") + set_property(TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_COMPILE_OPTIONS + "$<$>:${old_flags}>$<$>:-Xcompiler=${CUDA_flags}>" + ) + endif() +endfunction() + +if(MSVC) + target_compile_options(${PROJECT_NAME} INTERFACE /W4 ) + # target_link_options(${PROJECT_NAME} PUBLIC /NODEFAULTLIB:libcmt.lib /NODEFAULTLIB:libcmtd.lib /NODEFAULTLIB:msvcrtd.lib) # release dll + # target_link_options(${PROJECT_NAME} PUBLIC /NODEFAULTLIB:msvcrt.lib /NODEFAULTLIB:libcmtd.lib /NODEFAULTLIB:msvcrtd.lib) # release static + target_link_options(${PROJECT_NAME} PUBLIC /NODEFAULTLIB:libcmt.lib /NODEFAULTLIB:libcmtd.lib /NODEFAULTLIB:msvcrt.lib) # debug dll + # target_link_options(${PROJECT_NAME} PUBLIC /NODEFAULTLIB:libcmt.lib /NODEFAULTLIB:libcmtd.lib /NODEFAULTLIB:msvcrt.lib) # debug static + # target_link_options(${PROJECT_NAME} PUBLIC /NODEFAULTLIB:libcmt.lib /NODEFAULTLIB:msvcrtd.lib) # debug static release dll +else() + target_compile_options(${PROJECT_NAME} INTERFACE -Wall -Wextra -Wpedantic -Wconversion -Werror) +endif() + +# include the next line for a CUDA debug build +# target_compile_options(${PROJECT_NAME} PRIVATE $<$:--generate-line-info -G>) +target_compile_options(${PROJECT_NAME} PRIVATE $<$:-maxrregcount=64>) +target_compile_options(${PROJECT_NAME} PRIVATE $<$:-O3>) + +CUDA_CONVERT_FLAGS(${PROJECT_NAME}) + +if(APPLE) + # We need to add the path to the driver (libcuda.dylib) as an rpath, + # so that the static cuda runtime can find it at runtime. + set_property(TARGET ply2image + PROPERTY + BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/LICENSE.txt b/LICENSE.txt index ddc0498..ee25320 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,7 @@ BSD 4-Clause License with Paper Attribution Copyright [2023] [Benjamin Buch, Christina Junger and Gunther Notni] +Copyright [2025] [Julian Kuners] All rights reserved. @@ -19,7 +20,7 @@ are permitted provided that the following conditions are met: specific prior written permission. 4. Redistributions of any form whatsoever must retain the following acknowledgment: - "This product includes software developed by Benjamin Buch, Christina Junger and Gunther Notni." + "This product includes software developed by Benjamin Buch, Christina Junger, Gunther Notni, and Julian Kuners." 5. If you use this software in your own work, research, or project, you must cite the following paper: diff --git a/README.md b/README.md index cfbb0ab..46d3ed4 100644 --- a/README.md +++ b/README.md @@ -31,22 +31,51 @@ By default, the output image is stored in BBF file format with 64-bit floating p Saving as PNG is lossy! The output is always a 16 bit grayscale image with alpha channel. The pixel values range is truncated to 0 to 65535, no overflow or underflow takes place! All pixel values are rounded half up to integers. Fixed point values can be emulated via the value scaling. For example, to emulate 4 binary decimal places, the scaling must be set to 16 (=2^4). However, this information is not stored in the image! So when reading the PNG file later, you have to take care by yourself to interpret the values as fixed-point numbers again! +By appending `--cuda`, a CUDA accelerated implementation of the algorithm is dispatched on a Nvidia GPU. + ## Required libraries See [libraries](doc/setup.md) -## Usage +## How to build + +[CMake](https://gitlab.kitware.com/cmake/cmake) is used to build the project. Additionally, it is recommended to use [Nixpkgs](https://github.com/NixOS/nixpkgs) to deploy dependencies. +### How to install Nix +Either use the [NixOS](https://nixos.org/) Linux distribution or install Nix directly on your Linux distribution of choice. You may need to start the daemon or simply restart the system. +```bash +sh <(curl -L https://nixos.org/nix/install) --daemon ``` + +### How to build in a Nix development shell +A Nix development shell can be used to build the project. Additionally, the use of [nix-direnv](https://github.com/nix-community/nix-direnv) allows for easy integration into the terminal or an IDE such as VSCode. The development shell must be invoked impure due to the [NixGL](https://github.com/nix-community/nixGL) dependency that resolves the corresponding Nvidia driver dynamic libraries from Nixpkgs. +```bash +nix develop --impure # enter the nix develop shell mkdir build cd build -cmake .. -make +cmake -DCMAKE_CUDA_ARCHITECTURES=75 .. +make -j +./ply2image --help +exit # exit the nix develop shell ``` +### How to build and install with Nix +This project is packaged using Nix flakes and Nixpkgs. Run the following to build the project: +```bash +nix build ``` -./ply2image --help +The result will be located in the directory `result`. Please note that Nix built programs are meant to be distributed as packages and not as standalone binaries. This is due to the pure way dynamic libraries are resolved at runtime in Nix. + +Run the following to install the project: +```bash +nix profile install +``` + +Additionally, on non-NixOS Linux distributions, the dynamic libraries of the Nvidia driver must be installed from Nixpkgs. Otherwise, a stub implementation is used that simply won't find Nvidia devices at runtime. This can be done by running the following in the project root directory: +```bash +sudo nix build --out-link /run/opengl-driver .#nvidia --impure ``` +Please note that this must be repeated every time the Nvidia driver is updated. ## Licence diff --git a/default.nix b/default.nix new file mode 100644 index 0000000..533ee42 --- /dev/null +++ b/default.nix @@ -0,0 +1,69 @@ +{ + lib, + stdenv, + autoAddDriverRunpath, + + cmake, + + fmt, + zlib, + pngpp, + argparse, + boost, + + cudaPackages, +}: + +stdenv.mkDerivation { + pname = "ply2image"; + version = "1.0.0"; + + src = lib.fileset.toSource { + root = ./.; + fileset = lib.fileset.unions [ + ./src + ./CMakeLists.txt + ]; + }; + + + nativeBuildInputs = [ + # build tools and compilers + cmake + # hooks + autoAddDriverRunpath # add impure nvidia driver dynamic libraries to RPATH + ]; + + buildInputs = [ + # CUDA compiler and libraries + cudaPackages.cuda_nvcc + cudaPackages.cuda_cudart + cudaPackages.cuda_cccl + # C++ libraries + fmt + zlib + pngpp + argparse + boost + ]; + + cmakeFlags = [ + "-DCMAKE_CUDA_ARCHITECTURES=75" + ]; + + installPhase = '' + mkdir $out + install -D ply2image -t $out/bin/ + ''; + + meta = with lib; { + description = "Triangle-Mesh-Rasterization-Projection"; + homepage = "https://github.com/QBV-tu-ilmenau/Triangle-Mesh-Rasterization-Projection"; + license = { + shortName = "bsdOriginalPaperAttribution"; + free = true; + fullName = "BSD 4-Clause License with Paper Attribution"; + }; + mainProgram = "ply2image"; + }; +} \ No newline at end of file diff --git a/doc/cuda.md b/doc/cuda.md new file mode 100644 index 0000000..1ede0f3 --- /dev/null +++ b/doc/cuda.md @@ -0,0 +1,32 @@ +# CUDA strategy + +Problem is compute bound and heavily uses FP64 instructions. + +2 different cases of execution: +- without a reference filter +- with a reference filter, either min or max + +#### Case 1: without a reference filter +kernels: +- (1) raster_interpolation + - load raster points + - create triangles + - perform triangle interpolation + - recude & keep 1 result per pixel in global memory +- (2) filter_averaging_none: + - finish calculating average per pixel + +#### Case 2: with a reference filter +kernels: +- (1) init_pixel_array_header: + - init headers of result array + - mixed async array/linked array list +- (1) raster_interpolation: + - load raster points + - create triangles + - perform triangle interpolation + - record results in global memory +- (2) filter_averaging_min_max: + - load interpolation results + - perform raster filtering + - calculate average diff --git a/doc/setup.md b/doc/setup.md index d00748a..b1d319d 100644 --- a/doc/setup.md +++ b/doc/setup.md @@ -21,3 +21,9 @@ git clone https://github.com/QBV-tu-ilmenau/Triangle-Mesh-Rasterization-Projecti - [libpng](http://www.libpng.org/pub/png/libpng.html) - No longer necessary: Now the [bitmap](https://github.com/bebuch/bitmap) lib is part of the project. + +### CUDA + +- [NVCC](https://docs.nvidia.com/cuda/) +- [CUDA Runtime](https://docs.nvidia.com/cuda/) +- [CCCL](https://github.com/NVIDIA/cccl) diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..552d252 --- /dev/null +++ b/flake.lock @@ -0,0 +1,111 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1726560853, + "narHash": "sha256-X6rJYSESBVr3hBoH0WbKE5KvhPU5bloyZ2L4K60/fPQ=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "c1dfcf08411b08f6b8615f7d8971a2bfa81d5e8a", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "flake-utils_2": { + "locked": { + "lastModified": 1659877975, + "narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixgl": { + "inputs": { + "flake-utils": "flake-utils_2", + "nixpkgs": "nixpkgs" + }, + "locked": { + "lastModified": 1713543440, + "narHash": "sha256-lnzZQYG0+EXl/6NkGpyIz+FEOc/DSEG57AP1VsdeNrM=", + "owner": "nix-community", + "repo": "nixGL", + "rev": "310f8e49a149e4c9ea52f1adf70cdc768ec53f8a", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "nixGL", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1660551188, + "narHash": "sha256-a1LARMMYQ8DPx1BgoI/UN4bXe12hhZkCNqdxNi6uS0g=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "441dc5d512153039f19ef198e662e4f3dbb9fd65", + "type": "github" + }, + "original": { + "owner": "nixos", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs_2": { + "locked": { + "lastModified": 1729265718, + "narHash": "sha256-4HQI+6LsO3kpWTYuVGIzhJs1cetFcwT7quWCk/6rqeo=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "ccc0c2126893dd20963580b6478d1a10a4512185", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixgl": "nixgl", + "nixpkgs": "nixpkgs_2" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..a35f6f1 --- /dev/null +++ b/flake.nix @@ -0,0 +1,48 @@ +{ + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + nixgl.url = "github:nix-community/nixGL"; + }; + + outputs = { nixpkgs, flake-utils, nixgl, ... }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = import nixpkgs { + inherit system; + config = { + allowUnfree = true; + }; + overlays = [ nixgl.overlay ]; + }; + in { + devShells.default = pkgs.mkShell { + packages = with pkgs; [ + # build tools and compilers + gnumake + cmake + clang_17 + # CUDA compiler and libraries + cudaPackages.cuda_nvcc + cudaPackages.cuda_cudart + cudaPackages.cuda_cccl + # C++ libraries + fmt + zlib + pngpp + argparse + boost + ]; + + shellHook = '' + export LD_LIBRARY_PATH=$(${pkgs.nixgl.auto.nixGLDefault}/bin/nixGL printenv LD_LIBRARY_PATH):$LD_LIBRARY_PATH + ''; + }; + packages = { + default = pkgs.callPackage ./default.nix { }; + # sudo nix build --out-link /run/opengl-driver .#nvidia --impure + nvidia = pkgs.nixgl.auto.nvidiaDrivers.out; + }; + } + ); +} diff --git a/src/binary_read.hpp b/src/bitmap/binary_read.hpp similarity index 100% rename from src/binary_read.hpp rename to src/bitmap/binary_read.hpp diff --git a/src/binary_write.hpp b/src/bitmap/binary_write.hpp similarity index 100% rename from src/binary_write.hpp rename to src/bitmap/binary_write.hpp diff --git a/src/bitmap.hpp b/src/bitmap/bitmap.hpp similarity index 100% rename from src/bitmap.hpp rename to src/bitmap/bitmap.hpp diff --git a/src/bitmap_io.hpp b/src/bitmap/bitmap_io.hpp similarity index 100% rename from src/bitmap_io.hpp rename to src/bitmap/bitmap_io.hpp diff --git a/src/detail/binary_io_flags.hpp b/src/bitmap/detail/binary_io_flags.hpp similarity index 100% rename from src/detail/binary_io_flags.hpp rename to src/bitmap/detail/binary_io_flags.hpp diff --git a/src/detail/io.hpp b/src/bitmap/detail/io.hpp similarity index 100% rename from src/detail/io.hpp rename to src/bitmap/detail/io.hpp diff --git a/src/detail/valid_binary_format.hpp b/src/bitmap/detail/valid_binary_format.hpp similarity index 100% rename from src/detail/valid_binary_format.hpp rename to src/bitmap/detail/valid_binary_format.hpp diff --git a/src/exception.hpp b/src/bitmap/exception.hpp similarity index 100% rename from src/exception.hpp rename to src/bitmap/exception.hpp diff --git a/src/get_size.hpp b/src/bitmap/get_size.hpp similarity index 100% rename from src/get_size.hpp rename to src/bitmap/get_size.hpp diff --git a/src/histogram.hpp b/src/bitmap/histogram.hpp similarity index 100% rename from src/histogram.hpp rename to src/bitmap/histogram.hpp diff --git a/src/interpolate.hpp b/src/bitmap/interpolate.hpp similarity index 100% rename from src/interpolate.hpp rename to src/bitmap/interpolate.hpp diff --git a/src/image_format_png.hpp b/src/bitmap/io/image_format_png.hpp similarity index 98% rename from src/image_format_png.hpp rename to src/bitmap/io/image_format_png.hpp index 0970796..2cb5c03 100644 --- a/src/image_format_png.hpp +++ b/src/bitmap/io/image_format_png.hpp @@ -1,8 +1,10 @@ #pragma once -#include "bitmap/bitmap.hpp" -#include "bitmap/pixel.hpp" -#include "bitmap/masked_pixel.hpp" +// this is a modified version of image_format_png found in lib bitmap + +#include "../bitmap.hpp" +#include "../pixel.hpp" +#include "../masked_pixel.hpp" #include diff --git a/src/masked_pixel.hpp b/src/bitmap/masked_pixel.hpp similarity index 100% rename from src/masked_pixel.hpp rename to src/bitmap/masked_pixel.hpp diff --git a/src/matrix3x3.hpp b/src/bitmap/matrix3x3.hpp similarity index 100% rename from src/matrix3x3.hpp rename to src/bitmap/matrix3x3.hpp diff --git a/src/pixel.hpp b/src/bitmap/pixel.hpp similarity index 100% rename from src/pixel.hpp rename to src/bitmap/pixel.hpp diff --git a/src/pixel_algorithm.hpp b/src/bitmap/pixel_algorithm.hpp similarity index 100% rename from src/pixel_algorithm.hpp rename to src/bitmap/pixel_algorithm.hpp diff --git a/src/pixel_output.hpp b/src/bitmap/pixel_output.hpp similarity index 100% rename from src/pixel_output.hpp rename to src/bitmap/pixel_output.hpp diff --git a/src/point.hpp b/src/bitmap/point.hpp similarity index 100% rename from src/point.hpp rename to src/bitmap/point.hpp diff --git a/src/point_io.hpp b/src/bitmap/point_io.hpp similarity index 100% rename from src/point_io.hpp rename to src/bitmap/point_io.hpp diff --git a/src/rect.hpp b/src/bitmap/rect.hpp similarity index 100% rename from src/rect.hpp rename to src/bitmap/rect.hpp diff --git a/src/rect_io.hpp b/src/bitmap/rect_io.hpp similarity index 100% rename from src/rect_io.hpp rename to src/bitmap/rect_io.hpp diff --git a/src/rect_transform.hpp b/src/bitmap/rect_transform.hpp similarity index 100% rename from src/rect_transform.hpp rename to src/bitmap/rect_transform.hpp diff --git a/src/size.hpp b/src/bitmap/size.hpp similarity index 100% rename from src/size.hpp rename to src/bitmap/size.hpp diff --git a/src/size_io.hpp b/src/bitmap/size_io.hpp similarity index 100% rename from src/size_io.hpp rename to src/bitmap/size_io.hpp diff --git a/src/subbitmap.hpp b/src/bitmap/subbitmap.hpp similarity index 100% rename from src/subbitmap.hpp rename to src/bitmap/subbitmap.hpp diff --git a/src/transform.hpp b/src/bitmap/transform.hpp similarity index 100% rename from src/transform.hpp rename to src/bitmap/transform.hpp diff --git a/src/io/image_format_png.hpp b/src/io/image_format_png.hpp deleted file mode 100644 index 270b50f..0000000 --- a/src/io/image_format_png.hpp +++ /dev/null @@ -1,384 +0,0 @@ -#pragma once - -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - - -namespace bmp::png{ - - - struct g1{ - using value_type = bool; - static constexpr int channels = PNG_COLOR_TYPE_GRAY; - static constexpr int bit_depth = 1; - - value_type g; - }; - - struct g8{ - using value_type = std::uint8_t; - static constexpr int channels = PNG_COLOR_TYPE_GRAY; - static constexpr int bit_depth = 8; - - std::uint8_t g; - }; - - struct g16{ - using value_type = std::uint16_t; - static constexpr int channels = PNG_COLOR_TYPE_GRAY; - static constexpr int bit_depth = 16; - - std::uint16_t g; - }; - - struct ga8{ - using value_type = std::uint8_t; - static constexpr int channels = PNG_COLOR_TYPE_GRAY_ALPHA; - static constexpr int bit_depth = 8; - - std::uint8_t g; - std::uint8_t a; - }; - - struct ga16{ - using value_type = std::uint16_t; - static constexpr int channels = PNG_COLOR_TYPE_GRAY_ALPHA; - static constexpr int bit_depth = 16; - - std::uint16_t g; - std::uint16_t a; - }; - - struct rgb8{ - using value_type = std::uint8_t; - static constexpr int channels = PNG_COLOR_TYPE_RGB; - static constexpr int bit_depth = 8; - - std::uint8_t r; - std::uint8_t g; - std::uint8_t b; - }; - - struct rgb16{ - using value_type = std::uint16_t; - static constexpr int channels = PNG_COLOR_TYPE_RGB; - static constexpr int bit_depth = 16; - - std::uint16_t r; - std::uint16_t g; - std::uint16_t b; - }; - - struct rgba8{ - using value_type = std::uint8_t; - static constexpr int channels = PNG_COLOR_TYPE_RGB_ALPHA; - static constexpr int bit_depth = 8; - - std::uint8_t r; - std::uint8_t g; - std::uint8_t b; - std::uint8_t a; - }; - - struct rgba16{ - using value_type = std::uint16_t; - static constexpr int channels = PNG_COLOR_TYPE_RGB_ALPHA; - static constexpr int bit_depth = 16; - - std::uint16_t r; - std::uint16_t g; - std::uint16_t b; - std::uint16_t a; - }; - - - template - concept is_same_as_any_of = (std::same_as || ...); - - template - concept write_pixel = is_same_as_any_of; - - template - concept g_pixel = is_same_as_any_of; - - template - concept ga_pixel = is_same_as_any_of; - - template - concept rgb_pixel = is_same_as_any_of; - - template - concept rgba_pixel = is_same_as_any_of; - - - template - struct access_g{ - using target_type = To; - - template - constexpr To operator()(From const& pixel)const{ - return To(static_cast(pixel)); - } - } - - template - struct access_ga{ - using target_type = To; - - template - constexpr To operator()(From const& pixel)const{ - return To( - static_cast(pixel.g), - static_cast(pixel.a)); - } - } - - template template - constexpr To access_ga::operator()>(basic_masked_pixel const& pixel)const{ - return To( - static_cast(pixel.v), - pixel.m ? 0 : std::numeric_limits::max()); - } - - template - struct access_rgb{ - using target_type = To; - - template - constexpr To operator()(From const& pixel)const{ - return To( - static_cast(pixel.r), - static_cast(pixel.g), - static_cast(pixel.b)); - } - } - - template - struct access_rgba{ - using target_type = To; - - template - constexpr To operator()(From const& pixel)const{ - return To( - static_cast(pixel.r), - static_cast(pixel.g), - static_cast(pixel.b), - static_cast(pixel.a)); - } - } - - template template - constexpr To access_rgba::operator()>(basic_masked_pixel const& pixel)const{ - return To( - static_cast(pixel.v), - pixel.m ? 0 : std::numeric_limits::max()); - } - - - struct pixel_type_not_supported_by_png; - - - template inline constexpr auto pixel_access = pixel_type_not_supported_by_png{}; - - template <> inline constexpr auto pixel_access = access_g{}; - - template <> inline constexpr auto pixel_access = access_g{}; - template <> inline constexpr auto pixel_access = access_g{}; - template <> inline constexpr auto pixel_access = access_g{}; - template <> inline constexpr auto pixel_access = access_g{}; - - template <> inline constexpr auto pixel_access = access_ga{}; - template <> inline constexpr auto pixel_access = access_ga{}; - template <> inline constexpr auto pixel_access = access_ga{}; - template <> inline constexpr auto pixel_access = access_ga{}; - - template <> inline constexpr auto pixel_access = access_rgb{}; - template <> inline constexpr auto pixel_access = access_rgb{}; - template <> inline constexpr auto pixel_access = access_rgb{}; - template <> inline constexpr auto pixel_access = access_rgb{}; - - template <> inline constexpr auto pixel_access = access_rgba{}; - template <> inline constexpr auto pixel_access = access_rgba{}; - template <> inline constexpr auto pixel_access = access_rgba{}; - template <> inline constexpr auto pixel_access = access_rgba{}; - - template <> inline constexpr auto pixel_access = access_ga{}; - template <> inline constexpr auto pixel_access = access_ga{}; - template <> inline constexpr auto pixel_access = access_ga{}; - template <> inline constexpr auto pixel_access = access_ga{}; - - template <> inline constexpr auto pixel_access = access_rgba{}; - template <> inline constexpr auto pixel_access = access_rgba{}; - template <> inline constexpr auto pixel_access = access_rgba{}; - template <> inline constexpr auto pixel_access = access_rgba{}; - - - class writer{ - public: - writer() = default; - - writer(writer const&) = delete; - writer& operator=(writer const&) = delete; - - ~writer()noexcept{ - reset(); - } - - template - bool write(bitmap const& image, std::filesystem::path const& filepath){ - std::ofstream os(filepath, std::ios::binary); - if(!os.open()){ - throw std::runtime_error(std::format("can not open file {:s}", std::quoted(filepath.string()))); - } - - return write(image, os); - } - - template - bool write(bitmap const& image, std::ostream& os)noexcept{ - static constexpr auto max_size = static_cast(std::numeric_limits::max()); - if(image.h() > max_size || image.h() > max_size){ - if(error_callable_){ - error_callable_("dimensions are to large for PNG file format"sv); - } - return false; - } - - reset(); - - main_ = ::png_create_write_struct(PNG_LIBPNG_VER_STRING, this, &error, &warn); - if(!main_){ - if(error_callable_){ - error_callable_("failed to allocate png struct"sv); - } - return false; - } - - info_ = ::png_create_info_struct(main_); - if(!info_){ - if(error_callable_){ - error_callable_("failed to allocate png info struct"sv); - } - return false; - } - - // libpng will jump to this condition if an error occurs - if(std::setjmp(::png_jmpbuf(main_))){ - return false; - } - - ::png_set_write_fn(main_, &os, &write_data, &flush_data); - - auto const w = image.w_as() - auto const h = image.h_as() - - using pixel_type = pixel_access::target_type; - ::png_set_IHDR(main_, info_, w, h, pixel_type::bit_depth, pixel_type::channels, - PNG_INTERLACE_ADAM7, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT) - - // TODO: Implement some direct access instead of copy for types which support this - std::vector data; - data.reserve(image.point_count()); - std::ranges::transform(image, std::back_inserter(data), - [](T const& v){ - return pixel_access(v); - }); - - std::vector rows; - rows.reserve(h); - for(std::size_t y = 0; y < h; ++y){ - rows.push_back(&data[y * w]); - } - - ::png_set_rows(main_, info_, rows.data()); - - ::png_write_png(main_, info_, PNG_TRANSFORM_IDENTITY, nullptr); - - return true; - } - - void on_error(std::function callable)noexcept{ - error_callable_ = std::move(callable); - } - - void on_warning(std::function callable)noexcept{ - warning_callable_ = std::move(callable); - } - - private: - void reset(){ - png_destroy_write_struct(&main_, &info_); - - main_ = nullptr; - info_ = nullptr; - } - - static [[noreturn]] void error(::png_struct* main, char const* message)noexcept{ - auto const& callable = static_cast(::png_get_error_ptr(main))->error_callable_; - if(callable){ - callable(std::string_view(message)); - } - - // libpng requires the error handler to jump back to our write routine - std::longjmp(::png_jmpbuf(main), -1); - } - - static void warn(::png_struct* main, char const* message)noexcept{ - auto const& callable = static_cast(::png_get_error_ptr(main))->warning_callable_; - if(callable){ - callable(std::string_view(message)); - } - } - - static void write_data(::png_struct* png, ::png_byte* data, ::png_size_t length)noexcept try{ - auto& os = *static_cast(::png_get_io_ptr(png)); - os.write(reinterpret_cast(data), length); - if(!os.good()){ - error(png, "std::ostream::write() failed"); - } - }catch(std::exception const& error){ - error(png, error.what()); - }catch(...){ - error(png, "write_data: unknown error"); - } - - static void flush_data(png_struct* png)noexcept try{ - auto& os = *static_cast(::png_get_io_ptr(png)); - os.flush(); - if(!os.good()){ - error(png, "std::ostream::flush() failed"); - } - }catch(std::exception const& error){ - error(png, error.what()); - }catch(...){ - error(png, "flush_data: unknown error"); - } - - ::png_struct* main_ = nullptr; - ::png_info* info_ = nullptr; - - std::function error_callable_; - std::function warning_callable_; - }; - - template - bool write(bitmap const& image, std::ostream& os)noexcept{ - return writer{}.write(image, os); - } - - template - bool write(bitmap const& image, std::filesystem::path const& filepath){ - return writer{}.write(image, filepath); - } - - -} diff --git a/src/main.cpp b/src/main.cpp index 0ab4ca5..4e82b27 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,11 +1,4 @@ -#include "ply.hpp" -#include "image_format_png.hpp" - -#include "bitmap/bitmap.hpp" -#include "bitmap/binary_write.hpp" - #include - #include #include @@ -13,599 +6,18 @@ #include #include +#include "ply/ply.hpp" +#include "bitmap/io/image_format_png.hpp" +#include "bitmap/bitmap.hpp" +#include "bitmap/binary_write.hpp" +#include "ply2image/argparse.hpp" +#include "ply2image/tmrp.hpp" +#include "ply2image/cuda/tmrp.cuh" -namespace ply2image{ - - - using namespace std::literals; - - inline constexpr auto NaN = std::numeric_limits::quiet_NaN(); - - - enum class file_format{ - bbf = 0, - png = 1 - }; - - constexpr std::string_view file_format_strings[] = {"bbf"sv, "png"sv}; - - - enum class raster_filter{ - min = 0, - max = 1, - none = 2, - }; - - constexpr std::string_view raster_filter_strings[] = {"min"sv, "max"sv, "none"sv}; - - - std::string valid_values_string(std::ranges::output_range auto const& list){ - auto const begin = std::ranges::begin(list); - auto const end = std::ranges::end(list); - if(begin == end){ - throw std::logic_error("no valid values"); - } - - auto result = "(valid values: "s; - result += fmt::format("{:s}", std::quoted(*begin)); - for(auto const& entry: std::span(std::next(begin), end)){ - result += fmt::format(", {:s}", std::quoted(entry)); - } - result += ")"s; - return result; - } - - template - requires std::is_enum_v - T parse_enum_string(std::ranges::output_range auto const& list, std::string_view const value){ - auto const iter = std::ranges::find(list, value); - if(iter == std::ranges::end(list)){ - throw std::runtime_error( - fmt::format("invalid file format {:s} {:s}", value, valid_values_string(list))); - } - return static_cast(iter - std::ranges::begin(list)); - } - - - std::string trim_left(std::string_view const text, char const character){ - return {std::ranges::find_if(text, [=](char const c){ return c != character; }), text.end()}; - } - - - constexpr bool contains(auto list, auto value){ - return std::ranges::find(list, value) != list.end(); - } - - - template - std::optional get_raster( - argparse::ArgumentParser const& program, - std::string_view const arg_name, - std::string_view const disabled_name - ){ - if(program.get(disabled_name)){ - if(program.is_used(arg_name)){ - throw std::runtime_error( - "You cannot use " + std::string(arg_name) + " together with " + std::string(disabled_name)); - }else{ - return std::nullopt; - } - }else{ - return program.get(arg_name); - } - } - - template - std::int64_t raster_convert(T const v){ - using limits = std::numeric_limits; - if constexpr(ply::scalar_value)[[likely]]{ - if constexpr(std::is_floating_point_v){ - if(v != std::floor(v)){ - throw std::runtime_error("raster property contains at least one non-integer value"); - }else if(v < static_cast(limits::min()) || v > static_cast(limits::max())){ - throw std::runtime_error("raster property value is out of range"); - } - }else if constexpr(std::is_same_v){ - if(v > std::uint64_t(limits::max())){ - throw std::runtime_error("raster property value is out of range"); - } - } - - return static_cast(v); - }else{ - throw std::runtime_error("list type properties are not supported"); - } - } - - struct point{ - double x; - double y; - double v; - }; - - struct raster_point{ - double x; - double y; - double v; - std::int64_t rx; - std::int64_t ry; - - operator bmp::point()const{ - return {x, y}; - }; - }; - - template - struct raw_pixel; - - template <> - struct raw_pixel{ - double weight; - double value; - }; - - template <> - struct raw_pixel{ - double weight; - double value; - std::int64_t rx; - std::int64_t ry; - }; - - - struct max_value_filter{ - constexpr auto operator()(std::vector> const& p)const{ - return std::ranges::max_element(p, [](raw_pixel const& a, raw_pixel const& b){ - return a.value < b.value; - }); - } - }; - - struct min_value_filter{ - constexpr auto operator()(std::vector> const& p)const{ - return std::ranges::min_element(p, [](raw_pixel const& a, raw_pixel const& b){ - return a.value < b.value; - }); - } - }; - - struct none_filter{}; - - - bmp::bitmap>> to_vector_image( - std::size_t const width, - std::size_t const height, - std::vector const& points - ){ - bmp::bitmap>> vector_image(width, height); - for(auto const& p: points){ - auto const x = p.x; - auto const y = p.y; - auto const ix = static_cast(std::floor(x)); - auto const iy = static_cast(std::floor(y)); - auto const xr = x - std::floor(x); - auto const yr = y - std::floor(y); - if(ix < vector_image.w() && iy < vector_image.h()){ - auto const weight = (1.f - xr) * (1.f - yr); - vector_image(ix , iy ).push_back({weight, p.v}); - } - if(ix + 1 < vector_image.w() && iy < vector_image.h()){ - auto const weight = ( xr) * (1.f - yr); - vector_image(ix + 1, iy ).push_back({weight, p.v}); - } - if(ix < vector_image.w() && iy + 1 < vector_image.h()){ - auto const weight = (1.f - xr) * ( yr); - vector_image(ix , iy + 1).push_back({weight, p.v}); - } - if(ix + 1 < vector_image.w() && iy + 1 < vector_image.h()){ - auto const weight = ( xr) * ( yr); - vector_image(ix + 1, iy + 1).push_back({weight, p.v}); - } - } - return vector_image; - } - - struct raster_range{ - std::int64_t min_x; - std::int64_t max_x; - std::int64_t min_y; - std::int64_t max_y; - - std::size_t w()const{ - return static_cast(max_x + 1 - min_x); - } - - std::size_t h()const{ - return static_cast(max_y + 1 - min_y); - } - - std::size_t x(std::int64_t const x)const{ - return static_cast(x - min_x); - } - - std::size_t y(std::int64_t const y)const{ - return static_cast(y - min_y); - } - }; - - raster_range find_raster_range(std::vector const& points){ - using limits = std::numeric_limits; - raster_range range{limits::max(), limits::min(), limits::max(), limits::min()}; - for(auto const& p: points){ - range.min_x = std::min(range.min_x, p.rx); - range.max_x = std::max(range.max_x, p.rx); - range.min_y = std::min(range.min_y, p.ry); - range.max_y = std::max(range.max_y, p.ry); - } - return range; - } - - - constexpr double sqr(double const v)noexcept{ - return v * v; - } - - constexpr double distance(bmp::point const& a, bmp::point const& b){ - return std::sqrt(sqr(a.x() - b.x()) + sqr(a.y() - b.y())); - } - - constexpr double area(std::array, 3> const& t){ - auto const a = distance(t[0], t[1]); - auto const b = distance(t[1], t[2]); - auto const c = distance(t[2], t[0]); - auto const s = (a + b + c) / 2.; - return std::sqrt(s * (s - a) * (s - b) * (s - c)); - } - - constexpr bool is_inside(std::array const& t, bmp::point const& p){ - constexpr auto sign = - [](std::array, 3> const& t){ - return (t[0].x() - t[2].x()) * (t[1].y() - t[2].y()) - (t[1].x() - t[2].x()) * (t[0].y() - t[2].y()); - }; - - auto const d1 = sign({p, {t[0].x, t[0].y}, {t[1].x, t[1].y}}); - auto const d2 = sign({p, {t[1].x, t[1].y}, {t[2].x, t[2].y}}); - auto const d3 = sign({p, {t[2].x, t[2].y}, {t[0].x, t[0].y}}); - - auto const neg = (d1 < 0) || (d2 < 0) || (d3 < 0); - auto const pos = (d1 > 0) || (d2 > 0) || (d3 > 0); - - return !(neg && pos); - } - - class percent_printer{ - public: - struct lazy_incer{ - percent_printer& printer; - - ~lazy_incer(){ - ++printer; - } - }; - - percent_printer(std::size_t const label_width, std::string_view const base_line_label) - : label_width_(label_width) - { - if(base_line_label.size() > label_width_){ - throw std::logic_error("label width is larger then specified"); - } - - fmt::print("{:>{}s}: " - "====================================================================================================" - "\n", base_line_label, label_width_); - std::fflush(stdout); - } - - void init(std::string_view const label, std::size_t const count){ - if(label.size() > label_width_){ - throw std::logic_error("label width is larger then specified"); - } - - fmt::print("{:>{}s}: ", label, label_width_); - std::fflush(stdout); - - count_ = count; - prints_ = 0; - i_ = 0; - } - - lazy_incer lazy_inc(){ - return {*this}; - } - - private: - void operator++(){ - ++i_; - - if(count_ == 0){ - throw std::logic_error("percent printer used without init call"); - } - - if(i_ > count_){ - throw std::logic_error("percent printer run out of range"); - } - - auto const percent = - static_cast(std::ceil(static_cast(i_) / static_cast(count_) * 100.)); - - bool need_flush = false; - for(; prints_ < percent; ++prints_){ - fmt::print(">"); - need_flush = true; - } - - if(i_ == count_){ - fmt::print(" done\n"); - need_flush = true; - } - - if(need_flush){ - std::fflush(stdout); - } - } - - std::size_t label_width_ = 0; - std::size_t count_ = 0; - std::size_t prints_ = 0; - std::size_t i_ = 0; - }; - - template - bmp::bitmap>> to_vector_image( - std::size_t const width, - std::size_t const height, - std::vector const& points, - RasterFilter const& raster_filter - ){ - auto const range = find_raster_range(points); - if(range.w() < 2 || range.h() < 2){ - throw std::runtime_error("raster interpolation requires at least 2 columns and 2 rows"); - } - - fmt::print("raster with origin {:d}x{:d} and size {:d}x{:d}\n", - range.min_x, range.min_y, range.w(), range.h()); - - percent_printer progress(30, "base line"); - - bmp::bitmap> raster_image(range.w(), range.h()); - progress.init("create raster image", points.size()); - for(auto const& p: points){ - auto const printer = progress.lazy_inc(); - - auto& target_p = raster_image(range.x(p.rx), range.y(p.ry)); - if(target_p){ - throw std::runtime_error(fmt::format("raster point {:d}x{:d} exists twice", p.rx, p.ry)); - } - target_p = p; - } - - progress.init("raster interpolation", (raster_image.h() - 1) * (raster_image.w() - 1)); - bmp::bitmap>> vector_image(width, height); - for(std::size_t iy = 0; iy < raster_image.h() - 1; ++iy){ - for(std::size_t ix = 0; ix < raster_image.w() - 1; ++ix){ - auto const printer = progress.lazy_inc(); - - std::vector region; - region.reserve(4); - - if(auto const p = raster_image(ix, iy)){ - region.push_back(*p); - } - - if(auto const p = raster_image(ix + 1, iy)){ - region.push_back(*p); - } - - if(auto const p = raster_image(ix, iy + 1)){ - region.push_back(*p); - } - - if(auto const p = raster_image(ix + 1, iy + 1)){ - region.push_back(*p); - } - - if(region.size() < 3){ - continue; - } - - std::vector> triangles; - if(region.size() == 3){ - triangles.reserve(1); - triangles.push_back({region[0], region[1], region[2]}); - }else{ - triangles.reserve(4); - triangles.push_back({region[0], region[1], region[2]}); - triangles.push_back({region[1], region[2], region[3]}); - triangles.push_back({region[2], region[3], region[0]}); - triangles.push_back({region[3], region[0], region[1]}); - } - - for(auto const& t: triangles){ - // find integer bounting box around the floating point triangle within the target image - auto const fx = static_cast(std::clamp(static_cast(std::floor( - std::min({t[0].x, t[1].x, t[2].x}))), std::int64_t(0), static_cast(width - 1))); - auto const tx = static_cast(std::clamp(static_cast(std::ceil( - std::max({t[0].x, t[1].x, t[2].x}))), std::int64_t(0), static_cast(width - 1))); - if(tx == fx){ - continue; - } - - auto const fy = static_cast(std::clamp(static_cast(std::floor( - std::min({t[0].y, t[1].y, t[2].y}))), std::int64_t(0), static_cast(height - 1))); - auto const ty = static_cast(std::clamp(static_cast(std::ceil( - std::max({t[0].y, t[1].y, t[2].y}))), std::int64_t(0), static_cast(height - 1))); - if(ty == fy){ - continue; - } - - for(std::size_t y = fy; y <= ty; ++y){ - for(std::size_t x = fx; x <= tx; ++x){ - auto const p = bmp::point(static_cast(x), static_cast(y)); - if(!is_inside(t, p)){ - continue; - } - - std::array const areas{{ - area({p, t[1], t[2]}), - area({p, t[2], t[0]}), - area({p, t[0], t[1]}) - }}; - auto const area_sum = areas[0] + areas[1] + areas[2]; - std::array const weight{{ - areas[0] / area_sum, - areas[1] / area_sum, - areas[2] / area_sum - }}; - - auto const value = - t[0].v * weight[0] + - t[1].v * weight[1] + - t[2].v * weight[2]; - - auto const index = std::max({ - std::pair{weight[0], std::size_t(0)}, - std::pair{weight[1], std::size_t(1)}, - std::pair{weight[2], std::size_t(2)}}).second; - - vector_image(x, y).push_back({weight[index], value, t[index].rx, t[index].ry}); - } - } - } - } - } - - if constexpr(!std::same_as){ - // filter values via raster information - progress.init("reference filter", vector_image.point_count()); - for(auto& p: vector_image){ - auto const printer = progress.lazy_inc(); - - if(p.empty()){ - continue; - } - - auto const iter = raster_filter(p); - std::erase_if(p, - [ref_rx = iter->rx, ref_ry = iter->ry](raw_pixel const& v){ - return std::abs(ref_rx - v.rx) > 1 || std::abs(ref_ry - v.ry) > 1; - }); - - } - } - - return vector_image; - } - - template - bmp::bitmap to_image( - std::size_t const width, - std::size_t const height, - std::vector const& points, - RasterFilter const& ... raster_filter - ){ - using raw_pixel = ply2image::raw_pixel; - - auto const vector_image = to_vector_image(width, height, points, raster_filter ...); - - bmp::bitmap image(width, height, NaN); - std::ranges::transform(vector_image, image.begin(), - [](std::vector const& data){ - if(data.empty()){ - return NaN; - }else [[likely]]{ - if(data.size() == 1){ - return data[0].value; - } - - auto const sum_weight = std::transform_reduce(data.begin(), data.end(), 0., std::plus{}, - [](raw_pixel const& v){ - if(v.weight < 0.){ - throw std::logic_error("negative weight"); - } - return v.weight; - }); - if(sum_weight == 0.){ - return NaN; - } - - auto const value = std::transform_reduce(data.begin(), data.end(), 0., std::plus{}, - [](raw_pixel const& v){ - return v.value * v.weight; - }); - return value / sum_weight; - } - }); - - return image; - } - - -} - - -void print_help(argparse::ArgumentParser const& program, std::string_view const program_name){ - fmt::print(fmt::emphasis::bold, "{:s}", program_name); - fmt::print("\n\n" - "This program converts 3D point clouds in PLY file format to 2D image data in BBF or PNG file format.\n" - "\n" - "Links for Group for Quality Assurance and Industrial Image Processing in the Department of Mechanical " - "Engineering:\n" - "\n" - " - Project page with result examples image\n" - " https://gitlab.tu-ilmenau.de/FakMB/QBV/topics/software/ply2image\n" - " - PLY file format and how we use it\n" - " https://gitlab.tu-ilmenau.de/FakMB/QBV/topics/compendia/project-structure/-/blob/master/doc/doc-3d-file-" - "formats.md\n" - " - BBF file format specification\n" - " https://gitlab.tu-ilmenau.de/FakMB/QBV/topics/compendia/bbf-file-format\n" - " - PNG file format\n" - " https://en.wikipedia.org/wiki/Portable_Network_Graphics\n" - "\n" - "For this, two of the PLY properties are interpreted as x and y pixel coordinates for the 2D image. " - "A third PLY property is interpreted as the value of this pixel.\n" - "\n" - "By default, the x, y and z properties of the vertex element are used. This corresponds exactly to the " - "conversion of a 3D point cloud into a depth map.\n" - "\n" - "The values of the properties can be scaled. Before and after scaling, the values can be moved. The shift " - "before scaling takes place in the unit of the property. The shift after scaling takes place (for x and y) " - "in 2D pixels. The shift before and after scaling is of course equivalent via the scaling factor. That both " - "are offered is purely a convenience function.\n" - "\n" - "Since the 3D coordinates X and Y are usually not integers, in the 2D image the Z value must be distributed " - "among the surrounding four 2D pixels. If neighboring 3D X/Y coordinates are further than one unit apart, " - "then there will be gaps between these pixels in the 2D image. With almost all 3D measurement methods, 2D " - "neighborhood information of the 3D coordinates can simultaneously be acquired. It is strongly recommended " - "to always save them with the PLY file and to keep them even in case of global transformations of the 3D " - "points. If this information is available in x and y direction as a property of the PLY file, it can be used " - "to perform a dense interpolation between the 2D pixels that were adjacent in 3D. This results in gaps in the " - "2D image only if the original measurement of the 3D data had also detected a gap. The 2D raster must contain " - "integer values only. By default it is assumed to be specified in the PLY properties raster_x and raster_y. " - "If one of these properties is not found in the PLY file, the program prints a warning and performs the " - "conversion without raster interpolation. Raster interpolation can be switched off explicitly.\n" - "\n" - "The raster information can also be used to cleanly separate foreground and background. This is especially " - "useful for point clouds that have been transformed, as overlaps are very likely to occur. In marginal areas, " - "however, this may already be the case without transformation. For filtering, the minimum or maximum value is " - "determined as a reference value in the target pixel. Only values that are adjacent to this reference value " - "in the raster are included in the target pixel. By default, the minimum is used, which corresponds to a " - "foreground selection for Z values. (The smaller the value, the closer the pixel was to the acquisition " - "system)\n" - "\n" - "By default, the output image is stored in BBF file format with 64-bit floating point values in the native " - "byte order of the program's current execution environment. Empty pixels are encoded as NaN (Not a Number). " - "The BBF specification is linked above. It is a simple raw data format with a 24 bytes header.\n" - "\n" - "Saving as PNG is lossy! The output is always a 16 bit grayscale image with alpha channel. The pixel values " - "range is truncated to 0 to 65535, no overflow or underflow takes place! All pixel values are rounded half up " - "to integers. Fixed point values can be emulated via the value scaling. For example, to emulate 4 binary " - "decimal places, the scaling must be set to 16 (=2^4). However, this information is not stored in the image! " - "So when reading the PNG file later, you have to take care by yourself to interpret the values as fixed-point " - "numbers again!\n" - "\n"); - - fmt::print("{:s}\n", program.help().str()); -} +template +struct FirstType { + using type = T; +}; int main(int argc, char** argv)try{ using namespace std::literals; @@ -691,41 +103,46 @@ int main(int argc, char** argv)try{ program.add_argument("--x-scale") .help("all x values are multiplied by x-scale") .scan<'g', double>() - .default_value(1.); + .default_value(static_cast(1.)); program.add_argument("--y-scale") .help("all y values are multiplied by y-scale") .scan<'g', double>() - .default_value(1.); + .default_value(static_cast(1.)); program.add_argument("--value-scale") .help("all pixel values are multiplied by value-scale") .scan<'g', double>() - .default_value(1.); + .default_value(static_cast(1.)); program.add_argument("--x-pre-scale-offset") .help("all x values are added with x-pre-scale-offset before scaling") .scan<'g', double>() - .default_value(0.); + .default_value(static_cast(0.)); program.add_argument("--y-pre-scale-offset") .help("all y values are added with y-pre-scale-offset before scaling") .scan<'g', double>() - .default_value(0.); + .default_value(static_cast(0.)); program.add_argument("--value-pre-scale-offset") .help("all pixel values are added with value-pre-scale-offset before scaling") .scan<'g', double>() - .default_value(0.); + .default_value(static_cast(0.)); program.add_argument("--x-post-scale-offset") .help("all x values are added with x-post-scale-offset after scaling") .scan<'g', double>() - .default_value(0.); + .default_value(static_cast(0.)); program.add_argument("--y-post-scale-offset") .help("all y values are added with y-post-scale-offset after scaling") .scan<'g', double>() - .default_value(0.); + .default_value(static_cast(0.)); program.add_argument("--value-post-scale-offset") .help("all pixel values are added with value-post-scale-offset after scaling") .scan<'g', double>() - .default_value(0.); + .default_value(static_cast(0.)); + program.add_argument("--cuda") + .help("use CUDA for tmrp interpolation") + .implicit_value(true) + .default_value(false); + try{ program.parse_args(argc, argv); @@ -746,8 +163,8 @@ int main(int argc, char** argv)try{ ext != format ){ throw std::runtime_error(fmt::format( - "file extension of output file {:s} is different from specified output format {:s}", - std::quoted(ext), std::quoted(format))); + "file extension of output file {:?} is different from specified output format {:?}", + ext, format)); } auto const width = program.get("-w"); @@ -785,6 +202,8 @@ int main(int argc, char** argv)try{ auto const y_post_scale = program.get("--y-post-scale-offset"); auto const v_post_scale = program.get("--value-post-scale-offset"); + auto const cuda_enabled = program.get("--cuda"); + // load file ply::ply data; data.load(input_filepath); @@ -815,8 +234,8 @@ int main(int argc, char** argv)try{ auto const element_count_width = fmt::format("{:d}", element_count).size(); for(std::size_t i = 0; i < element_count; ++i){ auto const element_name = data.element_name(i); - fmt::print("element {:>{}d} {:s} with {:d} values\n", i, element_count_width, - std::quoted(element_name), data.value_count(i)); + fmt::print("element {:>{}d} {:?} with {:d} values\n", i, element_count_width, + element_name, data.value_count(i)); { auto names = data.property_names(i); @@ -838,8 +257,8 @@ int main(int argc, char** argv)try{ std::erase(used_properties, test); } fmt::print(used ? fmt::emphasis::bold : fmt::emphasis(), - " property {:>{}d} {:s} with type {:s}\n", j, property_count_width, - std::quoted(property_name), data.property_type_name(i, j)); + " property {:>{}d} {:?} with type {:s}\n", j, property_count_width, + property_name, data.property_type_name(i, j)); } } } @@ -890,12 +309,13 @@ int main(int argc, char** argv)try{ throw std::runtime_error("value count is 0"); } + auto const image_convert = - [&](std::type_identity, auto const& ... raster_filter){ + [&](std::type_identity, RasterFilter const& ... raster_filter){ // extract used data std::vector points(count); auto const convert = [&points](auto const& setter, std::span const list){ - if constexpr(ply::scalar_value)[[likely]]{ + if constexpr(ply::scalar_value){ for(std::size_t i = 0; i < points.size(); ++i){ setter(points[i], list[i]); } @@ -924,6 +344,18 @@ int main(int argc, char** argv)try{ std::visit([=](auto const& v){ convert(set_ry, v); }, data.values(*yr_element, *yr_property)); } + if constexpr (std::is_same_v) { + if (cuda_enabled) { + if constexpr (std::is_same_v::type, min_value_filter>) { + return tmrpcuda::to_image_filter_min(width, height, points); + } else if constexpr (std::is_same_v::type, max_value_filter>) { + return tmrpcuda::to_image_filter_max(width, height, points); + } else if constexpr (std::is_same_v::type, none_filter>) { + return tmrpcuda::to_image_filter_none(width, height, points); + } + } + } + // convert list to image return to_image(width, height, points, raster_filter ...); }; @@ -958,7 +390,7 @@ int main(int argc, char** argv)try{ return bmp::pixel::masked_g16u{.v = {}, .m = true}; }else{ return bmp::pixel::masked_g16u{ - .v = static_cast(std::round(std::clamp(v, 0., 65535.))), .m = false}; + .v = static_cast(std::round(std::clamp(v, static_cast(0.), static_cast(65535.)))), .m = false}; } }); diff --git a/src/ply.hpp b/src/ply/ply.hpp similarity index 99% rename from src/ply.hpp rename to src/ply/ply.hpp index 5e0e31a..2782a3f 100644 --- a/src/ply.hpp +++ b/src/ply/ply.hpp @@ -120,7 +120,7 @@ namespace ply{ return "std::size_t"sv; } }(); - throw std::runtime_error(fmt::format("Can not convert {:s} to {:s}", std::quoted(text), type_string)); + throw std::runtime_error(fmt::format("Can not convert {:?} to {:s}", text, type_string)); } template @@ -470,7 +470,7 @@ namespace ply{ auto const iter = find_property(name); if(iter == properties_.end()){ throw std::runtime_error(fmt::format( - "PLY element {:s} contains no property {:s}", std::quoted(name_), std::quoted(name))); + "PLY element {:?} contains no property {:?}", name_, name)); } return std::size_t(iter - properties_.begin()); } @@ -503,8 +503,8 @@ namespace ply{ auto const values = std::get_if(properties_[index]); if(values == nullptr){ throw std::runtime_error(fmt::format( - "PLY element {:s} property {:s} accessed as {:s} but its type is {:s}", - std::quoted(name_), std::quoted(property_name(index)), as_string(), + "PLY element {:?} property {:?} accessed as {:s} but its type is {:s}", + name_, property_name(index), as_string(), property_type_name(index))); } } @@ -584,7 +584,7 @@ namespace ply{ std::size_t element_index(std::string_view const name)const{ auto const iter = find_element(name); if(iter == elements_.end()){ - throw std::runtime_error(fmt::format("PLY contains no element {:s}", std::quoted(name))); + throw std::runtime_error(fmt::format("PLY contains no element {:?}", name)); } return std::size_t(iter - elements_.begin()); } diff --git a/src/text.hpp b/src/ply/text.hpp similarity index 100% rename from src/text.hpp rename to src/ply/text.hpp diff --git a/src/ply2image/argparse.cpp b/src/ply2image/argparse.cpp new file mode 100644 index 0000000..9d66e60 --- /dev/null +++ b/src/ply2image/argparse.cpp @@ -0,0 +1,75 @@ +#include "argparse.hpp" + +namespace ply2image{ + +using namespace std::literals; + +std::string trim_left(std::string_view const text, char const character){ + return {std::ranges::find_if(text, [=](char const c){ return c != character; }), text.end()}; +} + +} // namespace ply2image + +void print_help(argparse::ArgumentParser const& program, std::string_view const program_name){ + fmt::print(fmt::emphasis::bold, "{:s}", program_name); + fmt::print("\n\n" + "This program converts 3D point clouds in PLY file format to 2D image data in BBF or PNG file format.\n" + "\n" + "Links for Group for Quality Assurance and Industrial Image Processing in the Department of Mechanical " + "Engineering:\n" + "\n" + " - Project page with result examples image\n" + " https://gitlab.tu-ilmenau.de/FakMB/QBV/topics/software/ply2image\n" + " - PLY file format and how we use it\n" + " https://gitlab.tu-ilmenau.de/FakMB/QBV/topics/compendia/project-structure/-/blob/master/doc/doc-3d-file-" + "formats.md\n" + " - BBF file format specification\n" + " https://gitlab.tu-ilmenau.de/FakMB/QBV/topics/compendia/bbf-file-format\n" + " - PNG file format\n" + " https://en.wikipedia.org/wiki/Portable_Network_Graphics\n" + "\n" + "For this, two of the PLY properties are interpreted as x and y pixel coordinates for the 2D image. " + "A third PLY property is interpreted as the value of this pixel.\n" + "\n" + "By default, the x, y and z properties of the vertex element are used. This corresponds exactly to the " + "conversion of a 3D point cloud into a depth map.\n" + "\n" + "The values of the properties can be scaled. Before and after scaling, the values can be moved. The shift " + "before scaling takes place in the unit of the property. The shift after scaling takes place (for x and y) " + "in 2D pixels. The shift before and after scaling is of course equivalent via the scaling factor. That both " + "are offered is purely a convenience function.\n" + "\n" + "Since the 3D coordinates X and Y are usually not integers, in the 2D image the Z value must be distributed " + "among the surrounding four 2D pixels. If neighboring 3D X/Y coordinates are further than one unit apart, " + "then there will be gaps between these pixels in the 2D image. With almost all 3D measurement methods, 2D " + "neighborhood information of the 3D coordinates can simultaneously be acquired. It is strongly recommended " + "to always save them with the PLY file and to keep them even in case of global transformations of the 3D " + "points. If this information is available in x and y direction as a property of the PLY file, it can be used " + "to perform a dense interpolation between the 2D pixels that were adjacent in 3D. This results in gaps in the " + "2D image only if the original measurement of the 3D data had also detected a gap. The 2D raster must contain " + "integer values only. By default it is assumed to be specified in the PLY properties raster_x and raster_y. " + "If one of these properties is not found in the PLY file, the program prints a warning and performs the " + "conversion without raster interpolation. Raster interpolation can be switched off explicitly.\n" + "\n" + "The raster information can also be used to cleanly separate foreground and background. This is especially " + "useful for point clouds that have been transformed, as overlaps are very likely to occur. In marginal areas, " + "however, this may already be the case without transformation. For filtering, the minimum or maximum value is " + "determined as a reference value in the target pixel. Only values that are adjacent to this reference value " + "in the raster are included in the target pixel. By default, the minimum is used, which corresponds to a " + "foreground selection for Z values. (The smaller the value, the closer the pixel was to the acquisition " + "system)\n" + "\n" + "By default, the output image is stored in BBF file format with 64-bit floating point values in the native " + "byte order of the program's current execution environment. Empty pixels are encoded as NaN (Not a Number). " + "The BBF specification is linked above. It is a simple raw data format with a 24 bytes header.\n" + "\n" + "Saving as PNG is lossy! The output is always a 16 bit grayscale image with alpha channel. The pixel values " + "range is truncated to 0 to 65535, no overflow or underflow takes place! All pixel values are rounded half up " + "to integers. Fixed point values can be emulated via the value scaling. For example, to emulate 4 binary " + "decimal places, the scaling must be set to 16 (=2^4). However, this information is not stored in the image! " + "So when reading the PNG file later, you have to take care by yourself to interpret the values as fixed-point " + "numbers again!\n" + "\n"); + + fmt::print("{:s}\n", program.help().str()); +} \ No newline at end of file diff --git a/src/ply2image/argparse.hpp b/src/ply2image/argparse.hpp new file mode 100644 index 0000000..c70a261 --- /dev/null +++ b/src/ply2image/argparse.hpp @@ -0,0 +1,81 @@ +#pragma once + +#include +#include + +#include +#include + +namespace ply2image{ + +using namespace std::literals; + +enum class file_format{ + bbf = 0, + png = 1 +}; + +constexpr std::string_view file_format_strings[] = {"bbf"sv, "png"sv}; + +enum class raster_filter{ + min = 0, + max = 1, + none = 2, +}; + +constexpr std::string_view raster_filter_strings[] = {"min"sv, "max"sv, "none"sv}; + +std::string valid_values_string(std::ranges::output_range auto const& list){ + auto const begin = std::ranges::begin(list); + auto const end = std::ranges::end(list); + if(begin == end){ + throw std::logic_error("no valid values"); + } + + auto result = "(valid values: "s; + result += fmt::format("{:?}", *begin); + for(auto const& entry: std::span(std::next(begin), end)){ + result += fmt::format(", {:?}", entry); + } + result += ")"s; + return result; +} + +std::string trim_left(std::string_view const text, char const character); + +constexpr bool contains(auto list, auto value){ + return std::ranges::find(list, value) != list.end(); +} + +template +requires std::is_enum_v +T parse_enum_string(std::ranges::output_range auto const& list, std::string_view const value){ + auto const iter = std::ranges::find(list, value); + if(iter == std::ranges::end(list)){ + throw std::runtime_error( + fmt::format("invalid file format {:s} {:s}", value, valid_values_string(list))); + } + return static_cast(iter - std::ranges::begin(list)); +} + +template +std::optional get_raster( + argparse::ArgumentParser const& program, + std::string_view const arg_name, + std::string_view const disabled_name +){ + if(program.get(disabled_name)){ + if(program.is_used(arg_name)){ + throw std::runtime_error( + "You cannot use " + std::string(arg_name) + " together with " + std::string(disabled_name)); + }else{ + return std::nullopt; + } + }else{ + return program.get(arg_name); + } +} + +} // namespace ply2image + +void print_help(argparse::ArgumentParser const& program, std::string_view const program_name); \ No newline at end of file diff --git a/src/ply2image/cuda/tmrp.cu b/src/ply2image/cuda/tmrp.cu new file mode 100644 index 0000000..8270907 --- /dev/null +++ b/src/ply2image/cuda/tmrp.cu @@ -0,0 +1,990 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tmrp.cuh" +#include "../progress_bar.hpp" + +namespace ply2image { +namespace tmrpcuda { + +#define checkCudaErrors(call) \ +do { \ + cudaError_t err = call; \ + if (err != cudaSuccess) { \ + printf("CUDA error at %s %d: %s\n", __FILE__, __LINE__, \ + cudaGetErrorString(err)); \ + exit(EXIT_FAILURE); \ + } \ +} while (0) + +struct filter_min {}; +struct filter_max {}; +struct filter_none {}; + +template +concept RasterFilterNone = std::is_same_v; + +template +concept RasterFilterMinMax = + std::is_same_v || + std::is_same_v; + +template +concept RasterFilter = RasterFilterMinMax || RasterFilterNone; + +template +struct pixel_array { + constexpr static int preallocated_v = preallocated; + + struct pixel_vector_header { + int allocated_size; + pixel_vector_header* next_header; + + // fetches from local array + // index always starts at 0 + __device__ inline T* get(int index) { + return reinterpret_cast(reinterpret_cast(this) + sizeof(*this)) + index; + } + }; + + struct pixel_array_header { + cuda::binary_semaphore lock; + pixel_vector_header* next_header; + int last; + + __host__ __device__ pixel_array_header() : + lock(1), + next_header(nullptr), + last(0) { + // pass + } + }; + + int width; + pixel_array_header* header_array; + T* data_array; + + __host__ __device__ pixel_array(int width) : + width(width), + header_array(nullptr), + data_array(nullptr) { + // pass + } + + __host__ __device__ constexpr int get_preallocated() { + return preallocated; + } + + __device__ void init_header(int x, int y) { + new(&(header_array[y*width + x])) pixel_array_header(); + } + + // No bounds checking! + __device__ void push_back(int x, int y, const T &value) { + pixel_array_header* cur_array_header = &(header_array[y*width + x]); + int index = atomicAdd(&(cur_array_header->last), 1); + + if (index < preallocated) { + data_array[(y*width + x) * preallocated + index] = value; + } else { + if (cur_array_header->next_header == nullptr) { + allocate_arraynode(cur_array_header); + } + pixel_vector_header* cur_header = cur_array_header->next_header; + index = index - preallocated; + while (index >= cur_header->allocated_size) { + if (cur_header->next_header == nullptr) { + allocate_arraynode(cur_array_header, cur_header); + } + index = index - cur_header->allocated_size; + cur_header = cur_header->next_header; + } + T* sink = cur_header->get(index); + *sink = value; + } + } + +private: + __device__ void allocate_arraynode(pixel_array_header* array_header) { + array_header->lock.acquire(); + if (array_header->next_header == nullptr) { + pixel_vector_header* new_vectornode = static_cast(malloc(preallocated * sizeof(T) + sizeof(pixel_vector_header))); + + new_vectornode->allocated_size = preallocated; + new_vectornode->next_header = nullptr; + + array_header->next_header = new_vectornode; + __threadfence_block(); + } + array_header->lock.release(); + } + + __device__ void allocate_arraynode(pixel_array_header* array_header, pixel_vector_header* vector_header) { + array_header->lock.acquire(); + if (vector_header->next_header == nullptr) { + int new_size = vector_header->allocated_size*2; + pixel_vector_header* new_vectornode = static_cast(malloc(new_size * sizeof(T) + sizeof(pixel_vector_header))); + + new_vectornode->allocated_size = new_size; + new_vectornode->next_header = nullptr; + + vector_header->next_header = new_vectornode; + __threadfence_block(); + } + array_header->lock.release(); + } +}; + +struct interpolation_result_min_max { + double value_weighted; + double weight; + int rx; + int ry; +}; + +// this raster_point is comparable to an optional +// if v equals infinity (0x7FF0_0000_0000_0000), the raster_point is non existent in the raster_image +struct raster_point { + double x; + double y; + double v; + + __host__ __device__ bool exists() { + return v != cuda::std::numeric_limits::infinity(); + } + + __host__ __device__ raster_point(double x, double y, double v) : + x(x), + y(y), + v(v) { + // pass + } + + __host__ __device__ static inline raster_point empty() { + return raster_point(0, 0, cuda::std::numeric_limits::infinity()); + } + + // warning: this default constructor is intended for the use within shared memory in cuda + raster_point() = default; + + __host__ __device__ raster_point& operator=(const ply2image::raster_point ©) { + x = copy.x; + y = copy.y; + v = copy.v; + return *this; + } +}; + +template +struct r_triangle_parent { + using shared_rpoints_t = cuda::std::array, blockSizeY+1>; + const shared_rpoints_t &shared_rpoints; + + int ry1, rx1; + int ry2, rx2; + int ry3, rx3; + + __device__ r_triangle_parent( + const shared_rpoints_t &shared_rpoints, + int ry1, int rx1, + int ry2, int rx2, + int ry3, int rx3, + int, int) : // keep the argument list the same for all child structs + shared_rpoints(shared_rpoints), + ry1(ry1), rx1(rx1), + ry2(ry2), rx2(rx2), + ry3(ry3), rx3(rx3) { + // pass + } + + __device__ inline const raster_point* p1() const { + return &(shared_rpoints[ry1][rx1]); + } + + __device__ inline const raster_point* p2() const { + return &(shared_rpoints[ry2][rx2]); + } + + __device__ inline const raster_point* p3() const { + return &(shared_rpoints[ry3][rx3]); + } +}; + +template +struct r_triangle {}; + +template +struct r_triangle : r_triangle_parent { + // base offset used in global memory, in contrast to relative offset in shared memory + int base_ry, base_rx; + + __device__ r_triangle( + const r_triangle_parent::shared_rpoints_t &shared_rpoints, + int ry1, int rx1, + int ry2, int rx2, + int ry3, int rx3, + int base_ry, int base_rx) : // keep the argument list the same for all child structs + r_triangle_parent( + shared_rpoints, + ry1, rx1, + ry2, rx2, + ry3, rx3, + base_ry, base_rx), + base_ry(base_ry), + base_rx(base_rx) { + // pass + } + + __device__ int absolute_ry1() const { + return base_ry + this->ry1; + } + + __device__ int absolute_rx1() const { + return base_rx + this->rx1; + } + + __device__ int absolute_ry2() const { + return base_rx + this->ry2; + } + + __device__ int absolute_rx2() const { + return base_rx + this->rx2; + } + + __device__ int absolute_ry3() const { + return base_rx + this->ry3; + } + + __device__ int absolute_rx3() const { + return base_rx + this->rx3; + } +}; + +template +struct r_triangle : r_triangle_parent { + using r_triangle_parent::r_triangle_parent; +}; + +template +struct raster_interpolation_config_parent { + pixels_t pixels; + int rwidth; + int rheight; + int vwidth; + int vheight; + + __host__ __device__ raster_interpolation_config_parent( + int rwidth, + int rheight, + int vwidth, + int vheight) : + pixels(vwidth), + rwidth(rwidth), + rheight(rheight), + vwidth(vwidth), + vheight(vheight) { + // pass + } +}; + +struct interpolation_results_filter_none { + int width; + double* average_values; + double* average_weight_sum; + + __device__ interpolation_results_filter_none(int width) : + width(width), + average_values(nullptr), + average_weight_sum(nullptr) { + // pass + }; + + // warning: no bounds checking + __device__ void add_result_reduce(int x, int y, double value_weighted, double weight) { + const int index = y*width + x; + average_values[index] += value_weighted * weight; + average_weight_sum[index] += weight; + } + +}; + +template +struct raster_interpolation_config {}; + +template +struct raster_interpolation_config : raster_interpolation_config_parent> { + using raster_interpolation_config_parent>::raster_interpolation_config_parent; +}; + +template +struct raster_interpolation_config : raster_interpolation_config_parent { + using raster_interpolation_config_parent::raster_interpolation_config_parent; +}; + +template +__global__ void init_pixel_array_header(raster_interpolation_config ri_conf) { + const int idx = blockIdx.x*blockDim.x + threadIdx.x; + const int idy = blockIdx.y*blockDim.y + threadIdx.y; + + // initialize *(ri_conf.pixels.header_array) in a grid stride loop + for (int vy = idy; vy < ri_conf.vheight; vy += blockDim.y * gridDim.y) { + for (int vx = idx; vx < ri_conf.vwidth; vx += blockDim.x * gridDim.x) { + ri_conf.pixels.init_header(vx, vy); + } + } +} + +template +__global__ void raster_interpolation( + raster_interpolation_config ri_conf, + const raster_point* __restrict__ raster_image) { + __shared__ cuda::std::array, blockSizeY+1> shared_rpoints; + + const int idx = blockIdx.x * blockSizeX + threadIdx.x; + const int idy = blockIdx.y * blockSizeY + threadIdx.y; + const int idb = threadIdx.y * blockSizeX + threadIdx.x; + + const int x_stride = blockSizeX * gridDim.x; + const int y_stride = blockSizeY * gridDim.y; + + const int rx_leftover_pre = ri_conf.rwidth % x_stride; + const int rx_leftover = cuda::std::min(ri_conf.rwidth, (rx_leftover_pre == 0 ? x_stride : rx_leftover_pre)); + const int rx_max = ri_conf.rwidth - rx_leftover; + + const int ry_leftover_pre = ri_conf.rheight % y_stride; + const int ry_leftover = cuda::std::min(ri_conf.rheight, (ry_leftover_pre == 0 ? y_stride : ry_leftover_pre)); + const int ry_max = ri_conf.rheight - ry_leftover; + + const int extra_x = + idb < blockSizeY ? + blockSizeX : ((idb < blockSizeX + blockSizeY + 1) ? + (idb - blockSizeY) : (-1)); + const int extra_y = + idb < blockSizeY ? + idb : (idb < blockSizeX + blockSizeY + 1 ? + blockSizeY : (-1)); + + int ry; + const bool full_block_x = rx_leftover > blockSizeX * (blockIdx.x + 1); + const int idb_limit_max_x_pre = rx_leftover % blockSizeX; + const int idb_limit_max_x = blockSizeY + (idb_limit_max_x_pre == 0 ? blockSizeX : idb_limit_max_x_pre); + for (ry = idy; ry < ry_max; ry += y_stride) { + const raster_point* pitched_raster_points = raster_image + ry*ri_conf.rwidth; + int rx; + const int extra_current = (ry - threadIdx.y + extra_y) * ri_conf.rwidth - threadIdx.x + extra_x; + for (rx = idx; rx < rx_max; rx += x_stride) { + shared_rpoints[threadIdx.y][threadIdx.x] = pitched_raster_points[rx]; + if (idb < blockSizeY + blockSizeX + 1) { + shared_rpoints[extra_y][extra_x] = raster_image[extra_current + rx]; + } + + __syncthreads(); + create_triangles(shared_rpoints, ri_conf, rx, ry); + __syncthreads(); + } + + if (rx < ri_conf.rwidth && rx_leftover > blockSizeX * blockIdx.x + 1) { + shared_rpoints[threadIdx.y][threadIdx.x] = pitched_raster_points[rx]; + } + if (full_block_x && idb < blockSizeX + blockSizeY + 1 || !full_block_x && idb >= blockSizeY && idb < idb_limit_max_x) { + shared_rpoints[extra_y][extra_x] = raster_image[extra_current + rx]; + } + __syncthreads(); + if (rx < ri_conf.rwidth - 1) { + create_triangles(shared_rpoints, ri_conf, rx, ry); + } + __syncthreads(); + } + const raster_point* pitched_raster_points = raster_image + ry*ri_conf.rwidth; + const int extra_current = (ry - threadIdx.y + extra_y) * ri_conf.rwidth - threadIdx.x + extra_x; + const bool full_block_y = ry_leftover > blockSizeY * (blockIdx.y + 1); + const int idb_limit_max_y_pre = ry_leftover % blockSizeY; + const int idb_limit_max_y = (idb_limit_max_y_pre == 0 ? blockSizeY : idb_limit_max_y_pre); + int rx; + for (rx = idx; rx < rx_max; rx += x_stride) { + if (ry < ri_conf.rheight && ry_leftover > blockSizeY * blockIdx.y + 1) { + shared_rpoints[threadIdx.y][threadIdx.x] = pitched_raster_points[rx]; + if (full_block_y && idb < blockSizeX + blockSizeY + 1 || !full_block_y && idb < idb_limit_max_y) { + shared_rpoints[extra_y][extra_x] = raster_image[extra_current + rx]; + } + } + __syncthreads(); + if (ry < ri_conf.rheight - 1) { + create_triangles(shared_rpoints, ri_conf, rx, ry); + } + __syncthreads(); + } + + if (rx < ri_conf.rwidth && ry < ri_conf.rheight) { + shared_rpoints[threadIdx.y][threadIdx.x] = pitched_raster_points[rx]; + } + if (full_block_x && full_block_y && idb < blockSizeX + blockSizeY + 1 || full_block_y && idb >= blockSizeY && idb < idb_limit_max_x || full_block_x && idb < idb_limit_max_y) { + shared_rpoints[extra_y][extra_x] = raster_image[extra_current + rx]; + } + __syncthreads(); + if (rx < ri_conf.rwidth - 1 && ry < ri_conf.rheight - 1) { + create_triangles(shared_rpoints, ri_conf, rx, ry); + } + __syncthreads(); +} + +template +__device__ void create_triangles( + cuda::std::array, blockSizeY+1> &shared_rpoints, + raster_interpolation_config &ri_conf, + int rx, + int ry) { + + // either 0, 1 or 4 triangles are found + int rpoint_count = 0; + int non_existing_tri = 0; + + if (shared_rpoints[threadIdx.y][threadIdx.x].exists()) { + rpoint_count++; + } // else non_existing_tri = 0; + if (shared_rpoints[threadIdx.y][threadIdx.x+1].exists()) { + rpoint_count++; + } else { + non_existing_tri = 1; + } + if (shared_rpoints[threadIdx.y+1][threadIdx.x].exists()) { + rpoint_count++; + } else { + non_existing_tri = 2; + } + if (shared_rpoints[threadIdx.y+1][threadIdx.x+1].exists()) { + rpoint_count++; + } else { + non_existing_tri = 3; + } + + // atleast one triangle + if (rpoint_count >= 3) { + // use binary logic to determine triangles + char tri_x = static_cast(non_existing_tri >> 1); + char not_tri_x = tri_x^0x01; + char tri_y = static_cast(non_existing_tri & 0x01); + char not_tri_y = tri_y^0x01; + + // triangle 1 + r_triangle tri_1( + shared_rpoints, + threadIdx.y, threadIdx.x+(not_tri_x¬_tri_y), // px1, py1 + threadIdx.y + not_tri_x, threadIdx.x+tri_x, // px2, py2 + threadIdx.y + 1, threadIdx.x+(not_tri_x|not_tri_y), // px3, py3 + ry, rx); + process_triangle( + tri_1, + ri_conf); + + // 4 triangles total + if (rpoint_count == 4) { + // triangles 2-4 + r_triangle tri_2( + shared_rpoints, + threadIdx.y, threadIdx.x + (tri_x&tri_y), + threadIdx.y + 1-(tri_x^tri_y), threadIdx.x + (tri_x^tri_y), + threadIdx.y + 1, threadIdx.x + (not_tri_x|tri_y), + ry, rx); + process_triangle( + tri_2, + ri_conf); + r_triangle tri_3( + shared_rpoints, + threadIdx.y, threadIdx.x + (tri_x¬_tri_y), + threadIdx.y + tri_x, threadIdx.x + not_tri_x, + threadIdx.y + 1, threadIdx.x + (tri_x|not_tri_y), + ry, rx); + process_triangle( + tri_3, + ri_conf); + r_triangle tri_4( + shared_rpoints, + threadIdx.y, threadIdx.x + (not_tri_x&tri_y), + threadIdx.y + (tri_x^tri_y), threadIdx.x + 1-(tri_x^tri_y), + threadIdx.y + 1, threadIdx.x + (tri_x|tri_y), + ry, rx); + process_triangle( + tri_4, + ri_conf); + } + } +} + +template +__device__ void process_triangle(const r_triangle &tri, raster_interpolation_config &ri_conf) { + // bounding box + const int fx = cuda::std::clamp( + static_cast(cuda::std::floor(cuda::std::min({tri.p1()->x, tri.p2()->x, tri.p3()->x}))), + 0, + ri_conf.vwidth - 1); + const int tx = cuda::std::clamp( + static_cast(cuda::std::ceil(cuda::std::max({tri.p1()->x, tri.p2()->x, tri.p3()->x}))), + 0, + ri_conf.vwidth - 1); + if (fx == tx) { + return; + } + const int fy = cuda::std::clamp( + static_cast(cuda::std::floor(cuda::std::min({tri.p1()->y, tri.p2()->y, tri.p3()->y}))), + 0, + ri_conf.vheight - 1); + const int ty = cuda::std::clamp( + static_cast(cuda::std::ceil(cuda::std::max({tri.p1()->y, tri.p2()->y, tri.p3()->y}))), + 0, + ri_conf.vheight - 1); + if (fy == ty) { + return; + } + + int hits = 0; // bitwise counting, 32 entries in 32-bit int + int count = 0; + constexpr int hits_bitsize = sizeof(hits)*8; + // const int final_count = (tx - fx) * (ty - fy); + uint32_t base = 0; + for (int vy = fy; vy <= ty; vy++) { + for (int vx = fx; vx <= tx; vx++) { + hits = hits << 1; + if (in_tri(tri, vx, vy)) { + hits++; + } + count++; + if (count >= hits_bitsize) { + multiple_tri_interpolations(tri, ri_conf, fx, tx, fy, ty, hits, base); + base += hits_bitsize; + count = 0; + hits = 0; + } + } + } + multiple_tri_interpolations(tri, ri_conf, fx, tx, fy, ty, hits<<(32-count), base); +} + +template +__device__ bool in_tri(const r_triangle &tri, int vx, int vy) { + auto sign = []( + const double p1x, const double p1y, + const double p2x, const double p2y, + const double p3x, const double p3y) -> double { + return (p1x - p3x) * (p2y - p3y) - (p2x - p3x) * (p1y - p3y); + }; + + const double d1 = sign(vx, vy, tri.p1()->x, tri.p1()->y, tri.p2()->x, tri.p2()->y); + const double d2 = sign(vx, vy, tri.p2()->x, tri.p2()->y, tri.p3()->x, tri.p3()->y); + const double d3 = sign(vx, vy, tri.p3()->x, tri.p3()->y, tri.p1()->x, tri.p1()->y); + + const bool neg = (d1 < 0) || (d2 < 0) || (d3 < 0); + const bool pos = (d1 > 0) || (d2 > 0) || (d3 > 0); + + return !(neg && pos); +} + +template +__device__ void tri_interpolation( + const r_triangle &tri, + raster_interpolation_config &ri_conf, + int next_hit_px, + int next_hit_py) { + auto area = []( + const double p1x, const double p1y, + const double p2x, const double p2y, + const double p3x, const double p3y) -> double { + auto distance = []( + const double p1x, const double p1y, + const double p2x, const double p2y) -> double { + auto sqr = [](const double v) -> double { + return v * v; + }; + return cuda::std::sqrt(sqr(p1x - p2x) + sqr(p1y - p2y)); + }; + const double a = distance(p1x, p1y, p2x, p2y); + const double b = distance(p2x, p2y, p3x, p3y); + const double c = distance(p3x, p3y, p1x, p1y); + const double s = (a + b + c) / 2.; + return std::sqrt(s * (s - a) * (s - b) * (s - c)); + }; + + const double area_1 = area(next_hit_px, next_hit_py, tri.p2()->x, tri.p2()->y, tri.p3()->x, tri.p3()->y); + const double area_2 = area(next_hit_px, next_hit_py, tri.p3()->x, tri.p3()->y, tri.p1()->x, tri.p1()->y); + const double area_3 = area(next_hit_px, next_hit_py, tri.p1()->x, tri.p1()->y, tri.p2()->x, tri.p2()->y); + const double area_sum = area_1 + area_2 + area_3; + + const double weight_1 = area_1 / area_sum; + const double weight_2 = area_2 / area_sum; + const double weight_3 = area_3 / area_sum; + + const double weighted_value = + tri.p1()->v * weight_1 + + tri.p2()->v * weight_2 + + tri.p3()->v * weight_3; + + double nearest_weight; + + if constexpr(RasterFilterMinMax) { + int rx, ry; + cuda::std::tie(nearest_weight, rx, ry) = cuda::std::max( + {cuda::std::tuple{weight_1, tri.absolute_rx1(), tri.absolute_ry1()}, + cuda::std::tuple{weight_2, tri.absolute_rx2(), tri.absolute_ry2()}, + cuda::std::tuple{weight_3, tri.absolute_rx3(), tri.absolute_ry3()}}, + [](auto v1, auto v2) { + return cuda::std::get<0>(v1) < cuda::std::get<0>(v2); + }); + + ri_conf.pixels.push_back(next_hit_px, next_hit_py, {weighted_value, nearest_weight, rx, ry}); + } else { // RasterFilterNone + nearest_weight = cuda::std::max({weight_1, weight_2, weight_3}); + + ri_conf.pixels.add_result_reduce(next_hit_px, next_hit_py, weighted_value, nearest_weight); + } +} + +template +__device__ void multiple_tri_interpolations( + const r_triangle &tri, + raster_interpolation_config &ri_conf, + int fx, + int tx, + int fy, + int ty, + int hits, + uint32_t base) { + int hits_left = __popc(hits); + bool hits_non_empty = hits_left > 0; + uint32_t hit_index = base; + + const int box_width = tx - fx + 1; + + int next_hit_px = -1; + int next_hit_py = -1; + while (hits_non_empty) { + next_hit_px = -1; + while (next_hit_px == -1) { + if (hits & 0x80000000) { + next_hit_px = fx + hit_index % box_width; + next_hit_py = fy + hit_index / box_width; + } + hits = hits << 1; + hit_index++; + } + + tri_interpolation(tri, ri_conf, next_hit_px, next_hit_py); + + hits_left--; + hits_non_empty = hits_left > 0; + } +} + +template +__global__ void filter_averaging_none(raster_interpolation_config ri_conf) { + const int idx = blockIdx.x*blockSizeX + threadIdx.x; + const int idy = blockIdx.y*blockSizeY + threadIdx.y; + + const int y_stride = blockSizeY * gridDim.y; + const int x_stride = blockSizeX * gridDim.x; + + double *average_values = ri_conf.pixels.average_values; // is also output + double *average_weight_sum = ri_conf.pixels.average_weight_sum; + + for (int vy = idy; vy < ri_conf.vheight; vy += y_stride) { + const int index_base = vy * ri_conf.vwidth; + for (int vx = idx; vx < ri_conf.vwidth; vx += x_stride) { + const int index = index_base + vx; + + average_values[index] = average_values[index] / average_weight_sum[index]; + } + } +} + +template +__global__ void filter_averaging_min_max( + raster_interpolation_config ri_conf, + double* __restrict__ out_image) { + const int idx = blockIdx.x*blockSizeX + threadIdx.x; + const int idy = blockIdx.y*blockSizeY + threadIdx.y; + + const int y_stride = blockSizeY * gridDim.y; + const int x_stride = blockSizeX * gridDim.x; + + for (int vy = idy; vy < ri_conf.vheight; vy += y_stride) { + const int index_base = vy * ri_conf.vwidth; + for (int vx = idx; vx < ri_conf.vwidth; vx += x_stride) { + const int index = index_base + vx; + + pixel_array::pixel_array_header* header_array = &(ri_conf.pixels.header_array[index]); + + const int length = header_array->last; + if (length == 0) { + out_image[index] = cuda::std::numeric_limits::quiet_NaN(); + } else { + constexpr int preallocated = decltype(ri_conf.pixels)::preallocated_v; + const int data_index_base = index * preallocated; + + interpolation_result_min_max i_res = ri_conf.pixels.data_array[data_index_base+0]; + double min_max_value = i_res.value_weighted; + int min_max_rx = i_res.rx; + int min_max_ry = i_res.ry; + + auto min_max = [](double min_max_value, int min_max_rx, int min_max_ry, const interpolation_result_min_max &i_res){ + if constexpr (std::is_same_v) { + return cuda::std::min( + {cuda::std::tuple{min_max_value, min_max_rx, min_max_ry}, + cuda::std::tuple{i_res.value_weighted, i_res.rx, i_res.ry}}, + [](auto v1, auto v2) { + return cuda::std::get<0>(v1) < cuda::std::get<0>(v2); + }); + } else { // filter_max + return cuda::std::max( + {cuda::std::tuple{min_max_value, min_max_rx, min_max_ry}, + cuda::std::tuple{i_res.value_weighted, i_res.rx, i_res.ry}}, + [](auto v1, auto v2) { + return cuda::std::get<0>(v1) < cuda::std::get<0>(v2); + }); + } + }; + + for (int fixed_data_array_i = 1; fixed_data_array_i < cuda::std::min({preallocated, length}); fixed_data_array_i++) { + i_res = ri_conf.pixels.data_array[data_index_base + fixed_data_array_i]; + cuda::std::tie(min_max_value, min_max_rx, min_max_ry) = min_max(min_max_value, min_max_rx, min_max_ry, i_res); + } + + int leftover_length = length - preallocated; + pixel_array::pixel_vector_header* next_array = header_array->next_header; + while (leftover_length > 0) { + const int elements_in_array = cuda::std::min({next_array->allocated_size, leftover_length}); + for (int vector_data_array_i = 0; vector_data_array_i < elements_in_array; vector_data_array_i++) { + i_res = *(next_array->get(vector_data_array_i)); + cuda::std::tie(min_max_value, min_max_rx, min_max_ry) = min_max(min_max_value, min_max_rx, min_max_ry, i_res); + } + leftover_length = leftover_length - next_array->allocated_size; + next_array = next_array->next_header; + } + + // calc running average + double average_value = 0; + double average_weight_sum = 0; + + min_max_rx--; + min_max_ry--; + + auto min_max_filtering = [&](const interpolation_result_min_max &i_res){ + // -> in each loop, filter for (rx, ry) + const int rel_rx = i_res.rx - min_max_rx; + const int rel_ry = i_res.ry - min_max_ry; + if (2 >= rel_rx && rel_rx >= 0 && 2 >= rel_ry && rel_ry >= 0) { + // --> add entry to running average + average_value += i_res.value_weighted * i_res.weight; + average_weight_sum += i_res.weight; + } + }; + + // loop over fixed_data_array + for (int fixed_data_array_i = 0; fixed_data_array_i < cuda::std::min({preallocated, length}); fixed_data_array_i++) { + i_res = ri_conf.pixels.data_array[data_index_base + fixed_data_array_i]; + min_max_filtering(i_res); + } + + // loop over linked array list + leftover_length = length - preallocated; + next_array = header_array->next_header; + while (leftover_length > 0) { + const int elements_in_array = cuda::std::min({next_array->allocated_size, leftover_length}); + for (int vector_data_array_i = 0; vector_data_array_i < elements_in_array; vector_data_array_i++) { + i_res = *(next_array->get(vector_data_array_i)); + min_max_filtering(i_res); + } + leftover_length = leftover_length - next_array->allocated_size; + next_array = next_array->next_header; + } + + // finish running average + average_value = average_value / average_weight_sum; + + // record result in output array + out_image[index] = average_value; + } + } + } + +} + +template +bmp::bitmap to_image( + std::size_t const vwidth, + std::size_t const vheight, + std::vector const& points) { + const ply2image::raster_range range = ply2image::find_raster_range(points); + if(range.w() < 2 || range.h() < 2){ + throw std::runtime_error("raster interpolation requires at least 2 columns and 2 rows"); + } + + fmt::print("raster with origin {:d}x{:d} and size {:d}x{:d}\n", + range.min_x, range.min_y, range.w(), range.h()); + + ply2image::percent_printer progress(30, "base line"); + + // use custom raster_point instead of ply2image::raster_point + int rwidth = range.w(); + int rheight = range.h(); + int rimage_size = rwidth * rheight; + + raster_point *raster_image = new raster_point[rimage_size]; + + // init raster_image, as default constructor is insufficient for this struct + raster_point empty_rpoint = raster_point::empty(); + for (int i=0; i 1) { + fmt::print("Multiple CUDA devices detected.\nDefaulting to device 0.\n"); + } + + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + + + raster_interpolation_config ri_config(rwidth, rheight, vwidth, vheight); + + // TODO: make this dynamic and check whether enough memory is provided by the gpu + checkCudaErrors(cudaDeviceSetLimit(cudaLimitMallocHeapSize, 1610612736)); + + // TODO: use cudaMallocPitch for 2d memory allocation? + int vpixel_count = vwidth*vheight; + + constexpr int blockDimX = 8; + constexpr int blockDimY = 4; + dim3 blockDim(blockDimX, blockDimY); + + int threadCount = deviceProp.multiProcessorCount * deviceProp.maxThreadsPerMultiProcessor; + const double sharedMemPerThread = static_cast(deviceProp.sharedMemPerMultiprocessor)/deviceProp.maxThreadsPerMultiProcessor; + const double factorSharedMem = std::min(1.0, sharedMemPerThread/(sizeof(raster_point) * (blockDimX + 1) * (blockDimY + 1) / (blockDimX * blockDimY))); + const double targetThreadCount = factorSharedMem * threadCount; + const int newGridLength = static_cast(std::floor(std::sqrt(targetThreadCount/(blockDim.x*blockDim.y)))); + + dim3 gridDim(newGridLength, newGridLength); + + // init pixels of ri_config if min/max filter + if constexpr(RasterFilterMinMax) { + pixel_array &pixels = ri_config.pixels; + checkCudaErrors(cudaMalloc( + &(pixels.header_array), + vpixel_count*(sizeof(decltype(*(pixels.header_array))) + pixels.get_preallocated()*sizeof(interpolation_result_min_max)))); + pixels.data_array = reinterpret_cast(pixels.header_array + vpixel_count); + + init_pixel_array_header<<>>(ri_config); + cudaDeviceSynchronize(); + checkCudaErrors(cudaPeekAtLastError()); + } else { // init pixels of ri_config if none filter + interpolation_results_filter_none &pixels = ri_config.pixels; + checkCudaErrors(cudaMalloc( + &(pixels.average_values), + vpixel_count*2*(sizeof(double)))); + pixels.average_weight_sum = pixels.average_values + vpixel_count; + + checkCudaErrors(cudaMemset( + pixels.average_values, + 0, + vpixel_count*2*(sizeof(double)))); + } + + + int bytesize_rimage = rwidth*rheight*sizeof(raster_point); + raster_point *d_raster_image; + checkCudaErrors(cudaMalloc( + &d_raster_image, + bytesize_rimage)); + checkCudaErrors(cudaMemcpy(d_raster_image, raster_image, bytesize_rimage, cudaMemcpyHostToDevice)); + + checkCudaErrors(cudaFuncSetCacheConfig(raster_interpolation, cudaFuncCachePreferShared)); + raster_interpolation<<>>(ri_config, d_raster_image); + + double* d_out_image; + if constexpr (RasterFilterMinMax) { + cudaMalloc( + &d_out_image, + vpixel_count * sizeof(double) + ); + } + + cudaDeviceSynchronize(); + checkCudaErrors(cudaPeekAtLastError()); + + if constexpr (RasterFilterMinMax) { + checkCudaErrors(cudaFuncSetCacheConfig(filter_averaging_min_max, cudaFuncCachePreferL1)); + filter_averaging_min_max<<>>(ri_config, d_out_image); + } else { + checkCudaErrors(cudaFuncSetCacheConfig(filter_averaging_none, cudaFuncCachePreferL1)); + filter_averaging_none<<>>(ri_config); + d_out_image = ri_config.pixels.average_values; + } + cudaDeviceSynchronize(); + checkCudaErrors(cudaPeekAtLastError()); + + double* out_image_dyn = reinterpret_cast(malloc(vpixel_count * sizeof(double))); + if (out_image_dyn==nullptr) { + throw std::runtime_error("malloc() returned null"); + } + checkCudaErrors(cudaMemcpy(out_image_dyn, d_out_image, vpixel_count * sizeof(double), cudaMemcpyDeviceToHost)); + + const bmp::bitmap::size_type out_image_size(vwidth, vheight); + bmp::bitmap image(out_image_size, out_image_dyn, out_image_dyn + vpixel_count); + + free(out_image_dyn); + + return image; +} + +bmp::bitmap to_image_filter_min( + std::size_t const width, + std::size_t const height, + std::vector const& points) { + return to_image(width, height, points); +} + +bmp::bitmap to_image_filter_max( + std::size_t const width, + std::size_t const height, + std::vector const& points) { + return to_image(width, height, points); +} + +bmp::bitmap to_image_filter_none( + std::size_t const width, + std::size_t const height, + std::vector const& points) { + return to_image(width, height, points); +} + + +} // namespace tmrpcuda +} // namespace ply2image \ No newline at end of file diff --git a/src/ply2image/cuda/tmrp.cuh b/src/ply2image/cuda/tmrp.cuh new file mode 100644 index 0000000..cbaa6ff --- /dev/null +++ b/src/ply2image/cuda/tmrp.cuh @@ -0,0 +1,24 @@ +#pragma once + +#include "../tmrp_common.hpp" + +namespace ply2image { +namespace tmrpcuda { + +bmp::bitmap to_image_filter_min( + std::size_t const width, + std::size_t const height, + std::vector const& points); + +bmp::bitmap to_image_filter_max( + std::size_t const width, + std::size_t const height, + std::vector const& points); + +bmp::bitmap to_image_filter_none( + std::size_t const width, + std::size_t const height, + std::vector const& points); + +} // namespace tmrpcuda +} // namespace ply2image \ No newline at end of file diff --git a/src/ply2image/progress_bar.cpp b/src/ply2image/progress_bar.cpp new file mode 100644 index 0000000..97080c7 --- /dev/null +++ b/src/ply2image/progress_bar.cpp @@ -0,0 +1,71 @@ +#include "progress_bar.hpp" + +#include + +namespace ply2image{ + + +percent_printer::lazy_incer::~lazy_incer(){ + ++printer; +} + +percent_printer::percent_printer(std::size_t const label_width, std::string_view const base_line_label) + : label_width_(label_width){ + if(base_line_label.size() > label_width_){ + throw std::logic_error("label width is larger then specified"); + } + + fmt::print("{:>{}s}: " + "====================================================================================================" + "\n", base_line_label, label_width_); + std::fflush(stdout); +} + +void percent_printer::init(std::string_view const label, std::size_t const count){ + if(label.size() > label_width_){ + throw std::logic_error("label width is larger then specified"); + } + + fmt::print("{:>{}s}: ", label, label_width_); + std::fflush(stdout); + + count_ = count; + prints_ = 0; + i_ = 0; +} + +percent_printer::lazy_incer percent_printer::lazy_inc(){ + return {*this}; +} + +void percent_printer::operator++(){ + ++i_; + + if(count_ == 0){ + throw std::logic_error("percent printer used without init call"); + } + + if(i_ > count_){ + throw std::logic_error("percent printer run out of range"); + } + + auto const percent = + static_cast(std::ceil(static_cast(i_) / static_cast(count_) * 100.)); + + bool need_flush = false; + for(; prints_ < percent; ++prints_){ + fmt::print(">"); + need_flush = true; + } + + if(i_ == count_){ + fmt::print(" done\n"); + need_flush = true; + } + + if(need_flush){ + std::fflush(stdout); + } +} + +} // namespace ply2image \ No newline at end of file diff --git a/src/ply2image/progress_bar.hpp b/src/ply2image/progress_bar.hpp new file mode 100644 index 0000000..c6b6aa2 --- /dev/null +++ b/src/ply2image/progress_bar.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace ply2image{ + +class percent_printer{ + public: + struct lazy_incer{ + percent_printer& printer; + ~lazy_incer(); + }; + percent_printer(std::size_t const label_width, std::string_view const base_line_label); + void init(std::string_view const label, std::size_t const count); + lazy_incer lazy_inc(); + + private: + void operator++(); + std::size_t label_width_ = 0; + std::size_t count_ = 0; + std::size_t prints_ = 0; + std::size_t i_ = 0; +}; + +} // namespace ply2image \ No newline at end of file diff --git a/src/ply2image/tmrp.hpp b/src/ply2image/tmrp.hpp new file mode 100644 index 0000000..1212e8b --- /dev/null +++ b/src/ply2image/tmrp.hpp @@ -0,0 +1,340 @@ +#pragma once + +#include +#include + +#include "../bitmap/bitmap.hpp" +#include "../ply/ply.hpp" +#include "progress_bar.hpp" +#include "tmrp_common.hpp" + +namespace ply2image{ + +inline constexpr auto NaN = std::numeric_limits::quiet_NaN(); + +template +struct raw_pixel; + +template <> +struct raw_pixel{ + double weight; + double value; +}; + +template <> +struct raw_pixel{ + double weight; + double value; + std::int64_t rx; + std::int64_t ry; +}; + +struct none_filter{}; + +template +std::int64_t raster_convert(T const v){ + using limits = std::numeric_limits; + if constexpr(ply::scalar_value){ + if constexpr(std::is_floating_point_v){ + if(v != std::floor(v)){ + throw std::runtime_error("raster property contains at least one non-integer value"); + }else if(v < static_cast(limits::min()) || v > static_cast(limits::max())){ + throw std::runtime_error("raster property value is out of range"); + } + }else if constexpr(std::is_same_v){ + if(v > std::uint64_t(limits::max())){ + throw std::runtime_error("raster property value is out of range"); + } + } + + return static_cast(v); + }else{ + throw std::runtime_error("list type properties are not supported"); + } +} + +struct max_value_filter{ + constexpr auto operator()(std::vector> const& p)const{ + return std::ranges::max_element(p, [](raw_pixel const& a, raw_pixel const& b){ + return a.value < b.value; + }); + } +}; + +struct min_value_filter{ + constexpr auto operator()(std::vector> const& p)const{ + return std::ranges::min_element(p, [](raw_pixel const& a, raw_pixel const& b){ + return a.value < b.value; + }); + } +}; + +// projection -> state-of-the-art (version 1 of 2) +bmp::bitmap>> to_vector_image( + std::size_t const width, + std::size_t const height, + std::vector const& points + ){ + bmp::bitmap>> vector_image(width, height); + for(auto const& p: points){ + auto const x = p.x; + auto const y = p.y; + auto const ix = static_cast(std::floor(x)); + auto const iy = static_cast(std::floor(y)); + auto const xr = x - std::floor(x); + auto const yr = y - std::floor(y); + if(ix < vector_image.w() && iy < vector_image.h()){ + auto const weight = (1.f - xr) * (1.f - yr); + vector_image(ix , iy ).push_back({weight, p.v}); + } + if(ix + 1 < vector_image.w() && iy < vector_image.h()){ + auto const weight = ( xr) * (1.f - yr); + vector_image(ix + 1, iy ).push_back({weight, p.v}); + } + if(ix < vector_image.w() && iy + 1 < vector_image.h()){ + auto const weight = (1.f - xr) * ( yr); + vector_image(ix , iy + 1).push_back({weight, p.v}); + } + if(ix + 1 < vector_image.w() && iy + 1 < vector_image.h()){ + auto const weight = ( xr) * ( yr); + vector_image(ix + 1, iy + 1).push_back({weight, p.v}); + } + } + return vector_image; +} + +raster_range find_raster_range(std::vector const& points){ + using limits = std::numeric_limits; + raster_range range{limits::max(), limits::min(), limits::max(), limits::min()}; + for(auto const& p: points){ + range.min_x = std::min(range.min_x, p.rx); + range.max_x = std::max(range.max_x, p.rx); + range.min_y = std::min(range.min_y, p.ry); + range.max_y = std::max(range.max_y, p.ry); + } + return range; +} + +constexpr double sqr(double const v)noexcept{ + return v * v; +} + +double distance(bmp::point const& a, bmp::point const& b){ + return std::sqrt(sqr(a.x() - b.x()) + sqr(a.y() - b.y())); +} + +double area(std::array, 3> const& t){ + auto const a = distance(t[0], t[1]); + auto const b = distance(t[1], t[2]); + auto const c = distance(t[2], t[0]); + auto const s = (a + b + c) / 2.; + return std::sqrt(s * (s - a) * (s - b) * (s - c)); +} + +constexpr bool is_inside(std::array const& t, bmp::point const& p){ + constexpr auto sign = + [](std::array, 3> const& t){ + return (t[0].x() - t[2].x()) * (t[1].y() - t[2].y()) - (t[1].x() - t[2].x()) * (t[0].y() - t[2].y()); + }; + + auto const d1 = sign({p, {t[0].x, t[0].y}, {t[1].x, t[1].y}}); + auto const d2 = sign({p, {t[1].x, t[1].y}, {t[2].x, t[2].y}}); + auto const d3 = sign({p, {t[2].x, t[2].y}, {t[0].x, t[0].y}}); + + auto const neg = (d1 < 0) || (d2 < 0) || (d3 < 0); + auto const pos = (d1 > 0) || (d2 > 0) || (d3 > 0); + + return !(neg && pos); +} + +// projection -> Triangle-Mesh-Rasterization-Projection (version 2 of 2) +template +bmp::bitmap>> to_vector_image( + std::size_t const width, + std::size_t const height, + std::vector const& points, + RasterFilter const& raster_filter + ){ + auto const range = find_raster_range(points); + if(range.w() < 2 || range.h() < 2){ + throw std::runtime_error("raster interpolation requires at least 2 columns and 2 rows"); + } + + fmt::print("raster with origin {:d}x{:d} and size {:d}x{:d}\n", + range.min_x, range.min_y, range.w(), range.h()); + + percent_printer progress(30, "base line"); + + bmp::bitmap> raster_image(range.w(), range.h()); + progress.init("create raster image", points.size()); + for(auto const& p: points){ + auto const printer = progress.lazy_inc(); + + auto& target_p = raster_image(range.x(p.rx), range.y(p.ry)); + if(target_p){ + throw std::runtime_error(fmt::format("raster point {:d}x{:d} exists twice", p.rx, p.ry)); + } + target_p = p; + } + + progress.init("raster interpolation", (raster_image.h() - 1) * (raster_image.w() - 1)); + bmp::bitmap>> vector_image(width, height); + for(std::size_t iy = 0; iy < raster_image.h() - 1; ++iy){ + for(std::size_t ix = 0; ix < raster_image.w() - 1; ++ix){ + auto const printer = progress.lazy_inc(); + + std::vector region; + region.reserve(4); + + if(auto const p = raster_image(ix, iy)){ + region.push_back(*p); + } + + if(auto const p = raster_image(ix + 1, iy)){ + region.push_back(*p); + } + + if(auto const p = raster_image(ix, iy + 1)){ + region.push_back(*p); + } + + if(auto const p = raster_image(ix + 1, iy + 1)){ + region.push_back(*p); + } + + if(region.size() < 3){ + continue; + } + + std::vector> triangles; + if(region.size() == 3){ + triangles.reserve(1); + triangles.push_back({region[0], region[1], region[2]}); + }else{ + triangles.reserve(4); + triangles.push_back({region[0], region[1], region[2]}); + triangles.push_back({region[1], region[2], region[3]}); + triangles.push_back({region[2], region[3], region[0]}); + triangles.push_back({region[3], region[0], region[1]}); + } + + for(auto const& t: triangles){ + // find integer bounting box around the floating point triangle within the target image + auto const fx = static_cast(std::clamp(static_cast(std::floor( + std::min({t[0].x, t[1].x, t[2].x}))), std::int64_t(0), static_cast(width - 1))); + auto const tx = static_cast(std::clamp(static_cast(std::ceil( + std::max({t[0].x, t[1].x, t[2].x}))), std::int64_t(0), static_cast(width - 1))); + if(tx == fx){ + continue; + } + + auto const fy = static_cast(std::clamp(static_cast(std::floor( + std::min({t[0].y, t[1].y, t[2].y}))), std::int64_t(0), static_cast(height - 1))); + auto const ty = static_cast(std::clamp(static_cast(std::ceil( + std::max({t[0].y, t[1].y, t[2].y}))), std::int64_t(0), static_cast(height - 1))); + if(ty == fy){ + continue; + } + + for(std::size_t y = fy; y <= ty; ++y){ + for(std::size_t x = fx; x <= tx; ++x){ + auto const p = bmp::point(static_cast(x), static_cast(y)); + if(!is_inside(t, p)){ + continue; + } + + std::array const areas{{ + area({p, t[1], t[2]}), + area({p, t[2], t[0]}), + area({p, t[0], t[1]}) + }}; + auto const area_sum = areas[0] + areas[1] + areas[2]; + std::array const weight{{ + areas[0] / area_sum, + areas[1] / area_sum, + areas[2] / area_sum + }}; + + auto const value = + t[0].v * weight[0] + + t[1].v * weight[1] + + t[2].v * weight[2]; + + auto const index = std::max({ + std::pair{weight[0], std::size_t(0)}, + std::pair{weight[1], std::size_t(1)}, + std::pair{weight[2], std::size_t(2)}}).second; + + vector_image(x, y).push_back({weight[index], value, t[index].rx, t[index].ry}); + } + } + } + } + } + + if constexpr(!std::same_as){ + // filter values via raster information + progress.init("reference filter", vector_image.point_count()); + for(auto& p: vector_image){ + auto const printer = progress.lazy_inc(); + + if(p.empty()){ + continue; + } + + auto const iter = raster_filter(p); + std::erase_if(p, + [ref_rx = iter->rx, ref_ry = iter->ry](raw_pixel const& v){ + return std::abs(ref_rx - v.rx) > 1 || std::abs(ref_ry - v.ry) > 1; + }); + + } + } + + return vector_image; +} + +template +bmp::bitmap to_image( + std::size_t const width, + std::size_t const height, + std::vector const& points, + RasterFilter const& ... raster_filter + ){ + using raw_pixel = ply2image::raw_pixel; + + auto const vector_image = to_vector_image(width, height, points, raster_filter ...); + + bmp::bitmap image(width, height, NaN); + std::ranges::transform(vector_image, image.begin(), + [](std::vector const& data){ + if(data.empty()){ + return NaN; + }else [[likely]]{ + if(data.size() == 1){ + return data[0].value; + } + + auto const sum_weight = std::transform_reduce(data.begin(), data.end(), static_cast(0.), std::plus{}, + [](raw_pixel const& v){ + if(v.weight < static_cast(0.)){ + throw std::logic_error("negative weight"); + } + return v.weight; + }); + if(sum_weight == static_cast(0.)){ + return NaN; + } + + auto const value = std::transform_reduce(data.begin(), data.end(), static_cast(0.), std::plus{}, + [](raw_pixel const& v){ + return v.value * v.weight; + }); + return value / sum_weight; + } + }); + + return image; +} + +} // namespace ply2image \ No newline at end of file diff --git a/src/ply2image/tmrp_common.hpp b/src/ply2image/tmrp_common.hpp new file mode 100644 index 0000000..8da3392 --- /dev/null +++ b/src/ply2image/tmrp_common.hpp @@ -0,0 +1,53 @@ +#pragma once + +#include +#include + +#include "../bitmap/bitmap.hpp" + +namespace ply2image{ + +struct point{ + double x; + double y; + double v; +}; + +struct raster_point{ + double x; + double y; + double v; + std::int64_t rx; + std::int64_t ry; + + operator bmp::point()const{ + return {x, y}; + }; +}; + +struct raster_range{ + std::int64_t min_x; + std::int64_t max_x; + std::int64_t min_y; + std::int64_t max_y; + + std::size_t w()const{ + return static_cast(max_x + 1 - min_x); + } + + std::size_t h()const{ + return static_cast(max_y + 1 - min_y); + } + + std::size_t x(std::int64_t const x)const{ + return static_cast(x - min_x); + } + + std::size_t y(std::int64_t const y)const{ + return static_cast(y - min_y); + } +}; + +ply2image::raster_range find_raster_range(std::vector const& points); + +} // namespace ply2image \ No newline at end of file