From 563ff1bbc0997fc822ff2d0e5c4a8e31a54f5f85 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Mon, 31 Jul 2023 19:24:53 -0700 Subject: [PATCH 01/49] Massive improvements to NanoVDB Signed-off-by: Ken Museth --- doc/nanovdb/HelloWorld.md | 6 +- nanovdb/nanovdb/CMakeLists.txt | 13 +- nanovdb/nanovdb/NanoVDB.h | 4578 ++++++++++++----- nanovdb/nanovdb/PNanoVDB.h | 982 +++- nanovdb/nanovdb/Readme.md | 9 +- .../nanovdb/cmd/convert/nanovdb_convert.cc | 58 +- nanovdb/nanovdb/cmd/print/nanovdb_print.cc | 6 +- nanovdb/nanovdb/examples/CMakeLists.txt | 4 +- .../examples/benchmark/BenchKernels_dense.cu | 32 +- .../examples/benchmark/BenchKernels_nano.cu | 39 +- .../nanovdb/examples/benchmark/Benchmark.cc | 50 +- .../examples/benchmark/Benchmark_dense.cc | 16 +- .../examples/benchmark/Benchmark_nano.cc | 33 +- .../nanovdb/examples/benchmark/DenseGrid.h | 67 +- .../ex_bump_pool_buffer/bump_pool_buffer.cc | 4 +- .../examples/ex_collide_level_set/common.h | 1 + .../examples/ex_collide_level_set/main.cc | 4 +- .../examples/ex_collide_level_set/nanovdb.cu | 7 +- .../examples/ex_collide_level_set/openvdb.cc | 5 +- .../ex_index_grid_cuda/index_grid_cuda.cc | 22 +- .../ex_index_grid_cuda/index_grid_cuda.cu | 14 +- .../make_custom_nanovdb.cc | 11 +- .../make_custom_nanovdb_cuda.cc | 45 + .../make_custom_nanovdb_cuda.cu | 35 + .../make_funny_nanovdb.cc | 15 +- .../ex_make_typed_grids/make_typed_grids.cc | 17 +- .../ex_map_pool_buffer/map_pool_buffer.cc | 4 +- .../modify_nanovdb_thrust.cu | 4 +- .../ex_nodemanager_cuda/nodemanager_cuda.cc | 12 +- .../openvdb_to_nanovdb.cc | 6 +- .../openvdb_to_nanovdb_accessor.cc | 4 +- .../openvdb_to_nanovdb_cuda.cc | 9 +- .../examples/ex_raytrace_fog_volume/common.h | 1 + .../examples/ex_raytrace_fog_volume/main.cc | 4 +- .../ex_raytrace_fog_volume/nanovdb.cu | 3 +- .../ex_raytrace_fog_volume/openvdb.cc | 5 +- .../examples/ex_raytrace_level_set/common.h | 1 + .../examples/ex_raytrace_level_set/main.cc | 4 +- .../examples/ex_raytrace_level_set/nanovdb.cu | 4 +- .../examples/ex_raytrace_level_set/openvdb.cc | 5 +- .../read_nanovdb_sphere_accessor_cuda.cc | 2 +- nanovdb/nanovdb/examples/ex_util/CpuTimer.h | 52 - .../examples/ex_vox_to_nanovdb/VoxToNanoVDB.h | 12 +- .../ex_voxels_to_grid_cuda.cu | 53 + .../write_nanovdb_grids.cc | 2 +- nanovdb/nanovdb/unittest/TestNanoVDB.cc | 2227 ++++++-- nanovdb/nanovdb/unittest/TestNanoVDB.cu | 2180 ++++++++ nanovdb/nanovdb/unittest/TestOpenVDB.cc | 439 +- .../unittest/pnanovdb_validate_strides.h | 22 +- nanovdb/nanovdb/util/CpuTimer.h | 83 + nanovdb/nanovdb/util/CreateNanoGrid.h | 2079 ++++++++ nanovdb/nanovdb/util/CudaDeviceBuffer.h | 197 - nanovdb/nanovdb/util/DitherLUT.h | 2 +- nanovdb/nanovdb/util/GridBuilder.h | 3070 ++++++----- nanovdb/nanovdb/util/GridChecksum.h | 90 +- nanovdb/nanovdb/util/GridHandle.h | 415 +- nanovdb/nanovdb/util/GridStats.h | 28 +- nanovdb/nanovdb/util/GridValidator.h | 4 +- nanovdb/nanovdb/util/HostBuffer.h | 22 +- nanovdb/nanovdb/util/IO.h | 334 +- nanovdb/nanovdb/util/IndexGridBuilder.h | 652 --- nanovdb/nanovdb/util/Invoke.h | 2 +- nanovdb/nanovdb/util/NanoToOpenVDB.h | 32 +- nanovdb/nanovdb/util/NodeManager.h | 14 +- nanovdb/nanovdb/util/OpenToNanoVDB.h | 1489 +----- nanovdb/nanovdb/util/PrefixSum.h | 79 + nanovdb/nanovdb/util/Primitives.h | 1508 ++++-- nanovdb/nanovdb/util/Ray.h | 18 +- nanovdb/nanovdb/util/Stencils.h | 2 +- nanovdb/nanovdb/util/cuda/CudaAddBlindData.h | 94 + nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h | 185 + nanovdb/nanovdb/util/cuda/CudaIndexToGrid.h | 370 ++ nanovdb/nanovdb/util/cuda/CudaPointsToGrid.h | 1046 ++++ .../nanovdb/util/cuda/CudaSignedFloodFill.h | 190 + nanovdb/nanovdb/util/cuda/CudaUtils.h | 119 + nanovdb/nanovdb/util/cuda/GpuTimer.h | 104 + openvdb_cmd/vdb_tool/include/Tool.h | 38 +- 77 files changed, 16567 insertions(+), 6827 deletions(-) create mode 100644 nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cc create mode 100644 nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cu delete mode 100644 nanovdb/nanovdb/examples/ex_util/CpuTimer.h create mode 100644 nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu create mode 100644 nanovdb/nanovdb/unittest/TestNanoVDB.cu create mode 100644 nanovdb/nanovdb/util/CpuTimer.h create mode 100644 nanovdb/nanovdb/util/CreateNanoGrid.h delete mode 100644 nanovdb/nanovdb/util/CudaDeviceBuffer.h delete mode 100644 nanovdb/nanovdb/util/IndexGridBuilder.h create mode 100644 nanovdb/nanovdb/util/PrefixSum.h create mode 100644 nanovdb/nanovdb/util/cuda/CudaAddBlindData.h create mode 100644 nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h create mode 100644 nanovdb/nanovdb/util/cuda/CudaIndexToGrid.h create mode 100644 nanovdb/nanovdb/util/cuda/CudaPointsToGrid.h create mode 100644 nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.h create mode 100644 nanovdb/nanovdb/util/cuda/CudaUtils.h create mode 100644 nanovdb/nanovdb/util/cuda/GpuTimer.h diff --git a/doc/nanovdb/HelloWorld.md b/doc/nanovdb/HelloWorld.md index bddc0a1e3d..2bc5d98328 100644 --- a/doc/nanovdb/HelloWorld.md +++ b/doc/nanovdb/HelloWorld.md @@ -4,7 +4,7 @@ ```cpp #include // replace with your own dependencies for generating the OpenVDB grid -#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) +#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) #include // Convert an openvdb level set sphere into a nanovdb, use accessors to print out multiple values from both @@ -17,7 +17,7 @@ int main() auto srcGrid = openvdb::tools::createLevelSetSphere(100.0f, openvdb::Vec3f(0.0f), 1.0f); // Convert the OpenVDB grid, srcGrid, into a NanoVDB grid handle. - auto handle = nanovdb::openToNanoVDB(*srcGrid); + auto handle = nanovdb::createNanoGrid(*srcGrid); // Define a (raw) pointer to the NanoVDB grid on the host. Note we match the value type of the srcGrid! auto* dstGrid = handle.grid(); @@ -77,7 +77,7 @@ int main() ```cpp #include // this is required to read (and write) NanoVDB files on the host -#include // required for CUDA memory management +#include // required for CUDA memory management extern "C" void launch_kernels(const nanovdb::NanoGrid*, const nanovdb::NanoGrid*, diff --git a/nanovdb/nanovdb/CMakeLists.txt b/nanovdb/nanovdb/CMakeLists.txt index d20b4928f3..2e569bab80 100644 --- a/nanovdb/nanovdb/CMakeLists.txt +++ b/nanovdb/nanovdb/CMakeLists.txt @@ -76,7 +76,7 @@ if(NANOVDB_BUILD_UNITTESTS OR NANOVDB_BUILD_BENCHMARK) endif() if(NANOVDB_USE_CUDA) - set(CMAKE_CUDA_STANDARD 11) + set(CMAKE_CUDA_STANDARD 17) set(CMAKE_CUDA_STANDARD_REQUIRED ON) if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) @@ -167,8 +167,16 @@ set(NANOVDB_INCLUDE_FILES # NanoVDB util header files set(NANOVDB_INCLUDE_UTILFILES + util/CpuTimer.h + util/CreateNanoGrid.h util/CSampleFromVoxels.h - util/CudaDeviceBuffer.h + util/cuda/CudaAddBlindData.handle + util/cuda/CudaDeviceBuffer.h + util/cuda/CudaIndexToGrid.handle + util/cuda/CudaPointsToGrid.handle + util/cuda/CudaSignedFloodFill.handle + util/cuda/CudaUtils.handle + util/cuda/GpuTimer.h util/DitherLUT.h util/ForEach.h util/GridBuilder.h @@ -183,6 +191,7 @@ set(NANOVDB_INCLUDE_UTILFILES util/NanoToOpenVDB.h util/NodeManager.h util/OpenToNanoVDB.h + util/PrefixSum.h util/Primitives.h util/Range.h util/Ray.h diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index b1fe3ee433..fbe81519a5 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -29,7 +29,7 @@ structure can safely be ignored by most client codes)! - \warning NanoVDB grids can only be constructed via tools like openToNanoVDB + \warning NanoVDB grids can only be constructed via tools like createNanoGrid or the GridBuilder. This explains why none of the grid nodes defined below have public constructors or destructors. @@ -121,17 +121,26 @@ #define NANOVDB_MAGIC_NUMBER 0x304244566f6e614eUL // "NanoVDB0" in hex - little endian (uint64_t) #define NANOVDB_MAJOR_VERSION_NUMBER 32 // reflects changes to the ABI and hence also the file format -#define NANOVDB_MINOR_VERSION_NUMBER 4 // reflects changes to the API but not ABI -#define NANOVDB_PATCH_VERSION_NUMBER 2 // reflects changes that does not affect the ABI or API +#define NANOVDB_MINOR_VERSION_NUMBER 5 // reflects changes to the API but not ABI +#define NANOVDB_PATCH_VERSION_NUMBER 1 // reflects changes that does not affect the ABI or API + +#define TBB_SUPPRESS_DEPRECATED_MESSAGES 1 // This replaces a Coord key at the root level with a single uint64_t -#define USE_SINGLE_ROOT_KEY +#define NANOVDB_USE_SINGLE_ROOT_KEY + +// This allows for the old (deprecated) indexing scheme for ValueOnIndex +//#define NANOVDB_USE_OLD_VALUE_ON_INDEX // This replaces three levels of Coord keys in the ReadAccessor with one Coord -//#define USE_SINGLE_ACCESSOR_KEY +//#define NANOVDB_USE_SINGLE_ACCESSOR_KEY +// Use this to switch between std::ofstream or FILE implementations //#define NANOVDB_USE_IOSTREAMS +// Use this to switch between old and new accessor methods +#define NANOVDB_NEW_ACCESSOR_METHODS + #define NANOVDB_FPN_BRANCHLESS #define NANOVDB_DATA_ALIGNMENT 32 @@ -186,11 +195,26 @@ typedef unsigned long long uint64_t; #endif // __CUDACC_RTC__ #if defined(__CUDACC__) || defined(__HIP__) -// Only define __hostdev__ when using NVIDIA CUDA or HIP compiler -#define __hostdev__ __host__ __device__ +// Only define __hostdev__ when using NVIDIA CUDA or HIP compilers +#ifndef __hostdev__ +#define __hostdev__ __host__ __device__ // Runs on the CPU and GPU, called from the CPU or the GPU +#endif #else -#define __hostdev__ +// Dummy definitions of macros only defined by CUDA and HIP compilers +#ifndef __hostdev__ +#define __hostdev__ // Runs on the CPU and GPU, called from the CPU or the GPU #endif +#ifndef __global__ +#define __global__ // Runs on the GPU, called from the CPU or the GPU +#endif +#ifndef __device__ +#define __device__ // Runs on the GPU, called from the GPU +#endif +#ifndef __host__ +#define __host__ // Runs on the CPU, called from the CPU +#endif + +#endif // if defined(__CUDACC__) || defined(__HIP__) // The following macro will suppress annoying warnings when nvcc // compiles functions that call (host) intrinsics (which is perfectly valid) @@ -202,6 +226,13 @@ typedef unsigned long long uint64_t; #define NANOVDB_HOSTDEV_DISABLE_WARNING #endif +// Define compiler warnings that work with all compilers +//#if defined(_MSC_VER) +//#define NANO_WARNING(msg) _pragma("message" #msg) +//#else +//#define NANO_WARNING(msg) _Pragma("message" #msg) +//#endif + // A portable implementation of offsetof - unfortunately it doesn't work with static_assert #define NANOVDB_OFFSETOF(CLASS, MEMBER) ((int)(size_t)((char*)&((CLASS*)0)->MEMBER - (char*)0)) @@ -210,25 +241,59 @@ namespace nanovdb { // --------------------------> Build types <------------------------------------ /// @brief Dummy type for a voxel whose value equals an offset into an external value array -class ValueIndex {}; +class ValueIndex +{ +}; + +/// @brief Dummy type for a voxel whose value equals an offset into an external value array of active values +class ValueOnIndex +{ +}; + +/// @brief Like @c ValueIndex but with a mutable mask +class ValueIndexMask +{ +}; + +/// @brief Like @c ValueOnIndex but with a mutable mask +class ValueOnIndexMask +{ +}; /// @brief Dummy type for a voxel whose value equals its binary active state -class ValueMask {}; +class ValueMask +{ +}; /// @brief Dummy type for a 16 bit floating point values -class Half {}; +class Half +{ +}; /// @brief Dummy type for a 4bit quantization of float point values -class Fp4 {}; +class Fp4 +{ +}; /// @brief Dummy type for a 8bit quantization of float point values -class Fp8 {}; +class Fp8 +{ +}; /// @brief Dummy type for a 16bit quantization of float point values -class Fp16 {}; +class Fp16 +{ +}; /// @brief Dummy type for a variable bit quantization of floating point values -class FpN {}; +class FpN +{ +}; + +/// @dummy type for indexing points into voxels +class Points +{ +}; // --------------------------> GridType <------------------------------------ @@ -240,62 +305,67 @@ class FpN {}; /// 3) Verify that the ConvertTrait in NanoToOpenVDB.h works correctly with the new type /// 4) Add the new type to mapToGridType (defined below) that maps NanoVDB types to GridType /// 5) Add the new type to toStr (defined below) -enum class GridType : uint32_t { Unknown = 0, - Float = 1,// single precision floating point value - Double = 2,// double precision floating point value - Int16 = 3,// half precision signed integer value - Int32 = 4,// single precision signed integer value - Int64 = 5,// double precision signed integer value - Vec3f = 6,// single precision floating 3D vector - Vec3d = 7,// double precision floating 3D vector - Mask = 8,// no value, just the active state - Half = 9,// half precision floating point value - UInt32 = 10,// single precision unsigned integer value - Boolean = 11,// boolean value, encoded in bit array - RGBA8 = 12,// RGBA packed into 32bit word in reverse-order. R in low bits. - Fp4 = 13,// 4bit quantization of float point value - Fp8 = 14,// 8bit quantization of float point value - Fp16 = 15,// 16bit quantization of float point value - FpN = 16,// variable bit quantization of floating point value - Vec4f = 17,// single precision floating 4D vector - Vec4d = 18,// double precision floating 4D vector - Index = 19,// index into an external array of values - End = 20 }; +enum class GridType : uint32_t { Unknown = 0, // unknown value type - should rarely be used + Float = 1, // single precision floating point value + Double = 2, // double precision floating point value + Int16 = 3, // half precision signed integer value + Int32 = 4, // single precision signed integer value + Int64 = 5, // double precision signed integer value + Vec3f = 6, // single precision floating 3D vector + Vec3d = 7, // double precision floating 3D vector + Mask = 8, // no value, just the active state + Half = 9, // half precision floating point value + UInt32 = 10, // single precision unsigned integer value + Boolean = 11, // boolean value, encoded in bit array + RGBA8 = 12, // RGBA packed into 32bit word in reverse-order, i.e. R is lowest byte. + Fp4 = 13, // 4bit quantization of floating point value + Fp8 = 14, // 8bit quantization of floating point value + Fp16 = 15, // 16bit quantization of floating point value + FpN = 16, // variable bit quantization of floating point value + Vec4f = 17, // single precision floating 4D vector + Vec4d = 18, // double precision floating 4D vector + Index = 19, // index into an external array of active and inactive values + OnIndex = 20, // index into an external array of active values + IndexMask = 21, // like Index but with a mutable mask + OnIndexMask = 22, // like OnIndex but with a mutable mask + PointIndex = 23, // voxels encode indices to co-located points + Vec3u8 = 24, // 8bit quantization of floating point 3D vector (only as blind data) + Vec3u16 = 25, // 16bit quantization of floating point 3D vector (only as blind data) + End = 26 }; // should never be used #ifndef __CUDACC_RTC__ /// @brief Retuns a c-string used to describe a GridType inline const char* toStr(GridType gridType) { - static const char * LUT[] = { "?", "float", "double" , "int16", "int32", - "int64", "Vec3f", "Vec3d", "Mask", "Half", - "uint32", "bool", "RGBA8", "Float4", "Float8", - "Float16", "FloatN", "Vec4f", "Vec4d", "Index", "End" }; - static_assert( sizeof(LUT)/sizeof(char*) - 1 == int(GridType::End), "Unexpected size of LUT" ); + static const char* LUT[] = {"?", "float", "double", "int16", "int32", "int64", "Vec3f", "Vec3d", "Mask", "Half", + "uint32", "bool", "RGBA8", "Float4", "Float8", "Float16", "FloatN", "Vec4f", "Vec4d", + "Index", "OnIndex", "IndexMask", "OnIndexMask", "PointIndex", "Vec3u8", "Vec3u16", "End"}; + static_assert(sizeof(LUT) / sizeof(char*) - 1 == int(GridType::End), "Unexpected size of LUT"); return LUT[static_cast(gridType)]; } #endif // --------------------------> GridClass <------------------------------------ -/// @brief Classes (defined in OpenVDB) that are currently supported by NanoVDB +/// @brief Classes (superset of OpenVDB) that are currently supported by NanoVDB enum class GridClass : uint32_t { Unknown = 0, - LevelSet = 1, // narrow band level set, e.g. SDF - FogVolume = 2, // fog volume, e.g. density - Staggered = 3, // staggered MAC grid, e.g. velocity - PointIndex = 4, // point index grid - PointData = 5, // point data grid - Topology = 6, // grid with active states only (no values) - VoxelVolume = 7, // volume of geometric cubes, e.g. Minecraft - IndexGrid = 8,// grid whose values are offsets, e.g. into an external array - End = 9 }; + LevelSet = 1, // narrow band level set, e.g. SDF + FogVolume = 2, // fog volume, e.g. density + Staggered = 3, // staggered MAC grid, e.g. velocity + PointIndex = 4, // point index grid + PointData = 5, // point data grid + Topology = 6, // grid with active states only (no values) + VoxelVolume = 7, // volume of geometric cubes, e.g. colors cubes in Minecraft + IndexGrid = 8, // grid whose values are offsets, e.g. into an external array + TensorGrid = 9, // Index grid specefically indexing learnable tensor features + End = 10 }; #ifndef __CUDACC_RTC__ /// @brief Retuns a c-string used to describe a GridClass inline const char* toStr(GridClass gridClass) { - static const char * LUT[] = { "?", "SDF", "FOG" , "MAC", "PNTIDX", - "PNTDAT", "TOPO", "VOX", "INDEX", "END" }; - static_assert( sizeof(LUT)/sizeof(char*) - 1 == int(GridClass::End), "Unexpected size of LUT" ); + static const char* LUT[] = {"?", "SDF", "FOG", "MAC", "PNTIDX", "PNTDAT", "TOPO", "VOX", "INDEX", "TENSOR", "END"}; + static_assert(sizeof(LUT) / sizeof(char*) - 1 == int(GridClass::End), "Unexpected size of LUT"); return LUT[static_cast(gridClass)]; } #endif @@ -304,27 +374,27 @@ inline const char* toStr(GridClass gridClass) /// @brief Grid flags which indicate what extra information is present in the grid buffer. enum class GridFlags : uint32_t { - HasLongGridName = 1 << 0,// grid name is longer than 256 characters - HasBBox = 1 << 1,// nodes contain bounding-boxes of active values - HasMinMax = 1 << 2,// nodes contain min/max of active values - HasAverage = 1 << 3,// nodes contain averages of active values - HasStdDeviation = 1 << 4,// nodes contain standard deviations of active values - IsBreadthFirst = 1 << 5,// nodes are arranged breadth-first in memory - End = 1 << 6, + HasLongGridName = 1 << 0, // grid name is longer than 256 characters + HasBBox = 1 << 1, // nodes contain bounding-boxes of active values + HasMinMax = 1 << 2, // nodes contain min/max of active values + HasAverage = 1 << 3, // nodes contain averages of active values + HasStdDeviation = 1 << 4, // nodes contain standard deviations of active values + IsBreadthFirst = 1 << 5, // nodes are arranged breadth-first in memory + End = 1 << 6, // use End - 1 as a mask for the 5 lower bit flags }; #ifndef __CUDACC_RTC__ /// @brief Retuns a c-string used to describe a GridFlags inline const char* toStr(GridFlags gridFlags) { - static const char * LUT[] = { "has long grid name", - "has bbox", - "has min/max", - "has average", - "has standard deviation", - "is breadth-first", - "end" }; - static_assert( 1 << (sizeof(LUT)/sizeof(char*) - 1) == int(GridFlags::End), "Unexpected size of LUT" ); + static const char* LUT[] = {"has long grid name", + "has bbox", + "has min/max", + "has average", + "has standard deviation", + "is breadth-first", + "end"}; + static_assert(1 << (sizeof(LUT) / sizeof(char*) - 1) == int(GridFlags::End), "Unexpected size of LUT"); return LUT[static_cast(gridFlags)]; } #endif @@ -341,13 +411,16 @@ enum class GridBlindDataClass : uint32_t { Unknown = 0, /// @brief Blind-data Semantics that are currently understood by NanoVDB enum class GridBlindDataSemantic : uint32_t { Unknown = 0, - PointPosition = 1, + PointPosition = 1, // 3D coordinates in an unknown space PointColor = 2, PointNormal = 3, PointRadius = 4, PointVelocity = 5, PointId = 6, - End = 8 }; + WorldCoords = 7, // 3D coorinates in world space, e.g. (0.056, 0.8, 1,8) + GridCoords = 8, // 3D coorinates in grid space, e.g. (1.2, 4.0, 5.7), aka index-space + VoxelCoords = 9, // 3D coorinates invoxel space, e.g. (0.2, 0.0, 0.7) + End = 10 }; // --------------------------> is_same <------------------------------------ @@ -364,6 +437,47 @@ struct is_same static constexpr bool value = true; }; +// --------------------------> is_floating_point <------------------------------------ + +/// @brief C++11 implementation of std::is_floating_point +template +struct is_floating_point +{ + static constexpr bool value = is_same::value || is_same::value; +}; + +// --------------------------> BuildTraits <------------------------------------ + +/// @brief Define static boolean tests for template build types +template +struct BuildTraits +{ + // check if T is an index type + static constexpr bool is_index = is_same::value || + is_same::value || + is_same::value || + is_same::value; + static constexpr bool is_onindex = is_same::value || + is_same::value; + static constexpr bool is_offindex = is_same::value || + is_same::value; + static constexpr bool is_indexmask = is_same::value || + is_same::value; + // check if T is a compressed float type with fixed bit precision + static constexpr bool is_FpX = is_same::value || + is_same::value || + is_same::value; + // check if T is a compressed float type with fixed or variable bit precision + static constexpr bool is_Fp = is_FpX || is_same::value; + // check if T is a POD float type, i.e float or double + static constexpr bool is_float = is_floating_point::value; + // check if T is a template specialization of LeafData, i.e. has T mValues[512] + static constexpr bool is_special = is_index || is_Fp || + is_same::value || + is_same::value || + is_same::value; +}; // BuildTraits + // --------------------------> enable_if <------------------------------------ /// @brief C++11 implementation of std::enable_if @@ -378,6 +492,19 @@ struct enable_if using type = T; }; +// --------------------------> disable_if <------------------------------------ + +template +struct disable_if +{ + typedef T type; +}; + +template +struct disable_if +{ +}; + // --------------------------> is_const <------------------------------------ template @@ -406,13 +533,18 @@ struct remove_const using type = T; }; -// --------------------------> is_floating_point <------------------------------------ +// --------------------------> match_const <------------------------------------ -/// @brief C++11 implementation of std::is_floating_point -template -struct is_floating_point +template +struct match_const +{ + using type = typename remove_const::type; +}; + +template +struct match_const { - static const bool value = is_same::value || is_same::value; + using type = const typename remove_const::type; }; // --------------------------> is_specialization <------------------------------------ @@ -422,6 +554,8 @@ struct is_floating_point /// given in the second template parameter. /// /// @details is_specialization, Vec3>::value == true; +/// is_specialization::value == true; +/// is_specialization, std::vector>::value == true; template class TemplateType> struct is_specialization { @@ -433,10 +567,10 @@ struct is_specialization, TemplateType> static const bool value = true; }; -// --------------------------> Value Map <------------------------------------ +// --------------------------> BuildToValueMap <------------------------------------ /// @brief Maps one type (e.g. the build types above) to other (actual) types -template +template struct BuildToValueMap { using Type = T; @@ -450,6 +584,27 @@ struct BuildToValueMap using type = uint64_t; }; +template<> +struct BuildToValueMap +{ + using Type = uint64_t; + using type = uint64_t; +}; + +template<> +struct BuildToValueMap +{ + using Type = uint64_t; + using type = uint64_t; +}; + +template<> +struct BuildToValueMap +{ + using Type = uint64_t; + using type = uint64_t; +}; + template<> struct BuildToValueMap { @@ -492,6 +647,13 @@ struct BuildToValueMap using type = float; }; +template<> +struct BuildToValueMap +{ + using Type = uint64_t; + using type = uint64_t; +}; + // --------------------------> utility functions related to alignment <------------------------------------ /// @brief return true if the specified pointer is aligned @@ -529,24 +691,44 @@ __hostdev__ inline static const T* alignPtr(const T* p) return reinterpret_cast( (const uint8_t*)p + alignmentPadding(p) ); } -// --------------------------> PtrDiff PtrAdd <------------------------------------ +// --------------------------> PtrDiff <------------------------------------ -template +/// @brief Compute the distance, in bytes, between two pointers +/// @tparam T1 Type of the first pointer +/// @tparam T2 Type of the second pointer +/// @param p fist pointer, assumed to NOT be NULL +/// @param q second pointer, assumed to NOT be NULL +/// @return signed distance between pointer addresses in units of bytes +template __hostdev__ inline static int64_t PtrDiff(const T1* p, const T2* q) { NANOVDB_ASSERT(p && q); return reinterpret_cast(p) - reinterpret_cast(q); } -template -__hostdev__ inline static DstT* PtrAdd(SrcT *p, int64_t offset) +// --------------------------> PtrAdd <------------------------------------ + +/// @brief Adds a byte offset of a non-const pointer to produce another non-const pointer +/// @tparam DstT Type of the return pointer +/// @tparam SrcT Type of the input pointer +/// @param p non-const input pointer, assumed to NOT be NULL +/// @param offset signed byte offset +/// @return a non-const pointer defined as the offset of an input pointer +template +__hostdev__ inline static DstT* PtrAdd(SrcT* p, int64_t offset) { NANOVDB_ASSERT(p); return reinterpret_cast(reinterpret_cast(p) + offset); } -template -__hostdev__ inline static const DstT* PtrAdd(const SrcT *p, int64_t offset) +/// @brief Adds a byte offset of a const pointer to produce another const pointer +/// @tparam DstT Type of the return pointer +/// @tparam SrcT Type of the input pointer +/// @param p const input pointer, assumed to NOT be NULL +/// @param offset signed byte offset +/// @return a const pointer defined as the offset of a const input pointer +template +__hostdev__ inline static const DstT* PtrAdd(const SrcT* p, int64_t offset) { NANOVDB_ASSERT(p); return reinterpret_cast(reinterpret_cast(p) + offset); @@ -557,66 +739,122 @@ __hostdev__ inline static const DstT* PtrAdd(const SrcT *p, int64_t offset) /// @brief 8-bit red, green, blue, alpha packed into 32 bit unsigned int class Rgba8 { - union { - uint8_t c[4];// 4 color channels of red, green, blue and alpha components. - uint32_t packed;// 32 bit packed representation + union + { + uint8_t c[4]; // 4 integer color channels of red, green, blue and alpha components. + uint32_t packed; // 32 bit packed representation } mData; + public: static const int SIZE = 4; using ValueType = uint8_t; Rgba8(const Rgba8&) = default; Rgba8(Rgba8&&) = default; - Rgba8& operator=(Rgba8&&) = default; - Rgba8& operator=(const Rgba8&) = default; - __hostdev__ Rgba8() : mData{{0,0,0,0}} {static_assert(sizeof(uint32_t) == sizeof(Rgba8),"Unexpected sizeof");} - __hostdev__ Rgba8(uint8_t r, uint8_t g, uint8_t b, uint8_t a = 255u) : mData{{r, g, b, a}} {} - explicit __hostdev__ Rgba8(uint8_t v) : Rgba8(v,v,v,v) {} + Rgba8& operator=(Rgba8&&) = default; + Rgba8& operator=(const Rgba8&) = default; + + /// @brief Default ctor initializes all channels to zero + __hostdev__ Rgba8() + : mData{{0, 0, 0, 0}} + { + static_assert(sizeof(uint32_t) == sizeof(Rgba8), "Unexpected sizeof"); + } + + /// @brief integer r,g,b,a ctor where alpha channel defaults to opaque + /// @note all values should be in the range 0u to 255u + __hostdev__ Rgba8(uint8_t r, uint8_t g, uint8_t b, uint8_t a = 255u) + : mData{{r, g, b, a}} + { + } + + /// @brief @brief ctor where all channels are initialized to the same value + /// @note value should be in the range 0u to 255u + explicit __hostdev__ Rgba8(uint8_t v) + : mData{{v, v, v, v}} + { + } + + /// @brief floating-point r,g,b,a ctor where alpha channel defaults to opaque + /// @note all values should be in the range 0.0f to 1.0f __hostdev__ Rgba8(float r, float g, float b, float a = 1.0f) - : mData{{(uint8_t(0.5f + r * 255.0f)), // round to nearest - (uint8_t(0.5f + g * 255.0f)), // round to nearest - (uint8_t(0.5f + b * 255.0f)), // round to nearest - (uint8_t(0.5f + a * 255.0f))}}// round to nearest + : mData{{static_cast(0.5f + r * 255.0f), // round floats to nearest integers + static_cast(0.5f + g * 255.0f), // double {{}} is needed due to union + static_cast(0.5f + b * 255.0f), + static_cast(0.5f + a * 255.0f)}} { } - __hostdev__ bool operator<(const Rgba8& rhs) const { return mData.packed < rhs.mData.packed; } - __hostdev__ bool operator==(const Rgba8& rhs) const { return mData.packed == rhs.mData.packed; } + __hostdev__ bool operator<(const Rgba8& rhs) const { return mData.packed < rhs.mData.packed; } + __hostdev__ bool operator==(const Rgba8& rhs) const { return mData.packed == rhs.mData.packed; } __hostdev__ float lengthSqr() const { - return 0.0000153787005f*(float(mData.c[0])*mData.c[0] + - float(mData.c[1])*mData.c[1] + - float(mData.c[2])*mData.c[2]);//1/255^2 + return 0.0000153787005f * (float(mData.c[0]) * mData.c[0] + + float(mData.c[1]) * mData.c[1] + + float(mData.c[2]) * mData.c[2]); //1/255^2 } - __hostdev__ float length() const { return sqrtf(this->lengthSqr() ); } - __hostdev__ const uint8_t& operator[](int n) const { return mData.c[n]; } - __hostdev__ uint8_t& operator[](int n) { return mData.c[n]; } + __hostdev__ float length() const { return sqrtf(this->lengthSqr()); } + __hostdev__ const uint8_t& operator[](int n) const { return mData.c[n]; } + __hostdev__ uint8_t& operator[](int n) { return mData.c[n]; } __hostdev__ const uint32_t& packed() const { return mData.packed; } - __hostdev__ uint32_t& packed() { return mData.packed; } - __hostdev__ const uint8_t& r() const { return mData.c[0]; } - __hostdev__ const uint8_t& g() const { return mData.c[1]; } - __hostdev__ const uint8_t& b() const { return mData.c[2]; } - __hostdev__ const uint8_t& a() const { return mData.c[3]; } - __hostdev__ uint8_t& r() { return mData.c[0]; } - __hostdev__ uint8_t& g() { return mData.c[1]; } - __hostdev__ uint8_t& b() { return mData.c[2]; } - __hostdev__ uint8_t& a() { return mData.c[3]; } -};// Rgba8 - -using PackedRGBA8 = Rgba8;// for backwards compatibility - -// --------------------------> isValue(GridType, GridClass) <------------------------------------ - -/// @brief return true if the GridType maps to a floating point value. + __hostdev__ uint32_t& packed() { return mData.packed; } + __hostdev__ const uint8_t& r() const { return mData.c[0]; } + __hostdev__ const uint8_t& g() const { return mData.c[1]; } + __hostdev__ const uint8_t& b() const { return mData.c[2]; } + __hostdev__ const uint8_t& a() const { return mData.c[3]; } + __hostdev__ uint8_t& r() { return mData.c[0]; } + __hostdev__ uint8_t& g() { return mData.c[1]; } + __hostdev__ uint8_t& b() { return mData.c[2]; } + __hostdev__ uint8_t& a() { return mData.c[3]; } +}; // Rgba8 + +using PackedRGBA8 = Rgba8; // for backwards compatibility + +// --------------------------> isFloatingPoint(GridType) <------------------------------------ + +/// @brief return true if the GridType maps to a floating point type __hostdev__ inline bool isFloatingPoint(GridType gridType) { - return gridType == GridType::Float || + return gridType == GridType::Float || gridType == GridType::Double || - gridType == GridType::Fp4 || - gridType == GridType::Fp8 || - gridType == GridType::Fp16 || + gridType == GridType::Fp4 || + gridType == GridType::Fp8 || + gridType == GridType::Fp16 || gridType == GridType::FpN; } +// --------------------------> isFloatingPointVector(GridType) <------------------------------------ + +/// @brief return true if the GridType maps to a floating point vec3. +__hostdev__ inline bool isFloatingPointVector(GridType gridType) +{ + return gridType == GridType::Vec3f || + gridType == GridType::Vec3d || + gridType == GridType::Vec4f || + gridType == GridType::Vec4d; +} + +// --------------------------> isInteger(GridType) <------------------------------------ + +/// @brief return true if the GridType maps to a index type. +__hostdev__ inline bool isInteger(GridType gridType) +{ + return gridType == GridType::Int16 || + gridType == GridType::Int32 || + gridType == GridType::Int64 || + gridType == GridType::UInt32; +} + +// --------------------------> isIndex(GridType) <------------------------------------ + +/// @brief return true if the GridType maps to a index type. +__hostdev__ inline bool isIndex(GridType gridType) +{ + return gridType == GridType::Index || + gridType == GridType::OnIndex || + gridType == GridType::IndexMask || + gridType == GridType::OnIndexMask; +} + // --------------------------> isValue(GridType, GridClass) <------------------------------------ /// @brief return true if the combination of GridType and GridClass is valid. @@ -625,18 +863,58 @@ __hostdev__ inline bool isValid(GridType gridType, GridClass gridClass) if (gridClass == GridClass::LevelSet || gridClass == GridClass::FogVolume) { return isFloatingPoint(gridType); } else if (gridClass == GridClass::Staggered) { - return gridType == GridType::Vec3f || gridType == GridType::Vec3d || - gridType == GridType::Vec4f || gridType == GridType::Vec4d; - } else if (gridClass == GridClass::PointIndex || gridClass == GridClass::PointData) { - return gridType == GridType::UInt32; + return isFloatingPointVector(gridType); + } else if (gridClass == GridClass::PointIndex || gridClass == GridClass::PointData) { + return gridType == GridType::PointIndex || gridType == GridType::UInt32; } else if (gridClass == GridClass::Topology) { return gridType == GridType::Mask; } else if (gridClass == GridClass::IndexGrid) { - return gridType == GridType::Index; + return isIndex(gridType); } else if (gridClass == GridClass::VoxelVolume) { - return gridType == GridType::RGBA8 || gridType == GridType::Float || gridType == GridType::Double || gridType == GridType::Vec3f || gridType == GridType::Vec3d || gridType == GridType::UInt32; + return gridType == GridType::RGBA8 || gridType == GridType::Float || + gridType == GridType::Double || gridType == GridType::Vec3f || + gridType == GridType::Vec3d || gridType == GridType::UInt32; } - return gridClass < GridClass::End && gridType < GridType::End;// any valid combination + return gridClass < GridClass::End && gridType < GridType::End; // any valid combination +} + +// --------------------------> isValue(GridType, GridClass) <------------------------------------ + +/// @brief return true if the combination of GridBlindDataClass, GridBlindDataSemantic and GridType is valid. +__hostdev__ inline bool isValid(const GridBlindDataClass& blindClass, + const GridBlindDataSemantic& blindSemantics, + const GridType& blindType) +{ + bool test = false; + switch (blindClass) { + case GridBlindDataClass::IndexArray: + test = (blindSemantics == GridBlindDataSemantic::Unknown || + blindSemantics == GridBlindDataSemantic::PointId) && + isInteger(blindType); + break; + case GridBlindDataClass::AttributeArray: + if (blindSemantics == GridBlindDataSemantic::PointPosition || + blindSemantics == GridBlindDataSemantic::WorldCoords) { + test = blindType == GridType::Vec3f || blindType == GridType::Vec3d; + } else if (blindSemantics == GridBlindDataSemantic::GridCoords) { + test = blindType == GridType::Vec3f; + } else if (blindSemantics == GridBlindDataSemantic::VoxelCoords) { + test = blindType == GridType::Vec3f || blindType == GridType::Vec3u8 || blindType == GridType::Vec3u16; + } else { + test = blindSemantics != GridBlindDataSemantic::PointId; + } + break; + case GridBlindDataClass::GridName: + test = blindSemantics == GridBlindDataSemantic::Unknown && blindType == GridType::Unknown; + break; + default: // captures blindClass == Unknown and ChannelArray + test = blindClass < GridBlindDataClass::End && + blindSemantics < GridBlindDataSemantic::End && + blindType < GridType::End; // any valid combination + break; + } + //if (!test) printf("Invalid combination: GridBlindDataClass=%u, GridBlindDataSemantic=%u, GridType=%u\n",(uint32_t)blindClass, (uint32_t)blindSemantics, (uint32_t)blindType); + return test; } // ----------------------------> Version class <------------------------------------- @@ -646,42 +924,67 @@ __hostdev__ inline bool isValid(GridType gridType, GridClass gridClass) /// @details major is the top 11 bits, minor is the 11 middle bits and patch is the lower 10 bits class Version { - uint32_t mData;// 11 + 11 + 10 bit packing of major + minor + patch + uint32_t mData; // 11 + 11 + 10 bit packing of major + minor + patch public: - __hostdev__ Version() : mData( uint32_t(NANOVDB_MAJOR_VERSION_NUMBER) << 21 | - uint32_t(NANOVDB_MINOR_VERSION_NUMBER) << 10 | - uint32_t(NANOVDB_PATCH_VERSION_NUMBER) ) + __hostdev__ Version() + : mData(uint32_t(NANOVDB_MAJOR_VERSION_NUMBER) << 21 | + uint32_t(NANOVDB_MINOR_VERSION_NUMBER) << 10 | + uint32_t(NANOVDB_PATCH_VERSION_NUMBER)) { } __hostdev__ Version(uint32_t major, uint32_t minor, uint32_t patch) - : mData( major << 21 | minor << 10 | patch ) - { - NANOVDB_ASSERT(major < (1u << 11));// max value of major is 2047 - NANOVDB_ASSERT(minor < (1u << 11));// max value of minor is 2047 - NANOVDB_ASSERT(patch < (1u << 10));// max value of patch is 1023 - } - __hostdev__ bool operator==(const Version &rhs) const {return mData == rhs.mData;} - __hostdev__ bool operator< (const Version &rhs) const {return mData < rhs.mData;} - __hostdev__ bool operator<=(const Version &rhs) const {return mData <= rhs.mData;} - __hostdev__ bool operator> (const Version &rhs) const {return mData > rhs.mData;} - __hostdev__ bool operator>=(const Version &rhs) const {return mData >= rhs.mData;} - __hostdev__ uint32_t id() const { return mData; } - __hostdev__ uint32_t getMajor() const { return (mData >> 21) & ((1u << 11) - 1);} - __hostdev__ uint32_t getMinor() const { return (mData >> 10) & ((1u << 11) - 1);} - __hostdev__ uint32_t getPatch() const { return mData & ((1u << 10) - 1);} + : mData(major << 21 | minor << 10 | patch) + { + NANOVDB_ASSERT(major < (1u << 11)); // max value of major is 2047 + NANOVDB_ASSERT(minor < (1u << 11)); // max value of minor is 2047 + NANOVDB_ASSERT(patch < (1u << 10)); // max value of patch is 1023 + } + __hostdev__ bool operator==(const Version& rhs) const { return mData == rhs.mData; } + __hostdev__ bool operator<(const Version& rhs) const { return mData < rhs.mData; } + __hostdev__ bool operator<=(const Version& rhs) const { return mData <= rhs.mData; } + __hostdev__ bool operator>(const Version& rhs) const { return mData > rhs.mData; } + __hostdev__ bool operator>=(const Version& rhs) const { return mData >= rhs.mData; } + __hostdev__ uint32_t id() const { return mData; } + __hostdev__ uint32_t getMajor() const { return (mData >> 21) & ((1u << 11) - 1); } + __hostdev__ uint32_t getMinor() const { return (mData >> 10) & ((1u << 11) - 1); } + __hostdev__ uint32_t getPatch() const { return mData & ((1u << 10) - 1); } #ifndef __CUDACC_RTC__ const char* c_str() const { - char *buffer = (char*)malloc(4 + 1 + 4 + 1 + 4 + 1);// xxxx.xxxx.xxxx\0 + char* buffer = (char*)malloc(4 + 1 + 4 + 1 + 4 + 1); // xxxx.xxxx.xxxx\0 snprintf(buffer, 4 + 1 + 4 + 1 + 4 + 1, "%d.%d.%d", this->getMajor(), this->getMinor(), this->getPatch()); // Prevents overflows by enforcing a fixed size of buffer return buffer; } #endif -};// Version +}; // Version // ----------------------------> Various math functions <------------------------------------- +//@{ +/// @brief Pi constant taken from Boost to match old behaviour +template +inline __hostdev__ constexpr T pi() +{ + return 3.141592653589793238462643383279502884e+00; +} +template<> +inline __hostdev__ constexpr float pi() +{ + return 3.141592653589793238462643383279502884e+00F; +} +template<> +inline __hostdev__ constexpr double pi() +{ + return 3.141592653589793238462643383279502884e+00; +} +template<> +inline __hostdev__ constexpr long double pi() +{ + return 3.141592653589793238462643383279502884e+00L; +} +//@} + //@{ /// Tolerance for floating-point comparison template @@ -727,7 +1030,7 @@ struct Maximum template<> struct Maximum { - __hostdev__ static uint32_t value() { return 4294967295; } + __hostdev__ static uint32_t value() { return 4294967295u; } }; template<> struct Maximum @@ -859,7 +1162,7 @@ __hostdev__ inline T Abs(T x) template<> __hostdev__ inline float Abs(float x) { - return fabs(x); + return fabsf(x); } template<> @@ -910,8 +1213,11 @@ __hostdev__ inline double Sqrt(double x) //@} /// Return the sign of the given value as an integer (either -1, 0 or 1). -template -__hostdev__ inline T Sign(const T &x) { return ((T(0) < x)?T(1):T(0)) - ((x < T(0))?T(1):T(0)); } +template +__hostdev__ inline T Sign(const T& x) +{ + return ((T(0) < x) ? T(1) : T(0)) - ((x < T(0)) ? T(1) : T(0)); +} template __hostdev__ inline int MinIndex(const Vec3T& v) @@ -960,7 +1266,8 @@ __hostdev__ inline uint64_t AlignUp(uint64_t byteCount) // ------------------------------> Coord <-------------------------------------- // forward declaration so we can define Coord::asVec3s and Coord::asVec3d -template class Vec3; +template +class Vec3; /// @brief Signed (i, j, k) 32-bit integer coordinate class, similar to openvdb::math::Coord class Coord @@ -988,7 +1295,7 @@ class Coord { } - __hostdev__ Coord(ValueType *ptr) + __hostdev__ Coord(ValueType* ptr) : mVec{ptr[0], ptr[1], ptr[2]} { } @@ -1015,9 +1322,9 @@ class Coord /// @warning The argument is assumed to be 0, 1, or 2. __hostdev__ ValueType& operator[](IndexType i) { return mVec[i]; } - /// @brief Assignment operator that works with openvdb::Coord - template - __hostdev__ Coord& operator=(const CoordT &other) + /// @brief Assignment operator that works with openvdb::Coord + template + __hostdev__ Coord& operator=(const CoordT& other) { static_assert(sizeof(Coord) == sizeof(CoordT), "Mis-matched sizeof"); mVec[0] = other[0]; @@ -1038,12 +1345,16 @@ class Coord /// @brief Return true if this Coord is lexicographically less than the given Coord. __hostdev__ bool operator<(const Coord& rhs) const { - return mVec[0] < rhs[0] ? true : mVec[0] > rhs[0] ? false : mVec[1] < rhs[1] ? true : mVec[1] > rhs[1] ? false : mVec[2] < rhs[2] ? true : false; + return mVec[0] < rhs[0] ? true : mVec[0] > rhs[0] ? false + : mVec[1] < rhs[1] ? true + : mVec[1] > rhs[1] ? false + : mVec[2] < rhs[2] ? true + : false; } // @brief Return true if the Coord components are identical. - __hostdev__ bool operator==(const Coord& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; } - __hostdev__ bool operator!=(const Coord& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; } + __hostdev__ bool operator==(const Coord& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; } + __hostdev__ bool operator!=(const Coord& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; } __hostdev__ Coord& operator&=(int n) { mVec[0] &= n; @@ -1072,8 +1383,9 @@ class Coord mVec[2] += n; return *this; } - __hostdev__ Coord operator+(const Coord& rhs) const { return Coord(mVec[0] + rhs[0], mVec[1] + rhs[1], mVec[2] + rhs[2]); } - __hostdev__ Coord operator-(const Coord& rhs) const { return Coord(mVec[0] - rhs[0], mVec[1] - rhs[1], mVec[2] - rhs[2]); } + __hostdev__ Coord operator+(const Coord& rhs) const { return Coord(mVec[0] + rhs[0], mVec[1] + rhs[1], mVec[2] + rhs[2]); } + __hostdev__ Coord operator-(const Coord& rhs) const { return Coord(mVec[0] - rhs[0], mVec[1] - rhs[1], mVec[2] - rhs[2]); } + __hostdev__ Coord operator-() const { return Coord(-mVec[0], -mVec[1], -mVec[2]); } __hostdev__ Coord& operator+=(const Coord& rhs) { mVec[0] += rhs[0]; @@ -1112,6 +1424,22 @@ class Coord mVec[2] = other[2]; return *this; } +#if defined(__CUDACC__) // the following functions only run on the GPU! + __device__ inline Coord& minComponentAtomic(const Coord& other) + { + atomicMin(&mVec[0], other[0]); + atomicMin(&mVec[1], other[1]); + atomicMin(&mVec[2], other[2]); + return *this; + } + __device__ inline Coord& maxComponentAtomic(const Coord& other) + { + atomicMax(&mVec[0], other[0]); + atomicMax(&mVec[1], other[1]); + atomicMax(&mVec[2], other[2]); + return *this; + } +#endif __hostdev__ Coord offsetBy(ValueType dx, ValueType dy, ValueType dz) const { @@ -1133,23 +1461,27 @@ class Coord __hostdev__ static Coord Floor(const Vec3T& xyz) { return Coord(nanovdb::Floor(xyz[0]), nanovdb::Floor(xyz[1]), nanovdb::Floor(xyz[2])); } /// @brief Return a hash key derived from the existing coordinates. - /// @details For details on this hash function please see the VDB paper. - /// The prime numbers are modified based on the ACM Transactions on Graphics paper: - /// "Real-time 3D reconstruction at scale using voxel hashing" + /// @details The hash function is originally taken from the SIGGRAPH paper: + /// "VDB: High-resolution sparse volumes with dynamic topology" + /// and the prime numbers are modified based on the ACM Transactions on Graphics paper: + /// "Real-time 3D reconstruction at scale using voxel hashing" (the second number had a typo!) template __hostdev__ uint32_t hash() const { return ((1 << Log2N) - 1) & (mVec[0] * 73856093 ^ mVec[1] * 19349669 ^ mVec[2] * 83492791); } /// @brief Return the octant of this Coord //__hostdev__ size_t octant() const { return (uint32_t(mVec[0])>>31) | ((uint32_t(mVec[1])>>31)<<1) | ((uint32_t(mVec[2])>>31)<<2); } - __hostdev__ uint8_t octant() const { return uint8_t((uint8_t(bool(mVec[0] & (1u << 31)))) | + __hostdev__ uint8_t octant() const { return (uint8_t(bool(mVec[0] & (1u << 31)))) | (uint8_t(bool(mVec[1] & (1u << 31))) << 1) | - (uint8_t(bool(mVec[2] & (1u << 31))) << 2)); } + (uint8_t(bool(mVec[2] & (1u << 31))) << 2); } /// @brief Return a single precision floating-point vector of this coordinate __hostdev__ inline Vec3 asVec3s() const; /// @brief Return a double precision floating-point vector of this coordinate __hostdev__ inline Vec3 asVec3d() const; + + // returns a copy of itself, so it minics the behaviour of Vec3::round() + __hostdev__ inline Coord round() const { return *this; } }; // Coord class // ----------------------------> Vec3 <-------------------------------------- @@ -1162,6 +1494,7 @@ class Vec3 public: static const int SIZE = 3; + static const int size = 3; // in openvdb::math::Tuple using ValueType = T; Vec3() = default; __hostdev__ explicit Vec3(T x) @@ -1172,6 +1505,12 @@ class Vec3 : mVec{x, y, z} { } + template class Vec3T, class T2> + __hostdev__ Vec3(const Vec3T& v) + : mVec{T(v[0]), T(v[1]), T(v[2])} + { + static_assert(Vec3T::size == size, "expected Vec3T::size==3!"); + } template __hostdev__ explicit Vec3(const Vec3& v) : mVec{T(v[0]), T(v[1]), T(v[2])} @@ -1183,16 +1522,17 @@ class Vec3 } __hostdev__ bool operator==(const Vec3& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; } __hostdev__ bool operator!=(const Vec3& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; } - template - __hostdev__ Vec3& operator=(const Vec3T& rhs) + template class Vec3T, class T2> + __hostdev__ Vec3& operator=(const Vec3T& rhs) { + static_assert(Vec3T::size == size, "expected Vec3T::size==3!"); mVec[0] = rhs[0]; mVec[1] = rhs[1]; mVec[2] = rhs[2]; return *this; } __hostdev__ const T& operator[](int i) const { return mVec[i]; } - __hostdev__ T& operator[](int i) { return mVec[i]; } + __hostdev__ T& operator[](int i) { return mVec[i]; } template __hostdev__ T dot(const Vec3T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2]; } template @@ -1206,14 +1546,16 @@ class Vec3 { return mVec[0] * mVec[0] + mVec[1] * mVec[1] + mVec[2] * mVec[2]; // 5 flops } - __hostdev__ T length() const { return Sqrt(this->lengthSqr()); } - __hostdev__ Vec3 operator-() const { return Vec3(-mVec[0], -mVec[1], -mVec[2]); } - __hostdev__ Vec3 operator*(const Vec3& v) const { return Vec3(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2]); } - __hostdev__ Vec3 operator/(const Vec3& v) const { return Vec3(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2]); } - __hostdev__ Vec3 operator+(const Vec3& v) const { return Vec3(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2]); } - __hostdev__ Vec3 operator-(const Vec3& v) const { return Vec3(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2]); } - __hostdev__ Vec3 operator*(const T& s) const { return Vec3(s * mVec[0], s * mVec[1], s * mVec[2]); } - __hostdev__ Vec3 operator/(const T& s) const { return (T(1) / s) * (*this); } + __hostdev__ T length() const { return Sqrt(this->lengthSqr()); } + __hostdev__ Vec3 operator-() const { return Vec3(-mVec[0], -mVec[1], -mVec[2]); } + __hostdev__ Vec3 operator*(const Vec3& v) const { return Vec3(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2]); } + __hostdev__ Vec3 operator/(const Vec3& v) const { return Vec3(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2]); } + __hostdev__ Vec3 operator+(const Vec3& v) const { return Vec3(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2]); } + __hostdev__ Vec3 operator-(const Vec3& v) const { return Vec3(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2]); } + __hostdev__ Vec3 operator+(const Coord& ijk) const { return Vec3(mVec[0] + ijk[0], mVec[1] + ijk[1], mVec[2] + ijk[2]); } + __hostdev__ Vec3 operator-(const Coord& ijk) const { return Vec3(mVec[0] - ijk[0], mVec[1] - ijk[1], mVec[2] - ijk[2]); } + __hostdev__ Vec3 operator*(const T& s) const { return Vec3(s * mVec[0], s * mVec[1], s * mVec[2]); } + __hostdev__ Vec3 operator/(const T& s) const { return (T(1) / s) * (*this); } __hostdev__ Vec3& operator+=(const Vec3& v) { mVec[0] += v[0]; @@ -1221,6 +1563,13 @@ class Vec3 mVec[2] += v[2]; return *this; } + __hostdev__ Vec3& operator+=(const Coord& ijk) + { + mVec[0] += T(ijk[0]); + mVec[1] += T(ijk[1]); + mVec[2] += T(ijk[2]); + return *this; + } __hostdev__ Vec3& operator-=(const Vec3& v) { mVec[0] -= v[0]; @@ -1228,6 +1577,13 @@ class Vec3 mVec[2] -= v[2]; return *this; } + __hostdev__ Vec3& operator-=(const Coord& ijk) + { + mVec[0] -= T(ijk[0]); + mVec[1] -= T(ijk[1]); + mVec[2] -= T(ijk[2]); + return *this; + } __hostdev__ Vec3& operator*=(const T& s) { mVec[0] *= s; @@ -1270,9 +1626,29 @@ class Vec3 { return mVec[0] > mVec[1] ? (mVec[0] > mVec[2] ? mVec[0] : mVec[2]) : (mVec[1] > mVec[2] ? mVec[1] : mVec[2]); } + /// @brief Round each component if this Vec up to its integer value + /// @return Return an integer Coord __hostdev__ Coord floor() const { return Coord(Floor(mVec[0]), Floor(mVec[1]), Floor(mVec[2])); } + /// @brief Round each component if this Vec down to its integer value + /// @return Return an integer Coord __hostdev__ Coord ceil() const { return Coord(Ceil(mVec[0]), Ceil(mVec[1]), Ceil(mVec[2])); } - __hostdev__ Coord round() const { return Coord(Floor(mVec[0] + 0.5), Floor(mVec[1] + 0.5), Floor(mVec[2] + 0.5)); } + /// @brief Round each component if this Vec to its closest integer value + /// @return Return an integer Coord + __hostdev__ Coord round() const + { + if constexpr(is_same::value) { + return Coord(Floor(mVec[0] + 0.5f), Floor(mVec[1] + 0.5f), Floor(mVec[2] + 0.5f)); + } else if constexpr(is_same::value) { + return Coord(mVec[0], mVec[1], mVec[2]); + } else { + return Coord(Floor(mVec[0] + 0.5), Floor(mVec[1] + 0.5), Floor(mVec[2] + 0.5)); + } + } + + /// @brief return a non-const raw constant pointer to array of three vector components + __hostdev__ T* asPointer() { return mVec; } + /// @brief return a const raw constant pointer to array of three vector components + __hostdev__ const T* asPointer() const { return mVec; } }; // Vec3 template @@ -1286,16 +1662,25 @@ __hostdev__ inline Vec3 operator/(T1 scalar, const Vec3& vec) return Vec3(scalar / vec[0], scalar / vec[1], scalar / vec[2]); } -using Vec3R = Vec3; +//using Vec3R = Vec3;// deprecated using Vec3d = Vec3; using Vec3f = Vec3; -using Vec3i = Vec3; +using Vec3i = Vec3; +using Vec3u = Vec3; +using Vec3u8 = Vec3; +using Vec3u16 = Vec3; /// @brief Return a single precision floating-point vector of this coordinate -__hostdev__ inline Vec3f Coord::asVec3s() const { return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); } +__hostdev__ inline Vec3f Coord::asVec3s() const +{ + return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); +} /// @brief Return a double precision floating-point vector of this coordinate -__hostdev__ inline Vec3d Coord::asVec3d() const { return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); } +__hostdev__ inline Vec3d Coord::asVec3d() const +{ + return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); +} // ----------------------------> Vec4 <-------------------------------------- @@ -1307,6 +1692,7 @@ class Vec4 public: static const int SIZE = 4; + static const int size = 4; using ValueType = T; Vec4() = default; __hostdev__ explicit Vec4(T x) @@ -1322,33 +1708,41 @@ class Vec4 : mVec{T(v[0]), T(v[1]), T(v[2]), T(v[3])} { } + template class Vec4T, class T2> + __hostdev__ Vec4(const Vec4T& v) + : mVec{T(v[0]), T(v[1]), T(v[2]), T(v[3])} + { + static_assert(Vec4T::size == size, "expected Vec4T::size==4!"); + } __hostdev__ bool operator==(const Vec4& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2] && mVec[3] == rhs[3]; } __hostdev__ bool operator!=(const Vec4& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2] || mVec[3] != rhs[3]; } - template - __hostdev__ Vec4& operator=(const Vec4T& rhs) + template class Vec4T, class T2> + __hostdev__ Vec4& operator=(const Vec4T& rhs) { + static_assert(Vec4T::size == size, "expected Vec4T::size==4!"); mVec[0] = rhs[0]; mVec[1] = rhs[1]; mVec[2] = rhs[2]; mVec[3] = rhs[3]; return *this; } + __hostdev__ const T& operator[](int i) const { return mVec[i]; } - __hostdev__ T& operator[](int i) { return mVec[i]; } + __hostdev__ T& operator[](int i) { return mVec[i]; } template __hostdev__ T dot(const Vec4T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2] + mVec[3] * v[3]; } __hostdev__ T lengthSqr() const { return mVec[0] * mVec[0] + mVec[1] * mVec[1] + mVec[2] * mVec[2] + mVec[3] * mVec[3]; // 7 flops } - __hostdev__ T length() const { return Sqrt(this->lengthSqr()); } - __hostdev__ Vec4 operator-() const { return Vec4(-mVec[0], -mVec[1], -mVec[2], -mVec[3]); } - __hostdev__ Vec4 operator*(const Vec4& v) const { return Vec4(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2], mVec[3] * v[3]); } - __hostdev__ Vec4 operator/(const Vec4& v) const { return Vec4(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2], mVec[3] / v[3]); } - __hostdev__ Vec4 operator+(const Vec4& v) const { return Vec4(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2], mVec[3] + v[3]); } - __hostdev__ Vec4 operator-(const Vec4& v) const { return Vec4(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2], mVec[3] - v[3]); } - __hostdev__ Vec4 operator*(const T& s) const { return Vec4(s * mVec[0], s * mVec[1], s * mVec[2], s * mVec[3]); } - __hostdev__ Vec4 operator/(const T& s) const { return (T(1) / s) * (*this); } + __hostdev__ T length() const { return Sqrt(this->lengthSqr()); } + __hostdev__ Vec4 operator-() const { return Vec4(-mVec[0], -mVec[1], -mVec[2], -mVec[3]); } + __hostdev__ Vec4 operator*(const Vec4& v) const { return Vec4(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2], mVec[3] * v[3]); } + __hostdev__ Vec4 operator/(const Vec4& v) const { return Vec4(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2], mVec[3] / v[3]); } + __hostdev__ Vec4 operator+(const Vec4& v) const { return Vec4(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2], mVec[3] + v[3]); } + __hostdev__ Vec4 operator-(const Vec4& v) const { return Vec4(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2], mVec[3] - v[3]); } + __hostdev__ Vec4 operator*(const T& s) const { return Vec4(s * mVec[0], s * mVec[1], s * mVec[2], s * mVec[3]); } + __hostdev__ Vec4 operator/(const T& s) const { return (T(1) / s) * (*this); } __hostdev__ Vec4& operator+=(const Vec4& v) { mVec[0] += v[0]; @@ -1422,9 +1816,7 @@ using Vec4i = Vec4; // ----------------------------> TensorTraits <-------------------------------------- -template::value || - is_specialization::value || - is_same::value) ? 1 : 0> +template::value || is_specialization::value || is_same::value) ? 1 : 0> struct TensorTraits; template @@ -1470,65 +1862,125 @@ struct FloatTraits }; template<> -struct FloatTraits// size of empty class in C++ is 1 byte and not 0 byte +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = uint64_t; +}; + +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = uint64_t; +}; + +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = uint64_t; +}; + +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte { using FloatType = uint64_t; }; template<> -struct FloatTraits// size of empty class in C++ is 1 byte and not 0 byte +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte { using FloatType = bool; }; -// ----------------------------> mapping ValueType -> GridType <-------------------------------------- +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = double; +}; + +// ----------------------------> mapping BuildType -> GridType <-------------------------------------- -/// @brief Maps from a templated value type to a GridType enum +/// @brief Maps from a templated build type to a GridType enum template __hostdev__ inline GridType mapToGridType() { - if (is_same::value) { // resolved at compile-time + if constexpr(is_same::value) { // resolved at compile-time return GridType::Float; - } else if (is_same::value) { + } else if constexpr(is_same::value) { return GridType::Double; - } else if (is_same::value) { + } else if constexpr(is_same::value) { return GridType::Int16; - } else if (is_same::value) { + } else if constexpr(is_same::value) { return GridType::Int32; - } else if (is_same::value) { + } else if constexpr(is_same::value) { return GridType::Int64; - } else if (is_same::value) { + } else if constexpr(is_same::value) { return GridType::Vec3f; - } else if (is_same::value) { + } else if constexpr(is_same::value) { return GridType::Vec3d; - } else if (is_same::value) { + } else if constexpr(is_same::value) { return GridType::UInt32; - } else if (is_same::value) { + } else if constexpr(is_same::value) { return GridType::Mask; - } else if (is_same::value) { + } else if constexpr(is_same::value) { return GridType::Index; - } else if (is_same::value) { + } else if constexpr(is_same::value) { + return GridType::OnIndex; + } else if constexpr(is_same::value) { + return GridType::IndexMask; + } else if constexpr(is_same::value) { + return GridType::OnIndexMask; + } else if constexpr(is_same::value) { return GridType::Boolean; - } else if (is_same::value) { + } else if constexpr(is_same::value) { return GridType::RGBA8; } else if (is_same::value) { return GridType::Fp4; - } else if (is_same::value) { + } else if constexpr(is_same::value) { return GridType::Fp8; - } else if (is_same::value) { + } else if constexpr(is_same::value) { return GridType::Fp16; - } else if (is_same::value) { + } else if constexpr(is_same::value) { return GridType::FpN; - } else if (is_same::value) { + } else if constexpr(is_same::value) { return GridType::Vec4f; - } else if (is_same::value) { + } else if constexpr(is_same::value) { return GridType::Vec4d; + } else if (is_same::value) { + return GridType::PointIndex; + } else if constexpr(is_same::value) { + return GridType::Vec3u8; + } else if constexpr(is_same::value) { + return GridType::Vec3u16; } return GridType::Unknown; } +// ----------------------------> mapping BuildType -> GridClass <-------------------------------------- + +/// @brief Maps from a templated build type to a GridClass enum +template +__hostdev__ inline GridClass mapToGridClass(GridClass defaultClass = GridClass::Unknown) +{ + if (is_same::value) { + return GridClass::Topology; + } else if (BuildTraits::is_index) { + return GridClass::IndexGrid; + } else if (is_same::value) { + return GridClass::VoxelVolume; + } else if (is_same::value) { + return GridClass::PointIndex; + } + return defaultClass; +} + // ----------------------------> matMult <-------------------------------------- +/// @brief Multiply a 3x3 matrix and a 3d vector using 32bit floating point arithmetics +/// @note This corresponds to a linear mapping, e.g. scaling, rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the matrix +/// @return result of matrix-vector multiplication, i.e. mat x xyz template __hostdev__ inline Vec3T matMult(const float* mat, const Vec3T& xyz) { @@ -1537,6 +1989,12 @@ __hostdev__ inline Vec3T matMult(const float* mat, const Vec3T& xyz) fmaf(xyz[0], mat[6], fmaf(xyz[1], mat[7], xyz[2] * mat[8]))); // 6 fmaf + 3 mult = 9 flops } +/// @brief Multiply a 3x3 matrix and a 3d vector using 64bit floating point arithmetics +/// @note This corresponds to a linear mapping, e.g. scaling, rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the matrix +/// @return result of matrix-vector multiplication, i.e. mat x xyz template __hostdev__ inline Vec3T matMult(const double* mat, const Vec3T& xyz) { @@ -1545,6 +2003,13 @@ __hostdev__ inline Vec3T matMult(const double* mat, const Vec3T& xyz) fma(static_cast(xyz[0]), mat[6], fma(static_cast(xyz[1]), mat[7], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops } +/// @brief Multiply a 3x3 matrix to a 3d vector and add another 3d vector using 32bit floating point arithmetics +/// @note This corresponds to an affine transformation, i.e a linear mapping followed by a translation. e.g. scale/rotation and translation +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param vec 3d vector to be added AFTER the matrix multiplication +/// @param xyz input vector to be multiplied by the matrix and a translated by @c vec +/// @return result of affine transformation, i.e. (mat x xyz) + vec template __hostdev__ inline Vec3T matMult(const float* mat, const float* vec, const Vec3T& xyz) { @@ -1553,6 +2018,13 @@ __hostdev__ inline Vec3T matMult(const float* mat, const float* vec, const Vec3T fmaf(xyz[0], mat[6], fmaf(xyz[1], mat[7], fmaf(xyz[2], mat[8], vec[2])))); // 9 fmaf = 9 flops } +/// @brief Multiply a 3x3 matrix to a 3d vector and add another 3d vector using 64bit floating point arithmetics +/// @note This corresponds to an affine transformation, i.e a linear mapping followed by a translation. e.g. scale/rotation and translation +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param vec 3d vector to be added AFTER the matrix multiplication +/// @param xyz input vector to be multiplied by the matrix and a translated by @c vec +/// @return result of affine transformation, i.e. (mat x xyz) + vec template __hostdev__ inline Vec3T matMult(const double* mat, const double* vec, const Vec3T& xyz) { @@ -1561,8 +2033,12 @@ __hostdev__ inline Vec3T matMult(const double* mat, const double* vec, const Vec fma(static_cast(xyz[0]), mat[6], fma(static_cast(xyz[1]), mat[7], fma(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fma = 9 flops } -// matMultT: Multiply with the transpose: - +/// @brief Multiply the transposed of a 3x3 matrix and a 3d vector using 32bit floating point arithmetics +/// @note This corresponds to an inverse linear mapping, e.g. inverse scaling, inverse rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the transposed matrix +/// @return result of matrix-vector multiplication, i.e. mat^T x xyz template __hostdev__ inline Vec3T matMultT(const float* mat, const Vec3T& xyz) { @@ -1571,6 +2047,12 @@ __hostdev__ inline Vec3T matMultT(const float* mat, const Vec3T& xyz) fmaf(xyz[0], mat[2], fmaf(xyz[1], mat[5], xyz[2] * mat[8]))); // 6 fmaf + 3 mult = 9 flops } +/// @brief Multiply the transposed of a 3x3 matrix and a 3d vector using 64bit floating point arithmetics +/// @note This corresponds to an inverse linear mapping, e.g. inverse scaling, inverse rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the transposed matrix +/// @return result of matrix-vector multiplication, i.e. mat^T x xyz template __hostdev__ inline Vec3T matMultT(const double* mat, const Vec3T& xyz) { @@ -1601,13 +2083,13 @@ __hostdev__ inline Vec3T matMultT(const double* mat, const double* vec, const Ve template struct BaseBBox { - Vec3T mCoord[2]; - __hostdev__ bool operator==(const BaseBBox& rhs) const { return mCoord[0] == rhs.mCoord[0] && mCoord[1] == rhs.mCoord[1]; }; - __hostdev__ bool operator!=(const BaseBBox& rhs) const { return mCoord[0] != rhs.mCoord[0] || mCoord[1] != rhs.mCoord[1]; }; + Vec3T mCoord[2]; + __hostdev__ bool operator==(const BaseBBox& rhs) const { return mCoord[0] == rhs.mCoord[0] && mCoord[1] == rhs.mCoord[1]; }; + __hostdev__ bool operator!=(const BaseBBox& rhs) const { return mCoord[0] != rhs.mCoord[0] || mCoord[1] != rhs.mCoord[1]; }; __hostdev__ const Vec3T& operator[](int i) const { return mCoord[i]; } - __hostdev__ Vec3T& operator[](int i) { return mCoord[i]; } - __hostdev__ Vec3T& min() { return mCoord[0]; } - __hostdev__ Vec3T& max() { return mCoord[1]; } + __hostdev__ Vec3T& operator[](int i) { return mCoord[i]; } + __hostdev__ Vec3T& min() { return mCoord[0]; } + __hostdev__ Vec3T& max() { return mCoord[1]; } __hostdev__ const Vec3T& min() const { return mCoord[0]; } __hostdev__ const Vec3T& max() const { return mCoord[1]; } __hostdev__ Coord& translate(const Vec3T& xyz) @@ -1616,7 +2098,7 @@ struct BaseBBox mCoord[1] += xyz; return *this; } - // @brief Expand this bounding box to enclose point (i, j, k). + /// @brief Expand this bounding box to enclose point @c xyz. __hostdev__ BaseBBox& expand(const Vec3T& xyz) { mCoord[0].minComponent(xyz); @@ -1624,11 +2106,19 @@ struct BaseBBox return *this; } + /// @brief Expand this bounding box to enclode the given bounding box. + __hostdev__ BaseBBox& expand(const BaseBBox& bbox) + { + mCoord[0].minComponent(bbox[0]); + mCoord[1].maxComponent(bbox[1]); + return *this; + } + /// @brief Intersect this bounding box with the given bounding box. __hostdev__ BaseBBox& intersect(const BaseBBox& bbox) { - mCoord[0].maxComponent(bbox.min()); - mCoord[1].minComponent(bbox.max()); + mCoord[0].maxComponent(bbox[0]); + mCoord[1].minComponent(bbox[1]); return *this; } @@ -1669,7 +2159,7 @@ struct BBox : public BaseBBox using BaseT = BaseBBox; using BaseT::mCoord; __hostdev__ BBox() - : BaseT(Vec3T( Maximum::value()), + : BaseT(Vec3T(Maximum::value()), Vec3T(-Maximum::value())) { } @@ -1682,12 +2172,15 @@ struct BBox : public BaseBBox Vec3T(ValueType(max[0] + 1), ValueType(max[1] + 1), ValueType(max[2] + 1))) { } - __hostdev__ static BBox createCube(const Coord& min, typename Coord::ValueType dim) + __hostdev__ static BBox createCube(const Coord& min, typename Coord::ValueType dim) { return BBox(min, min.offsetBy(dim)); } - __hostdev__ BBox(const BaseBBox& bbox) : BBox(bbox[0], bbox[1]) {} + __hostdev__ BBox(const BaseBBox& bbox) + : BBox(bbox[0], bbox[1]) + { + } __hostdev__ bool empty() const { return mCoord[0][0] >= mCoord[1][0] || mCoord[0][1] >= mCoord[1][1] || mCoord[0][2] >= mCoord[1][2]; } @@ -1698,7 +2191,7 @@ struct BBox : public BaseBBox p[0] < mCoord[1][0] && p[1] < mCoord[1][1] && p[2] < mCoord[1][2]; } -};// BBox +}; // BBox /// @brief Partial template specialization for integer coordinate types /// @@ -1716,6 +2209,7 @@ struct BBox : public BaseBBox { const BBox& mBBox; CoordT mPos; + public: __hostdev__ Iterator(const BBox& b) : mBBox(b) @@ -1724,7 +2218,7 @@ struct BBox : public BaseBBox } __hostdev__ Iterator& operator++() { - if (mPos[2] < mBBox[1][2]) {// this is the most common case + if (mPos[2] < mBBox[1][2]) { // this is the most common case ++mPos[2]; } else if (mPos[1] < mBBox[1][1]) { mPos[2] = mBBox[0][2]; @@ -1743,7 +2237,7 @@ struct BBox : public BaseBBox return tmp; } /// @brief Return @c true if the iterator still points to a valid coordinate. - __hostdev__ operator bool() const { return mPos[0] <= mBBox[1][0]; } + __hostdev__ operator bool() const { return mPos[0] <= mBBox[1][0]; } __hostdev__ const CoordT& operator*() const { return mPos; } }; // Iterator __hostdev__ Iterator begin() const { return Iterator{*this}; } @@ -1766,23 +2260,32 @@ struct BBox : public BaseBBox other.mCoord[0][n] = mCoord[1][n] + 1; } - __hostdev__ static BBox createCube(const CoordT& min, typename CoordT::ValueType dim) + __hostdev__ static BBox createCube(const CoordT& min, typename CoordT::ValueType dim) { return BBox(min, min.offsetBy(dim - 1)); } + __hostdev__ static BBox createCube(typename CoordT::ValueType min, typename CoordT::ValueType max) + { + return BBox(CoordT(min), CoordT(max)); + } + __hostdev__ bool is_divisible() const { return mCoord[0][0] < mCoord[1][0] && mCoord[0][1] < mCoord[1][1] && mCoord[0][2] < mCoord[1][2]; } /// @brief Return true if this bounding box is empty, i.e. uninitialized - __hostdev__ bool empty() const { return mCoord[0][0] > mCoord[1][0] || - mCoord[0][1] > mCoord[1][1] || - mCoord[0][2] > mCoord[1][2]; } - __hostdev__ CoordT dim() const { return this->empty() ? Coord(0) : this->max() - this->min() + Coord(1); } - __hostdev__ uint64_t volume() const { auto d = this->dim(); return uint64_t(d[0])*uint64_t(d[1])*uint64_t(d[2]); } - __hostdev__ bool isInside(const CoordT& p) const { return !(CoordT::lessThan(p, this->min()) || CoordT::lessThan(this->max(), p)); } + __hostdev__ bool empty() const { return mCoord[0][0] > mCoord[1][0] || + mCoord[0][1] > mCoord[1][1] || + mCoord[0][2] > mCoord[1][2]; } + __hostdev__ CoordT dim() const { return this->empty() ? Coord(0) : this->max() - this->min() + Coord(1); } + __hostdev__ uint64_t volume() const + { + auto d = this->dim(); + return uint64_t(d[0]) * uint64_t(d[1]) * uint64_t(d[2]); + } + __hostdev__ bool isInside(const CoordT& p) const { return !(CoordT::lessThan(p, this->min()) || CoordT::lessThan(this->max(), p)); } /// @brief Return @c true if the given bounding box is inside this bounding box. - __hostdev__ bool isInside(const BBox& b) const + __hostdev__ bool isInside(const BBox& b) const { return !(CoordT::lessThan(b.min(), this->min()) || CoordT::lessThan(this->max(), b.max())); } @@ -1806,10 +2309,49 @@ struct BBox : public BaseBBox { return BBox(mCoord[0].offsetBy(-padding), mCoord[1].offsetBy(padding)); } -};// BBox + + /// @brief @brief transform this coordinate bounding box by the specified map + /// @param map mapping of index to world coordinates + /// @return world bounding box + template + __hostdev__ BBox transform(const Map& map) const + { + const Vec3d tmp = map.applyMap(Vec3d(mCoord[0][0], mCoord[0][1], mCoord[0][2])); + BBox bbox(tmp, tmp); + bbox.expand(map.applyMap(Vec3d(mCoord[0][0], mCoord[0][1], mCoord[1][2]))); + bbox.expand(map.applyMap(Vec3d(mCoord[0][0], mCoord[1][1], mCoord[0][2]))); + bbox.expand(map.applyMap(Vec3d(mCoord[1][0], mCoord[0][1], mCoord[0][2]))); + bbox.expand(map.applyMap(Vec3d(mCoord[1][0], mCoord[1][1], mCoord[0][2]))); + bbox.expand(map.applyMap(Vec3d(mCoord[1][0], mCoord[0][1], mCoord[1][2]))); + bbox.expand(map.applyMap(Vec3d(mCoord[0][0], mCoord[1][1], mCoord[1][2]))); + bbox.expand(map.applyMap(Vec3d(mCoord[1][0], mCoord[1][1], mCoord[1][2]))); + return bbox; + } + +#if defined(__CUDACC__) // the following functions only run on the GPU! + __device__ inline BBox& expandAtomic(const CoordT& ijk) + { + mCoord[0].minComponentAtomic(ijk); + mCoord[1].maxComponentAtomic(ijk); + return *this; + } + __device__ inline BBox& expandAtomic(const BBox& bbox) + { + mCoord[0].minComponentAtomic(bbox[0]); + mCoord[1].maxComponentAtomic(bbox[1]); + return *this; + } + __device__ inline BBox& intersectAtomic(const BBox& bbox) + { + mCoord[0].maxComponentAtomic(bbox[0]); + mCoord[1].minComponentAtomic(bbox[1]); + return *this; + } +#endif +}; // BBox using CoordBBox = BBox; -using BBoxR = BBox; +using BBoxR = BBox; // -------------------> Find lowest and highest bit in a word <---------------------------- @@ -1821,7 +2363,7 @@ __hostdev__ static inline uint32_t FindLowestOn(uint32_t v) { NANOVDB_ASSERT(v); #if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) - return __ffs(v); + return __ffs(v) - 1; // one based indexing #elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) unsigned long index; _BitScanForward(&index, v); @@ -1829,7 +2371,7 @@ __hostdev__ static inline uint32_t FindLowestOn(uint32_t v) #elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) return static_cast(__builtin_ctzl(v)); #else -//#warning Using software implementation for FindLowestOn(uint32_t) + //NANO_WARNING("Using software implementation for FindLowestOn(uint32_t v)") static const unsigned char DeBruijn[32] = { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; // disable unary minus on unsigned warning @@ -1852,16 +2394,19 @@ NANOVDB_HOSTDEV_DISABLE_WARNING __hostdev__ static inline uint32_t FindHighestOn(uint32_t v) { NANOVDB_ASSERT(v); -#if defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + return sizeof(uint32_t) * 8 - 1 - __clz(v); // Return the number of consecutive high-order zero bits in a 32-bit integer. +#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) unsigned long index; _BitScanReverse(&index, v); return static_cast(index); #elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) return sizeof(unsigned long) * 8 - 1 - __builtin_clzl(v); #else -//#warning Using software implementation for FindHighestOn(uint32_t) + //NANO_WARNING("Using software implementation for FindHighestOn(uint32_t)") static const unsigned char DeBruijn[32] = { - 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31}; + 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31}; v |= v >> 1; // first round down to one less than a power of 2 v |= v >> 2; v |= v >> 4; @@ -1879,7 +2424,7 @@ __hostdev__ static inline uint32_t FindLowestOn(uint64_t v) { NANOVDB_ASSERT(v); #if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) - return __ffsll(static_cast(v)); + return __ffsll(static_cast(v)) - 1; // one based indexing #elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) unsigned long index; _BitScanForward64(&index, v); @@ -1887,7 +2432,7 @@ __hostdev__ static inline uint32_t FindLowestOn(uint64_t v) #elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) return static_cast(__builtin_ctzll(v)); #else -//#warning Using software implementation for FindLowestOn(uint64_t) + //NANO_WARNING("Using software implementation for FindLowestOn(uint64_t)") static const unsigned char DeBruijn[64] = { 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, @@ -1914,7 +2459,9 @@ NANOVDB_HOSTDEV_DISABLE_WARNING __hostdev__ static inline uint32_t FindHighestOn(uint64_t v) { NANOVDB_ASSERT(v); -#if defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + return sizeof(unsigned long) * 8 - 1 - __clzll(static_cast(v)); +#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) unsigned long index; _BitScanReverse64(&index, v); return static_cast(index); @@ -1933,39 +2480,175 @@ NANOVDB_HOSTDEV_DISABLE_WARNING __hostdev__ inline uint32_t CountOn(uint64_t v) { #if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) -//#warning Using popcll for CountOn + //#warning Using popcll for CountOn return __popcll(v); // __popcnt64 intrinsic support was added in VS 2019 16.8 #elif defined(_MSC_VER) && defined(_M_X64) && (_MSC_VER >= 1928) && defined(NANOVDB_USE_INTRINSICS) -//#warning Using popcnt64 for CountOn + //#warning Using popcnt64 for CountOn return __popcnt64(v); #elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) -//#warning Using builtin_popcountll for CountOn + //#warning Using builtin_popcountll for CountOn return __builtin_popcountll(v); -#else// use software implementation -//#warning Using software implementation for CountOn +#else // use software implementation + //NANO_WARNING("Using software implementation for CountOn") v = v - ((v >> 1) & uint64_t(0x5555555555555555)); v = (v & uint64_t(0x3333333333333333)) + ((v >> 2) & uint64_t(0x3333333333333333)); return (((v + (v >> 4)) & uint64_t(0xF0F0F0F0F0F0F0F)) * uint64_t(0x101010101010101)) >> 56; #endif } -// ----------------------------> Mask <-------------------------------------- +// ----------------------------> BitFlags <-------------------------------------- -/// @brief Bit-mask to encode active states and facilitate sequential iterators -/// and a fast codec for I/O compression. -template -class Mask +template +struct BitArray; +template<> +struct BitArray<8> { - static constexpr uint32_t SIZE = 1U << (3 * LOG2DIM); // Number of bits in mask - static constexpr uint32_t WORD_COUNT = SIZE >> 6; // Number of 64 bit words - uint64_t mWords[WORD_COUNT]; + uint8_t mFlags{0}; +}; +template<> +struct BitArray<16> +{ + uint16_t mFlags{0}; +}; +template<> +struct BitArray<32> +{ + uint32_t mFlags{0}; +}; +template<> +struct BitArray<64> +{ + uint64_t mFlags{0}; +}; -public: - /// @brief Return the memory footprint in bytes of this Mask - __hostdev__ static size_t memUsage() { return sizeof(Mask); } +template +class BitFlags : public BitArray +{ +protected: + using BitArray::mFlags; - /// @brief Return the number of bits available in this Mask +public: + using Type = decltype(mFlags); + BitFlags() {} + BitFlags(std::initializer_list list) + { + for (auto bit : list) + mFlags |= static_cast(1 << bit); + } + template + BitFlags(std::initializer_list list) + { + for (auto mask : list) + mFlags |= static_cast(mask); + } + __hostdev__ Type data() const { return mFlags; } + __hostdev__ Type& data() { return mFlags; } + __hostdev__ void initBit(std::initializer_list list) + { + mFlags = 0u; + for (auto bit : list) + mFlags |= static_cast(1 << bit); + } + template + __hostdev__ void initMask(std::initializer_list list) + { + mFlags = 0u; + for (auto mask : list) + mFlags |= static_cast(mask); + } + //__hostdev__ Type& data() { return mFlags; } + //__hostdev__ Type data() const { return mFlags; } + __hostdev__ Type getFlags() const { return mFlags & (static_cast(GridFlags::End) - 1u); } // mask out everything except relevant bits + + __hostdev__ void setOn() { mFlags = ~Type(0u); } + __hostdev__ void setOff() { mFlags = Type(0u); } + + __hostdev__ void setBitOn(uint8_t bit) { mFlags |= static_cast(1 << bit); } + __hostdev__ void setBitOff(uint8_t bit) { mFlags &= ~static_cast(1 << bit); } + + __hostdev__ void setBitOn(std::initializer_list list) + { + for (auto bit : list) + mFlags |= static_cast(1 << bit); + } + __hostdev__ void setBitOff(std::initializer_list list) + { + for (auto bit : list) + mFlags &= ~static_cast(1 << bit); + } + + template + __hostdev__ void setMaskOn(MaskT mask) { mFlags |= static_cast(mask); } + template + __hostdev__ void setMaskOff(MaskT mask) { mFlags &= ~static_cast(mask); } + + template + __hostdev__ void setMaskOn(std::initializer_list list) + { + for (auto mask : list) + mFlags |= static_cast(mask); + } + template + __hostdev__ void setMaskOff(std::initializer_list list) + { + for (auto mask : list) + mFlags &= ~static_cast(mask); + } + + __hostdev__ void setBit(uint8_t bit, bool on) { on ? this->setBitOn(bit) : this->setBitOff(bit); } + template + __hostdev__ void setMask(MaskT mask, bool on) { on ? this->setMaskOn(mask) : this->setMaskOff(mask); } + + __hostdev__ bool isOn() const { return mFlags == ~Type(0u); } + __hostdev__ bool isOff() const { return mFlags == Type(0u); } + __hostdev__ bool isBitOn(uint8_t bit) const { return 0 != (mFlags & static_cast(1 << bit)); } + __hostdev__ bool isBitOff(uint8_t bit) const { return 0 == (mFlags & static_cast(1 << bit)); } + template + __hostdev__ bool isMaskOn(MaskT mask) const { return 0 != (mFlags & static_cast(mask)); } + template + __hostdev__ bool isMaskOff(MaskT mask) const { return 0 == (mFlags & static_cast(mask)); } + /// @brief return true if any of the masks in the list are on + template + __hostdev__ bool isMaskOn(std::initializer_list list) const + { + for (auto mask : list) + if (0 != (mFlags & static_cast(mask))) + return true; + return false; + } + /// @brief return true if any of the masks in the list are off + template + __hostdev__ bool isMaskOff(std::initializer_list list) const + { + for (auto mask : list) + if (0 == (mFlags & static_cast(mask))) + return true; + return false; + } + /// @brief required for backwards compatibility + __hostdev__ BitFlags& operator=(Type n) + { + mFlags = n; + return *this; + } +}; // BitFlags + +// ----------------------------> Mask <-------------------------------------- + +/// @brief Bit-mask to encode active states and facilitate sequential iterators +/// and a fast codec for I/O compression. +template +class Mask +{ +public: + static constexpr uint32_t SIZE = 1U << (3 * LOG2DIM); // Number of bits in mask + static constexpr uint32_t WORD_COUNT = SIZE >> 6; // Number of 64 bit words + + /// @brief Return the memory footprint in bytes of this Mask + __hostdev__ static size_t memUsage() { return sizeof(Mask); } + + /// @brief Return the number of bits available in this Mask __hostdev__ static uint32_t bitCount() { return SIZE; } /// @brief Return the number of machine words used by this Mask @@ -1974,8 +2657,8 @@ class Mask /// @brief Return the total number of set bits in this Mask __hostdev__ uint32_t countOn() const { - uint32_t sum = 0, n = WORD_COUNT; - for (const uint64_t* w = mWords; n--; ++w) + uint32_t sum = 0; + for (const uint64_t *w = mWords, *q = w + WORD_COUNT; w != q; ++w) sum += CountOn(*w); return sum; } @@ -1983,21 +2666,30 @@ class Mask /// @brief Return the number of lower set bits in mask up to but excluding the i'th bit inline __hostdev__ uint32_t countOn(uint32_t i) const { - uint32_t n = i >> 6, sum = CountOn( mWords[n] & ((uint64_t(1) << (i & 63u))-1u) ); - for (const uint64_t* w = mWords; n--; ++w) sum += CountOn(*w); + uint32_t n = i >> 6, sum = CountOn(mWords[n] & ((uint64_t(1) << (i & 63u)) - 1u)); + for (const uint64_t* w = mWords; n--; ++w) + sum += CountOn(*w); return sum; } - template + template class Iterator { public: - __hostdev__ Iterator() : mPos(Mask::SIZE), mParent(nullptr){} - __hostdev__ Iterator(uint32_t pos, const Mask* parent) : mPos(pos), mParent(parent){} - Iterator& operator=(const Iterator&) = default; + __hostdev__ Iterator() + : mPos(Mask::SIZE) + , mParent(nullptr) + { + } + __hostdev__ Iterator(uint32_t pos, const Mask* parent) + : mPos(pos) + , mParent(parent) + { + } + Iterator& operator=(const Iterator&) = default; __hostdev__ uint32_t operator*() const { return mPos; } __hostdev__ uint32_t pos() const { return mPos; } - __hostdev__ operator bool() const { return mPos != Mask::SIZE; } + __hostdev__ operator bool() const { return mPos != Mask::SIZE; } __hostdev__ Iterator& operator++() { mPos = mParent->findNext(mPos + 1); @@ -2015,6 +2707,33 @@ class Mask const Mask* mParent; }; // Member class Iterator + class DenseIterator + { + public: + __hostdev__ DenseIterator(uint32_t pos = Mask::SIZE) + : mPos(pos) + { + } + DenseIterator& operator=(const DenseIterator&) = default; + __hostdev__ uint32_t operator*() const { return mPos; } + __hostdev__ uint32_t pos() const { return mPos; } + __hostdev__ operator bool() const { return mPos != Mask::SIZE; } + __hostdev__ DenseIterator& operator++() + { + ++mPos; + return *this; + } + __hostdev__ DenseIterator operator++(int) + { + auto tmp = *this; + ++mPos; + return tmp; + } + + private: + uint32_t mPos; + }; // Member class DenseIterator + using OnIterator = Iterator; using OffIterator = Iterator; @@ -2022,6 +2741,8 @@ class Mask __hostdev__ OffIterator beginOff() const { return OffIterator(this->findFirst(), this); } + __hostdev__ DenseIterator beginAll() const { return DenseIterator(0); } + /// @brief Initialize all bits to zero. __hostdev__ Mask() { @@ -2042,41 +2763,35 @@ class Mask mWords[i] = other.mWords[i]; } - /// @brief Return a const reference to the nth word of the bit mask, for a word of arbitrary size. - template - __hostdev__ const WordT& getWord(int n) const - { - NANOVDB_ASSERT(n * 8 * sizeof(WordT) < SIZE); - return reinterpret_cast(mWords)[n]; - } - - /// @brief Return a reference to the nth word of the bit mask, for a word of arbitrary size. - template - __hostdev__ WordT& getWord(int n) - { - NANOVDB_ASSERT(n * 8 * sizeof(WordT) < SIZE); - return reinterpret_cast(mWords)[n]; - } + /// @brief Return a pointer to the list of words of the bit mask + __hostdev__ uint64_t* words() { return mWords; } + __hostdev__ const uint64_t* words() const { return mWords; } /// @brief Assignment operator that works with openvdb::util::NodeMask - template - __hostdev__ Mask& operator=(const MaskT& other) + template + __hostdev__ typename enable_if::value, Mask&>::type operator=(const MaskT& other) { static_assert(sizeof(Mask) == sizeof(MaskT), "Mismatching sizeof"); static_assert(WORD_COUNT == MaskT::WORD_COUNT, "Mismatching word count"); static_assert(LOG2DIM == MaskT::LOG2DIM, "Mismatching LOG2DIM"); - auto *src = reinterpret_cast(&other); - uint64_t *dst = mWords; - for (uint32_t i = 0; i < WORD_COUNT; ++i) { - *dst++ = *src++; - } + auto* src = reinterpret_cast(&other); + for (uint64_t *dst = mWords, *end = dst + WORD_COUNT; dst != end; ++dst) + *dst = *src++; + return *this; + } + + __hostdev__ Mask& operator=(const Mask& other) + { + for (uint32_t i = 0; i < WORD_COUNT; ++i) + mWords[i] = other.mWords[i]; return *this; } __hostdev__ bool operator==(const Mask& other) const { for (uint32_t i = 0; i < WORD_COUNT; ++i) { - if (mWords[i] != other.mWords[i]) return false; + if (mWords[i] != other.mWords[i]) + return false; } return true; } @@ -2109,20 +2824,33 @@ class Mask /// @brief Set the specified bit on. __hostdev__ void setOn(uint32_t n) { mWords[n >> 6] |= uint64_t(1) << (n & 63); } - /// @brief Set the specified bit off. __hostdev__ void setOff(uint32_t n) { mWords[n >> 6] &= ~(uint64_t(1) << (n & 63)); } +#if defined(__CUDACC__) // the following functions only run on the GPU! + __device__ inline void setOnAtomic(uint32_t n) + { + atomicOr(reinterpret_cast(this) + (n >> 6), 1ull << (n & 63)); + } + __device__ inline void setOffAtomic(uint32_t n) + { + atomicAnd(reinterpret_cast(this) + (n >> 6), ~(1ull << (n & 63))); + } + __device__ inline void setAtomic(uint32_t n, bool on) + { + on ? this->setOnAtomic(n) : this->setOffAtomic(n); + } +#endif /// @brief Set the specified bit on or off. - __hostdev__ void set(uint32_t n, bool On) + __hostdev__ void set(uint32_t n, bool on) { -#if 1 // switch between branchless - auto &word = mWords[n >> 6]; +#if 1 // switch between branchless + auto& word = mWords[n >> 6]; n &= 63; word &= ~(uint64_t(1) << n); - word |= uint64_t(On) << n; + word |= uint64_t(on) << n; #else - On ? this->setOn(n) : this->setOff(n); + on ? this->setOn(n) : this->setOff(n); #endif } @@ -2157,73 +2885,96 @@ class Mask __hostdev__ void toggle(uint32_t n) { mWords[n >> 6] ^= uint64_t(1) << (n & 63); } /// @brief Bitwise intersection - __hostdev__ Mask& operator&=(const Mask& other) + __hostdev__ Mask& operator&=(const Mask& other) { - uint64_t *w1 = mWords; - const uint64_t *w2 = other.mWords; - for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 &= *w2; + uint64_t* w1 = mWords; + const uint64_t* w2 = other.mWords; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) + *w1 &= *w2; return *this; } /// @brief Bitwise union __hostdev__ Mask& operator|=(const Mask& other) { - uint64_t *w1 = mWords; - const uint64_t *w2 = other.mWords; - for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 |= *w2; + uint64_t* w1 = mWords; + const uint64_t* w2 = other.mWords; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) + *w1 |= *w2; return *this; } /// @brief Bitwise difference __hostdev__ Mask& operator-=(const Mask& other) { - uint64_t *w1 = mWords; - const uint64_t *w2 = other.mWords; - for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 &= ~*w2; + uint64_t* w1 = mWords; + const uint64_t* w2 = other.mWords; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) + *w1 &= ~*w2; return *this; } /// @brief Bitwise XOR __hostdev__ Mask& operator^=(const Mask& other) { - uint64_t *w1 = mWords; - const uint64_t *w2 = other.mWords; - for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 ^= *w2; + uint64_t* w1 = mWords; + const uint64_t* w2 = other.mWords; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) + *w1 ^= *w2; return *this; } -private: - NANOVDB_HOSTDEV_DISABLE_WARNING - template + template __hostdev__ uint32_t findFirst() const { - uint32_t n = 0; + uint32_t n = 0u; const uint64_t* w = mWords; - for (; n + template __hostdev__ uint32_t findNext(uint32_t start) const { uint32_t n = start >> 6; // initiate if (n >= WORD_COUNT) return SIZE; // check for out of bounds - uint32_t m = start & 63; - uint64_t b = On ? mWords[n] : ~mWords[n]; - if (b & (uint64_t(1) << m)) - return start; // simple case: start is on - b &= ~uint64_t(0) << m; // mask out lower bits + uint32_t m = start & 63u; + uint64_t b = ON ? mWords[n] : ~mWords[n]; + if (b & (uint64_t(1u) << m)) + return start; // simple case: start is on/off + b &= ~uint64_t(0u) << m; // mask out lower bits while (!b && ++n < WORD_COUNT) - b = On ? mWords[n] : ~mWords[n]; // find next non-zero word - return (!b ? SIZE : (n << 6) + FindLowestOn(b)); // catch last word=0 + b = ON ? mWords[n] : ~mWords[n]; // find next non-zero word + return b ? (n << 6) + FindLowestOn(b) : SIZE; // catch last word=0 + } + + NANOVDB_HOSTDEV_DISABLE_WARNING + template + __hostdev__ uint32_t findPrev(uint32_t start) const + { + uint32_t n = start >> 6; // initiate + if (n >= WORD_COUNT) + return SIZE; // check for out of bounds + uint32_t m = start & 63u; + uint64_t b = ON ? mWords[n] : ~mWords[n]; + if (b & (uint64_t(1u) << m)) + return start; // simple case: start is on/off + b &= (uint64_t(1u) << m) - 1u; // mask out higher bits + while (!b && n) + b = ON ? mWords[--n] : ~mWords[--n]; // find previous non-zero word + return b ? (n << 6) + FindHighestOn(b) : SIZE; // catch first word=0 } + +private: + uint64_t mWords[WORD_COUNT]; }; // Mask class // ----------------------------> Map <-------------------------------------- /// @brief Defines an affine transform and its inverse represented as a 3x3 matrix and a vec3 translation struct Map -{ +{ // 264B (not 32B aligned!) float mMatF[9]; // 9*4B <- 3x3 matrix float mInvMatF[9]; // 9*4B <- 3x3 matrix float mVecF[3]; // 3*4B <- translation @@ -2233,108 +2984,212 @@ struct Map double mVecD[3]; // 3*8B <- translation double mTaperD; // 8B, placeholder for taper value - /// @brief Initialize the member data - template - __hostdev__ void set(const Mat3T& mat, const Mat3T& invMat, const Vec3T& translate, double taper); + /// @brief Default constructor for the identity map + __hostdev__ Map() + : mMatF{1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f} + , mInvMatF{1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f} + , mVecF{0.0f, 0.0f, 0.0f} + , mTaperF{1.0f} + , mMatD{1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0} + , mInvMatD{1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0} + , mVecD{0.0, 0.0, 0.0} + , mTaperD{1.0} + { + } + __hostdev__ Map(double s, const Vec3d& t = Vec3d(0.0, 0.0, 0.0)) + : mMatF{float(s), 0.0f, 0.0f, 0.0f, float(s), 0.0f, 0.0f, 0.0f, float(s)} + , mInvMatF{1.0f / float(s), 0.0f, 0.0f, 0.0f, 1.0f / float(s), 0.0f, 0.0f, 0.0f, 1.0f / float(s)} + , mVecF{float(t[0]), float(t[1]), float(t[2])} + , mTaperF{1.0f} + , mMatD{s, 0.0, 0.0, 0.0, s, 0.0, 0.0, 0.0, s} + , mInvMatD{1.0 / s, 0.0, 0.0, 0.0, 1.0 / s, 0.0, 0.0, 0.0, 1.0 / s} + , mVecD{t[0], t[1], t[2]} + , mTaperD{1.0} + { + } + + /// @brief Initialize the member data from 3x3 or 4x4 matrices + /// @note This is not _hostdev__ since then MatT=openvdb::Mat4d will produce warnings + template + void set(const MatT& mat, const MatT& invMat, const Vec3T& translate, double taper = 1.0); - /// @brief Initialize the member data + /// @brief Initialize the member data from 4x4 matrices /// @note The last (4th) row of invMat is actually ignored. + /// This is not _hostdev__ since then Mat4T=openvdb::Mat4d will produce warnings template - __hostdev__ void set(const Mat4T& mat, const Mat4T& invMat, double taper) {this->set(mat, invMat, mat[3], taper);} + void set(const Mat4T& mat, const Mat4T& invMat, double taper = 1.0) { this->set(mat, invMat, mat[3], taper); } template - __hostdev__ void set(double scale, const Vec3T &translation, double taper); + void set(double scale, const Vec3T& translation, double taper = 1.0); + /// @brief Apply the forward affine transformation to a vector using 64bit floating point arithmetics. + /// @note Typically this operation is used for the scale, rotation and translation of index -> world mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return Forward mapping for affine transformation, i.e. (mat x ijk) + translation template - __hostdev__ Vec3T applyMap(const Vec3T& xyz) const { return matMult(mMatD, mVecD, xyz); } - template - __hostdev__ Vec3T applyMapF(const Vec3T& xyz) const { return matMult(mMatF, mVecF, xyz); } + __hostdev__ Vec3T applyMap(const Vec3T& ijk) const { return matMult(mMatD, mVecD, ijk); } + /// @brief Apply the forward affine transformation to a vector using 32bit floating point arithmetics. + /// @note Typically this operation is used for the scale, rotation and translation of index -> world mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return Forward mapping for affine transformation, i.e. (mat x ijk) + translation + template + __hostdev__ Vec3T applyMapF(const Vec3T& ijk) const { return matMult(mMatF, mVecF, ijk); } + + /// @brief Apply the linear forward 3x3 transformation to an input 3d vector using 64bit floating point arithmetics, + /// e.g. scale and rotation WITHOUT translation. + /// @note Typically this operation is used for scale and rotation from index -> world mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return linear forward 3x3 mapping of the input vector template - __hostdev__ Vec3T applyJacobian(const Vec3T& xyz) const { return matMult(mMatD, xyz); } + __hostdev__ Vec3T applyJacobian(const Vec3T& ijk) const { return matMult(mMatD, ijk); } + + /// @brief Apply the linear forward 3x3 transformation to an input 3d vector using 32bit floating point arithmetics, + /// e.g. scale and rotation WITHOUT translation. + /// @note Typically this operation is used for scale and rotation from index -> world mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return linear forward 3x3 mapping of the input vector template - __hostdev__ Vec3T applyJacobianF(const Vec3T& xyz) const { return matMult(mMatF, xyz); } + __hostdev__ Vec3T applyJacobianF(const Vec3T& ijk) const { return matMult(mMatF, ijk); } + /// @brief Apply the inverse affine mapping to a vector using 64bit floating point arithmetics. + /// @note Typically this operation is used for the world -> index mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param xyz 3D vector to be mapped - typically floating point world coordinates + /// @return Inverse afine mapping of the input @c xyz i.e. (xyz - translation) x mat^-1 template __hostdev__ Vec3T applyInverseMap(const Vec3T& xyz) const { return matMult(mInvMatD, Vec3T(xyz[0] - mVecD[0], xyz[1] - mVecD[1], xyz[2] - mVecD[2])); } + + /// @brief Apply the inverse affine mapping to a vector using 32bit floating point arithmetics. + /// @note Typically this operation is used for the world -> index mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param xyz 3D vector to be mapped - typically floating point world coordinates + /// @return Inverse afine mapping of the input @c xyz i.e. (xyz - translation) x mat^-1 template __hostdev__ Vec3T applyInverseMapF(const Vec3T& xyz) const { return matMult(mInvMatF, Vec3T(xyz[0] - mVecF[0], xyz[1] - mVecF[1], xyz[2] - mVecF[2])); } + /// @brief Apply the linear inverse 3x3 transformation to an input 3d vector using 64bit floating point arithmetics, + /// e.g. inverse scale and inverse rotation WITHOUT translation. + /// @note Typically this operation is used for scale and rotation from world -> index mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return linear inverse 3x3 mapping of the input vector i.e. xyz x mat^-1 template __hostdev__ Vec3T applyInverseJacobian(const Vec3T& xyz) const { return matMult(mInvMatD, xyz); } + + /// @brief Apply the linear inverse 3x3 transformation to an input 3d vector using 32bit floating point arithmetics, + /// e.g. inverse scale and inverse rotation WITHOUT translation. + /// @note Typically this operation is used for scale and rotation from world -> index mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return linear inverse 3x3 mapping of the input vector i.e. xyz x mat^-1 template __hostdev__ Vec3T applyInverseJacobianF(const Vec3T& xyz) const { return matMult(mInvMatF, xyz); } + /// @brief Apply the transposed inverse 3x3 transformation to an input 3d vector using 64bit floating point arithmetics, + /// e.g. inverse scale and inverse rotation WITHOUT translation. + /// @note Typically this operation is used for scale and rotation from world -> index mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return linear inverse 3x3 mapping of the input vector i.e. xyz x mat^-1 template __hostdev__ Vec3T applyIJT(const Vec3T& xyz) const { return matMultT(mInvMatD, xyz); } template __hostdev__ Vec3T applyIJTF(const Vec3T& xyz) const { return matMultT(mInvMatF, xyz); } + + /// @brief Return a voxels size in each coordinate direction, measured at the origin + __hostdev__ Vec3d getVoxelSize() const { return this->applyMap(Vec3d(1)) - this->applyMap(Vec3d(0)); } }; // Map -template -__hostdev__ inline void Map::set(const Mat3T& mat, const Mat3T& invMat, const Vec3T& translate, double taper) +template +inline void Map::set(const MatT& mat, const MatT& invMat, const Vec3T& translate, double taper) { - float *mf = mMatF, *vf = mVecF, *mif = mInvMatF; + float * mf = mMatF, *vf = mVecF, *mif = mInvMatF; double *md = mMatD, *vd = mVecD, *mid = mInvMatD; mTaperF = static_cast(taper); mTaperD = taper; for (int i = 0; i < 3; ++i) { *vd++ = translate[i]; //translation - *vf++ = static_cast(translate[i]); + *vf++ = static_cast(translate[i]); //translation for (int j = 0; j < 3; ++j) { *md++ = mat[j][i]; //transposed *mid++ = invMat[j][i]; - *mf++ = static_cast(mat[j][i]); + *mf++ = static_cast(mat[j][i]); //transposed *mif++ = static_cast(invMat[j][i]); } } } template -__hostdev__ inline void Map::set(double dx, const Vec3T &trans, double taper) -{ - const double mat[3][3] = { - {dx, 0.0, 0.0}, // row 0 - {0.0, dx, 0.0}, // row 1 - {0.0, 0.0, dx}, // row 2 - }, idx = 1.0/dx, invMat[3][3] = { - {idx, 0.0, 0.0}, // row 0 - {0.0, idx, 0.0}, // row 1 - {0.0, 0.0, idx}, // row 2 - }; +inline void Map::set(double dx, const Vec3T& trans, double taper) +{ + NANOVDB_ASSERT(dx > 0.0); + const double mat[3][3] = { {dx, 0.0, 0.0}, // row 0 + {0.0, dx, 0.0}, // row 1 + {0.0, 0.0, dx} }; // row 2 + const double idx = 1.0 / dx; + const double invMat[3][3] = { {idx, 0.0, 0.0}, // row 0 + {0.0, idx, 0.0}, // row 1 + {0.0, 0.0, idx} }; // row 2 this->set(mat, invMat, trans, taper); } // ----------------------------> GridBlindMetaData <-------------------------------------- struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridBlindMetaData -{ - static const int MaxNameSize = 256;// due to NULL termination the maximum length is one less! - int64_t mByteOffset; // byte offset to the blind data, relative to the GridData. - uint64_t mElementCount; // number of elements, e.g. point count - uint32_t mFlags; // flags +{ // 288 bytes + static const int MaxNameSize = 256; // due to NULL termination the maximum length is one less! + int64_t mDataOffset; // byte offset to the blind data, relative to this GridBlindMetaData. + uint64_t mValueCount; // number of blind values, e.g. point count + uint32_t mValueSize;// byte size of each value, e.g. 4 if mDataType=Float and 1 if mDataType=Unknown GridBlindDataSemantic mSemantic; // semantic meaning of the data. GridBlindDataClass mDataClass; // 4 bytes GridType mDataType; // 4 bytes - char mName[MaxNameSize];// note this include the NULL termination + char mName[MaxNameSize]; // note this includes the NULL termination + // no padding required for 32 byte alignment + + // disallow copy-construction since methods like blindData and getBlindData uses the this pointer! + GridBlindMetaData(const GridBlindMetaData&) = delete; - /// @brief return memory usage in bytes for the class (note this computes for all blindMetaData structures.) - __hostdev__ static uint64_t memUsage(uint64_t blindDataCount = 0) + // disallow copy-assignment since methods like blindData and getBlindData uses the this pointer! + const GridBlindMetaData& operator=(const GridBlindMetaData&) = delete; + + __hostdev__ void setBlindData(void* blindData) { mDataOffset = PtrDiff(blindData, this); } + + // unsafe + __hostdev__ const void* blindData() const {return PtrAdd(this, mDataOffset);} + + /// @brief Get a const pointer to the blind data represented by this meta data + /// @tparam BlindDataT Expected value type of the blind data. + /// @return Returns NULL if mGridType!=mapToGridType(), else a const point of type BlindDataT. + /// @note Use mDataType=Unknown if BlindDataT is a custom data type unknown to NanoVDB. + template + __hostdev__ const BlindDataT* getBlindData() const { - return blindDataCount * sizeof(GridBlindMetaData); + //if (mDataType != mapToGridType()) printf("getBlindData mismatch\n"); + return mDataType == mapToGridType() ? PtrAdd(this, mDataOffset) : nullptr; } - __hostdev__ void setBlindData(void *ptr) { mByteOffset = PtrDiff(ptr, this); } - - template - __hostdev__ const T* getBlindData() const { return PtrAdd(this, mByteOffset); } + /// @brief return true if this meta data has a valid combination of semantic, class and value tags + __hostdev__ bool isValid() const { return nanovdb::isValid(mDataClass, mSemantic, mDataType); } + /// @brief return size in bytes of the blind data represented by this blind meta data + /// @note This size includes possible padding for 32 byte alignment. The actual amount + /// of bind data is mValueCount * mValueSize + __hostdev__ uint64_t blindDataSize() const + { + return AlignUp(mValueCount * mValueSize); + } }; // GridBlindMetaData // ----------------------------> NodeTrait <-------------------------------------- @@ -2348,14 +3203,14 @@ struct NodeTrait; template struct NodeTrait { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); using Type = typename GridOrTreeOrRootT::LeafNodeType; using type = typename GridOrTreeOrRootT::LeafNodeType; }; template struct NodeTrait { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); using Type = const typename GridOrTreeOrRootT::LeafNodeType; using type = const typename GridOrTreeOrRootT::LeafNodeType; }; @@ -2363,47 +3218,66 @@ struct NodeTrait template struct NodeTrait { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); - using Type = typename GridOrTreeOrRootT::RootType::ChildNodeType::ChildNodeType; - using type = typename GridOrTreeOrRootT::RootType::ChildNodeType::ChildNodeType; + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = typename GridOrTreeOrRootT::RootNodeType::ChildNodeType::ChildNodeType; + using type = typename GridOrTreeOrRootT::RootNodeType::ChildNodeType::ChildNodeType; }; template struct NodeTrait { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); - using Type = const typename GridOrTreeOrRootT::RootType::ChildNodeType::ChildNodeType; - using type = const typename GridOrTreeOrRootT::RootType::ChildNodeType::ChildNodeType; + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = const typename GridOrTreeOrRootT::RootNodeType::ChildNodeType::ChildNodeType; + using type = const typename GridOrTreeOrRootT::RootNodeType::ChildNodeType::ChildNodeType; }; template struct NodeTrait { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); - using Type = typename GridOrTreeOrRootT::RootType::ChildNodeType; - using type = typename GridOrTreeOrRootT::RootType::ChildNodeType; + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = typename GridOrTreeOrRootT::RootNodeType::ChildNodeType; + using type = typename GridOrTreeOrRootT::RootNodeType::ChildNodeType; }; template struct NodeTrait { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); - using Type = const typename GridOrTreeOrRootT::RootType::ChildNodeType; - using type = const typename GridOrTreeOrRootT::RootType::ChildNodeType; + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = const typename GridOrTreeOrRootT::RootNodeType::ChildNodeType; + using type = const typename GridOrTreeOrRootT::RootNodeType::ChildNodeType; }; template struct NodeTrait { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); - using Type = typename GridOrTreeOrRootT::RootType; - using type = typename GridOrTreeOrRootT::RootType; + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = typename GridOrTreeOrRootT::RootNodeType; + using type = typename GridOrTreeOrRootT::RootNodeType; }; template struct NodeTrait { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); - using Type = const typename GridOrTreeOrRootT::RootType; - using type = const typename GridOrTreeOrRootT::RootType; + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = const typename GridOrTreeOrRootT::RootNodeType; + using type = const typename GridOrTreeOrRootT::RootNodeType; }; +// ----------------------------> Froward decelerations of random access methods <-------------------------------------- + +template +struct GetValue; +template +struct SetValue; +template +struct SetVoxel; +template +struct GetState; +template +struct GetDim; +template +struct GetLeaf; +template +struct ProbeValue; +template +struct GetNodeInfo; + // ----------------------------> Grid <-------------------------------------- /* @@ -2431,77 +3305,75 @@ struct NodeTrait /// /// @note No client code should (or can) interface with this struct so it can safely be ignored! struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData -{// sizeof(GridData) = 672B - static const int MaxNameSize = 256;// due to NULL termination the maximum length is one less +{ // sizeof(GridData) = 672B + static const int MaxNameSize = 256; // due to NULL termination the maximum length is one less uint64_t mMagic; // 8B (0) magic to validate it is valid grid data. uint64_t mChecksum; // 8B (8). Checksum of grid buffer. - Version mVersion;// 4B (16) major, minor, and patch version numbers - uint32_t mFlags; // 4B (20). flags for grid. + Version mVersion; // 4B (16) major, minor, and patch version numbers + BitFlags<32> mFlags; // 4B (20). flags for grid. uint32_t mGridIndex; // 4B (24). Index of this grid in the buffer uint32_t mGridCount; // 4B (28). Total number of grids in the buffer uint64_t mGridSize; // 8B (32). byte count of this entire grid occupied in the buffer. char mGridName[MaxNameSize]; // 256B (40) Map mMap; // 264B (296). affine transformation between index and world space in both single and double precision - BBox mWorldBBox; // 48B (560). floating-point AABB of active values in WORLD SPACE (2 x 3 doubles) - Vec3R mVoxelSize; // 24B (608). size of a voxel in world units + BBox mWorldBBox; // 48B (560). floating-point AABB of active values in WORLD SPACE (2 x 3 doubles) + Vec3d mVoxelSize; // 24B (608). size of a voxel in world units GridClass mGridClass; // 4B (632). GridType mGridType; // 4B (636). - int64_t mBlindMetadataOffset; // 8B (640). offset of GridBlindMetaData structures that follow this grid. + int64_t mBlindMetadataOffset; // 8B (640). offset to beginning of GridBlindMetaData structures that follow this grid. uint32_t mBlindMetadataCount; // 4B (648). count of GridBlindMetaData structures that follow this grid. - uint32_t mData0;// 4B (652) - uint64_t mData1, mData2;// 2x8B (656) padding to 32 B alignment. mData1 is use for the total number of values indexed by an IndexGrid - - // Set and unset various bit flags - __hostdev__ void setFlagsOff() { mFlags = uint32_t(0); } - __hostdev__ void setMinMaxOn(bool on = true) - { - if (on) { - mFlags |= static_cast(GridFlags::HasMinMax); - } else { - mFlags &= ~static_cast(GridFlags::HasMinMax); - } - } - __hostdev__ void setBBoxOn(bool on = true) - { - if (on) { - mFlags |= static_cast(GridFlags::HasBBox); - } else { - mFlags &= ~static_cast(GridFlags::HasBBox); - } - } - __hostdev__ void setLongGridNameOn(bool on = true) - { - if (on) { - mFlags |= static_cast(GridFlags::HasLongGridName); - } else { - mFlags &= ~static_cast(GridFlags::HasLongGridName); - } - } - __hostdev__ void setAverageOn(bool on = true) - { - if (on) { - mFlags |= static_cast(GridFlags::HasAverage); - } else { - mFlags &= ~static_cast(GridFlags::HasAverage); - } + uint32_t mData0; // 4B (652) + uint64_t mData1, mData2; // 2x8B (656) padding to 32 B alignment. mData1 is use for the total number of values indexed by an IndexGrid + /// @brief Use this method to initiate most member dat + __hostdev__ GridData& operator=(const GridData& other) + { + static_assert(8 * 84 == sizeof(GridData), "GridData has unexpected size"); + auto* src = reinterpret_cast(&other); + for (auto *dst = reinterpret_cast(this), *end = dst + 84; dst != end; ++dst) + *dst = *src++; + return *this; } - __hostdev__ void setStdDeviationOn(bool on = true) - { - if (on) { - mFlags |= static_cast(GridFlags::HasStdDeviation); - } else { - mFlags &= ~static_cast(GridFlags::HasStdDeviation); - } + __hostdev__ void init(std::initializer_list list = {GridFlags::IsBreadthFirst}, + uint64_t gridSize = 0u, + const Map& map = Map(), + GridType gridType = GridType::Unknown, + GridClass gridClass = GridClass::Unknown) + { + mMagic = NANOVDB_MAGIC_NUMBER; + mChecksum = 0u; + mVersion = Version(); + mFlags.initMask(list); + mGridIndex = 0u; + mGridCount = 1u; + mGridSize = gridSize; + mGridName[0] = '\0'; + mMap = map; + mWorldBBox = BBox(); + mVoxelSize = map.getVoxelSize(); + mGridClass = gridClass; + mGridType = gridType; + mBlindMetadataOffset = mGridSize; // i.e. no blind data + mBlindMetadataCount = 0u; // i.e. no blind data + mData0 = 0u; + mData1 = 0u; // only used for index and point grids + mData2 = 0u; } - __hostdev__ void setBreadthFirstOn(bool on = true) - { - if (on) { - mFlags |= static_cast(GridFlags::IsBreadthFirst); - } else { - mFlags &= ~static_cast(GridFlags::IsBreadthFirst); - } + // Set and unset various bit flags + __hostdev__ bool isValid() const { return mMagic == NANOVDB_MAGIC_NUMBER; } + __hostdev__ void setMinMaxOn(bool on = true) { mFlags.setMask(GridFlags::HasMinMax, on); } + __hostdev__ void setBBoxOn(bool on = true) { mFlags.setMask(GridFlags::HasBBox, on); } + __hostdev__ void setLongGridNameOn(bool on = true) { mFlags.setMask(GridFlags::HasLongGridName, on); } + __hostdev__ void setAverageOn(bool on = true) { mFlags.setMask(GridFlags::HasAverage, on); } + __hostdev__ void setStdDeviationOn(bool on = true) { mFlags.setMask(GridFlags::HasStdDeviation, on); } + __hostdev__ bool setGridName(const char* src) + { + char *dst = mGridName, *end = dst + MaxNameSize; + while (*src != '\0' && dst < end - 1) + *dst++ = *src++; + while (dst < end) + *dst++ = '\0'; + return *src == '\0'; // returns true if input grid name is NOT longer than MaxNameSize characters } - // Affine transformations based on double precision template __hostdev__ Vec3T applyMap(const Vec3T& xyz) const { return mMap.applyMap(xyz); } // Pos: index -> world @@ -2543,10 +3415,10 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData }; // GridData // Forward declaration of accelerated random access class -template +template class ReadAccessor; -template +template using DefaultReadAccessor = ReadAccessor; /// @brief Highest level of the data structure. Contains a tree and a world->index @@ -2554,14 +3426,18 @@ using DefaultReadAccessor = ReadAccessor; /// /// @note This the API of this class to interface with client code template -class Grid : private GridData +class Grid : public GridData { public: - using TreeType = TreeT; - using RootType = typename TreeT::RootType; - using DataType = GridData; + using TreeType = TreeT; + using RootType = typename TreeT::RootType; + using RootNodeType = RootType; + using UpperNodeType = typename RootNodeType::ChildNodeType; + using LowerNodeType = typename UpperNodeType::ChildNodeType; + using LeafNodeType = typename RootType::LeafNodeType; + using DataType = GridData; using ValueType = typename TreeT::ValueType; - using BuildType = typename TreeT::BuildType;// in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + using BuildType = typename TreeT::BuildType; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool using CoordType = typename TreeT::CoordType; using AccessorType = DefaultReadAccessor; @@ -2592,9 +3468,17 @@ class Grid : private GridData /// @brief @brief Return the total number of values indexed by this IndexGrid /// - /// @note This method is only defined for IndexGrid = NanoGrid - template - __hostdev__ typename enable_if::value, const uint64_t&>::type valueCount() const {return DataType::mData1;} + /// @note This method is only defined for IndexGrid = NanoGrid + template + __hostdev__ typename enable_if::is_index, const uint64_t&>::type + valueCount() const { return DataType::mData1; } + + /// @brief @brief Return the total number of points indexed by this PointGrid + /// + /// @note This method is only defined for PointGrid = NanoGrid + template + __hostdev__ typename enable_if::value, const uint64_t&>::type + pointCount() const { return DataType::mData1; } /// @brief Return a const reference to the tree __hostdev__ const TreeT& tree() const { return *reinterpret_cast(this->treePtr()); } @@ -2606,7 +3490,7 @@ class Grid : private GridData __hostdev__ AccessorType getAccessor() const { return AccessorType(this->tree().root()); } /// @brief Return a const reference to the size of a voxel in world units - __hostdev__ const Vec3R& voxelSize() const { return DataType::mVoxelSize; } + __hostdev__ const Vec3d& voxelSize() const { return DataType::mVoxelSize; } /// @brief Return a const reference to the Map for this grid __hostdev__ const Map& map() const { return DataType::mMap; } @@ -2658,7 +3542,7 @@ class Grid : private GridData __hostdev__ Vec3T indexToWorldGradF(const Vec3T& grad) const { return DataType::applyIJTF(grad); } /// @brief Computes a AABB of active values in world space - __hostdev__ const BBox& worldBBox() const { return DataType::mWorldBBox; } + __hostdev__ const BBox& worldBBox() const { return DataType::mWorldBBox; } /// @brief Computes a AABB of active values in index space /// @@ -2670,7 +3554,7 @@ class Grid : private GridData __hostdev__ uint64_t activeVoxelCount() const { return this->tree().activeVoxelCount(); } /// @brief Methods related to the classification of this grid - __hostdev__ bool isValid() const { return DataType::mMagic == NANOVDB_MAGIC_NUMBER; } + __hostdev__ bool isValid() const { return DataType::isValid(); } __hostdev__ const GridType& gridType() const { return DataType::mGridType; } __hostdev__ const GridClass& gridClass() const { return DataType::mGridClass; } __hostdev__ bool isLevelSet() const { return DataType::mGridClass == GridClass::LevelSet; } @@ -2681,31 +3565,44 @@ class Grid : private GridData __hostdev__ bool isPointData() const { return DataType::mGridClass == GridClass::PointData; } __hostdev__ bool isMask() const { return DataType::mGridClass == GridClass::Topology; } __hostdev__ bool isUnknown() const { return DataType::mGridClass == GridClass::Unknown; } - __hostdev__ bool hasMinMax() const { return DataType::mFlags & static_cast(GridFlags::HasMinMax); } - __hostdev__ bool hasBBox() const { return DataType::mFlags & static_cast(GridFlags::HasBBox); } - __hostdev__ bool hasLongGridName() const { return DataType::mFlags & static_cast(GridFlags::HasLongGridName); } - __hostdev__ bool hasAverage() const { return DataType::mFlags & static_cast(GridFlags::HasAverage); } - __hostdev__ bool hasStdDeviation() const { return DataType::mFlags & static_cast(GridFlags::HasStdDeviation); } - __hostdev__ bool isBreadthFirst() const { return DataType::mFlags & static_cast(GridFlags::IsBreadthFirst); } + __hostdev__ bool hasMinMax() const { return DataType::mFlags.isMaskOn(GridFlags::HasMinMax); } + __hostdev__ bool hasBBox() const { return DataType::mFlags.isMaskOn(GridFlags::HasBBox); } + __hostdev__ bool hasLongGridName() const { return DataType::mFlags.isMaskOn(GridFlags::HasLongGridName); } + __hostdev__ bool hasAverage() const { return DataType::mFlags.isMaskOn(GridFlags::HasAverage); } + __hostdev__ bool hasStdDeviation() const { return DataType::mFlags.isMaskOn(GridFlags::HasStdDeviation); } + __hostdev__ bool isBreadthFirst() const { return DataType::mFlags.isMaskOn(GridFlags::IsBreadthFirst); } /// @brief return true if the specified node type is layed out breadth-first in memory and has a fixed size. /// This allows for sequential access to the nodes. - template + template __hostdev__ bool isSequential() const { return NodeT::FIXED_SIZE && this->isBreadthFirst(); } /// @brief return true if the specified node level is layed out breadth-first in memory and has a fixed size. /// This allows for sequential access to the nodes. - template - __hostdev__ bool isSequential() const { return NodeTrait::type::FIXED_SIZE && this->isBreadthFirst(); } + template + __hostdev__ bool isSequential() const { return NodeTrait::type::FIXED_SIZE && this->isBreadthFirst(); } + + __hostdev__ bool isSequential() const { return UpperNodeType::FIXED_SIZE && LowerNodeType::FIXED_SIZE && LeafNodeType::FIXED_SIZE && this->isBreadthFirst(); } /// @brief Return a c-string with the name of this grid __hostdev__ const char* gridName() const { if (this->hasLongGridName()) { - NANOVDB_ASSERT(DataType::mBlindMetadataCount>0); - const auto &metaData = this->blindMetaData(DataType::mBlindMetadataCount-1);// always the last + NANOVDB_ASSERT(DataType::mBlindMetadataCount > 0); +#if 1// search for first blind meta data that contains a name + for (uint32_t i = 0; i < DataType::mBlindMetadataCount; ++i) { + const auto& metaData = this->blindMetaData(i);// EXTREMELY important to be a reference + if (metaData.mDataClass == GridBlindDataClass::GridName) { + NANOVDB_ASSERT(metaData.mDataType == GridType::Unknown); + return metaData.template getBlindData(); + } + } + NANOVDB_ASSERT(false); // should never hit this! +#else// this assumes that the long grid name is always the last blind meta data + const auto& metaData = this->blindMetaData(DataType::mBlindMetadataCount - 1); // always the last NANOVDB_ASSERT(metaData.mDataClass == GridBlindDataClass::GridName); return metaData.template getBlindData(); +#endif } return DataType::mGridName; } @@ -2722,19 +3619,35 @@ class Grid : private GridData /// @brief Return the count of blind-data encoded in this grid __hostdev__ uint32_t blindDataCount() const { return DataType::mBlindMetadataCount; } - /// @brief Return the index of the blind data with specified semantic if found, otherwise -1. + /// @brief Return the index of the first blind data with specified name if found, otherwise -1. + __hostdev__ int findBlindData(const char* name) const; + + /// @brief Return the index of the first blind data with specified semantic if found, otherwise -1. __hostdev__ int findBlindDataForSemantic(GridBlindDataSemantic semantic) const; /// @brief Returns a const pointer to the blindData at the specified linear offset. /// - /// @warning Point might be NULL and the linear offset is assumed to be in the valid range + /// @warning Pointer might be NULL and the linear offset is assumed to be in the valid range + // this method is deprecated !!!! __hostdev__ const void* blindData(uint32_t n) const { - if (DataType::mBlindMetadataCount == 0u) { - return nullptr; - } + printf("\nnanovdb::Grid::blindData is unsafe and hence deprecated! Please use nanovdb::Grid::getBlindData instead.\n\n"); NANOVDB_ASSERT(n < DataType::mBlindMetadataCount); - return this->blindMetaData(n).template getBlindData(); + return this->blindMetaData(n).blindData(); + } + + template + __hostdev__ const BlindDataT* getBlindData(uint32_t n) const + { + if (n >= DataType::mBlindMetadataCount) return nullptr;// index is out of bounds + return this->blindMetaData(n).template getBlindData();// NULL if mismatching BlindDataT + } + + template + __hostdev__ BlindDataT* getBlindData(uint32_t n) + { + if (n >= DataType::mBlindMetadataCount) return nullptr;// index is out of bounds + return const_cast(this->blindMetaData(n).template getBlindData());// NULL if mismatching BlindDataT } __hostdev__ const GridBlindMetaData& blindMetaData(uint32_t n) const { return *DataType::blindMetaData(n); } @@ -2746,31 +3659,57 @@ class Grid : private GridData template __hostdev__ int Grid::findBlindDataForSemantic(GridBlindDataSemantic semantic) const { - for (uint32_t i = 0, n = this->blindDataCount(); i < n; ++i) + for (uint32_t i = 0, n = this->blindDataCount(); i < n; ++i) { if (this->blindMetaData(i).mSemantic == semantic) return int(i); + } + return -1; +} + +template +__hostdev__ int Grid::findBlindData(const char* name) const +{ + auto test = [&](int n) { + const char* str = this->blindMetaData(n).mName; + for (int i = 0; i < GridBlindMetaData::MaxNameSize; ++i) { + if (name[i] != str[i]) + return false; + if (name[i] == '\0' && str[i] == '\0') + return true; + } + return true; // all len characters matched + }; + for (int i = 0, n = this->blindDataCount(); i < n; ++i) + if (test(i)) + return i; return -1; } // ----------------------------> Tree <-------------------------------------- -template struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) TreeData -{// sizeof(TreeData<3>) == 64B - static_assert(ROOT_LEVEL == 3, "Root level is assumed to be three"); - uint64_t mNodeOffset[4];//32B, byte offset from this tree to first leaf, lower, upper and root node - uint32_t mNodeCount[3];// 12B, total number of nodes of type: leaf, lower internal, upper internal - uint32_t mTileCount[3];// 12B, total number of active tile values at the lower internal, upper internal and root node levels - uint64_t mVoxelCount;// 8B, total number of active voxels in the root and all its child nodes. +{ // sizeof(TreeData) == 64B + uint64_t mNodeOffset[4]; //32B, byte offset from this tree to first leaf, lower, upper and root node + uint32_t mNodeCount[3]; // 12B, total number of nodes of type: leaf, lower internal, upper internal + uint32_t mTileCount[3]; // 12B, total number of active tile values at the lower internal, upper internal and root node levels + uint64_t mVoxelCount; // 8B, total number of active voxels in the root and all its child nodes. // No padding since it's always 32B aligned - template + __hostdev__ TreeData& operator=(const TreeData& other) + { + static_assert(8 * 8 == sizeof(TreeData), "TreeData has unexpected size"); + auto* src = reinterpret_cast(&other); + for (auto *dst = reinterpret_cast(this), *end = dst + 8; dst != end; ++dst) + *dst = *src++; + return *this; + } + template __hostdev__ void setRoot(const RootT* root) { mNodeOffset[3] = PtrDiff(root, this); } - template + template __hostdev__ RootT* getRoot() { return PtrAdd(this, mNodeOffset[3]); } - template + template __hostdev__ const RootT* getRoot() const { return PtrAdd(this, mNodeOffset[3]); } - template + template __hostdev__ void setFirstNode(const NodeT* node) { mNodeOffset[NodeT::LEVEL] = node ? PtrDiff(node, this) : 0; @@ -2797,7 +3736,7 @@ struct GridTree /// @brief VDB Tree, which is a thin wrapper around a RootNode. template -class Tree : private TreeData +class Tree : public TreeData { static_assert(RootT::LEVEL == 3, "Tree depth is not supported"); static_assert(RootT::ChildNodeType::LOG2DIM == 5, "Tree configuration is not supported"); @@ -2805,11 +3744,14 @@ class Tree : private TreeData static_assert(RootT::LeafNodeType::LOG2DIM == 3, "Tree configuration is not supported"); public: - using DataType = TreeData; + using DataType = TreeData; using RootType = RootT; - using LeafNodeType = typename RootT::LeafNodeType; + using RootNodeType = RootT; + using UpperNodeType = typename RootNodeType::ChildNodeType; + using LowerNodeType = typename UpperNodeType::ChildNodeType; + using LeafNodeType = typename RootType::LeafNodeType; using ValueType = typename RootT::ValueType; - using BuildType = typename RootT::BuildType;// in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + using BuildType = typename RootT::BuildType; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool using CoordType = typename RootT::CoordType; using AccessorType = DefaultReadAccessor; @@ -2839,6 +3781,7 @@ class Tree : private TreeData /// @brief Return the value of the given voxel (regardless of state or location in the tree.) __hostdev__ ValueType getValue(const CoordType& ijk) const { return this->root().getValue(ijk); } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->root().getValue(CoordType(i, j, k)); } /// @brief Return the active state of the given voxel (regardless of state or location in the tree.) __hostdev__ bool isActive(const CoordType& ijk) const { return this->root().isActive(ijk); } @@ -2868,7 +3811,7 @@ class Tree : private TreeData /// referred to as active voxels (see activeVoxelCount defined above). __hostdev__ const uint32_t& activeTileCount(uint32_t level) const { - NANOVDB_ASSERT(level > 0 && level <= 3);// 1, 2, or 3 + NANOVDB_ASSERT(level > 0 && level <= 3); // 1, 2, or 3 return DataType::mTileCount[level - 1]; } @@ -2888,50 +3831,62 @@ class Tree : private TreeData /// @brief return a pointer to the first node of the specified type /// /// @warning Note it may return NULL if no nodes exist - template + template __hostdev__ NodeT* getFirstNode() { const uint64_t offset = DataType::mNodeOffset[NodeT::LEVEL]; - return offset>0 ? PtrAdd(this, offset) : nullptr; + return offset > 0 ? PtrAdd(this, offset) : nullptr; } /// @brief return a const pointer to the first node of the specified type /// /// @warning Note it may return NULL if no nodes exist - template + template __hostdev__ const NodeT* getFirstNode() const { const uint64_t offset = DataType::mNodeOffset[NodeT::LEVEL]; - return offset>0 ? PtrAdd(this, offset) : nullptr; + return offset > 0 ? PtrAdd(this, offset) : nullptr; } /// @brief return a pointer to the first node at the specified level /// /// @warning Note it may return NULL if no nodes exist - template + template __hostdev__ typename NodeTrait::type* getFirstNode() { - return this->template getFirstNode::type>(); + return this->template getFirstNode::type>(); } /// @brief return a const pointer to the first node of the specified level /// /// @warning Note it may return NULL if no nodes exist - template + template __hostdev__ const typename NodeTrait::type* getFirstNode() const { - return this->template getFirstNode::type>(); + return this->template getFirstNode::type>(); } /// @brief Template specializations of getFirstNode - __hostdev__ LeafNodeType* getFirstLeaf() {return this->getFirstNode();} - __hostdev__ const LeafNodeType* getFirstLeaf() const {return this->getFirstNode();} - __hostdev__ typename NodeTrait::type* getFirstLower() {return this->getFirstNode<1>();} - __hostdev__ const typename NodeTrait::type* getFirstLower() const {return this->getFirstNode<1>();} - __hostdev__ typename NodeTrait::type* getFirstUpper() {return this->getFirstNode<2>();} - __hostdev__ const typename NodeTrait::type* getFirstUpper() const {return this->getFirstNode<2>();} + __hostdev__ LeafNodeType* getFirstLeaf() { return this->getFirstNode(); } + __hostdev__ const LeafNodeType* getFirstLeaf() const { return this->getFirstNode(); } + __hostdev__ typename NodeTrait::type* getFirstLower() { return this->getFirstNode<1>(); } + __hostdev__ const typename NodeTrait::type* getFirstLower() const { return this->getFirstNode<1>(); } + __hostdev__ typename NodeTrait::type* getFirstUpper() { return this->getFirstNode<2>(); } + __hostdev__ const typename NodeTrait::type* getFirstUpper() const { return this->getFirstNode<2>(); } + + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + return this->root().template get(ijk, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) + { + return this->root().template set(ijk, args...); + } private: static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(TreeData) is misaligned"); @@ -2945,7 +3900,7 @@ __hostdev__ void Tree::extrema(ValueType& min, ValueType& max) const max = this->root().maximum(); } -// --------------------------> RootNode <------------------------------------ +// --------------------------> RootData <------------------------------------ /// @brief Struct with all the member data of the RootNode (useful during serialization of an openvdb RootNode) /// @@ -2954,15 +3909,15 @@ template struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData { using ValueT = typename ChildT::ValueType; - using BuildT = typename ChildT::BuildType;// in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + using BuildT = typename ChildT::BuildType; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool using CoordT = typename ChildT::CoordType; using StatsT = typename ChildT::FloatType; static constexpr bool FIXED_SIZE = false; /// @brief Return a key based on the coordinates of a voxel -#ifdef USE_SINGLE_ROOT_KEY +#ifdef NANOVDB_USE_SINGLE_ROOT_KEY using KeyT = uint64_t; - template + template __hostdev__ static KeyT CoordToKey(const CoordType& ijk) { static_assert(sizeof(CoordT) == sizeof(CoordType), "Mismatching sizeof"); @@ -2973,10 +3928,10 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData } __hostdev__ static CoordT KeyToCoord(const KeyT& key) { - static constexpr uint64_t MASK = (1u << 21) - 1; - return CoordT(((key >> 42) & MASK) << ChildT::TOTAL, - ((key >> 21) & MASK) << ChildT::TOTAL, - (key & MASK) << ChildT::TOTAL); + static constexpr uint64_t MASK = (1u << 21) - 1; // used to mask out 21 lower bits + return CoordT(((key >> 42) & MASK) << ChildT::TOTAL, // x are the upper 21 bits + ((key >> 21) & MASK) << ChildT::TOTAL, // y are the middle 21 bits + (key & MASK) << ChildT::TOTAL); // z are the lower 21 bits } #else using KeyT = CoordT; @@ -2995,31 +3950,33 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment /// /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. - __hostdev__ static constexpr uint32_t padding() { - return sizeof(RootData) - (24 + 4 + 3*sizeof(ValueT) + 2*sizeof(StatsT)); + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(RootData) - (24 + 4 + 3 * sizeof(ValueT) + 2 * sizeof(StatsT)); } struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) Tile { - template - __hostdev__ void setChild(const CoordType& k, const ChildT *ptr, const RootData *data) + template + __hostdev__ void setChild(const CoordType& k, const void* ptr, const RootData* data) { key = CoordToKey(k); + state = false; child = PtrDiff(ptr, data); } - template - __hostdev__ void setValue(const CoordType& k, bool s, const ValueType &v) + template + __hostdev__ void setValue(const CoordType& k, bool s, const ValueType& v) { key = CoordToKey(k); state = s; value = v; child = 0; } - __hostdev__ bool isChild() const { return child!=0; } - __hostdev__ bool isValue() const { return child==0; } - __hostdev__ bool isActive() const { return child==0 && state; } + __hostdev__ bool isChild() const { return child != 0; } + __hostdev__ bool isValue() const { return child == 0; } + __hostdev__ bool isActive() const { return child == 0 && state; } __hostdev__ CoordT origin() const { return KeyToCoord(key); } - KeyT key; // USE_SINGLE_ROOT_KEY ? 8B : 12B + KeyT key; // NANOVDB_USE_SINGLE_ROOT_KEY ? 8B : 12B int64_t child; // 8B. signed byte offset from this node to the child node. 0 means it is a constant tile, so use value. uint32_t state; // 4B. state of tile value ValueT value; // value of tile (i.e. no child node) @@ -3039,6 +3996,36 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData return reinterpret_cast(this + 1) + n; } + __hostdev__ Tile* probeTile(const CoordT& ijk) + { +#if 1 // switch between linear and binary seach + const auto key = CoordToKey(ijk); + for (Tile *p = reinterpret_cast(this + 1), *q = p + mTableSize; p < q; ++p) + if (p->key == key) + return p; + return nullptr; +#else // do not enable binary search if tiles are not guaranteed to be sorted!!!!!! + int32_t low = 0, high = mTableSize; // low is inclusive and high is exclusive + while (low != high) { + int mid = low + ((high - low) >> 1); + const Tile* tile = &tiles[mid]; + if (tile->key == key) { + return tile; + } else if (tile->key < key) { + low = mid + 1; + } else { + high = mid; + } + } + return nullptr; +#endif + } + + __hostdev__ inline const Tile* probeTile(const CoordT& ijk) const + { + return const_cast(this)->probeTile(ijk); + } + /// @brief Returns a const reference to the child node in the specified tile. /// /// @warning A child node is assumed to exist in the specified tile @@ -3053,9 +4040,9 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData return PtrAdd(this, tile->child); } - __hostdev__ const ValueT& getMin() const { return mMinimum; } - __hostdev__ const ValueT& getMax() const { return mMaximum; } - __hostdev__ const StatsT& average() const { return mAverage; } + __hostdev__ const ValueT& getMin() const { return mMinimum; } + __hostdev__ const ValueT& getMax() const { return mMaximum; } + __hostdev__ const StatsT& average() const { return mAverage; } __hostdev__ const StatsT& stdDeviation() const { return mStdDevi; } __hostdev__ void setMin(const ValueT& v) { mMinimum = v; } @@ -3070,119 +4057,260 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData ~RootData() = delete; }; // RootData +// --------------------------> RootNode <------------------------------------ + /// @brief Top-most node of the VDB tree structure. template -class RootNode : private RootData +class RootNode : public RootData { public: using DataType = RootData; - using LeafNodeType = typename ChildT::LeafNodeType; using ChildNodeType = ChildT; - using RootType = RootNode;// this allows RootNode to behave like a Tree - + using RootType = RootNode; // this allows RootNode to behave like a Tree + using RootNodeType = RootType; + using UpperNodeType = ChildT; + using LowerNodeType = typename UpperNodeType::ChildNodeType; + using LeafNodeType = typename ChildT::LeafNodeType; using ValueType = typename DataType::ValueT; using FloatType = typename DataType::StatsT; - using BuildType = typename DataType::BuildT;// in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + using BuildType = typename DataType::BuildT; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool using CoordType = typename ChildT::CoordType; - using BBoxType = BBox; + using BBoxType = BBox; using AccessorType = DefaultReadAccessor; using Tile = typename DataType::Tile; static constexpr bool FIXED_SIZE = DataType::FIXED_SIZE; static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf - class ChildIterator - { - const DataType *mParent; - uint32_t mPos, mSize; + template + class BaseIter + { + protected: + using DataT = typename match_const::type; + using TileT = typename match_const::type; + DataT* mData; + uint32_t mPos, mSize; + __hostdev__ BaseIter(DataT* data = nullptr, uint32_t n = 0) + : mData(data) + , mPos(0) + , mSize(n) + { + } + public: - __hostdev__ ChildIterator() : mParent(nullptr), mPos(0), mSize(0) {} - __hostdev__ ChildIterator(const RootNode *parent) : mParent(parent->data()), mPos(0), mSize(parent->tileCount()) { - NANOVDB_ASSERT(mParent); - while (mPostile(mPos)->isChild()) ++mPos; + __hostdev__ operator bool() const { return mPos < mSize; } + __hostdev__ uint32_t pos() const { return mPos; } + __hostdev__ void next() { ++mPos; } + __hostdev__ TileT* tile() const { return mData->tile(mPos); } + __hostdev__ CoordType getOrigin() const + { + NANOVDB_ASSERT(*this); + return this->tile()->origin(); } - ChildIterator& operator=(const ChildIterator&) = default; - __hostdev__ const ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mParent->getChild(mParent->tile(mPos));} - __hostdev__ const ChildT* operator->() const {NANOVDB_ASSERT(*this); return mParent->getChild(mParent->tile(mPos));} - __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); mParent->tile(mPos)->origin();} - __hostdev__ operator bool() const {return mPos < mSize;} - __hostdev__ uint32_t pos() const {return mPos;} - __hostdev__ ChildIterator& operator++() { - NANOVDB_ASSERT(mParent); - ++mPos; - while (mPos < mSize && mParent->tile(mPos)->isValue()) ++mPos; - return *this; + __hostdev__ CoordType getCoord() const + { + NANOVDB_ASSERT(*this); + return this->tile()->origin(); } - __hostdev__ ChildIterator operator++(int) { + }; // Member class BaseIter + + template + class ChildIter : public BaseIter + { + using BaseT = BaseIter; + using NodeT = typename match_const::type; + + public: + __hostdev__ ChildIter() + : BaseT() + { + } + __hostdev__ ChildIter(RootT* parent) + : BaseT(parent->data(), parent->tileCount()) + { + NANOVDB_ASSERT(BaseT::mData); + while (*this && !this->tile()->isChild()) + this->next(); + } + __hostdev__ NodeT& operator*() const + { + NANOVDB_ASSERT(*this); + return *BaseT::mData->getChild(this->tile()); + } + __hostdev__ NodeT* operator->() const + { + NANOVDB_ASSERT(*this); + return BaseT::mData->getChild(this->tile()); + } + __hostdev__ ChildIter& operator++() + { + NANOVDB_ASSERT(BaseT::mData); + this->next(); + while (*this && this->tile()->isValue()) + this->next(); + return *this; + } + __hostdev__ ChildIter operator++(int) + { auto tmp = *this; ++(*this); return tmp; } - }; // Member class ChildIterator + }; // Member class ChildIter - ChildIterator beginChild() const {return ChildIterator(this);} + using ChildIterator = ChildIter; + using ConstChildIterator = ChildIter; - class ValueIterator + ChildIterator beginChild() { return ChildIterator(this); } + ConstChildIterator cbeginChild() const { return ConstChildIterator(this); } + + template + class ValueIter : public BaseIter { - const DataType *mParent; - uint32_t mPos, mSize; + using BaseT = BaseIter; + public: - __hostdev__ ValueIterator() : mParent(nullptr), mPos(0), mSize(0) {} - __hostdev__ ValueIterator(const RootNode *parent) : mParent(parent->data()), mPos(0), mSize(parent->tileCount()){ - NANOVDB_ASSERT(mParent); - while (mPos < mSize && mParent->tile(mPos)->isChild()) ++mPos; + __hostdev__ ValueIter() + : BaseT() + { } - ValueIterator& operator=(const ValueIterator&) = default; - __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->tile(mPos)->value;} - __hostdev__ bool isActive() const {NANOVDB_ASSERT(*this); return mParent->tile(mPos)->state;} - __hostdev__ operator bool() const {return mPos < mSize;} - __hostdev__ uint32_t pos() const {return mPos;} - __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); mParent->tile(mPos)->origin();} - __hostdev__ ValueIterator& operator++() { - NANOVDB_ASSERT(mParent); - ++mPos; - while (mPos < mSize && mParent->tile(mPos)->isChild()) ++mPos; + __hostdev__ ValueIter(RootT* parent) + : BaseT(parent->data(), parent->tileCount()) + { + NANOVDB_ASSERT(BaseT::mData); + while (*this && this->tile()->isChild()) + this->next(); + } + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return this->tile()->value; + } + __hostdev__ bool isActive() const + { + NANOVDB_ASSERT(*this); + return this->tile()->state; + } + __hostdev__ ValueIter& operator++() + { + NANOVDB_ASSERT(BaseT::mData); + this->next(); + while (*this && this->tile()->isChild()) + this->next(); return *this; } - __hostdev__ ValueIterator operator++(int) { + __hostdev__ ValueIter operator++(int) + { auto tmp = *this; ++(*this); return tmp; } - }; // Member class ValueIterator + }; // Member class ValueIter - ValueIterator beginValue() const {return ValueIterator(this);} + using ValueIterator = ValueIter; + using ConstValueIterator = ValueIter; - class ValueOnIterator + ValueIterator beginValue() { return ValueIterator(this); } + ConstValueIterator cbeginValueAll() const { return ConstValueIterator(this); } + + template + class ValueOnIter : public BaseIter { - const DataType *mParent; - uint32_t mPos, mSize; + using BaseT = BaseIter; + public: - __hostdev__ ValueOnIterator() : mParent(nullptr), mPos(0), mSize(0) {} - __hostdev__ ValueOnIterator(const RootNode *parent) : mParent(parent->data()), mPos(0), mSize(parent->tileCount()){ - NANOVDB_ASSERT(mParent); - while (mPos < mSize && !mParent->tile(mPos)->isActive()) ++mPos; + __hostdev__ ValueOnIter() + : BaseT() + { } - ValueOnIterator& operator=(const ValueOnIterator&) = default; - __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->tile(mPos)->value;} - __hostdev__ operator bool() const {return mPos < mSize;} - __hostdev__ uint32_t pos() const {return mPos;} - __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); mParent->tile(mPos)->origin();} - __hostdev__ ValueOnIterator& operator++() { - NANOVDB_ASSERT(mParent); - ++mPos; - while (mPos < mSize && !mParent->tile(mPos)->isActive()) ++mPos; + __hostdev__ ValueOnIter(RootT* parent) + : BaseT(parent->data(), parent->tileCount()) + { + NANOVDB_ASSERT(BaseT::mData); + while (*this && !this->tile()->isActive()) + ++BaseT::mPos; + } + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return this->tile()->value; + } + __hostdev__ ValueOnIter& operator++() + { + NANOVDB_ASSERT(BaseT::mData); + this->next(); + while (*this && !this->tile()->isActive()) + this->next(); return *this; } - __hostdev__ ValueOnIterator operator++(int) { + __hostdev__ ValueOnIter operator++(int) + { auto tmp = *this; ++(*this); return tmp; } - }; // Member class ValueOnIterator + }; // Member class ValueOnIter + + using ValueOnIterator = ValueOnIter; + using ConstValueOnIterator = ValueOnIter; - ValueOnIterator beginValueOn() const {return ValueOnIterator(this);} + ValueOnIterator beginValueOn() { return ValueOnIterator(this); } + ConstValueOnIterator cbeginValueOn() const { return ConstValueOnIterator(this); } + + template + class DenseIter : public BaseIter + { + using BaseT = BaseIter; + using NodeT = typename match_const::type; + + public: + __hostdev__ DenseIter() + : BaseT() + { + } + __hostdev__ DenseIter(RootT* parent) + : BaseT(parent->data(), parent->tileCount()) + { + NANOVDB_ASSERT(BaseT::mData); + } + __hostdev__ NodeT* probeChild(ValueType& value) const + { + NANOVDB_ASSERT(*this); + NodeT* child = nullptr; + auto* t = this->tile(); + if (t->isChild()) { + child = BaseT::mData->getChild(t); + } else { + value = t->value; + } + return child; + } + __hostdev__ bool isValueOn() const + { + NANOVDB_ASSERT(*this); + return this->tile()->state; + } + __hostdev__ DenseIter& operator++() + { + NANOVDB_ASSERT(BaseT::mData); + this->next(); + return *this; + } + __hostdev__ DenseIter operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class DenseIter + + using DenseIterator = DenseIter; + using ConstDenseIterator = DenseIter; + + DenseIterator beginDense() { return DenseIterator(this); } + ConstDenseIterator cbeginDense() const { return ConstDenseIterator(this); } + ConstDenseIterator cbeginChildAll() const { return ConstDenseIterator(this); } /// @brief This class cannot be constructed or deleted RootNode() = delete; @@ -3207,18 +4335,19 @@ class RootNode : private RootData /// @brief Return the number of tiles encoded in this root node __hostdev__ const uint32_t& tileCount() const { return DataType::mTableSize; } + __hostdev__ const uint32_t& getTableSize() const { return DataType::mTableSize; } /// @brief Return a const reference to the minimum active value encoded in this root node and any of its child nodes - __hostdev__ const ValueType& minimum() const { return this->getMin(); } + __hostdev__ const ValueType& minimum() const { return DataType::mMinimum; } /// @brief Return a const reference to the maximum active value encoded in this root node and any of its child nodes - __hostdev__ const ValueType& maximum() const { return this->getMax(); } + __hostdev__ const ValueType& maximum() const { return DataType::mMaximum; } /// @brief Return a const reference to the average of all the active values encoded in this root node and any of its child nodes __hostdev__ const FloatType& average() const { return DataType::mAverage; } /// @brief Return the variance of all the active values encoded in this root node and any of its child nodes - __hostdev__ FloatType variance() const { return DataType::mStdDevi * DataType::mStdDevi; } + __hostdev__ FloatType variance() const { return Pow2(DataType::mStdDevi); } /// @brief Return a const reference to the standard deviation of all the active values encoded in this root node and any of its child nodes __hostdev__ const FloatType& stdDeviation() const { return DataType::mStdDevi; } @@ -3229,31 +4358,42 @@ class RootNode : private RootData /// @brief Return the actual memory footprint of this root node __hostdev__ uint64_t memUsage() const { return sizeof(RootNode) + DataType::mTableSize * sizeof(Tile); } + /// @brief Return true if this RootNode is empty, i.e. contains no values or nodes + __hostdev__ bool isEmpty() const { return DataType::mTableSize == uint32_t(0); } + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + /// @brief Return the value of the given voxel + __hostdev__ ValueType getValue(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + /// @brief return the state and updates the value of the specified voxel + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS + /// @brief Return the value of the given voxel __hostdev__ ValueType getValue(const CoordType& ijk) const { - if (const Tile* tile = this->probeTile(ijk)) { + if (const Tile* tile = DataType::probeTile(ijk)) { return tile->isChild() ? this->getChild(tile)->getValue(ijk) : tile->value; } return DataType::mBackground; } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->getValue(CoordType(i, j, k)); } __hostdev__ bool isActive(const CoordType& ijk) const { - if (const Tile* tile = this->probeTile(ijk)) { + if (const Tile* tile = DataType::probeTile(ijk)) { return tile->isChild() ? this->getChild(tile)->isActive(ijk) : tile->state; } return false; } - /// @brief Return true if this RootNode is empty, i.e. contains no values or nodes - __hostdev__ bool isEmpty() const { return DataType::mTableSize == uint32_t(0); } - __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { - if (const Tile* tile = this->probeTile(ijk)) { + if (const Tile* tile = DataType::probeTile(ijk)) { if (tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); return child->probeValue(ijk, v); } v = tile->value; @@ -3265,48 +4405,50 @@ class RootNode : private RootData __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const { - const Tile* tile = this->probeTile(ijk); + const Tile* tile = DataType::probeTile(ijk); if (tile && tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); return child->probeLeaf(ijk); } return nullptr; } +#endif // NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ const ChildNodeType* probeChild(const CoordType& ijk) const { - const Tile* tile = this->probeTile(ijk); - if (tile && tile->isChild()) { - return this->getChild(tile); - } - return nullptr; + const Tile* tile = DataType::probeTile(ijk); + return tile && tile->isChild() ? this->getChild(tile) : nullptr; + } + + __hostdev__ ChildNodeType* probeChild(const CoordType& ijk) + { + const Tile* tile = DataType::probeTile(ijk); + return tile && tile->isChild() ? this->getChild(tile) : nullptr; } - /// @brief Find and return a Tile of this root node - __hostdev__ const Tile* probeTile(const CoordType& ijk) const + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const { - const Tile* tiles = reinterpret_cast(this + 1); - const auto key = DataType::CoordToKey(ijk); -#if 1 // switch between linear and binary seach - for (uint32_t i = 0; i < DataType::mTableSize; ++i) { - if (tiles[i].key == key) return &tiles[i]; + if (const Tile* tile = this->probeTile(ijk)) { + if (tile->isChild()) + return this->getChild(tile)->template get(ijk, args...); + return OpT::get(*tile, args...); } -#else// do not enable binary search if tiles are not guaranteed to be sorted!!!!!! - // binary-search of pre-sorted elements - int32_t low = 0, high = DataType::mTableSize; // low is inclusive and high is exclusive - while (low != high) { - int mid = low + ((high - low) >> 1); - const Tile* tile = &tiles[mid]; - if (tile->key == key) { - return tile; - } else if (tile->key < key) { - low = mid + 1; - } else { - high = mid; - } + return OpT::get(*this, args...); + } + + template + // __hostdev__ auto // occationally fails with NVCC + __hostdev__ decltype(OpT::set(std::declval(), std::declval()...)) + set(const CoordType& ijk, ArgsT&&... args) + { + if (Tile* tile = DataType::probeTile(ijk)) { + if (tile->isChild()) + return this->getChild(tile)->template set(ijk, args...); + return OpT::set(*tile, args...); } -#endif - return nullptr; + return OpT::set(*this, args...); } private: @@ -3318,7 +4460,7 @@ class RootNode : private RootData template friend class Tree; - +#ifndef NANOVDB_NEW_ACCESSOR_METHODS /// @brief Private method to return node information and update a ReadAccessor template __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& ijk, const AccT& acc) const @@ -3326,15 +4468,13 @@ class RootNode : private RootData using NodeInfoT = typename AccT::NodeInfo; if (const Tile* tile = this->probeTile(ijk)) { if (tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); acc.insert(ijk, child); return child->getNodeInfoAndCache(ijk, acc); } - return NodeInfoT{LEVEL, ChildT::dim(), tile->value, tile->value, tile->value, - 0, tile->origin(), tile->origin() + CoordType(ChildT::DIM)}; + return NodeInfoT{LEVEL, ChildT::dim(), tile->value, tile->value, tile->value, 0, tile->origin(), tile->origin() + CoordType(ChildT::DIM)}; } - return NodeInfoT{LEVEL, ChildT::dim(), this->minimum(), this->maximum(), - this->average(), this->stdDeviation(), this->bbox()[0], this->bbox()[1]}; + return NodeInfoT{LEVEL, ChildT::dim(), this->minimum(), this->maximum(), this->average(), this->stdDeviation(), this->bbox()[0], this->bbox()[1]}; } /// @brief Private method to return a voxel value and update a ReadAccessor @@ -3343,7 +4483,7 @@ class RootNode : private RootData { if (const Tile* tile = this->probeTile(ijk)) { if (tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); acc.insert(ijk, child); return child->getValueAndCache(ijk, acc); } @@ -3357,7 +4497,7 @@ class RootNode : private RootData { const Tile* tile = this->probeTile(ijk); if (tile && tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); acc.insert(ijk, child); return child->isActiveAndCache(ijk, acc); } @@ -3369,7 +4509,7 @@ class RootNode : private RootData { if (const Tile* tile = this->probeTile(ijk)) { if (tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); acc.insert(ijk, child); return child->probeValueAndCache(ijk, v, acc); } @@ -3385,19 +4525,20 @@ class RootNode : private RootData { const Tile* tile = this->probeTile(ijk); if (tile && tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); acc.insert(ijk, child); return child->probeLeafAndCache(ijk, acc); } return nullptr; } +#endif // NANOVDB_NEW_ACCESSOR_METHODS template __hostdev__ uint32_t getDimAndCache(const CoordType& ijk, const RayT& ray, const AccT& acc) const { if (const Tile* tile = this->probeTile(ijk)) { if (tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); acc.insert(ijk, child); return child->getDimAndCache(ijk, ray, acc); } @@ -3406,6 +4547,38 @@ class RootNode : private RootData return ChildNodeType::dim(); // background } + template + //__hostdev__ decltype(OpT::get(std::declval(), std::declval()...)) + __hostdev__ auto + getAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) const + { + if (const Tile* tile = this->probeTile(ijk)) { + if (tile->isChild()) { + const ChildT* child = this->getChild(tile); + acc.insert(ijk, child); + return child->template getAndCache(ijk, acc, args...); + } + return OpT::get(*tile, args...); + } + return OpT::get(*this, args...); + } + + template + // __hostdev__ auto // occationally fails with NVCC + __hostdev__ decltype(OpT::set(std::declval(), std::declval()...)) + setAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) + { + if (Tile* tile = DataType::probeTile(ijk)) { + if (tile->isChild()) { + ChildT* child = this->getChild(tile); + acc.insert(ijk, child); + return child->template setAndCache(ijk, acc, args...); + } + return OpT::set(*tile, args...); + } + return OpT::set(*this, args...); + } + }; // RootNode class // After the RootNode the memory layout is assumed to be the sorted Tiles @@ -3419,16 +4592,16 @@ template struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData { using ValueT = typename ChildT::ValueType; - using BuildT = typename ChildT::BuildType;// in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + using BuildT = typename ChildT::BuildType; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool using StatsT = typename ChildT::FloatType; using CoordT = typename ChildT::CoordType; - using MaskT = typename ChildT::template MaskType; + using MaskT = typename ChildT::template MaskType; static constexpr bool FIXED_SIZE = true; union Tile { ValueT value; - int64_t child;//signed 64 bit byte offset relative to the InternalData!! + int64_t child; //signed 64 bit byte offset relative to this InternalData, i.e. child-pointer = Tile::child + this /// @brief This class cannot be constructed or deleted Tile() = delete; Tile(const Tile&) = delete; @@ -3450,22 +4623,22 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment /// /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. - __hostdev__ static constexpr uint32_t padding() { - return sizeof(InternalData) - (24u + 8u + 2*(sizeof(MaskT) + sizeof(ValueT) + sizeof(StatsT)) - + (1u << (3 * LOG2DIM))*(sizeof(ValueT) > 8u ? sizeof(ValueT) : 8u)); + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(InternalData) - (24u + 8u + 2 * (sizeof(MaskT) + sizeof(ValueT) + sizeof(StatsT)) + (1u << (3 * LOG2DIM)) * (sizeof(ValueT) > 8u ? sizeof(ValueT) : 8u)); } alignas(32) Tile mTable[1u << (3 * LOG2DIM)]; // sizeof(ValueT) x (16*16*16 or 32*32*32) __hostdev__ static uint64_t memUsage() { return sizeof(InternalData); } - __hostdev__ void setChild(uint32_t n, const void *ptr) + __hostdev__ void setChild(uint32_t n, const void* ptr) { NANOVDB_ASSERT(mChildMask.isOn(n)); mTable[n].child = PtrDiff(ptr, this); } - template - __hostdev__ void setValue(uint32_t n, const ValueT &v) + template + __hostdev__ void setValue(uint32_t n, const ValueT& v) { NANOVDB_ASSERT(!mChildMask.isOn(n)); mTable[n].value = v; @@ -3485,24 +4658,24 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData __hostdev__ ValueT getValue(uint32_t n) const { - NANOVDB_ASSERT(!mChildMask.isOn(n)); + NANOVDB_ASSERT(mChildMask.isOff(n)); return mTable[n].value; } __hostdev__ bool isActive(uint32_t n) const { - NANOVDB_ASSERT(!mChildMask.isOn(n)); + NANOVDB_ASSERT(mChildMask.isOff(n)); return mValueMask.isOn(n); } - __hostdev__ bool isChild(uint32_t n) const {return mChildMask.isOn(n);} + __hostdev__ bool isChild(uint32_t n) const { return mChildMask.isOn(n); } - template + template __hostdev__ void setOrigin(const T& ijk) { mBBox[0] = ijk; } - __hostdev__ const ValueT& getMin() const { return mMinimum; } - __hostdev__ const ValueT& getMax() const { return mMaximum; } - __hostdev__ const StatsT& average() const { return mAverage; } + __hostdev__ const ValueT& getMin() const { return mMinimum; } + __hostdev__ const ValueT& getMax() const { return mMaximum; } + __hostdev__ const StatsT& average() const { return mAverage; } __hostdev__ const StatsT& stdDeviation() const { return mStdDevi; } __hostdev__ void setMin(const ValueT& v) { mMinimum = v; } @@ -3519,7 +4692,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData /// @brief Internal nodes of a VDB treedim(), template -class InternalNode : private InternalData +class InternalNode : public InternalData { public: using DataType = InternalData; @@ -3547,48 +4720,153 @@ class InternalNode : private InternalData class ChildIterator : public MaskIterT { using BaseT = MaskIterT; - const DataType *mParent; + const DataType* mParent; + public: - __hostdev__ ChildIterator() : BaseT(), mParent(nullptr) {} - __hostdev__ ChildIterator(const InternalNode* parent) : BaseT(parent->data()->mChildMask.beginOn()), mParent(parent->data()) {} - ChildIterator& operator=(const ChildIterator&) = default; - __hostdev__ const ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mParent->getChild(BaseT::pos());} - __hostdev__ const ChildT* operator->() const {NANOVDB_ASSERT(*this); return mParent->getChild(BaseT::pos());} - __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); return (*this)->origin();} + __hostdev__ ChildIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ ChildIterator(const InternalNode* parent) + : BaseT(parent->data()->mChildMask.beginOn()) + , mParent(parent->data()) + { + } + ChildIterator& operator=(const ChildIterator&) = default; + __hostdev__ const ChildT& operator*() const + { + NANOVDB_ASSERT(*this); + return *mParent->getChild(BaseT::pos()); + } + __hostdev__ const ChildT* operator->() const + { + NANOVDB_ASSERT(*this); + return mParent->getChild(BaseT::pos()); + } + __hostdev__ CoordType getOrigin() const + { + NANOVDB_ASSERT(*this); + return (*this)->origin(); + } }; // Member class ChildIterator - ChildIterator beginChild() const {return ChildIterator(this);} + ChildIterator beginChild() const { return ChildIterator(this); } /// @brief Visits all tile values in this node, i.e. both inactive and active tiles class ValueIterator : public MaskIterT { using BaseT = MaskIterT; - const InternalNode *mParent; + const InternalNode* mParent; + public: - __hostdev__ ValueIterator() : BaseT(), mParent(nullptr) {} - __hostdev__ ValueIterator(const InternalNode* parent) : BaseT(parent->data()->mChildMask.beginOff()), mParent(parent) {} - ValueIterator& operator=(const ValueIterator&) = default; - __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->data()->getValue(BaseT::pos());} - __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); return mParent->localToGlobalCoord(BaseT::pos());} - __hostdev__ bool isActive() const { NANOVDB_ASSERT(*this); return mParent->data()->isActive(BaseT::mPos);} + __hostdev__ ValueIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ ValueIterator(const InternalNode* parent) + : BaseT(parent->data()->mChildMask.beginOff()) + , mParent(parent) + { + } + ValueIterator& operator=(const ValueIterator&) = default; + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return mParent->data()->getValue(BaseT::pos()); + } + __hostdev__ CoordType getOrigin() const + { + NANOVDB_ASSERT(*this); + return mParent->localToGlobalCoord(BaseT::pos()); + } + __hostdev__ bool isActive() const + { + NANOVDB_ASSERT(*this); + return mParent->data()->isActive(BaseT::mPos); + } }; // Member class ValueIterator - ValueIterator beginValue() const {return ValueIterator(this);} + ValueIterator beginValue() const { return ValueIterator(this); } + ValueIterator cbeginValueAll() const { return ValueIterator(this); } /// @brief Visits active tile values of this node only class ValueOnIterator : public MaskIterT { using BaseT = MaskIterT; - const InternalNode *mParent; + const InternalNode* mParent; + public: - __hostdev__ ValueOnIterator() : BaseT(), mParent(nullptr) {} - __hostdev__ ValueOnIterator(const InternalNode* parent) : BaseT(parent->data()->mValueMask.beginOn()), mParent(parent) {} - ValueOnIterator& operator=(const ValueOnIterator&) = default; - __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->data()->getValue(BaseT::pos());} - __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); return mParent->localToGlobalCoord(BaseT::pos());} + __hostdev__ ValueOnIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ ValueOnIterator(const InternalNode* parent) + : BaseT(parent->data()->mValueMask.beginOn()) + , mParent(parent) + { + } + ValueOnIterator& operator=(const ValueOnIterator&) = default; + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return mParent->data()->getValue(BaseT::pos()); + } + __hostdev__ CoordType getOrigin() const + { + NANOVDB_ASSERT(*this); + return mParent->localToGlobalCoord(BaseT::pos()); + } }; // Member class ValueOnIterator - ValueOnIterator beginValueOn() const {return ValueOnIterator(this);} + ValueOnIterator beginValueOn() const { return ValueOnIterator(this); } + ValueOnIterator cbeginValueOn() const { return ValueOnIterator(this); } + + /// @brief Visits all tile values and child nodes of this node + class DenseIterator : public Mask::DenseIterator + { + using BaseT = typename Mask::DenseIterator; + const DataType* mParent; + + public: + __hostdev__ DenseIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ DenseIterator(const InternalNode* parent) + : BaseT(0) + , mParent(parent->data()) + { + } + DenseIterator& operator=(const DenseIterator&) = default; + __hostdev__ const ChildT* probeChild(ValueType& value) const + { + NANOVDB_ASSERT(mParent && bool(*this)); + const ChildT* child = nullptr; + if (mParent->mChildMask.isOn(BaseT::pos())) { + child = mParent->getChild(BaseT::pos()); + } else { + value = mParent->getValue(BaseT::pos()); + } + return child; + } + __hostdev__ bool isValueOn() const + { + NANOVDB_ASSERT(mParent && bool(*this)); + return mParent->isActive(BaseT::pos()); + } + __hostdev__ CoordType getOrigin() const + { + NANOVDB_ASSERT(mParent && bool(*this)); + return mParent->localToGlobalCoord(BaseT::pos()); + } + }; // Member class DenseIterator + + DenseIterator beginDense() const { return DenseIterator(this); } + DenseIterator cbeginChildAll() const { return DenseIterator(this); } // matches openvdb /// @brief This class cannot be constructed or deleted InternalNode() = delete; @@ -3608,9 +4886,11 @@ class InternalNode : private InternalData /// @brief Return a const reference to the bit mask of active voxels in this internal node __hostdev__ const MaskType& valueMask() const { return DataType::mValueMask; } + __hostdev__ const MaskType& getValueMask() const { return DataType::mValueMask; } /// @brief Return a const reference to the bit mask of child nodes in this internal node __hostdev__ const MaskType& childMask() const { return DataType::mChildMask; } + __hostdev__ const MaskType& getChildMask() const { return DataType::mChildMask; } /// @brief Return the origin in index space of this leaf node __hostdev__ CoordType origin() const { return DataType::mBBox.min() & ~MASK; } @@ -3625,7 +4905,7 @@ class InternalNode : private InternalData __hostdev__ const FloatType& average() const { return DataType::mAverage; } /// @brief Return the variance of all the active values encoded in this internal node and any of its child nodes - __hostdev__ FloatType variance() const { return DataType::mStdDevi*DataType::mStdDevi; } + __hostdev__ FloatType variance() const { return DataType::mStdDevi * DataType::mStdDevi; } /// @brief Return a const reference to the standard deviation of all the active values encoded in this internal node and any of its child nodes __hostdev__ const FloatType& stdDeviation() const { return DataType::mStdDevi; } @@ -3633,20 +4913,38 @@ class InternalNode : private InternalData /// @brief Return a const reference to the bounding box in index space of active values in this internal node and any of its child nodes __hostdev__ const BBox& bbox() const { return DataType::mBBox; } + /// @brief If the first entry in this node's table is a tile, return the tile's value. + /// Otherwise, return the result of calling getFirstValue() on the child. + __hostdev__ ValueType getFirstValue() const + { + return DataType::mChildMask.isOn(0) ? this->getChild(0)->getFirstValue() : DataType::getValue(0); + } + + /// @brief If the last entry in this node's table is a tile, return the tile's value. + /// Otherwise, return the result of calling getLastValue() on the child. + __hostdev__ ValueType getLastValue() const + { + return DataType::mChildMask.isOn(SIZE - 1) ? this->getChild(SIZE - 1)->getLastValue() : DataType::getValue(SIZE - 1); + } + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS /// @brief Return the value of the given voxel + __hostdev__ ValueType getValue(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + /// @brief return the state and updates the value of the specified voxel + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS __hostdev__ ValueType getValue(const CoordType& ijk) const { const uint32_t n = CoordToOffset(ijk); return DataType::mChildMask.isOn(n) ? this->getChild(n)->getValue(ijk) : DataType::getValue(n); } - __hostdev__ bool isActive(const CoordType& ijk) const { const uint32_t n = CoordToOffset(ijk); return DataType::mChildMask.isOn(n) ? this->getChild(n)->isActive(ijk) : DataType::isActive(n); } - - /// @brief return the state and updates the value of the specified voxel __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { const uint32_t n = CoordToOffset(ijk); @@ -3655,7 +4953,6 @@ class InternalNode : private InternalData v = DataType::getValue(n); return DataType::isActive(n); } - __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const { const uint32_t n = CoordToOffset(ijk); @@ -3664,6 +4961,13 @@ class InternalNode : private InternalData return nullptr; } +#endif // NANOVDB_NEW_ACCESSOR_METHODS + + __hostdev__ ChildNodeType* probeChild(const CoordType& ijk) + { + const uint32_t n = CoordToOffset(ijk); + return DataType::mChildMask.isOn(n) ? this->getChild(n) : nullptr; + } __hostdev__ const ChildNodeType* probeChild(const CoordType& ijk) const { const uint32_t n = CoordToOffset(ijk); @@ -3673,15 +4977,9 @@ class InternalNode : private InternalData /// @brief Return the linear offset corresponding to the given coordinate __hostdev__ static uint32_t CoordToOffset(const CoordType& ijk) { -#if 0 - return (((ijk[0] & MASK) >> ChildT::TOTAL) << (2 * LOG2DIM)) + - (((ijk[1] & MASK) >> ChildT::TOTAL) << (LOG2DIM)) + - ((ijk[2] & MASK) >> ChildT::TOTAL); -#else - return (((ijk[0] & MASK) >> ChildT::TOTAL) << (2 * LOG2DIM)) | + return (((ijk[0] & MASK) >> ChildT::TOTAL) << (2 * LOG2DIM)) | // note, we're using bitwise OR instead of + (((ijk[1] & MASK) >> ChildT::TOTAL) << (LOG2DIM)) | - ((ijk[2] & MASK) >> ChildT::TOTAL); -#endif + ((ijk[2] & MASK) >> ChildT::TOTAL); } /// @return the local coordinate of the n'th tile or child node @@ -3707,14 +5005,30 @@ class InternalNode : private InternalData } /// @brief Return true if this node or any of its child nodes contain active values - __hostdev__ bool isActive() const + __hostdev__ bool isActive() const { return DataType::mFlags & uint32_t(2); } + + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const { - return DataType::mFlags & uint32_t(2); + const uint32_t n = CoordToOffset(ijk); + if (this->isChild(n)) + return this->getChild(n)->template get(ijk, args...); + return OpT::get(*this, n, args...); + } + + template + //__hostdev__ auto // occationally fails with NVCC + __hostdev__ decltype(OpT::set(std::declval(), std::declval(), std::declval()...)) + set(const CoordType& ijk, ArgsT&&... args) + { + const uint32_t n = CoordToOffset(ijk); + if (this->isChild(n)) + return this->getChild(n)->template set(ijk, args...); + return OpT::set(*this, n, args...); } private: static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(InternalData) is misaligned"); - //static_assert(offsetof(DataType, mTable) % 32 == 0, "InternalData::mTable is misaligned"); template friend class ReadAccessor; @@ -3724,48 +5038,33 @@ class InternalNode : private InternalData template friend class InternalNode; +#ifndef NANOVDB_NEW_ACCESSOR_METHODS /// @brief Private read access method used by the ReadAccessor template __hostdev__ ValueType getValueAndCache(const CoordType& ijk, const AccT& acc) const { const uint32_t n = CoordToOffset(ijk); - if (!DataType::mChildMask.isOn(n)) + if (DataType::mChildMask.isOff(n)) return DataType::getValue(n); const ChildT* child = this->getChild(n); acc.insert(ijk, child); return child->getValueAndCache(ijk, acc); } - - template - __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& ijk, const AccT& acc) const - { - using NodeInfoT = typename AccT::NodeInfo; - const uint32_t n = CoordToOffset(ijk); - if (!DataType::mChildMask.isOn(n)) { - return NodeInfoT{LEVEL, this->dim(), this->minimum(), this->maximum(), this->average(), - this->stdDeviation(), this->bbox()[0], this->bbox()[1]}; - } - const ChildT* child = this->getChild(n); - acc.insert(ijk, child); - return child->getNodeInfoAndCache(ijk, acc); - } - template __hostdev__ bool isActiveAndCache(const CoordType& ijk, const AccT& acc) const { const uint32_t n = CoordToOffset(ijk); - if (!DataType::mChildMask.isOn(n)) + if (DataType::mChildMask.isOff(n)) return DataType::isActive(n); const ChildT* child = this->getChild(n); acc.insert(ijk, child); return child->isActiveAndCache(ijk, acc); } - template __hostdev__ bool probeValueAndCache(const CoordType& ijk, ValueType& v, const AccT& acc) const { const uint32_t n = CoordToOffset(ijk); - if (!DataType::mChildMask.isOn(n)) { + if (DataType::mChildMask.isOff(n)) { v = DataType::getValue(n); return DataType::isActive(n); } @@ -3773,22 +5072,35 @@ class InternalNode : private InternalData acc.insert(ijk, child); return child->probeValueAndCache(ijk, v, acc); } - template __hostdev__ const LeafNodeType* probeLeafAndCache(const CoordType& ijk, const AccT& acc) const { const uint32_t n = CoordToOffset(ijk); - if (!DataType::mChildMask.isOn(n)) + if (DataType::mChildMask.isOff(n)) return nullptr; const ChildT* child = this->getChild(n); acc.insert(ijk, child); return child->probeLeafAndCache(ijk, acc); } + template + __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& ijk, const AccT& acc) const + { + using NodeInfoT = typename AccT::NodeInfo; + const uint32_t n = CoordToOffset(ijk); + if (DataType::mChildMask.isOff(n)) { + return NodeInfoT{LEVEL, this->dim(), this->minimum(), this->maximum(), this->average(), this->stdDeviation(), this->bbox()[0], this->bbox()[1]}; + } + const ChildT* child = this->getChild(n); + acc.insert(ijk, child); + return child->getNodeInfoAndCache(ijk, acc); + } +#endif // NANOVDB_NEW_ACCESSOR_METHODS template __hostdev__ uint32_t getDimAndCache(const CoordType& ijk, const RayT& ray, const AccT& acc) const { - if (DataType::mFlags & uint32_t(1u)) return this->dim(); // skip this node if the 1st bit is set + if (DataType::mFlags & uint32_t(1u)) + return this->dim(); // skip this node if the 1st bit is set //if (!ray.intersects( this->bbox() )) return 1< return ChildNodeType::dim(); // tile value } + template + __hostdev__ auto + //__hostdev__ decltype(OpT::get(std::declval(), std::declval(), std::declval()...)) + getAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) const + { + const uint32_t n = CoordToOffset(ijk); + if (DataType::mChildMask.isOff(n)) + return OpT::get(*this, n, args...); + const ChildT* child = this->getChild(n); + acc.insert(ijk, child); + return child->template getAndCache(ijk, acc, args...); + } + + template + //__hostdev__ auto // occationally fails with NVCC + __hostdev__ decltype(OpT::set(std::declval(), std::declval(), std::declval()...)) + setAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) + { + const uint32_t n = CoordToOffset(ijk); + if (DataType::mChildMask.isOff(n)) + return OpT::set(*this, n, args...); + ChildT* child = this->getChild(n); + acc.insert(ijk, child); + return child->template setAndCache(ijk, acc, args...); + } + }; // InternalNode class -// --------------------------> LeafNode <------------------------------------ +// --------------------------> LeafData <------------------------------------ /// @brief Stuct with all the member data of the LeafNode (useful during serialization of an openvdb LeafNode) /// @@ -3815,12 +5153,12 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData using ValueType = ValueT; using BuildType = ValueT; using FloatType = typename FloatTraits::FloatType; - using ArrayType = ValueT;// type used for the internal mValue array + using ArrayType = ValueT; // type used for the internal mValue array static constexpr bool FIXED_SIZE = true; CoordT mBBoxMin; // 12B. uint8_t mBBoxDif[3]; // 3B. - uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: is sparse ValueIndex, bits5,6,7: bit-width for FpN + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN MaskT mValueMask; // LOG2DIM(3): 64B. ValueType mMinimum; // typically 4B @@ -3832,21 +5170,20 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment /// /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. - __hostdev__ static constexpr uint32_t padding() { - return sizeof(LeafData) - (12 + 3 + 1 + sizeof(MaskT) - + 2*(sizeof(ValueT) + sizeof(FloatType)) - + (1u << (3 * LOG2DIM))*sizeof(ValueT)); + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(LeafData) - (12 + 3 + 1 + sizeof(MaskT) + 2 * (sizeof(ValueT) + sizeof(FloatType)) + (1u << (3 * LOG2DIM)) * sizeof(ValueT)); } __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } - //__hostdev__ const ValueType* values() const { return mValues; } __hostdev__ ValueType getValue(uint32_t i) const { return mValues[i]; } - __hostdev__ void setValueOnly(uint32_t offset, const ValueType& value) { mValues[offset] = value; } - __hostdev__ void setValue(uint32_t offset, const ValueType& value) + __hostdev__ void setValueOnly(uint32_t offset, const ValueType& value) { mValues[offset] = value; } + __hostdev__ void setValue(uint32_t offset, const ValueType& value) { mValueMask.setOn(offset); mValues[offset] = value; } + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } __hostdev__ ValueType getMin() const { return mMinimum; } __hostdev__ ValueType getMax() const { return mMaximum; } @@ -3858,9 +5195,15 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData __hostdev__ void setAvg(const FloatType& v) { mAverage = v; } __hostdev__ void setDev(const FloatType& v) { mStdDevi = v; } - template + template __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } + __hostdev__ void fill(const ValueType& v) + { + for (auto *p = mValues, *q = p + 512; p != q; ++p) + *p = v; + } + /// @brief This class cannot be constructed or deleted LeafData() = delete; LeafData(const LeafData&) = delete; @@ -3868,6 +5211,8 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData ~LeafData() = delete; }; // LeafData +// --------------------------> LeafFnBase <------------------------------------ + /// @brief Base-class for quantized float leaf nodes template class MaskT, uint32_t LOG2DIM> struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafFnBase @@ -3879,55 +5224,60 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafFnBase CoordT mBBoxMin; // 12B. uint8_t mBBoxDif[3]; // 3B. - uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: is sparse ValueIndex, bits5,6,7: bit-width for FpN + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN MaskT mValueMask; // LOG2DIM(3): 64B. - float mMinimum; // 4B - minimum of ALL values in this node - float mQuantum; // = (max - min)/15 4B - uint16_t mMin, mMax, mAvg, mDev;// quantized representations of statistics of active values + float mMinimum; // 4B - minimum of ALL values in this node + float mQuantum; // = (max - min)/15 4B + uint16_t mMin, mMax, mAvg, mDev; // quantized representations of statistics of active values // no padding since it's always 32B aligned __hostdev__ static uint64_t memUsage() { return sizeof(LeafFnBase); } /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment /// /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. - __hostdev__ static constexpr uint32_t padding() { - return sizeof(LeafFnBase) - (12 + 3 + 1 + sizeof(MaskT) + 2*4 + 4*2); + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(LeafFnBase) - (12 + 3 + 1 + sizeof(MaskT) + 2 * 4 + 4 * 2); } __hostdev__ void init(float min, float max, uint8_t bitWidth) { mMinimum = min; - mQuantum = (max - min)/float((1 << bitWidth)-1); + mQuantum = (max - min) / float((1 << bitWidth) - 1); } + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + /// @brief return the quantized minimum of the active values in this node - __hostdev__ float getMin() const { return mMin*mQuantum + mMinimum; } + __hostdev__ float getMin() const { return mMin * mQuantum + mMinimum; } /// @brief return the quantized maximum of the active values in this node - __hostdev__ float getMax() const { return mMax*mQuantum + mMinimum; } + __hostdev__ float getMax() const { return mMax * mQuantum + mMinimum; } /// @brief return the quantized average of the active values in this node - __hostdev__ float getAvg() const { return mAvg*mQuantum + mMinimum; } + __hostdev__ float getAvg() const { return mAvg * mQuantum + mMinimum; } /// @brief return the quantized standard deviation of the active values in this node /// @note 0 <= StdDev <= max-min or 0 <= StdDev/(max-min) <= 1 - __hostdev__ float getDev() const { return mDev*mQuantum; } + __hostdev__ float getDev() const { return mDev * mQuantum; } /// @note min <= X <= max or 0 <= (X-min)/(min-max) <= 1 - __hostdev__ void setMin(float min) { mMin = uint16_t((min - mMinimum)/mQuantum + 0.5f); } + __hostdev__ void setMin(float min) { mMin = uint16_t((min - mMinimum) / mQuantum + 0.5f); } /// @note min <= X <= max or 0 <= (X-min)/(min-max) <= 1 - __hostdev__ void setMax(float max) { mMax = uint16_t((max - mMinimum)/mQuantum + 0.5f); } + __hostdev__ void setMax(float max) { mMax = uint16_t((max - mMinimum) / mQuantum + 0.5f); } /// @note min <= avg <= max or 0 <= (avg-min)/(min-max) <= 1 - __hostdev__ void setAvg(float avg) { mAvg = uint16_t((avg - mMinimum)/mQuantum + 0.5f); } + __hostdev__ void setAvg(float avg) { mAvg = uint16_t((avg - mMinimum) / mQuantum + 0.5f); } /// @note 0 <= StdDev <= max-min or 0 <= StdDev/(max-min) <= 1 - __hostdev__ void setDev(float dev) { mDev = uint16_t(dev/mQuantum + 0.5f); } + __hostdev__ void setDev(float dev) { mDev = uint16_t(dev / mQuantum + 0.5f); } - template + template __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } -};// LeafFnBase +}; // LeafFnBase + +// --------------------------> LeafData <------------------------------------ /// @brief Stuct with all the member data of the LeafNode (useful during serialization of an openvdb LeafNode) /// @@ -3938,24 +5288,25 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData; using BuildType = Fp4; - using ArrayType = uint8_t;// type used for the internal mValue array + using ArrayType = uint8_t; // type used for the internal mValue array static constexpr bool FIXED_SIZE = true; - alignas(32) uint8_t mCode[1u << (3 * LOG2DIM - 1)];// LeafFnBase is 32B aligned and so is mCode + alignas(32) uint8_t mCode[1u << (3 * LOG2DIM - 1)]; // LeafFnBase is 32B aligned and so is mCode __hostdev__ static constexpr uint64_t memUsage() { return sizeof(LeafData); } - __hostdev__ static constexpr uint32_t padding() { - static_assert(BaseT::padding()==0, "expected no padding in LeafFnBase"); + __hostdev__ static constexpr uint32_t padding() + { + static_assert(BaseT::padding() == 0, "expected no padding in LeafFnBase"); return sizeof(LeafData) - sizeof(BaseT) - (1u << (3 * LOG2DIM - 1)); } __hostdev__ static constexpr uint8_t bitWidth() { return 4u; } - __hostdev__ float getValue(uint32_t i) const + __hostdev__ float getValue(uint32_t i) const { #if 0 const uint8_t c = mCode[i>>1]; return ( (i&1) ? c >> 4 : c & uint8_t(15) )*BaseT::mQuantum + BaseT::mMinimum; #else - return ((mCode[i>>1] >> ((i&1)<<2)) & uint8_t(15))*BaseT::mQuantum + BaseT::mMinimum; + return ((mCode[i >> 1] >> ((i & 1) << 2)) & uint8_t(15)) * BaseT::mQuantum + BaseT::mMinimum; #endif } @@ -3966,25 +5317,28 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +// --------------------------> LeafBase <------------------------------------ + template class MaskT, uint32_t LOG2DIM> struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData : public LeafFnBase { using BaseT = LeafFnBase; using BuildType = Fp8; - using ArrayType = uint8_t;// type used for the internal mValue array + using ArrayType = uint8_t; // type used for the internal mValue array static constexpr bool FIXED_SIZE = true; alignas(32) uint8_t mCode[1u << 3 * LOG2DIM]; - __hostdev__ static constexpr int64_t memUsage() { return sizeof(LeafData); } - __hostdev__ static constexpr uint32_t padding() { - static_assert(BaseT::padding()==0, "expected no padding in LeafFnBase"); + __hostdev__ static constexpr int64_t memUsage() { return sizeof(LeafData); } + __hostdev__ static constexpr uint32_t padding() + { + static_assert(BaseT::padding() == 0, "expected no padding in LeafFnBase"); return sizeof(LeafData) - sizeof(BaseT) - (1u << 3 * LOG2DIM); } __hostdev__ static constexpr uint8_t bitWidth() { return 8u; } - __hostdev__ float getValue(uint32_t i) const + __hostdev__ float getValue(uint32_t i) const { - return mCode[i]*BaseT::mQuantum + BaseT::mMinimum;// code * (max-min)/255 + min + return mCode[i] * BaseT::mQuantum + BaseT::mMinimum; // code * (max-min)/255 + min } /// @brief This class cannot be constructed or deleted LeafData() = delete; @@ -3993,26 +5347,29 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +// --------------------------> LeafData <------------------------------------ + template class MaskT, uint32_t LOG2DIM> struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData : public LeafFnBase { using BaseT = LeafFnBase; using BuildType = Fp16; - using ArrayType = uint16_t;// type used for the internal mValue array + using ArrayType = uint16_t; // type used for the internal mValue array static constexpr bool FIXED_SIZE = true; alignas(32) uint16_t mCode[1u << 3 * LOG2DIM]; __hostdev__ static constexpr uint64_t memUsage() { return sizeof(LeafData); } - __hostdev__ static constexpr uint32_t padding() { - static_assert(BaseT::padding()==0, "expected no padding in LeafFnBase"); - return sizeof(LeafData) - sizeof(BaseT) - 2*(1u << 3 * LOG2DIM); + __hostdev__ static constexpr uint32_t padding() + { + static_assert(BaseT::padding() == 0, "expected no padding in LeafFnBase"); + return sizeof(LeafData) - sizeof(BaseT) - 2 * (1u << 3 * LOG2DIM); } __hostdev__ static constexpr uint8_t bitWidth() { return 16u; } - __hostdev__ float getValue(uint32_t i) const + __hostdev__ float getValue(uint32_t i) const { - return mCode[i]*BaseT::mQuantum + BaseT::mMinimum;// code * (max-min)/65535 + min + return mCode[i] * BaseT::mQuantum + BaseT::mMinimum; // code * (max-min)/65535 + min } /// @brief This class cannot be constructed or deleted @@ -4022,59 +5379,61 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +// --------------------------> LeafData <------------------------------------ + template class MaskT, uint32_t LOG2DIM> struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData : public LeafFnBase -{// this class has no data members, however every instance is immediately followed -// bitWidth*64 bytes. Since its base class is 32B aligned so are the bitWidth*64 bytes +{ // this class has no additional data members, however every instance is immediately followed by + // bitWidth*64 bytes. Since its base class is 32B aligned so are the bitWidth*64 bytes using BaseT = LeafFnBase; using BuildType = FpN; - static constexpr bool FIXED_SIZE = false; - __hostdev__ static constexpr uint32_t padding() { - static_assert(BaseT::padding()==0, "expected no padding in LeafFnBase"); + static constexpr bool FIXED_SIZE = false; + __hostdev__ static constexpr uint32_t padding() + { + static_assert(BaseT::padding() == 0, "expected no padding in LeafFnBase"); return 0; } - __hostdev__ uint8_t bitWidth() const { return 1 << (BaseT::mFlags >> 5); }// 4,8,16,32 = 2^(2,3,4,5) - __hostdev__ size_t memUsage() const { return sizeof(*this) + this->bitWidth()*64; } - __hostdev__ static size_t memUsage(uint32_t bitWidth) { return 96u + bitWidth*64; } - __hostdev__ float getValue(uint32_t i) const + __hostdev__ uint8_t bitWidth() const { return 1 << (BaseT::mFlags >> 5); } // 4,8,16,32 = 2^(2,3,4,5) + __hostdev__ size_t memUsage() const { return sizeof(*this) + this->bitWidth() * 64; } + __hostdev__ static size_t memUsage(uint32_t bitWidth) { return 96u + bitWidth * 64; } + __hostdev__ float getValue(uint32_t i) const { -#ifdef NANOVDB_FPN_BRANCHLESS// faster - const int b = BaseT::mFlags >> 5;// b = 0, 1, 2, 3, 4 corresponding to 1, 2, 4, 8, 16 bits -#if 0// use LUT +#ifdef NANOVDB_FPN_BRANCHLESS // faster + const int b = BaseT::mFlags >> 5; // b = 0, 1, 2, 3, 4 corresponding to 1, 2, 4, 8, 16 bits +#if 0 // use LUT uint16_t code = reinterpret_cast(this + 1)[i >> (4 - b)]; const static uint8_t shift[5] = {15, 7, 3, 1, 0}; const static uint16_t mask[5] = {1, 3, 15, 255, 65535}; code >>= (i & shift[b]) << b; code &= mask[b]; -#else// no LUT +#else // no LUT uint32_t code = reinterpret_cast(this + 1)[i >> (5 - b)]; - //code >>= (i & ((16 >> b) - 1)) << b; code >>= (i & ((32 >> b) - 1)) << b; - code &= (1 << (1 << b)) - 1; + code &= (1 << (1 << b)) - 1; #endif -#else// use branched version (slow) +#else // use branched version (slow) float code; - auto *values = reinterpret_cast(this+1); + auto* values = reinterpret_cast(this + 1); switch (BaseT::mFlags >> 5) { - case 0u:// 1 bit float - code = float((values[i>>3] >> (i&7) ) & uint8_t(1)); - break; - case 1u:// 2 bits float - code = float((values[i>>2] >> ((i&3)<<1)) & uint8_t(3)); - break; - case 2u:// 4 bits float - code = float((values[i>>1] >> ((i&1)<<2)) & uint8_t(15)); - break; - case 3u:// 8 bits float - code = float(values[i]); - break; - default:// 16 bits float - code = float(reinterpret_cast(values)[i]); + case 0u: // 1 bit float + code = float((values[i >> 3] >> (i & 7)) & uint8_t(1)); + break; + case 1u: // 2 bits float + code = float((values[i >> 2] >> ((i & 3) << 1)) & uint8_t(3)); + break; + case 2u: // 4 bits float + code = float((values[i >> 1] >> ((i & 1) << 2)) & uint8_t(15)); + break; + case 3u: // 8 bits float + code = float(values[i]); + break; + default: // 16 bits float + code = float(reinterpret_cast(values)[i]); } #endif - return float(code) * BaseT::mQuantum + BaseT::mMinimum;// code * (max-min)/UNITS + min + return float(code) * BaseT::mQuantum + BaseT::mMinimum; // code * (max-min)/UNITS + min } /// @brief This class cannot be constructed or deleted @@ -4084,6 +5443,8 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +// --------------------------> LeafData <------------------------------------ + // Partial template specialization of LeafData with bool template class MaskT, uint32_t LOG2DIM> struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData @@ -4092,38 +5453,37 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData) == sizeof(Mask), "Mismatching sizeof"); using ValueType = bool; using BuildType = bool; - using FloatType = bool;// dummy value type - using ArrayType = MaskT;// type used for the internal mValue array + using FloatType = bool; // dummy value type + using ArrayType = MaskT; // type used for the internal mValue array static constexpr bool FIXED_SIZE = true; CoordT mBBoxMin; // 12B. uint8_t mBBoxDif[3]; // 3B. - uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: is sparse ValueIndex, bits5,6,7: bit-width for FpN + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN MaskT mValueMask; // LOG2DIM(3): 64B. MaskT mValues; // LOG2DIM(3): 64B. - uint64_t mPadding[2];// 16B padding to 32B alignment + uint64_t mPadding[2]; // 16B padding to 32B alignment - __hostdev__ static constexpr uint32_t padding() {return sizeof(LeafData) - 12u - 3u - 1u - 2*sizeof(MaskT) - 16u;} - __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ static constexpr uint32_t padding() { return sizeof(LeafData) - 12u - 3u - 1u - 2 * sizeof(MaskT) - 16u; } + __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } - //__hostdev__ const ValueType* values() const { return nullptr; } __hostdev__ bool getValue(uint32_t i) const { return mValues.isOn(i); } - __hostdev__ bool getMin() const { return false; }// dummy - __hostdev__ bool getMax() const { return false; }// dummy - __hostdev__ bool getAvg() const { return false; }// dummy - __hostdev__ bool getDev() const { return false; }// dummy + __hostdev__ bool getMin() const { return false; } // dummy + __hostdev__ bool getMax() const { return false; } // dummy + __hostdev__ bool getAvg() const { return false; } // dummy + __hostdev__ bool getDev() const { return false; } // dummy __hostdev__ void setValue(uint32_t offset, bool v) { mValueMask.setOn(offset); mValues.set(offset, v); } + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + __hostdev__ void setMin(const bool&) {} // no-op + __hostdev__ void setMax(const bool&) {} // no-op + __hostdev__ void setAvg(const bool&) {} // no-op + __hostdev__ void setDev(const bool&) {} // no-op - __hostdev__ void setMin(const bool&) {}// no-op - __hostdev__ void setMax(const bool&) {}// no-op - __hostdev__ void setAvg(const bool&) {}// no-op - __hostdev__ void setDev(const bool&) {}// no-op - - template + template __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } /// @brief This class cannot be constructed or deleted @@ -4133,6 +5493,8 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +// --------------------------> LeafData <------------------------------------ + // Partial template specialization of LeafData with ValueMask template class MaskT, uint32_t LOG2DIM> struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData @@ -4141,39 +5503,36 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData) == sizeof(Mask), "Mismatching sizeof"); using ValueType = bool; using BuildType = ValueMask; - using FloatType = bool;// dummy value type - using ArrayType = void;// type used for the internal mValue array - void means missing + using FloatType = bool; // dummy value type + using ArrayType = void; // type used for the internal mValue array - void means missing static constexpr bool FIXED_SIZE = true; CoordT mBBoxMin; // 12B. uint8_t mBBoxDif[3]; // 3B. - uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: is sparse ValueIndex, bits5,6,7: bit-width for FpN + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN MaskT mValueMask; // LOG2DIM(3): 64B. - uint64_t mPadding[2];// 16B padding to 32B alignment + uint64_t mPadding[2]; // 16B padding to 32B alignment __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } - __hostdev__ static constexpr uint32_t padding() { - return sizeof(LeafData) - (12u + 3u + 1u + sizeof(MaskT) + 2*8u); - } - - //__hostdev__ const ValueType* values() const { return nullptr; } - __hostdev__ bool getValue(uint32_t i) const { return mValueMask.isOn(i); } - __hostdev__ bool getMin() const { return false; }// dummy - __hostdev__ bool getMax() const { return false; }// dummy - __hostdev__ bool getAvg() const { return false; }// dummy - __hostdev__ bool getDev() const { return false; }// dummy - __hostdev__ void setValue(uint32_t offset, bool) + __hostdev__ static constexpr uint32_t padding() { - mValueMask.setOn(offset); + return sizeof(LeafData) - (12u + 3u + 1u + sizeof(MaskT) + 2 * 8u); } - __hostdev__ void setMin(const ValueType&) {}// no-op - __hostdev__ void setMax(const ValueType&) {}// no-op - __hostdev__ void setAvg(const FloatType&) {}// no-op - __hostdev__ void setDev(const FloatType&) {}// no-op - - template + __hostdev__ bool getValue(uint32_t i) const { return mValueMask.isOn(i); } + __hostdev__ bool getMin() const { return false; } // dummy + __hostdev__ bool getMax() const { return false; } // dummy + __hostdev__ bool getAvg() const { return false; } // dummy + __hostdev__ bool getDev() const { return false; } // dummy + __hostdev__ void setValue(uint32_t offset, bool) { mValueMask.setOn(offset); } + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + __hostdev__ void setMin(const ValueType&) {} // no-op + __hostdev__ void setMax(const ValueType&) {} // no-op + __hostdev__ void setAvg(const FloatType&) {} // no-op + __hostdev__ void setDev(const FloatType&) {} // no-op + + template __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } /// @brief This class cannot be constructed or deleted @@ -4183,80 +5542,231 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +// --------------------------> LeafIndexBase <------------------------------------ + // Partial template specialization of LeafData with ValueIndex template class MaskT, uint32_t LOG2DIM> -struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafIndexBase { static_assert(sizeof(CoordT) == sizeof(Coord), "Mismatching sizeof"); static_assert(sizeof(MaskT) == sizeof(Mask), "Mismatching sizeof"); using ValueType = uint64_t; - using BuildType = ValueIndex; using FloatType = uint64_t; - using ArrayType = void;// type used for the internal mValue array - void means missing + using ArrayType = void; // type used for the internal mValue array - void means missing static constexpr bool FIXED_SIZE = true; CoordT mBBoxMin; // 12B. uint8_t mBBoxDif[3]; // 3B. - uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: is sparse ValueIndex, bits5,6,7: bit-width for FpN - + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN MaskT mValueMask; // LOG2DIM(3): 64B. - uint64_t mStatsOff;// 8B offset to min/max/avg/sdv - uint64_t mValueOff;// 8B offset to values - // No padding since it's always 32B aligned +#ifdef NANOVDB_USE_OLD_VALUE_ON_INDEX + uint64_t mOffset; // 8B offset to first value in this leaf node + union + { + uint8_t mCountOn[8]; + uint64_t mPrefixSum; + }; // prefix sum of active values per 64 bit words +#else + uint64_t mOffset, mPrefixSum; // 8B offset to first value in this leaf node and 9-bit prefix sum +#endif + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(LeafIndexBase) - (12u + 3u + 1u + sizeof(MaskT) + 2 * 8u); + } + __hostdev__ static uint64_t memUsage() { return sizeof(LeafIndexBase); } + __hostdev__ bool hasStats() const { return mFlags & (uint8_t(1) << 4); } + // return the offset to the first value indexed by this leaf node + __hostdev__ const uint64_t& firstOffset() const { return mOffset; } + __hostdev__ void setMin(const ValueType&) {} // no-op + __hostdev__ void setMax(const ValueType&) {} // no-op + __hostdev__ void setAvg(const FloatType&) {} // no-op + __hostdev__ void setDev(const FloatType&) {} // no-op + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + template + __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } +}; // LeafIndexBase + +// --------------------------> LeafData <------------------------------------ + +// Partial template specialization of LeafData with ValueIndex +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafIndexBase +{ + using BaseT = LeafIndexBase; + using BuildType = ValueIndex; + // return the total number of values indexed by this leaf node, excluding the optional 4 stats + __hostdev__ static uint32_t valueCount() { return uint32_t(512); } // 8^3 = 2^9 + // return the offset to the last value indexed by this leaf node (disregarding optional stats) + __hostdev__ uint64_t lastOffset() const { return BaseT::mOffset + 511u; } // 2^9 - 1 + // if stats are available, they are always placed after the last voxel value in this leaf node + __hostdev__ uint64_t getMin() const { return this->hasStats() ? BaseT::mOffset + 512u : 0u; } + __hostdev__ uint64_t getMax() const { return this->hasStats() ? BaseT::mOffset + 513u : 0u; } + __hostdev__ uint64_t getAvg() const { return this->hasStats() ? BaseT::mOffset + 514u : 0u; } + __hostdev__ uint64_t getDev() const { return this->hasStats() ? BaseT::mOffset + 515u : 0u; } + __hostdev__ uint64_t getValue(uint32_t i) const { return BaseT::mOffset + i; } // dense leaf node with active and inactive voxels + + /// @brief This class cannot be constructed or deleted + LeafData() = delete; + LeafData(const LeafData&) = delete; + LeafData& operator=(const LeafData&) = delete; + ~LeafData() = delete; +}; // LeafData - __hostdev__ static constexpr uint32_t padding() { - return sizeof(LeafData) - (12u + 3u + 1u + sizeof(MaskT) + 2*8u); +// --------------------------> LeafData <------------------------------------ + +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafIndexBase +{ + using BaseT = LeafIndexBase; + using BuildType = ValueOnIndex; + __hostdev__ uint32_t valueCount() const + { +#ifdef NANOVDB_USE_OLD_VALUE_ON_INDEX + return BaseT::mCountOn[6] + ((uint32_t(BaseT::mCountOn[7] >> 6) & uint32_t(1)) << 8) + CountOn(BaseT::mValueMask.words()[7]); +#else + return CountOn(BaseT::mValueMask.words()[7]) + (BaseT::mPrefixSum >> 54u & 511u); // last 9 bits of mPrefixSum do not account for the last word in mValueMask +#endif } + __hostdev__ uint64_t lastOffset() const { return BaseT::mOffset + this->valueCount() - 1u; } + __hostdev__ uint64_t getMin() const { return this->hasStats() ? this->lastOffset() + 1u : 0u; } + __hostdev__ uint64_t getMax() const { return this->hasStats() ? this->lastOffset() + 2u : 0u; } + __hostdev__ uint64_t getAvg() const { return this->hasStats() ? this->lastOffset() + 3u : 0u; } + __hostdev__ uint64_t getDev() const { return this->hasStats() ? this->lastOffset() + 4u : 0u; } + __hostdev__ uint64_t getValue(uint32_t i) const + { +#if 0 // just for debugging + return mValueMask.isOn(i) ? mOffset + mValueMask.countOn(i) : 0u; +#else + uint32_t n = i >> 6; + const uint64_t w = BaseT::mValueMask.words()[n], mask = uint64_t(1) << (i & 63u); + if (!(w & mask)) + return uint64_t(0); // if i'th value is inactive return offset to background value + uint64_t sum = BaseT::mOffset + CountOn(w & (mask - 1u)); +#ifdef NANOVDB_USE_OLD_VALUE_ON_INDEX + if (n--) + sum += BaseT::mCountOn[n] + ((uint32_t(BaseT::mCountOn[7] >> n) & uint32_t(1)) << 8); // exclude first 64 voxels +#else + if (n--) + sum += BaseT::mPrefixSum >> (9u * n) & 511u; +#endif + return sum; +#endif + } + + /// @brief This class cannot be constructed or deleted + LeafData() = delete; + LeafData(const LeafData&) = delete; + LeafData& operator=(const LeafData&) = delete; + ~LeafData() = delete; +}; // LeafData + +// --------------------------> LeafData <------------------------------------ + +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafData +{ + using BuildType = ValueIndexMask; + MaskT mMask; + __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ bool isMaskOn(uint32_t offset) const { return mMask.isOn(offset); } + __hostdev__ void setMask(uint32_t offset, bool v) { mMask.set(offset, v); } +}; // LeafData +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafData +{ + using BuildType = ValueOnIndexMask; + MaskT mMask; __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ bool isMaskOn(uint32_t offset) const { return mMask.isOn(offset); } + __hostdev__ void setMask(uint32_t offset, bool v) { mMask.set(offset, v); } +}; // LeafData + +// --------------------------> LeafData <------------------------------------ + +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +{ + static_assert(sizeof(CoordT) == sizeof(Coord), "Mismatching sizeof"); + static_assert(sizeof(MaskT) == sizeof(Mask), "Mismatching sizeof"); + using ValueType = uint64_t; + using BuildType = Points; + using FloatType = typename FloatTraits::FloatType; + using ArrayType = uint16_t; // type used for the internal mValue array + static constexpr bool FIXED_SIZE = true; + + CoordT mBBoxMin; // 12B. + uint8_t mBBoxDif[3]; // 3B. + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN + MaskT mValueMask; // LOG2DIM(3): 64B. - __hostdev__ uint64_t getMin() const { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 0; } - __hostdev__ uint64_t getMax() const { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 1; } - __hostdev__ uint64_t getAvg() const { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 2; } - __hostdev__ uint64_t getDev() const { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 3; } - __hostdev__ void setValue(uint32_t offset, uint64_t) + uint64_t mOffset; // 8B + uint64_t mPointCount; // 8B + alignas(32) uint16_t mValues[1u << 3 * LOG2DIM]; // 1KB + // no padding + + /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment + /// + /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. + __hostdev__ static constexpr uint32_t padding() { - mValueMask.setOn(offset); + return sizeof(LeafData) - (12u + 3u + 1u + sizeof(MaskT) + 2 * 8u + (1u << 3 * LOG2DIM) * 2u); } + __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } - __hostdev__ uint64_t getValue(uint32_t i) const + __hostdev__ uint64_t offset() const { return mOffset; } + __hostdev__ uint64_t pointCount() const { return mPointCount; } + __hostdev__ uint64_t first(uint32_t i) const { return i ? uint64_t(mValues[i - 1u]) + mOffset : mOffset; } + __hostdev__ uint64_t last(uint32_t i) const { return uint64_t(mValues[i]) + mOffset; } + __hostdev__ uint64_t getValue(uint32_t i) const { return uint64_t(mValues[i]); } + __hostdev__ void setValueOnly(uint32_t offset, uint16_t value) { mValues[offset] = value; } + __hostdev__ void setValue(uint32_t offset, uint16_t value) { - if (mFlags & uint8_t(16u)) {// if 4th bit is set only active voxels are indexed - return mValueMask.isOn(i) ? mValueOff + mValueMask.countOn(i) : 0;// 0 is background - } - return mValueOff + i;// dense array of active and inactive voxels + mValueMask.setOn(offset); + mValues[offset] = value; } + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + + __hostdev__ ValueType getMin() const { return mOffset; } + __hostdev__ ValueType getMax() const { return mPointCount; } + __hostdev__ FloatType getAvg() const { return 0.0f; } + __hostdev__ FloatType getDev() const { return 0.0f; } + + __hostdev__ void setMin(const ValueType&) {} + __hostdev__ void setMax(const ValueType&) {} + __hostdev__ void setAvg(const FloatType&) {} + __hostdev__ void setDev(const FloatType&) {} - template - __hostdev__ void setMin(const T &min, T *p) { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 0] = min; } - template - __hostdev__ void setMax(const T &max, T *p) { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 1] = max; } - template - __hostdev__ void setAvg(const T &avg, T *p) { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 2] = avg; } - template - __hostdev__ void setDev(const T &dev, T *p) { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 3] = dev; } - template - __hostdev__ void setOrigin(const T &ijk) { mBBoxMin = ijk; } + template + __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } + + //__hostdev__ void fill(const ValueType &v) {for (auto *p=mValues, *q=p+512; p!=q; ++p) *p = v;} /// @brief This class cannot be constructed or deleted LeafData() = delete; LeafData(const LeafData&) = delete; LeafData& operator=(const LeafData&) = delete; ~LeafData() = delete; -}; // LeafData +}; // LeafData + +// --------------------------> LeafNode <------------------------------------ /// @brief Leaf nodes of the VDB tree. (defaults to 8x8x8 = 512 voxels) template class MaskT = Mask, uint32_t Log2Dim = 3> -class LeafNode : private LeafData +class LeafNode : public LeafData { public: struct ChildNodeType { - static constexpr uint32_t TOTAL = 0; - static constexpr uint32_t DIM = 1; + static constexpr uint32_t TOTAL = 0; + static constexpr uint32_t DIM = 1; __hostdev__ static uint32_t dim() { return 1u; } }; // Voxel using LeafNodeType = LeafNode; @@ -4275,56 +5785,120 @@ class LeafNode : private LeafData class ValueOnIterator : public MaskIterT { using BaseT = MaskIterT; - const LeafNode *mParent; + const LeafNode* mParent; + public: - __hostdev__ ValueOnIterator() : BaseT(), mParent(nullptr) {} - __hostdev__ ValueOnIterator(const LeafNode* parent) : BaseT(parent->data()->mValueMask.beginOn()), mParent(parent) {} - ValueOnIterator& operator=(const ValueOnIterator&) = default; - __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->getValue(BaseT::pos());} - __hostdev__ CoordT getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + __hostdev__ ValueOnIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ ValueOnIterator(const LeafNode* parent) + : BaseT(parent->data()->mValueMask.beginOn()) + , mParent(parent) + { + } + ValueOnIterator& operator=(const ValueOnIterator&) = default; + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return mParent->getValue(BaseT::pos()); + } + __hostdev__ CoordT getCoord() const + { + NANOVDB_ASSERT(*this); + return mParent->offsetToGlobalCoord(BaseT::pos()); + } }; // Member class ValueOnIterator - ValueOnIterator beginValueOn() const {return ValueOnIterator(this);} + ValueOnIterator beginValueOn() const { return ValueOnIterator(this); } + ValueOnIterator cbeginValueOn() const { return ValueOnIterator(this); } /// @brief Visits all inactive values in a leaf node class ValueOffIterator : public MaskIterT { using BaseT = MaskIterT; - const LeafNode *mParent; + const LeafNode* mParent; + public: - __hostdev__ ValueOffIterator() : BaseT(), mParent(nullptr) {} - __hostdev__ ValueOffIterator(const LeafNode* parent) : BaseT(parent->data()->mValueMask.beginOff()), mParent(parent) {} - ValueOffIterator& operator=(const ValueOffIterator&) = default; - __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->getValue(BaseT::pos());} - __hostdev__ CoordT getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + __hostdev__ ValueOffIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ ValueOffIterator(const LeafNode* parent) + : BaseT(parent->data()->mValueMask.beginOff()) + , mParent(parent) + { + } + ValueOffIterator& operator=(const ValueOffIterator&) = default; + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return mParent->getValue(BaseT::pos()); + } + __hostdev__ CoordT getCoord() const + { + NANOVDB_ASSERT(*this); + return mParent->offsetToGlobalCoord(BaseT::pos()); + } }; // Member class ValueOffIterator - ValueOffIterator beginValueOff() const {return ValueOffIterator(this);} + ValueOffIterator beginValueOff() const { return ValueOffIterator(this); } + ValueOffIterator cbeginValueOff() const { return ValueOffIterator(this); } /// @brief Visits all values in a leaf node, i.e. both active and inactive values class ValueIterator { - const LeafNode *mParent; - uint32_t mPos; + const LeafNode* mParent; + uint32_t mPos; + public: - __hostdev__ ValueIterator() : mParent(nullptr), mPos(1u << 3 * Log2Dim) {} - __hostdev__ ValueIterator(const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);} - ValueIterator& operator=(const ValueIterator&) = default; - __hostdev__ ValueType operator*() const { NANOVDB_ASSERT(*this); return mParent->getValue(mPos);} - __hostdev__ CoordT getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(mPos);} - __hostdev__ bool isActive() const { NANOVDB_ASSERT(*this); return mParent->isActive(mPos);} - __hostdev__ operator bool() const {return mPos < (1u << 3 * Log2Dim);} - __hostdev__ ValueIterator& operator++() {++mPos; return *this;} - __hostdev__ ValueIterator operator++(int) { + __hostdev__ ValueIterator() + : mParent(nullptr) + , mPos(1u << 3 * Log2Dim) + { + } + __hostdev__ ValueIterator(const LeafNode* parent) + : mParent(parent) + , mPos(0) + { + NANOVDB_ASSERT(parent); + } + ValueIterator& operator=(const ValueIterator&) = default; + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return mParent->getValue(mPos); + } + __hostdev__ CoordT getCoord() const + { + NANOVDB_ASSERT(*this); + return mParent->offsetToGlobalCoord(mPos); + } + __hostdev__ bool isActive() const + { + NANOVDB_ASSERT(*this); + return mParent->isActive(mPos); + } + __hostdev__ operator bool() const { return mPos < (1u << 3 * Log2Dim); } + __hostdev__ ValueIterator& operator++() + { + ++mPos; + return *this; + } + __hostdev__ ValueIterator operator++(int) + { auto tmp = *this; ++(*this); return tmp; } }; // Member class ValueIterator - ValueIterator beginValue() const {return ValueIterator(this);} + ValueIterator beginValue() const { return ValueIterator(this); } + ValueIterator cbeginValueAll() const { return ValueIterator(this); } - static_assert(is_same::Type>::value, "Mismatching BuildType"); + static_assert(is_same::Type>::value, "Mismatching BuildType"); static constexpr uint32_t LOG2DIM = Log2Dim; static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes static constexpr uint32_t DIM = 1u << TOTAL; // number of voxels along each axis of this node @@ -4339,18 +5913,19 @@ class LeafNode : private LeafData /// @brief Return a const reference to the bit mask of active voxels in this leaf node __hostdev__ const MaskType& valueMask() const { return DataType::mValueMask; } + __hostdev__ const MaskType& getValueMask() const { return DataType::mValueMask; } /// @brief Return a const reference to the minimum active value encoded in this leaf node - __hostdev__ ValueType minimum() const { return this->getMin(); } + __hostdev__ ValueType minimum() const { return DataType::getMin(); } /// @brief Return a const reference to the maximum active value encoded in this leaf node - __hostdev__ ValueType maximum() const { return this->getMax(); } + __hostdev__ ValueType maximum() const { return DataType::getMax(); } /// @brief Return a const reference to the average of all the active values encoded in this leaf node __hostdev__ FloatType average() const { return DataType::getAvg(); } /// @brief Return the variance of all the active values encoded in this leaf node - __hostdev__ FloatType variance() const { return DataType::getDev()*DataType::getDev(); } + __hostdev__ FloatType variance() const { return Pow2(DataType::getDev()); } /// @brief Return a const reference to the standard deviation of all the active values encoded in this leaf node __hostdev__ FloatType stdDeviation() const { return DataType::getDev(); } @@ -4360,6 +5935,9 @@ class LeafNode : private LeafData /// @brief Return the origin in index space of this leaf node __hostdev__ CoordT origin() const { return DataType::mBBoxMin & ~MASK; } + /// @brief Compute the local coordinates from a linear offset + /// @param n Linear offset into this nodes dense table + /// @return Local (vs global) 3D coordinates __hostdev__ static CoordT OffsetToLocalCoord(uint32_t n) { NANOVDB_ASSERT(n < SIZE); @@ -4382,12 +5960,12 @@ class LeafNode : private LeafData __hostdev__ BBox bbox() const { BBox bbox(DataType::mBBoxMin, DataType::mBBoxMin); - if ( this->hasBBox() ) { + if (this->hasBBox()) { bbox.max()[0] += DataType::mBBoxDif[0]; bbox.max()[1] += DataType::mBBoxDif[1]; bbox.max()[2] += DataType::mBBoxDif[2]; - } else {// very rare case - bbox = BBox();// invalid + } else { // very rare case + bbox = BBox(); // invalid } return bbox; } @@ -4395,7 +5973,7 @@ class LeafNode : private LeafData /// @brief Return the total number of voxels (e.g. values) encoded in this leaf node __hostdev__ static uint32_t voxelCount() { return 1u << (3 * LOG2DIM); } - __hostdev__ static uint32_t padding() {return DataType::padding();} + __hostdev__ static uint32_t padding() { return DataType::padding(); } /// @brief return memory usage in bytes for the class __hostdev__ uint64_t memUsage() { return DataType::memUsage(); } @@ -4412,6 +5990,11 @@ class LeafNode : private LeafData /// @brief Return the voxel value at the given coordinate. __hostdev__ ValueType getValue(const CoordT& ijk) const { return DataType::getValue(CoordToOffset(ijk)); } + /// @brief Return the first value in this leaf node. + __hostdev__ ValueType getFirstValue() const { return this->getValue(0); } + /// @brief Return the last value in this leaf node. + __hostdev__ ValueType getLastValue() const { return this->getValue(SIZE - 1); } + /// @brief Sets the value at the specified location and activate its state. /// /// @note This is safe since it does not change the topology of the tree (unlike setValue methods on the other nodes) @@ -4435,13 +6018,13 @@ class LeafNode : private LeafData return !DataType::mValueMask.isOff(); } - __hostdev__ bool hasBBox() const {return DataType::mFlags & uint8_t(2);} + __hostdev__ bool hasBBox() const { return DataType::mFlags & uint8_t(2); } /// @brief Return @c true if the voxel value at the given coordinate is active and updates @c v with the value. __hostdev__ bool probeValue(const CoordT& ijk, ValueType& v) const { const uint32_t n = CoordToOffset(ijk); - v = DataType::getValue(n); + v = DataType::getValue(n); return DataType::mValueMask.isOn(n); } @@ -4450,11 +6033,11 @@ class LeafNode : private LeafData /// @brief Return the linear offset corresponding to the given coordinate __hostdev__ static uint32_t CoordToOffset(const CoordT& ijk) { - #if 0 +#if 0 return ((ijk[0] & MASK) << (2 * LOG2DIM)) + ((ijk[1] & MASK) << LOG2DIM) + (ijk[2] & MASK); - #else +#else return ((ijk[0] & MASK) << (2 * LOG2DIM)) | ((ijk[1] & MASK) << LOG2DIM) | (ijk[2] & MASK); - #endif +#endif } /// @brief Updates the local bounding box of active voxels in this node. Return true if bbox was updated. @@ -4462,10 +6045,34 @@ class LeafNode : private LeafData /// @warning It assumes that the origin and value mask have already been set. /// /// @details This method is based on few (intrinsic) bit operations and hence is relatively fast. - /// However, it should only only be called of either the value mask has changed or if the + /// However, it should only only be called if either the value mask has changed or if the /// active bounding box is still undefined. e.g. during construction of this node. __hostdev__ bool updateBBox(); + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + return OpT::get(*this, CoordToOffset(ijk), args...); + } + + template + __hostdev__ auto get(const uint32_t n, ArgsT&&... args) const + { + return OpT::get(*this, n, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) + { + return OpT::set(*this, CoordToOffset(ijk), args...); + } + + template + __hostdev__ auto set(const uint32_t n, ArgsT&&... args) + { + return OpT::set(*this, n, args...); + } + private: static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(LeafData) is misaligned"); //static_assert(offsetof(DataType, mValues) % 32 == 0, "LeafData::mValues is misaligned"); @@ -4478,16 +6085,17 @@ class LeafNode : private LeafData template friend class InternalNode; +#ifndef NANOVDB_NEW_ACCESSOR_METHODS /// @brief Private method to return a voxel value and update a (dummy) ReadAccessor template __hostdev__ ValueType getValueAndCache(const CoordT& ijk, const AccT&) const { return this->getValue(ijk); } /// @brief Return the node information. template - __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& /*ijk*/, const AccT& /*acc*/) const { + __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& /*ijk*/, const AccT& /*acc*/) const + { using NodeInfoT = typename AccT::NodeInfo; - return NodeInfoT{LEVEL, this->dim(), this->minimum(), this->maximum(), - this->average(), this->stdDeviation(), this->bbox()[0], this->bbox()[1]}; + return NodeInfoT{LEVEL, this->dim(), this->minimum(), this->maximum(), this->average(), this->stdDeviation(), this->bbox()[0], this->bbox()[1]}; } template @@ -4498,24 +6106,44 @@ class LeafNode : private LeafData template __hostdev__ const LeafNode* probeLeafAndCache(const CoordT&, const AccT&) const { return this; } +#endif template __hostdev__ uint32_t getDimAndCache(const CoordT&, const RayT& /*ray*/, const AccT&) const { - if (DataType::mFlags & uint8_t(1u)) return this->dim(); // skip this node if the 1st bit is set + if (DataType::mFlags & uint8_t(1u)) + return this->dim(); // skip this node if the 1st bit is set //if (!ray.intersects( this->bbox() )) return 1 << LOG2DIM; return ChildNodeType::dim(); } + template + __hostdev__ auto + //__hostdev__ decltype(OpT::get(std::declval(), std::declval(), std::declval()...)) + getAndCache(const CoordType& ijk, const AccT&, ArgsT&&... args) const + { + return OpT::get(*this, CoordToOffset(ijk), args...); + } + + template + //__hostdev__ auto // occationally fails with NVCC + __hostdev__ decltype(OpT::set(std::declval(), std::declval(), std::declval()...)) + setAndCache(const CoordType& ijk, const AccT&, ArgsT&&... args) + { + return OpT::set(*this, CoordToOffset(ijk), args...); + } + }; // LeafNode class +// --------------------------> LeafNode::updateBBox <------------------------------------ + template class MaskT, uint32_t LOG2DIM> __hostdev__ inline bool LeafNode::updateBBox() { static_assert(LOG2DIM == 3, "LeafNode::updateBBox: only supports LOGDIM = 3!"); if (DataType::mValueMask.isOff()) { - DataType::mFlags &= ~uint8_t(2);// set 2nd bit off, which indicates that this nodes has no bbox + DataType::mFlags &= ~uint8_t(2); // set 2nd bit off, which indicates that this nodes has no bbox return false; } auto update = [&](uint32_t min, uint32_t max, int axis) { @@ -4523,15 +6151,13 @@ __hostdev__ inline bool LeafNode::updateBBox() DataType::mBBoxMin[axis] = (DataType::mBBoxMin[axis] & ~MASK) + int(min); DataType::mBBoxDif[axis] = uint8_t(max - min); }; - uint64_t word64 = DataType::mValueMask.template getWord(0); - uint32_t Xmin = word64 ? 0u : 8u; - uint32_t Xmax = Xmin; - for (int i = 1; i < 8; ++i) { // last loop over 8 64 words - if (uint64_t w = DataType::mValueMask.template getWord(i)) { // skip if word has no set bits - word64 |= w; // union 8 x 64 bits words into one 64 bit word - if (Xmin == 8) { + uint64_t *w = DataType::mValueMask.words(), word64 = *w; + uint32_t Xmin = word64 ? 0u : 8u, Xmax = Xmin; + for (int i = 1; i < 8; ++i) { // last loop over 8 64 bit words + if (w[i]) { // skip if word has no set bits + word64 |= w[i]; // union 8 x 64 bits words into one 64 bit word + if (Xmin == 8) Xmin = i; // only set once - } Xmax = i; } } @@ -4540,10 +6166,10 @@ __hostdev__ inline bool LeafNode::updateBBox() update(FindLowestOn(word64) >> 3, FindHighestOn(word64) >> 3, 1); const uint32_t *p = reinterpret_cast(&word64), word32 = p[0] | p[1]; const uint16_t *q = reinterpret_cast(&word32), word16 = q[0] | q[1]; - const uint8_t *b = reinterpret_cast(&word16), byte = b[0] | b[1]; + const uint8_t * b = reinterpret_cast(&word16), byte = b[0] | b[1]; NANOVDB_ASSERT(byte); update(FindLowestOn(static_cast(byte)), FindHighestOn(static_cast(byte)), 2); - DataType::mFlags |= uint8_t(2);// set 2nd bit on, which indicates that this nodes has a bbox + DataType::mFlags |= uint8_t(2); // set 2nd bit on, which indicates that this nodes has a bbox return true; } // LeafNode::updateBBox @@ -4594,33 +6220,47 @@ struct NanoNode using type = NanoRoot; }; -using FloatTree = NanoTree; +using FloatTree = NanoTree; +using Fp4Tree = NanoTree; +using Fp8Tree = NanoTree; +using Fp16Tree = NanoTree; +using FpNTree = NanoTree; using DoubleTree = NanoTree; -using Int32Tree = NanoTree; +using Int32Tree = NanoTree; using UInt32Tree = NanoTree; -using Int64Tree = NanoTree; -using Vec3fTree = NanoTree; -using Vec3dTree = NanoTree; -using Vec4fTree = NanoTree; -using Vec4dTree = NanoTree; -using Vec3ITree = NanoTree; -using MaskTree = NanoTree; -using IndexTree = NanoTree; -using BoolTree = NanoTree; - -using FloatGrid = Grid; +using Int64Tree = NanoTree; +using Vec3fTree = NanoTree; +using Vec3dTree = NanoTree; +using Vec4fTree = NanoTree; +using Vec4dTree = NanoTree; +using Vec3ITree = NanoTree; +using MaskTree = NanoTree; +using BoolTree = NanoTree; +using IndexTree = NanoTree; +using OnIndexTree = NanoTree; +using IndexMaskTree = NanoTree; +using OnIndexMaskTree = NanoTree; + +using FloatGrid = Grid; +using Fp4Grid = Grid; +using Fp8Grid = Grid; +using Fp16Grid = Grid; +using FpNGrid = Grid; using DoubleGrid = Grid; -using Int32Grid = Grid; +using Int32Grid = Grid; using UInt32Grid = Grid; -using Int64Grid = Grid; -using Vec3fGrid = Grid; -using Vec3dGrid = Grid; -using Vec4fGrid = Grid; -using Vec4dGrid = Grid; -using Vec3IGrid = Grid; -using MaskGrid = Grid; -using IndexGrid = Grid; -using BoolGrid = Grid; +using Int64Grid = Grid; +using Vec3fGrid = Grid; +using Vec3dGrid = Grid; +using Vec4fGrid = Grid; +using Vec4dGrid = Grid; +using Vec3IGrid = Grid; +using MaskGrid = Grid; +using BoolGrid = Grid; +using IndexGrid = Grid; +using OnIndexGrid = Grid; +using IndexMaskGrid = Grid; +using OnIndexMaskGrid = Grid; // --------------------------> ReadAccessor <------------------------------------ @@ -4643,24 +6283,26 @@ using BoolGrid = Grid; /// O(1) random access operations by means of inverse tree traversal, /// which amortizes the non-const time complexity of the root node. -template +template class ReadAccessor { - using GridT = NanoGrid;// grid - using TreeT = NanoTree;// tree - using RootT = NanoRoot; // root node - using LeafT = NanoLeaf; // Leaf node + using GridT = NanoGrid; // grid + using TreeT = NanoTree; // tree + using RootT = NanoRoot; // root node + using LeafT = NanoLeaf; // Leaf node using FloatType = typename RootT::FloatType; using CoordValueType = typename RootT::CoordType::ValueType; mutable const RootT* mRoot; // 8 bytes (mutable to allow for access methods to be const) public: + using BuildType = BuildT; using ValueType = typename RootT::ValueType; using CoordType = typename RootT::CoordType; static const int CacheLevels = 0; - - struct NodeInfo { +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + struct NodeInfo + { uint32_t mLevel; // 4B uint32_t mDim; // 4B ValueType mMinimum; // typically 4B @@ -4670,15 +6312,24 @@ class ReadAccessor CoordType mBBoxMin; // 3*4B CoordType mBBoxMax; // 3*4B }; - +#endif /// @brief Constructor from a root node - __hostdev__ ReadAccessor(const RootT& root) : mRoot{&root} {} + __hostdev__ ReadAccessor(const RootT& root) + : mRoot{&root} + { + } - /// @brief Constructor from a grid - __hostdev__ ReadAccessor(const GridT& grid) : ReadAccessor(grid.tree().root()) {} + /// @brief Constructor from a grid + __hostdev__ ReadAccessor(const GridT& grid) + : ReadAccessor(grid.tree().root()) + { + } /// @brief Constructor from a tree - __hostdev__ ReadAccessor(const TreeT& tree) : ReadAccessor(tree.root()) {} + __hostdev__ ReadAccessor(const TreeT& tree) + : ReadAccessor(tree.root()) + { + } /// @brief Reset this access to its initial state, i.e. with an empty cache /// @node Noop since this template specialization has no cache @@ -4690,18 +6341,34 @@ class ReadAccessor ReadAccessor(const ReadAccessor&) = default; ~ReadAccessor() = default; ReadAccessor& operator=(const ReadAccessor&) = default; - +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + return this->template get>(ijk); + } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ ValueType operator()(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ ValueType operator()(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ auto getNodeInfo(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS __hostdev__ ValueType getValue(const CoordType& ijk) const { return mRoot->getValueAndCache(ijk, *this); } + __hostdev__ ValueType getValue(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } __hostdev__ ValueType operator()(const CoordType& ijk) const { return this->getValue(ijk); } __hostdev__ ValueType operator()(int i, int j, int k) const { - return this->getValue(CoordType(i,j,k)); + return this->getValue(CoordType(i, j, k)); } __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const @@ -4723,12 +6390,23 @@ class ReadAccessor { return mRoot->probeLeafAndCache(ijk, *this); } - +#endif // NANOVDB_NEW_ACCESSOR_METHODS template __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const { return mRoot->getDimAndCache(ijk, ray, *this); } + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + return mRoot->template get(ijk, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) const + { + return const_cast(mRoot)->template set(ijk, args...); + } private: /// @brief Allow nodes to insert themselves into the cache. @@ -4745,16 +6423,16 @@ class ReadAccessor }; // ReadAccessor class /// @brief Node caching at a single tree level -template -class ReadAccessor//e.g. 0, 1, 2 +template +class ReadAccessor //e.g. 0, 1, 2 { static_assert(LEVEL0 >= 0 && LEVEL0 <= 2, "LEVEL0 should be 0, 1, or 2"); - using GridT = NanoGrid;// grid - using TreeT = NanoTree; - using RootT = NanoRoot; // root node - using LeafT = NanoLeaf; // Leaf node - using NodeT = typename NodeTrait::type; + using GridT = NanoGrid; // grid + using TreeT = NanoTree; + using RootT = NanoRoot; // root node + using LeafT = NanoLeaf; // Leaf node + using NodeT = typename NodeTrait::type; using CoordT = typename RootT::CoordType; using ValueT = typename RootT::ValueType; @@ -4767,13 +6445,14 @@ class ReadAccessor//e.g. 0, 1, 2 mutable const NodeT* mNode; // 8 bytes public: + using BuildType = BuildT; using ValueType = ValueT; using CoordType = CoordT; static const int CacheLevels = 1; - +#ifndef NANOVDB_NEW_ACCESSOR_METHODS using NodeInfo = typename ReadAccessor::NodeInfo; - +#endif /// @brief Constructor from a root node __hostdev__ ReadAccessor(const RootT& root) : mKey(CoordType::max()) @@ -4783,10 +6462,16 @@ class ReadAccessor//e.g. 0, 1, 2 } /// @brief Constructor from a grid - __hostdev__ ReadAccessor(const GridT& grid) : ReadAccessor(grid.tree().root()) {} + __hostdev__ ReadAccessor(const GridT& grid) + : ReadAccessor(grid.tree().root()) + { + } /// @brief Constructor from a tree - __hostdev__ ReadAccessor(const TreeT& tree) : ReadAccessor(tree.root()) {} + __hostdev__ ReadAccessor(const TreeT& tree) + : ReadAccessor(tree.root()) + { + } /// @brief Reset this access to its initial state, i.e. with an empty cache __hostdev__ void clear() @@ -4809,63 +6494,90 @@ class ReadAccessor//e.g. 0, 1, 2 (ijk[2] & int32_t(~NodeT::MASK)) == mKey[2]; } +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + return this->template get>(ijk); + } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ ValueType operator()(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ ValueType operator()(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ auto getNodeInfo(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS __hostdev__ ValueType getValue(const CoordType& ijk) const { - if (this->isCached(ijk)) { + if (this->isCached(ijk)) return mNode->getValueAndCache(ijk, *this); - } return mRoot->getValueAndCache(ijk, *this); } + __hostdev__ ValueType getValue(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } __hostdev__ ValueType operator()(const CoordType& ijk) const { return this->getValue(ijk); } __hostdev__ ValueType operator()(int i, int j, int k) const { - return this->getValue(CoordType(i,j,k)); + return this->getValue(CoordType(i, j, k)); } __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const { - if (this->isCached(ijk)) { + if (this->isCached(ijk)) return mNode->getNodeInfoAndCache(ijk, *this); - } return mRoot->getNodeInfoAndCache(ijk, *this); } __hostdev__ bool isActive(const CoordType& ijk) const { - if (this->isCached(ijk)) { + if (this->isCached(ijk)) return mNode->isActiveAndCache(ijk, *this); - } return mRoot->isActiveAndCache(ijk, *this); } __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { - if (this->isCached(ijk)) { + if (this->isCached(ijk)) return mNode->probeValueAndCache(ijk, v, *this); - } return mRoot->probeValueAndCache(ijk, v, *this); } __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { - if (this->isCached(ijk)) { + if (this->isCached(ijk)) return mNode->probeLeafAndCache(ijk, *this); - } return mRoot->probeLeafAndCache(ijk, *this); } - +#endif // NANOVDB_NEW_ACCESSOR_METHODS template __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const { - if (this->isCached(ijk)) { + if (this->isCached(ijk)) return mNode->getDimAndCache(ijk, ray, *this); - } return mRoot->getDimAndCache(ijk, ray, *this); } + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + if (this->isCached(ijk)) + return mNode->template getAndCache(ijk, *this, args...); + return mRoot->template getAndCache(ijk, *this, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) const + { + if (this->isCached(ijk)) + return const_cast(mNode)->template setAndCache(ijk, *this, args...); + return const_cast(mRoot)->template setAndCache(ijk, *this, args...); + } + private: /// @brief Allow nodes to insert themselves into the cache. template @@ -4888,16 +6600,16 @@ class ReadAccessor//e.g. 0, 1, 2 }; // ReadAccessor -template -class ReadAccessor//e.g. (0,1), (1,2), (0,2) +template +class ReadAccessor //e.g. (0,1), (1,2), (0,2) { static_assert(LEVEL0 >= 0 && LEVEL0 <= 2, "LEVEL0 must be 0, 1, 2"); static_assert(LEVEL1 >= 0 && LEVEL1 <= 2, "LEVEL1 must be 0, 1, 2"); static_assert(LEVEL0 < LEVEL1, "Level 0 must be lower than level 1"); - using GridT = NanoGrid;// grid - using TreeT = NanoTree; - using RootT = NanoRoot; - using LeafT = NanoLeaf; + using GridT = NanoGrid; // grid + using TreeT = NanoTree; + using RootT = NanoRoot; + using LeafT = NanoLeaf; using Node1T = typename NodeTrait::type; using Node2T = typename NodeTrait::type; using CoordT = typename RootT::CoordType; @@ -4906,7 +6618,7 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) using CoordValueType = typename RootT::CoordT::ValueType; // All member data are mutable to allow for access methods to be const -#ifdef USE_SINGLE_ACCESSOR_KEY // 44 bytes total +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY // 44 bytes total mutable CoordT mKey; // 3*4 = 12 bytes #else // 68 bytes total mutable CoordT mKeys[2]; // 2*3*4 = 24 bytes @@ -4916,16 +6628,17 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) mutable const Node2T* mNode2; public: + using BuildType = BuildT; using ValueType = ValueT; using CoordType = CoordT; static const int CacheLevels = 2; - - using NodeInfo = typename ReadAccessor::NodeInfo; - +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + using NodeInfo = typename ReadAccessor::NodeInfo; +#endif /// @brief Constructor from a root node __hostdev__ ReadAccessor(const RootT& root) -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY : mKey(CoordType::max()) #else : mKeys{CoordType::max(), CoordType::max()} @@ -4936,16 +6649,22 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) { } - /// @brief Constructor from a grid - __hostdev__ ReadAccessor(const GridT& grid) : ReadAccessor(grid.tree().root()) {} + /// @brief Constructor from a grid + __hostdev__ ReadAccessor(const GridT& grid) + : ReadAccessor(grid.tree().root()) + { + } /// @brief Constructor from a tree - __hostdev__ ReadAccessor(const TreeT& tree) : ReadAccessor(tree.root()) {} + __hostdev__ ReadAccessor(const TreeT& tree) + : ReadAccessor(tree.root()) + { + } /// @brief Reset this access to its initial state, i.e. with an empty cache __hostdev__ void clear() { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY mKey = CoordType::max(); #else mKeys[0] = mKeys[1] = CoordType::max(); @@ -4961,7 +6680,7 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) ~ReadAccessor() = default; ReadAccessor& operator=(const ReadAccessor&) = default; -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY __hostdev__ bool isCached1(CoordValueType dirty) const { if (!mNode1) @@ -5001,9 +6720,23 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) } #endif +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + return this->template get>(ijk); + } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ ValueType operator()(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ ValueType operator()(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ auto getNodeInfo(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -5021,12 +6754,15 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) } __hostdev__ ValueType operator()(int i, int j, int k) const { - return this->getValue(CoordType(i,j,k)); + return this->getValue(CoordType(i, j, k)); + } + __hostdev__ ValueType getValue(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); } - __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -5041,7 +6777,7 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) __hostdev__ bool isActive(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -5056,7 +6792,7 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -5071,7 +6807,7 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -5083,11 +6819,12 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) } return mRoot->probeLeafAndCache(ijk, *this); } +#endif // NANOVDB_NEW_ACCESSOR_METHODS template __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -5100,6 +6837,38 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) return mRoot->getDimAndCache(ijk, ray, *this); } + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached1(dirty)) { + return mNode1->template getAndCache(ijk, *this, args...); + } else if (this->isCached2(dirty)) { + return mNode2->template getAndCache(ijk, *this, args...); + } + return mRoot->template getAndCache(ijk, *this, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached1(dirty)) { + return const_cast(mNode1)->template setAndCache(ijk, *this, args...); + } else if (this->isCached2(dirty)) { + return const_cast(mNode2)->template setAndCache(ijk, *this, args...); + } + return const_cast(mRoot)->template setAndCache(ijk, *this, args...); + } + private: /// @brief Allow nodes to insert themselves into the cache. template @@ -5112,7 +6881,7 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) /// @brief Inserts a leaf node and key pair into this ReadAccessor __hostdev__ void insert(const CoordType& ijk, const Node1T* node) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY mKey = ijk; #else mKeys[0] = ijk & ~Node1T::MASK; @@ -5121,28 +6890,27 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) } __hostdev__ void insert(const CoordType& ijk, const Node2T* node) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY mKey = ijk; #else mKeys[1] = ijk & ~Node2T::MASK; #endif mNode2 = node; } - template + template __hostdev__ void insert(const CoordType&, const OtherNodeT*) const {} }; // ReadAccessor - /// @brief Node caching at all (three) tree levels -template +template class ReadAccessor { - using GridT = NanoGrid;// grid - using TreeT = NanoTree; - using RootT = NanoRoot; // root node + using GridT = NanoGrid; // grid + using TreeT = NanoTree; + using RootT = NanoRoot; // root node using NodeT2 = NanoUpper; // upper internal node using NodeT1 = NanoLower; // lower internal node - using LeafT = NanoLeaf< BuildT>; // Leaf node + using LeafT = NanoLeaf; // Leaf node using CoordT = typename RootT::CoordType; using ValueT = typename RootT::ValueType; @@ -5150,25 +6918,26 @@ class ReadAccessor using CoordValueType = typename RootT::CoordT::ValueType; // All member data are mutable to allow for access methods to be const -#ifdef USE_SINGLE_ACCESSOR_KEY // 44 bytes total +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY // 44 bytes total mutable CoordT mKey; // 3*4 = 12 bytes #else // 68 bytes total mutable CoordT mKeys[3]; // 3*3*4 = 36 bytes #endif mutable const RootT* mRoot; - mutable const void* mNode[3]; // 4*8 = 32 bytes + mutable const void* mNode[3]; // 4*8 = 32 bytes public: + using BuildType = BuildT; using ValueType = ValueT; using CoordType = CoordT; static const int CacheLevels = 3; - +#ifndef NANOVDB_NEW_ACCESSOR_METHODS using NodeInfo = typename ReadAccessor::NodeInfo; - +#endif /// @brief Constructor from a root node __hostdev__ ReadAccessor(const RootT& root) -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY : mKey(CoordType::max()) #else : mKeys{CoordType::max(), CoordType::max(), CoordType::max()} @@ -5178,11 +6947,17 @@ class ReadAccessor { } - /// @brief Constructor from a grid - __hostdev__ ReadAccessor(const GridT& grid) : ReadAccessor(grid.tree().root()) {} + /// @brief Constructor from a grid + __hostdev__ ReadAccessor(const GridT& grid) + : ReadAccessor(grid.tree().root()) + { + } /// @brief Constructor from a tree - __hostdev__ ReadAccessor(const TreeT& tree) : ReadAccessor(tree.root()) {} + __hostdev__ ReadAccessor(const TreeT& tree) + : ReadAccessor(tree.root()) + { + } __hostdev__ const RootT& root() const { return *mRoot; } @@ -5202,19 +6977,18 @@ class ReadAccessor return reinterpret_cast(mNode[NodeT::LEVEL]); } - template + template __hostdev__ const typename NodeTrait::type* getNode() const { using T = typename NodeTrait::type; - static_assert(LEVEL>=0 && LEVEL<=2, "ReadAccessor::getNode: Invalid node type"); + static_assert(LEVEL >= 0 && LEVEL <= 2, "ReadAccessor::getNode: Invalid node type"); return reinterpret_cast(mNode[LEVEL]); } - /// @brief Reset this access to its initial state, i.e. with an empty cache __hostdev__ void clear() { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY mKey = CoordType::max(); #else mKeys[0] = mKeys[1] = mKeys[2] = CoordType::max(); @@ -5222,7 +6996,7 @@ class ReadAccessor mNode[0] = mNode[1] = mNode[2] = nullptr; } -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY template __hostdev__ bool isCached(CoordValueType dirty) const { @@ -5243,13 +7017,29 @@ class ReadAccessor template __hostdev__ bool isCached(const CoordType& ijk) const { - return (ijk[0] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][0] && (ijk[1] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][1] && (ijk[2] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][2]; + return (ijk[0] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][0] && + (ijk[1] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][1] && + (ijk[2] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][2]; } #endif +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + return this->template get>(ijk); + } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ ValueType operator()(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ ValueType operator()(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ auto getNodeInfo(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -5269,12 +7059,16 @@ class ReadAccessor } __hostdev__ ValueType operator()(int i, int j, int k) const { - return this->getValue(CoordType(i,j,k)); + return this->getValue(CoordType(i, j, k)); + } + __hostdev__ ValueType getValue(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); } __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -5291,7 +7085,7 @@ class ReadAccessor __hostdev__ bool isActive(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -5308,7 +7102,7 @@ class ReadAccessor __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -5322,10 +7116,9 @@ class ReadAccessor } return mRoot->probeValueAndCache(ijk, v, *this); } - __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -5339,11 +7132,48 @@ class ReadAccessor } return mRoot->probeLeafAndCache(ijk, *this); } +#endif // NANOVDB_NEW_ACCESSOR_METHODS + + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached(dirty)) { + return ((const LeafT*)mNode[0])->template getAndCache(ijk, *this, args...); + } else if (this->isCached(dirty)) { + return ((const NodeT1*)mNode[1])->template getAndCache(ijk, *this, args...); + } else if (this->isCached(dirty)) { + return ((const NodeT2*)mNode[2])->template getAndCache(ijk, *this, args...); + } + return mRoot->template getAndCache(ijk, *this, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached(dirty)) { + return ((LeafT*)mNode[0])->template setAndCache(ijk, *this, args...); + } else if (this->isCached(dirty)) { + return ((NodeT1*)mNode[1])->template setAndCache(ijk, *this, args...); + } else if (this->isCached(dirty)) { + return ((NodeT2*)mNode[2])->template setAndCache(ijk, *this, args...); + } + return ((RootT*)mRoot)->template setAndCache(ijk, *this, args...); + } template __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -5371,7 +7201,7 @@ class ReadAccessor template __hostdev__ void insert(const CoordType& ijk, const NodeT* node) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY mKey = ijk; #else mKeys[NodeT::LEVEL] = ijk & ~NodeT::MASK; @@ -5394,20 +7224,20 @@ class ReadAccessor /// createAccessor<1,2>(grid): Caching of lower and upper internal nodes /// createAccessor<0,1,2>(grid): Caching of all nodes at all tree levels -template -ReadAccessor createAccessor(const NanoGrid &grid) +template +ReadAccessor createAccessor(const NanoGrid& grid) { return ReadAccessor(grid); } -template -ReadAccessor createAccessor(const NanoTree &tree) +template +ReadAccessor createAccessor(const NanoTree& tree) { return ReadAccessor(tree); } -template -ReadAccessor createAccessor(const NanoRoot &root) +template +ReadAccessor createAccessor(const NanoRoot& root) { return ReadAccessor(root); } @@ -5419,7 +7249,72 @@ ReadAccessor createAccessor(const NanoRoot + GridMetaData(const NanoGrid& grid) + { + mGridData = *grid.data(); + mTreeData = *grid.tree().data(); + mIndexBBox = grid.indexBBox(); + mRootTableSize = grid.tree().root().getTableSize(); + } + GridMetaData(const uint8_t* buffer) + { + auto* grid = reinterpret_cast*>(buffer); // dummy grid type + NANOVDB_ASSERT(grid && grid->isValid()); + mGridData = *grid->data(); + mTreeData = *grid->tree().data(); + mIndexBBox = grid->indexBBox(); + mRootTableSize = grid->tree().root().getTableSize(); + } + __hostdev__ bool safeCast() const { return mTreeData.mNodeOffset[3] == sizeof(TreeData); } + template + __hostdev__ static bool safeCast(const NanoGrid& grid) + { // the RootData follows right after the TreeData + return grid.tree().data()->mNodeOffset[3] == sizeof(TreeData); + } + __hostdev__ bool isValid() const { return mGridData.mMagic == NANOVDB_MAGIC_NUMBER; } + __hostdev__ const GridType& gridType() const { return mGridData.mGridType; } + __hostdev__ const GridClass& gridClass() const { return mGridData.mGridClass; } + __hostdev__ bool isLevelSet() const { return mGridData.mGridClass == GridClass::LevelSet; } + __hostdev__ bool isFogVolume() const { return mGridData.mGridClass == GridClass::FogVolume; } + __hostdev__ bool isStaggered() const { return mGridData.mGridClass == GridClass::Staggered; } + __hostdev__ bool isPointIndex() const { return mGridData.mGridClass == GridClass::PointIndex; } + __hostdev__ bool isGridIndex() const { return mGridData.mGridClass == GridClass::IndexGrid; } + __hostdev__ bool isPointData() const { return mGridData.mGridClass == GridClass::PointData; } + __hostdev__ bool isMask() const { return mGridData.mGridClass == GridClass::Topology; } + __hostdev__ bool isUnknown() const { return mGridData.mGridClass == GridClass::Unknown; } + __hostdev__ bool hasMinMax() const { return mGridData.mFlags.isMaskOn(GridFlags::HasMinMax); } + __hostdev__ bool hasBBox() const { return mGridData.mFlags.isMaskOn(GridFlags::HasBBox); } + __hostdev__ bool hasLongGridName() const { return mGridData.mFlags.isMaskOn(GridFlags::HasLongGridName); } + __hostdev__ bool hasAverage() const { return mGridData.mFlags.isMaskOn(GridFlags::HasAverage); } + __hostdev__ bool hasStdDeviation() const { return mGridData.mFlags.isMaskOn(GridFlags::HasStdDeviation); } + __hostdev__ bool isBreadthFirst() const { return mGridData.mFlags.isMaskOn(GridFlags::IsBreadthFirst); } + __hostdev__ uint64_t gridSize() const { return mGridData.mGridSize; } + __hostdev__ uint32_t gridIndex() const { return mGridData.mGridIndex; } + __hostdev__ uint32_t gridCount() const { return mGridData.mGridCount; } + __hostdev__ const char* shortGridName() const { return mGridData.mGridName; } + __hostdev__ const Map& map() const { return mGridData.mMap; } + __hostdev__ const BBox& worldBBox() const { return mGridData.mWorldBBox; } + __hostdev__ const BBox& indexBBox() const { return mIndexBBox; } + __hostdev__ Vec3d voxelSize() const { return mGridData.mVoxelSize; } + __hostdev__ int blindDataCount() const { return mGridData.mBlindMetadataCount; } + //__hostdev__ const GridBlindMetaData& blindMetaData(uint32_t n) const {return *mGridData.blindMetaData(n);} + __hostdev__ uint64_t activeVoxelCount() const { return mTreeData.mVoxelCount; } + __hostdev__ const uint32_t& activeTileCount(uint32_t level) const { return mTreeData.mTileCount[level - 1]; } + __hostdev__ uint32_t nodeCount(uint32_t level) const { return mTreeData.mNodeCount[level]; } + __hostdev__ uint64_t checksum() const { return mGridData.mChecksum; } + __hostdev__ uint32_t rootTableSize() const { return mRootTableSize; } + __hostdev__ bool isEmpty() const { return mRootTableSize == 0; } + __hostdev__ Version version() const { return mGridData.mVersion; } +#else // We cast to a grid templated on a dummy ValueType which is safe because we are very // careful only to call certain methods which are known to be invariant to the ValueType! // In other words, don't use this technique unless you are intimately familiar with the @@ -5443,11 +7338,11 @@ class GridMetaData __hostdev__ bool isMask() const { return this->grid().isMask(); } __hostdev__ bool isStaggered() const { return this->grid().isStaggered(); } __hostdev__ bool isUnknown() const { return this->grid().isUnknown(); } - __hostdev__ const Map& map() const { return this->grid().map(); } - __hostdev__ const BBox& worldBBox() const { return this->grid().worldBBox(); } - __hostdev__ const BBox& indexBBox() const { return this->grid().indexBBox(); } - __hostdev__ Vec3R voxelSize() const { return this->grid().voxelSize(); } - __hostdev__ int blindDataCount() const { return this->grid().blindDataCount(); } + __hostdev__ const Map& map() const { return this->grid().map(); } + __hostdev__ const BBox& worldBBox() const { return this->grid().worldBBox(); } + __hostdev__ const BBox& indexBBox() const { return this->grid().indexBBox(); } + __hostdev__ Vec3d voxelSize() const { return this->grid().voxelSize(); } + __hostdev__ int blindDataCount() const { return this->grid().blindDataCount(); } __hostdev__ const GridBlindMetaData& blindMetaData(uint32_t n) const { return this->grid().blindMetaData(n); } __hostdev__ uint64_t activeVoxelCount() const { return this->grid().activeVoxelCount(); } __hostdev__ const uint32_t& activeTileCount(uint32_t level) const { return this->grid().tree().activeTileCount(level); } @@ -5455,34 +7350,40 @@ class GridMetaData __hostdev__ uint64_t checksum() const { return this->grid().checksum(); } __hostdev__ bool isEmpty() const { return this->grid().isEmpty(); } __hostdev__ Version version() const { return this->grid().version(); } +#endif }; // GridMetaData /// @brief Class to access points at a specific voxel location -template -class PointAccessor : public DefaultReadAccessor +/// +/// @note If GridClass::PointIndex AttT should be uint32_t and if GridClass::PointData Vec3f +template +class PointAccessor : public DefaultReadAccessor { - using AccT = DefaultReadAccessor; - const UInt32Grid* mGrid; - const AttT* mData; + using AccT = DefaultReadAccessor; + const NanoGrid& mGrid; + const AttT* mData; public: - using LeafNodeType = typename NanoRoot::LeafNodeType; - - PointAccessor(const UInt32Grid& grid) + PointAccessor(const NanoGrid& grid) : AccT(grid.tree().root()) - , mGrid(&grid) - , mData(reinterpret_cast(grid.blindData(0))) + , mGrid(grid) + , mData(grid.template getBlindData(0)) { - NANOVDB_ASSERT(grid.gridType() == GridType::UInt32); + NANOVDB_ASSERT(grid.gridType() == mapToGridType()); NANOVDB_ASSERT((grid.gridClass() == GridClass::PointIndex && is_same::value) || (grid.gridClass() == GridClass::PointData && is_same::value)); - NANOVDB_ASSERT(grid.blindDataCount() >= 1); } + + /// @brief return true if this access was initialized correctly + __hostdev__ operator bool() const { return mData != nullptr; } + + __hostdev__ const NanoGrid& grid() const { return mGrid; } + /// @brief Return the total number of point in the grid and set the /// iterators to the complete range of points. __hostdev__ uint64_t gridPoints(const AttT*& begin, const AttT*& end) const { - const uint64_t count = mGrid->blindMetaData(0u).mElementCount; + const uint64_t count = mGrid.blindMetaData(0u).mValueCount; begin = mData; end = begin + count; return count; @@ -5501,100 +7402,165 @@ class PointAccessor : public DefaultReadAccessor return leaf->maximum(); } - /// @brief get iterators over offsets to points at a specific voxel location + /// @brief get iterators over attributes to points at a specific voxel location __hostdev__ uint64_t voxelPoints(const Coord& ijk, const AttT*& begin, const AttT*& end) const + { + begin = end = nullptr; + if (auto* leaf = this->probeLeaf(ijk)) { + const uint32_t offset = NanoLeaf::CoordToOffset(ijk); + if (leaf->isActive(offset)) { + begin = mData + leaf->minimum(); + end = begin + leaf->getValue(offset); + if (offset > 0u) + begin += leaf->getValue(offset - 1); + } + } + return end - begin; + } +}; // PointAccessor + +template +class PointAccessor : public DefaultReadAccessor +{ + using AccT = DefaultReadAccessor; + const NanoGrid& mGrid; + const AttT* mData; + +public: + PointAccessor(const NanoGrid& grid) + : AccT(grid.tree().root()) + , mGrid(grid) + , mData(grid.template getBlindData(0)) + { + NANOVDB_ASSERT(mData); + NANOVDB_ASSERT(grid.gridType() == GridType::PointIndex); + NANOVDB_ASSERT((grid.gridClass() == GridClass::PointIndex && is_same::value) || + (grid.gridClass() == GridClass::PointData && is_same::value)); + } + + /// @brief return true if this access was initialized correctly + __hostdev__ operator bool() const { return mData != nullptr; } + + __hostdev__ const NanoGrid& grid() const { return mGrid; } + + /// @brief Return the total number of point in the grid and set the + /// iterators to the complete range of points. + __hostdev__ uint64_t gridPoints(const AttT*& begin, const AttT*& end) const + { + const uint64_t count = mGrid.blindMetaData(0u).mValueCount; + begin = mData; + end = begin + count; + return count; + } + /// @brief Return the number of points in the leaf node containing the coordinate @a ijk. + /// If this return value is larger than zero then the iterators @a begin and @a end + /// will point to all the attributes contained within that leaf node. + __hostdev__ uint64_t leafPoints(const Coord& ijk, const AttT*& begin, const AttT*& end) const { auto* leaf = this->probeLeaf(ijk); if (leaf == nullptr) return 0; - const uint32_t offset = LeafNodeType::CoordToOffset(ijk); - if (leaf->isActive(offset)) { - auto* p = mData + leaf->minimum(); - begin = p + (offset == 0 ? 0 : leaf->getValue(offset - 1)); - end = p + leaf->getValue(offset); - return end - begin; + begin = mData + leaf->offset(); + end = begin + leaf->pointCount(); + return leaf->pointCount(); + } + + /// @brief get iterators over attributes to points at a specific voxel location + __hostdev__ uint64_t voxelPoints(const Coord& ijk, const AttT*& begin, const AttT*& end) const + { + if (auto* leaf = this->probeLeaf(ijk)) { + const uint32_t n = NanoLeaf::CoordToOffset(ijk); + if (leaf->isActive(n)) { + begin = mData + leaf->first(n); + end = mData + leaf->last(n); + return end - begin; + } } - return 0; + begin = end = nullptr; + return 0u; // no leaf or inactive voxel } -}; // PointAccessor +}; // PointAccessor /// @brief Class to access values in channels at a specific voxel location. /// /// @note The ChannelT template parameter can be either const and non-const. -template -class ChannelAccessor : public DefaultReadAccessor +template +class ChannelAccessor : public DefaultReadAccessor { - using BaseT = DefaultReadAccessor; - const IndexGrid &mGrid; - ChannelT *mChannel; + static_assert(BuildTraits::is_index, "Expected an index build type"); + using BaseT = DefaultReadAccessor; + + const NanoGrid& mGrid; + ChannelT* mChannel; public: using ValueType = ChannelT; - using TreeType = IndexTree; - using AccessorType = ChannelAccessor; + using TreeType = NanoTree; + using AccessorType = ChannelAccessor; /// @brief Ctor from an IndexGrid and an integer ID of an internal channel /// that is assumed to exist as blind data in the IndexGrid. - __hostdev__ ChannelAccessor(const IndexGrid& grid, uint32_t channelID = 0u) + __hostdev__ ChannelAccessor(const NanoGrid& grid, uint32_t channelID = 0u) : BaseT(grid.tree().root()) , mGrid(grid) , mChannel(nullptr) { - NANOVDB_ASSERT(grid.gridType() == GridType::Index); + NANOVDB_ASSERT(isIndex(grid.gridType())); NANOVDB_ASSERT(grid.gridClass() == GridClass::IndexGrid); this->setChannel(channelID); } /// @brief Ctor from an IndexGrid and an external channel - __hostdev__ ChannelAccessor(const IndexGrid& grid, ChannelT *channelPtr) + __hostdev__ ChannelAccessor(const NanoGrid& grid, ChannelT* channelPtr) : BaseT(grid.tree().root()) , mGrid(grid) , mChannel(channelPtr) { - NANOVDB_ASSERT(grid.gridType() == GridType::Index); + NANOVDB_ASSERT(isIndex(grid.gridType())); NANOVDB_ASSERT(grid.gridClass() == GridClass::IndexGrid); - NANOVDB_ASSERT(mChannel); } + /// @brief return true if this access was initialized correctly + __hostdev__ operator bool() const { return mChannel != nullptr; } + /// @brief Return a const reference to the IndexGrid - __hostdev__ const IndexGrid &grid() const {return mGrid;} + __hostdev__ const NanoGrid& grid() const { return mGrid; } /// @brief Return a const reference to the tree of the IndexGrid - __hostdev__ const IndexTree &tree() const {return mGrid.tree();} + __hostdev__ const TreeType& tree() const { return mGrid.tree(); } /// @brief Return a vector of the axial voxel sizes - __hostdev__ const Vec3R& voxelSize() const { return mGrid.voxelSize(); } + __hostdev__ const Vec3d& voxelSize() const { return mGrid.voxelSize(); } /// @brief Return total number of values indexed by the IndexGrid __hostdev__ const uint64_t& valueCount() const { return mGrid.valueCount(); } /// @brief Change to an external channel - __hostdev__ void setChannel(ChannelT *channelPtr) - { - mChannel = channelPtr; - NANOVDB_ASSERT(mChannel); - } + /// @return Pointer to channel data + __hostdev__ ChannelT* setChannel(ChannelT* channelPtr) {return mChannel = channelPtr;} - /// @brief Change to an internal channel, assuming it exists as as blind data - /// in the IndexGrid. - __hostdev__ void setChannel(uint32_t channelID) + /// @brief Change to an internal channel, assuming it exists as as blind data + /// in the IndexGrid. + /// @return Pointer to channel data, which could be NULL if channelID is out of range or + /// if ChannelT does not match the value type of the blind data + __hostdev__ ChannelT* setChannel(uint32_t channelID) { - this->setChannel(reinterpret_cast(const_cast(mGrid.blindData(channelID)))); + return mChannel = const_cast(mGrid.template getBlindData(channelID)); } /// @brief Return the linear offset into a channel that maps to the specified coordinate - __hostdev__ uint64_t getIndex(const Coord& ijk) const {return BaseT::getValue(ijk);} - __hostdev__ uint64_t idx(int i, int j, int k) const {return BaseT::getValue(Coord(i,j,k));} + __hostdev__ uint64_t getIndex(const Coord& ijk) const { return BaseT::getValue(ijk); } + __hostdev__ uint64_t idx(int i, int j, int k) const { return BaseT::getValue(Coord(i, j, k)); } /// @brief Return the value from a cached channel that maps to the specified coordinate - __hostdev__ ChannelT& getValue(const Coord& ijk) const {return mChannel[BaseT::getValue(ijk)];} - __hostdev__ ChannelT& operator()(const Coord& ijk) const {return this->getValue(ijk);} - __hostdev__ ChannelT& operator()(int i, int j, int k) const {return this->getValue(Coord(i,j,k));} + __hostdev__ ChannelT& getValue(const Coord& ijk) const { return mChannel[BaseT::getValue(ijk)]; } + __hostdev__ ChannelT& operator()(const Coord& ijk) const { return this->getValue(ijk); } + __hostdev__ ChannelT& operator()(int i, int j, int k) const { return this->getValue(Coord(i, j, k)); } /// @brief return the state and updates the value of the specified voxel - __hostdev__ bool probeValue(const CoordType& ijk, typename remove_const::type &v) const + __hostdev__ bool probeValue(const Coord& ijk, typename remove_const::type& v) const { - uint64_t idx; + uint64_t idx; const bool isActive = BaseT::probeValue(ijk, idx); v = mChannel[idx]; return isActive; @@ -5602,12 +7568,12 @@ class ChannelAccessor : public DefaultReadAccessor /// @brief Return the value from a specified channel that maps to the specified coordinate /// /// @note The template parameter can be either const or non-const - template - __hostdev__ T& getValue(const Coord& ijk, T* channelPtr) const {return channelPtr[BaseT::getValue(ijk)];} + template + __hostdev__ T& getValue(const Coord& ijk, T* channelPtr) const { return channelPtr[BaseT::getValue(ijk)]; } }; // ChannelAccessor - +// the following code block uses std and therefore needs to be ignored by CUDA and HIP #if !defined(__CUDA_ARCH__) && !defined(__HIP__) #if 0 @@ -5645,73 +7611,83 @@ namespace io { /// @throw std::invalid_argument if buffer does not point to a valid NanoVDB grid. /// /// @warning This is pretty ugly code that involves lots of pointer and bit manipulations - not for the faint of heart :) -template // StreamT class must support: "void write(char*, size_t)" -void writeUncompressedGrid(StreamT &os, const void *buffer) +template // StreamT class must support: "void write(char*, size_t)" +void writeUncompressedGrid(StreamT& os, const void* buffer) { - char header[192] = {0}, *dst = header;// combines io::Header + io::MetaData, see util/IO.h + char header[192] = {0}, *dst = header; // combines io::Header + io::MetaData, see util/IO.h const char *grid = (const char*)buffer, *tree = grid + 672, *root = tree + *(const uint64_t*)(tree + 24); - auto cpy = [&](const char *src, int n){for (auto *end=src+n; src!=end; ++src) *dst++ = *src;}; - if (*(const uint64_t*)(grid)!=0x304244566f6e614eUL) { - fprintf(stderr, "nanovdb::writeUncompressedGrid: invalid magic number\n"); exit(EXIT_FAILURE); - } else if (*(const uint32_t*)(grid+16)>>21!=32) { - fprintf(stderr, "nanovdb::writeUncompressedGrid: invalid major version\n"); exit(EXIT_FAILURE); - } - cpy(grid , 8);// uint64_t Header::magic - cpy(grid + 16, 4);// uint32_t Heder::version - *(uint16_t*)(dst) = 1; dst += 4;// uint16_t Header::gridCount=1 and uint16_t Header::codec=0 - cpy(grid + 32, 8);// uint64_t MetaData::gridSize - cpy(grid + 32, 8);// uint64_t MetaData::fileSize - dst += 8;// uint64_t MetaData::nameKey - cpy(tree + 56, 8);// uint64_t MetaData::voxelCount - cpy(grid + 636, 4);// uint32_t MetaData::gridType - cpy(grid + 632, 4);// uint32_t MetaData::gridClass - cpy(grid + 560, 48);// double[6] MetaData::worldBBox - cpy(root , 24);// int[6] MetaData::indexBBox - cpy(grid + 608, 24);// double[3] MetaData::voxelSize - const char *gridName = grid + 40;// shortGridName - if (*(const uint32_t*)(grid+20) & uint32_t(1)) {// has long grid name - gridName = grid + *(const int64_t*)(grid + 640) + 288*(*(const uint32_t*)(grid + 648) - 1); - gridName += *(const uint64_t*)gridName;// long grid name encoded in blind meta data + auto cpy = [&](const char* src, int n) {for (auto *end=src+n; src!=end; ++src) *dst++ = *src; }; + if (*(const uint64_t*)(grid) != 0x304244566f6e614eUL) { + fprintf(stderr, "nanovdb::writeUncompressedGrid: invalid magic number\n"); + exit(EXIT_FAILURE); + } else if (*(const uint32_t*)(grid + 16) >> 21 != 32) { + fprintf(stderr, "nanovdb::writeUncompressedGrid: invalid major version\n"); + exit(EXIT_FAILURE); + } + cpy(grid, 8); // uint64_t Header::magic + cpy(grid + 16, 4); // uint32_t Heder::version + *(uint16_t*)(dst) = 1; + dst += 4; // uint16_t Header::gridCount=1 and uint16_t Header::codec=0 + cpy(grid + 32, 8); // uint64_t MetaData::gridSize + cpy(grid + 32, 8); // uint64_t MetaData::fileSize + dst += 8; // uint64_t MetaData::nameKey + cpy(tree + 56, 8); // uint64_t MetaData::voxelCount + cpy(grid + 636, 4); // uint32_t MetaData::gridType + cpy(grid + 632, 4); // uint32_t MetaData::gridClass + cpy(grid + 560, 48); // double[6] MetaData::worldBBox + cpy(root, 24); // int[6] MetaData::indexBBox + cpy(grid + 608, 24); // double[3] MetaData::voxelSize + const char* gridName = grid + 40; // shortGridName + if (*(const uint32_t*)(grid + 20) & uint32_t(1)) { // has long grid name + gridName = grid + *(const int64_t*)(grid + 640) + 288 * (*(const uint32_t*)(grid + 648) - 1); + gridName += *(const uint64_t*)gridName; // long grid name encoded in blind meta data } uint32_t nameSize = 1; // '\0' - for (const char *p = gridName; *p!='\0'; ++p) ++nameSize; - *(uint32_t*)(dst) = nameSize; dst += 4;// uint32_t MetaData::nameSize - cpy(tree + 32, 12);// uint32_t[3] MetaData::nodeCount - *(uint32_t*)(dst) = 1; dst += 4;// uint32_t MetaData::nodeCount[3]=1 - cpy(tree + 44, 12);// uint32_t[3] MetaData::tileCount - dst += 4;// uint16_t codec and padding - cpy(grid + 16, 4);// uint32_t MetaData::version + for (const char* p = gridName; *p != '\0'; ++p) + ++nameSize; + *(uint32_t*)(dst) = nameSize; + dst += 4; // uint32_t MetaData::nameSize + cpy(tree + 32, 12); // uint32_t[3] MetaData::nodeCount + *(uint32_t*)(dst) = 1; + dst += 4; // uint32_t MetaData::nodeCount[3]=1 + cpy(tree + 44, 12); // uint32_t[3] MetaData::tileCount + dst += 4; // uint16_t codec and padding + cpy(grid + 16, 4); // uint32_t MetaData::version assert(dst - header == 192); - os.write(header, 192);// write header - os.write(gridName, nameSize);// write grid name - while(1) {// loop over all grids in the buffer (typically just one grid per buffer) - const uint64_t gridSize = *(const uint64_t*)(grid + 32); - os.write(grid, gridSize);// write grid <- bulk of writing! - if (*(const uint32_t*)(grid+24) >= *(const uint32_t*)(grid+28) - 1) break; - grid += gridSize; + os.write(header, 192); // write header + os.write(gridName, nameSize); // write grid name + while (1) { // loop over all grids in the buffer (typically just one grid per buffer) + const uint64_t gridSize = *(const uint64_t*)(grid + 32); + os.write(grid, gridSize); // write grid <- bulk of writing! + if (*(const uint32_t*)(grid + 24) >= *(const uint32_t*)(grid + 28) - 1) + break; + grid += gridSize; } -}// writeUncompressedGrid +} // writeUncompressedGrid /// @brief write multiple NanoVDB grids to a single file, without compression. template class VecT> void writeUncompressedGrids(const char* fileName, const VecT& handles) { -#ifdef NANOVDB_USE_IOSTREAMS// use this to switch between std::ofstream or FILE implementations +#ifdef NANOVDB_USE_IOSTREAMS // use this to switch between std::ofstream or FILE implementations std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); #else - struct StreamT { - FILE *fptr; - StreamT(const char *name) {fptr = fopen(name, "wb");} - ~StreamT() {fclose(fptr);} - void write(const char *data, size_t n){fwrite(data, 1, n, fptr);} - bool is_open() const {return fptr != NULL;} + struct StreamT + { + FILE* fptr; + StreamT(const char* name) { fptr = fopen(name, "wb"); } + ~StreamT() { fclose(fptr); } + void write(const char* data, size_t n) { fwrite(data, 1, n, fptr); } + bool is_open() const { return fptr != NULL; } } os(fileName); #endif if (!os.is_open()) { - fprintf(stderr, "nanovdb::writeUncompressedGrids: Unable to open file \"%s\"for output\n",fileName); exit(EXIT_FAILURE); + fprintf(stderr, "nanovdb::writeUncompressedGrids: Unable to open file \"%s\"for output\n", fileName); + exit(EXIT_FAILURE); } - for (auto &handle : handles) writeUncompressedGrid(os, handle.data()); -}// writeUncompressedGrids + for (auto& handle : handles) + writeUncompressedGrid(os, handle.data()); +} // writeUncompressedGrids /// @brief read all uncompressed grids from a stream and return their handles. /// @@ -5719,61 +7695,217 @@ void writeUncompressedGrids(const char* fileName, const VecT& handl /// /// @details StreamT class must support: "bool read(char*, size_t)" and "void skip(uint32_t)" template class VecT> -VecT readUncompressedGrids(StreamT& is, const typename GridHandleT::BufferType& buffer = typename GridHandleT::BufferType()) -{// header1, metadata11, grid11, metadata12, grid2 ... header2, metadata21, grid21, metadata22, grid22 ... - char header[16], metadata[176]; - VecT handles; - while(is.read(header, 16)) {// read all segments, e.g. header1, metadata11, grid11, metadata12, grid2 ... - if (*(uint64_t*)(header)!=0x304244566f6e614eUL) { - fprintf(stderr, "nanovdb::readUncompressedGrids: invalid magic number\n"); exit(EXIT_FAILURE); - } else if (*(uint32_t*)(header+8)>>21!=32) { - fprintf(stderr, "nanovdb::readUncompressedGrids: invalid major version\n"); exit(EXIT_FAILURE); - } else if (*(uint16_t*)(header+14)!=0) { - fprintf(stderr, "nanovdb::readUncompressedGrids: invalid codec\n"); exit(EXIT_FAILURE); - } - for (uint16_t i=0, e=*(uint16_t*)(header+12); i readUncompressedGrids(StreamT& is, const typename GridHandleT::BufferType& pool = typename GridHandleT::BufferType()) +{ // header1, metadata11, grid11, metadata12, grid2 ... header2, metadata21, grid21, metadata22, grid22 ... + char header[16], metadata[176]; + VecT handles; + while (is.read(header, 16)) { // read all segments, e.g. header1, metadata11, grid11, metadata12, grid2 ... + if (*(uint64_t*)(header) != 0x304244566f6e614eUL) { + fprintf(stderr, "nanovdb::readUncompressedGrids: invalid magic number\n"); + exit(EXIT_FAILURE); + } else if (*(uint32_t*)(header + 8) >> 21 != 32) { + fprintf(stderr, "nanovdb::readUncompressedGrids: invalid major version\n"); + exit(EXIT_FAILURE); + } else if (*(uint16_t*)(header + 14) != 0) { + fprintf(stderr, "nanovdb::readUncompressedGrids: invalid codec\n"); + exit(EXIT_FAILURE); + } + for (uint16_t i = 0, e = *(uint16_t*)(header + 12); i < e; ++i) { // read all grids in segment + if (!is.read(metadata, 176)) { + fprintf(stderr, "nanovdb::readUncompressedGrids: error reading metadata\n"); + exit(EXIT_FAILURE); + } + const uint64_t gridSize = *(uint64_t*)(metadata); + auto buffer = GridHandleT::BufferType::create(gridSize, &pool); + is.skip(*(uint32_t*)(metadata + 136)); // skip grid name + is.read((char*)buffer.data(), gridSize); + handles.emplace_back(std::move(buffer)); + } + } + return handles; +} // readUncompressedGrids /// @brief Read a multiple un-compressed NanoVDB grids from a file and return them as a vector. template class VecT> -VecT readUncompressedGrids(const char *fileName, const typename GridHandleT::BufferType& buffer = typename GridHandleT::BufferType()) +VecT readUncompressedGrids(const char* fileName, const typename GridHandleT::BufferType& buffer = typename GridHandleT::BufferType()) { -#ifdef NANOVDB_USE_IOSTREAMS// use this to switch between std::ifstream or FILE implementations - struct StreamT : public std::ifstream { - StreamT(const char *name) : std::ifstream(name, std::ios::in | std::ios::binary) {} - void skip(uint32_t off) {this->seekg(off, std::ios_base::cur);} +#ifdef NANOVDB_USE_IOSTREAMS // use this to switch between std::ifstream or FILE implementations + struct StreamT : public std::ifstream + { + StreamT(const char* name) + : std::ifstream(name, std::ios::in | std::ios::binary) + { + } + void skip(uint32_t off) { this->seekg(off, std::ios_base::cur); } }; #else - struct StreamT { - FILE *fptr; - StreamT(const char *name) {fptr = fopen(name, "rb");} - ~StreamT() {fclose(fptr);} - bool read(char *data, size_t n){size_t m=fread(data, 1, n, fptr); return n==m;} - void skip(uint32_t off){fseek(fptr, off, SEEK_CUR);} - bool is_open() const {return fptr != NULL;} + struct StreamT + { + FILE* fptr; + StreamT(const char* name) { fptr = fopen(name, "rb"); } + ~StreamT() { fclose(fptr); } + bool read(char* data, size_t n) + { + size_t m = fread(data, 1, n, fptr); + return n == m; + } + void skip(uint32_t off) { fseek(fptr, off, SEEK_CUR); } + bool is_open() const { return fptr != NULL; } }; #endif - StreamT is(fileName); - if (!is.is_open()) { - fprintf(stderr, "nanovdb::readUncompressedGrids: Unable to open file \"%s\"for input\n",fileName); exit(EXIT_FAILURE); - } - return readUncompressedGrids(is, buffer); -}// readUncompressedGrids + StreamT is(fileName); + if (!is.is_open()) { + fprintf(stderr, "nanovdb::readUncompressedGrids: Unable to open file \"%s\"for input\n", fileName); + exit(EXIT_FAILURE); + } + return readUncompressedGrids(is, buffer); +} // readUncompressedGrids } // namespace io -#endif// if !defined(__CUDA_ARCH__) && !defined(__HIP__) +#endif // if !defined(__CUDA_ARCH__) && !defined(__HIP__) + +// ----------------------------> Implementations of random access methods <-------------------------------------- + +/// @brief Implements Tree::getValue(Coord), i.e. return the value associated with a specific coordinate @c ijk. +/// @tparam BuildT Build type of the grid being called +/// @details The value at a coordinate maps to the background, a tile value or a leaf value. +template +struct GetValue +{ + __hostdev__ static auto get(const NanoRoot& root) { return root.mBackground; } + __hostdev__ static auto get(const typename NanoRoot::Tile& tile) { return tile.value; } + __hostdev__ static auto get(const NanoUpper& node, uint32_t n) { return node.mTable[n].value; } + __hostdev__ static auto get(const NanoLower& node, uint32_t n) { return node.mTable[n].value; } + __hostdev__ static auto get(const NanoLeaf& leaf, uint32_t n) { return leaf.getValue(n); } // works with all build types +}; // GetValue + +template +struct SetValue +{ + static_assert(!BuildTraits::is_special, "SetValue does not support special value types"); + using ValueT = typename NanoLeaf::ValueType; + __hostdev__ static auto set(NanoRoot&, const ValueT&) {} // no-op + __hostdev__ static auto set(typename NanoRoot::Tile& tile, const ValueT& v) { tile.value = v; } + __hostdev__ static auto set(NanoUpper& node, uint32_t n, const ValueT& v) { node.mTable[n].value = v; } + __hostdev__ static auto set(NanoLower& node, uint32_t n, const ValueT& v) { node.mTable[n].value = v; } + __hostdev__ static auto set(NanoLeaf& leaf, uint32_t n, const ValueT& v) { leaf.mValues[n] = v; } +}; // SetValue + +template +struct SetVoxel +{ + static_assert(!BuildTraits::is_special, "SetVoxel does not support special value types"); + using ValueT = typename NanoLeaf::ValueType; + __hostdev__ static auto set(NanoRoot&, const ValueT&) {} // no-op + __hostdev__ static auto set(typename NanoRoot::Tile&, const ValueT&) {} // no-op + __hostdev__ static auto set(NanoUpper&, uint32_t, const ValueT&) {} // no-op + __hostdev__ static auto set(NanoLower&, uint32_t, const ValueT&) {} // no-op + __hostdev__ static auto set(NanoLeaf& leaf, uint32_t n, const ValueT& v) { leaf.mValues[n] = v; } +}; // SetVoxel + +/// @brief Implements Tree::isActive(Coord) +/// @tparam BuildT Build type of the grid being called +template +struct GetState +{ + __hostdev__ static auto get(const NanoRoot&) { return false; } + __hostdev__ static auto get(const typename NanoRoot::Tile& tile) { return tile.state > 0; } + __hostdev__ static auto get(const NanoUpper& node, uint32_t n) { return node.mValueMask.isOn(n); } + __hostdev__ static auto get(const NanoLower& node, uint32_t n) { return node.mValueMask.isOn(n); } + __hostdev__ static auto get(const NanoLeaf& leaf, uint32_t n) { return leaf.mValueMask.isOn(n); } +}; // GetState + +/// @brief Implements Tree::getDim(Coord) +/// @tparam BuildT Build type of the grid being called +template +struct GetDim +{ + __hostdev__ static uint32_t get(const NanoRoot&) { return 0u; } // background + __hostdev__ static uint32_t get(const typename NanoRoot::Tile&) { return 4096u; } + __hostdev__ static uint32_t get(const NanoUpper&, uint32_t) { return 128u; } + __hostdev__ static uint32_t get(const NanoLower&, uint32_t) { return 8u; } + __hostdev__ static uint32_t get(const NanoLeaf&, uint32_t) { return 1u; } +}; // GetDim + +/// @brief Implements Tree::probeLeaf(Coord) +/// @tparam BuildT Build type of the grid being called +template +struct GetLeaf +{ + __hostdev__ static const NanoLeaf* get(const NanoRoot&) { return nullptr; } + __hostdev__ static const NanoLeaf* get(const typename NanoRoot::Tile&) { return nullptr; } + __hostdev__ static const NanoLeaf* get(const NanoUpper&, uint32_t) { return nullptr; } + __hostdev__ static const NanoLeaf* get(const NanoLower&, uint32_t) { return nullptr; } + __hostdev__ static const NanoLeaf* get(const NanoLeaf& leaf, uint32_t) { return &leaf; } +}; // GetLeaf + +/// @brief Implements Tree::probeLeaf(Coord) +/// @tparam BuildT Build type of the grid being called +template +struct ProbeValue +{ + using ValueT = typename BuildToValueMap::Type; + __hostdev__ static bool get(const NanoRoot& root, ValueT& v) + { + v = root.mBackground; + return false; + } + __hostdev__ static bool get(const typename NanoRoot::Tile& tile, ValueT& v) + { + v = tile.value; + return tile.state > 0u; + } + __hostdev__ static bool get(const NanoUpper& node, uint32_t n, ValueT& v) + { + v = node.mTable[n].value; + return node.mValueMask.isOn(n); + } + __hostdev__ static bool get(const NanoLower& node, uint32_t n, ValueT& v) + { + v = node.mTable[n].value; + return node.mValueMask.isOn(n); + } + __hostdev__ static bool get(const NanoLeaf& leaf, uint32_t n, ValueT& v) + { + v = leaf.getValue(n); + return leaf.mValueMask.isOn(n); + } +}; // ProbeValue + +/// @brief Implements Tree::getNodeInfo(Coord) +/// @tparam BuildT Build type of the grid being called +template +struct GetNodeInfo +{ + struct NodeInfo + { + uint32_t level, dim; + typename NanoLeaf::ValueType minimum, maximum; + typename NanoLeaf::FloatType average, stdDevi; + CoordBBox bbox; + }; + __hostdev__ static NodeInfo get(const NanoRoot& root) + { + return NodeInfo{3u, NanoUpper::DIM, root.minimum(), root.maximum(), root.average(), root.stdDeviation(), root.bbox()}; + } + __hostdev__ static NodeInfo get(const typename NanoRoot::Tile& tile) + { + return NodeInfo{3u, NanoUpper::DIM, tile.value, tile.value, tile.value, 0, CoordBBox::createCube(tile.origin(), NanoUpper::DIM)}; + } + __hostdev__ static NodeInfo get(const NanoUpper& node, uint32_t n) + { + return NodeInfo{2u, node.dim(), node.minimum(), node.maximum(), node.average(), node.stdDeviation(), node.bbox()}; + } + __hostdev__ static NodeInfo get(const NanoLower& node, uint32_t n) + { + return NodeInfo{1u, node.dim(), node.minimum(), node.maximum(), node.average(), node.stdDeviation(), node.bbox()}; + } + __hostdev__ static NodeInfo get(const NanoLeaf& leaf, uint32_t n) + { + return NodeInfo{0u, leaf.dim(), leaf.minimum(), leaf.maximum(), leaf.average(), leaf.stdDeviation(), leaf.bbox()}; + } +}; // GetNodeInfo } // namespace nanovdb diff --git a/nanovdb/nanovdb/PNanoVDB.h b/nanovdb/nanovdb/PNanoVDB.h index 950b50ceed..44e5ff1da4 100644 --- a/nanovdb/nanovdb/PNanoVDB.h +++ b/nanovdb/nanovdb/PNanoVDB.h @@ -7,7 +7,7 @@ \author Andrew Reidmeyer - \brief This file is a portable (e.g. pointer-less) C99/GLSL/HLSL port + \brief This file is a portable (e.g. pointer-less) C99/GLSL/HLSL port of NanoVDB.h, which is compatible with most graphics APIs. */ @@ -73,7 +73,9 @@ #if defined(PNANOVDB_BUF_C) #include -#if defined(_WIN32) +#if defined(__CUDACC__) +#define PNANOVDB_BUF_FORCE_INLINE static inline __host__ __device__ __forceinline__ +#elif defined(_WIN32) #define PNANOVDB_BUF_FORCE_INLINE static inline __forceinline #else #define PNANOVDB_BUF_FORCE_INLINE static inline __attribute__((always_inline)) @@ -115,6 +117,32 @@ PNANOVDB_BUF_FORCE_INLINE uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, return data64[wordaddress64]; #endif } +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint32_t byte_offset, uint32_t value) +{ + uint32_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + if (wordaddress < buf.size_in_words) + { + buf.data[wordaddress] = value; +} +#else + buf.data[wordaddress] = value; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint32_t byte_offset, uint64_t value) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint32_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + if (wordaddress64 < size_in_words64) + { + data64[wordaddress64] = value; + } +#else + data64[wordaddress64] = value; +#endif +} #elif defined(PNANOVDB_ADDRESS_64) PNANOVDB_BUF_FORCE_INLINE uint32_t pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint64_t byte_offset) { @@ -136,6 +164,32 @@ PNANOVDB_BUF_FORCE_INLINE uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, return data64[wordaddress64]; #endif } +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint64_t byte_offset, uint32_t value) +{ + uint64_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + if (wordaddress < buf.size_in_words) + { + buf.data[wordaddress] = value; + } +#else + buf.data[wordaddress] = value; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint64_t byte_offset, uint64_t value) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint64_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + if (wordaddress64 < size_in_words64) + { + data64[wordaddress64] = value; + } +#else + data64[wordaddress64] = value; +#endif +} #endif typedef uint32_t pnanovdb_grid_type_t; #define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn @@ -153,6 +207,14 @@ uint2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) ret.y = pnanovdb_buf_read_uint32(buf, byte_offset + 4u); return ret; } +void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint byte_offset, uint value) +{ + // NOP, by default no write in HLSL +} +void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uint2 value) +{ + // NOP, by default no write in HLSL +} #elif defined(PNANOVDB_ADDRESS_64) #define pnanovdb_buf_t StructuredBuffer uint pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint64_t byte_offset) @@ -166,6 +228,14 @@ uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint64_t byte_offset) ret = ret + (uint64_t(pnanovdb_buf_read_uint32(buf, byte_offset + 4u)) << 32u); return ret; } +void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint64_t byte_offset, uint value) +{ + // NOP, by default no write in HLSL +} +void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint64_t byte_offset, uint64_t value) +{ + // NOP, by default no write in HLSL +} #endif #define pnanovdb_grid_type_t uint #define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn @@ -185,6 +255,14 @@ uvec2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) ret.y = pnanovdb_buf_read_uint32(buf, byte_offset + 4u); return ret; } +void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint byte_offset, uint value) +{ + // NOP, by default no write in HLSL +} +void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uvec2 value) +{ + // NOP, by default no write in HLSL +} #define pnanovdb_grid_type_t uint #define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn #endif @@ -193,7 +271,9 @@ uvec2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) // force inline #if defined(PNANOVDB_C) -#if defined(_WIN32) +#if defined(__CUDACC__) +#define PNANOVDB_FORCE_INLINE static inline __host__ __device__ __forceinline__ +#elif defined(_WIN32) #define PNANOVDB_FORCE_INLINE static inline __forceinline #else #define PNANOVDB_FORCE_INLINE static inline __attribute__((always_inline)) @@ -207,7 +287,11 @@ uvec2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) // struct typedef, static const, inout #if defined(PNANOVDB_C) #define PNANOVDB_STRUCT_TYPEDEF(X) typedef struct X X; +#if defined(__CUDACC__) +#define PNANOVDB_STATIC_CONST static const __host__ __device__ +#else #define PNANOVDB_STATIC_CONST static const +#endif #define PNANOVDB_INOUT(X) X* #define PNANOVDB_IN(X) const X* #define PNANOVDB_DEREF(X) (*X) @@ -256,7 +340,9 @@ PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_ PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return (pnanovdb_uint64_t)v; } PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return (pnanovdb_uint32_t)v; } PNANOVDB_FORCE_INLINE float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { float vf; pnanovdb_memcpy(&vf, &v, sizeof(vf)); return vf; } +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return *((pnanovdb_uint32_t*)(&v)); } PNANOVDB_FORCE_INLINE double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { double vf; pnanovdb_memcpy(&vf, &v, sizeof(vf)); return vf; } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { return *((pnanovdb_uint64_t*)(&v)); } PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return (pnanovdb_uint32_t)v; } PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return (pnanovdb_uint32_t)(v >> 32u); } PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return ((pnanovdb_uint64_t)x) | (((pnanovdb_uint64_t)y) << 32u); } @@ -282,6 +368,7 @@ typedef float3 pnanovdb_vec3_t; pnanovdb_int32_t pnanovdb_uint32_as_int32(pnanovdb_uint32_t v) { return int(v); } pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return uint(v); } float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { return asfloat(v); } +pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return asuint(v); } float pnanovdb_floor(float v) { return floor(v); } pnanovdb_int32_t pnanovdb_float_to_int32(float v) { return int(v); } float pnanovdb_int32_to_float(pnanovdb_int32_t v) { return float(v); } @@ -294,6 +381,7 @@ typedef int2 pnanovdb_int64_t; pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return int2(v); } pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uint2(v); } double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return asdouble(v.x, v.y); } +pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { uint2 ret; asuint(v, ret.x, ret.y); return ret; } pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return v.x; } pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return v.y; } pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uint2(x, y); } @@ -306,6 +394,7 @@ typedef int64_t pnanovdb_int64_t; pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return int64_t(v); } pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uint64_t(v); } double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return asdouble(uint(v), uint(v >> 32u)); } +pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { uint2 ret; asuint(v, ret.x, ret.y); return uint64_t(ret.x) + (uint64_t(ret.y) << 32u); } pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return uint(v); } pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return uint(v >> 32u); } pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uint64_t(x) + (uint64_t(y) << 32u); } @@ -328,7 +417,9 @@ pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return ivec2(v) pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uvec2(v); } pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return uint(v); } float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { return uintBitsToFloat(v); } +pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return floatBitsToUint(v); } double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return packDouble2x32(uvec2(v.x, v.y)); } +pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { return unpackDouble2x32(v); } pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return v.x; } pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return v.y; } pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uvec2(x, y); } @@ -450,6 +541,119 @@ pnanovdb_coord_t pnanovdb_coord_uniform(pnanovdb_int32_t a) { return ivec3(a, a, pnanovdb_coord_t pnanovdb_coord_add(pnanovdb_coord_t a, pnanovdb_coord_t b) { return a + b; } #endif +// ------------------------------------------------ Uint64 Utils ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint32_countbits(pnanovdb_uint32_t value) +{ +#if defined(PNANOVDB_C) +#if defined(_MSC_VER) && (_MSC_VER >= 1928) && defined(PNANOVDB_USE_INTRINSICS) + return __popcnt(value); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(PNANOVDB_USE_INTRINSICS) + return __builtin_popcount(value); +#else + value = value - ((value >> 1) & 0x55555555); + value = (value & 0x33333333) + ((value >> 2) & 0x33333333); + value = (value + (value >> 4)) & 0x0F0F0F0F; + return (value * 0x01010101) >> 24; +#endif +#elif defined(PNANOVDB_HLSL) + return countbits(value); +#elif defined(PNANOVDB_GLSL) + return bitCount(value); +#endif +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_countbits(pnanovdb_uint64_t value) +{ + return pnanovdb_uint32_countbits(pnanovdb_uint64_low(value)) + pnanovdb_uint32_countbits(pnanovdb_uint64_high(value)); +} + +#if defined(PNANOVDB_ADDRESS_32) +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_offset(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + pnanovdb_uint32_t low = pnanovdb_uint64_low(a); + pnanovdb_uint32_t high = pnanovdb_uint64_high(a); + low += b; + if (low < b) + { + high += 1u; + } + return pnanovdb_uint32_as_uint64(low, high); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_dec(pnanovdb_uint64_t a) +{ + pnanovdb_uint32_t low = pnanovdb_uint64_low(a); + pnanovdb_uint32_t high = pnanovdb_uint64_high(a); + if (low == 0u) + { + high -= 1u; + } + low -= 1u; + return pnanovdb_uint32_as_uint64(low, high); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_to_uint32_lsr(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + pnanovdb_uint32_t low = pnanovdb_uint64_low(a); + pnanovdb_uint32_t high = pnanovdb_uint64_high(a); + return (b >= 32u) ? + (high >> (b - 32)) : + ((low >> b) | ((b > 0) ? (high << (32u - b)) : 0u)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_bit_mask(pnanovdb_uint32_t bit_idx) +{ + pnanovdb_uint32_t mask_low = bit_idx < 32u ? 1u << bit_idx : 0u; + pnanovdb_uint32_t mask_high = bit_idx >= 32u ? 1u << (bit_idx - 32u) : 0u; + return pnanovdb_uint32_as_uint64(mask_low, mask_high); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_and(pnanovdb_uint64_t a, pnanovdb_uint64_t b) +{ + return pnanovdb_uint32_as_uint64( + pnanovdb_uint64_low(a) & pnanovdb_uint64_low(b), + pnanovdb_uint64_high(a) & pnanovdb_uint64_high(b) + ); +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_uint64_any_bit(pnanovdb_uint64_t a) +{ + return pnanovdb_uint64_low(a) != 0u || pnanovdb_uint64_high(a) != 0u; +} + +#else +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_offset(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + return a + b; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_dec(pnanovdb_uint64_t a) +{ + return a - 1u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_to_uint32_lsr(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + return pnanovdb_uint64_low(a >> b); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_bit_mask(pnanovdb_uint32_t bit_idx) +{ + return 1llu << bit_idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_and(pnanovdb_uint64_t a, pnanovdb_uint64_t b) +{ + return a & b; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_uint64_any_bit(pnanovdb_uint64_t a) +{ + return a != 0llu; +} +#endif + // ------------------------------------------------ Address Type ----------------------------------------------------------- #if defined(PNANOVDB_ADDRESS_32) @@ -484,6 +688,12 @@ PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64(pnanovdb_addr ret.byte_offset += pnanovdb_uint64_low(byte_offset); return ret; } +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64_product(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += pnanovdb_uint64_low(byte_offset) * multiplier; + return ret; +} PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_address_mask(pnanovdb_address_t address, pnanovdb_uint32_t mask) { return address.byte_offset & mask; @@ -538,6 +748,12 @@ PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64(pnanovdb_addr ret.byte_offset += byte_offset; return ret; } +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64_product(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset * pnanovdb_uint32_as_uint64_low(multiplier); + return ret; +} PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_address_mask(pnanovdb_address_t address, pnanovdb_uint32_t mask) { return pnanovdb_uint64_low(address.byte_offset) & mask; @@ -597,6 +813,43 @@ PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_read_coord(pnanovdb_buf_t buf, p ret.z = pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 8u))); return ret; } +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_vec3_t ret; + ret.x = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 0u)); + ret.y = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 4u)); + ret.z = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 8u)); + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint16(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(address, 3u)); + return (raw >> (pnanovdb_address_mask(address, 2) << 3)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint8(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(address, 3u)); + return (raw >> (pnanovdb_address_mask(address, 3) << 3)) & 255; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3u16(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_vec3_t ret; + const float scale = 1.f / 65535.f; + ret.x = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 0u))) - 0.5f; + ret.y = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 2u))) - 0.5f; + ret.z = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 4u))) - 0.5f; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3u8(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_vec3_t ret; + const float scale = 1.f / 255.f; + ret.x = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 0u))) - 0.5f; + ret.y = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 1u))) - 0.5f; + ret.z = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 2u))) - 0.5f; + return ret; +} PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_read_bit(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint32_t bit_offset) { @@ -626,13 +879,52 @@ PNANOVDB_FORCE_INLINE float pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_addr } #endif +// ------------------------------------------------ High Level Buffer Write ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE void pnanovdb_write_uint32(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint32_t value) +{ + pnanovdb_buf_write_uint32(buf, address.byte_offset, value); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_uint64(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint64_t value) +{ + pnanovdb_buf_write_uint64(buf, address.byte_offset, value); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_int32(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_int32_t value) +{ + pnanovdb_write_uint32(buf, address, pnanovdb_int32_as_uint32(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_int64(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_int64_t value) +{ + pnanovdb_buf_write_uint64(buf, address.byte_offset, pnanovdb_int64_as_uint64(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_float(pnanovdb_buf_t buf, pnanovdb_address_t address, float value) +{ + pnanovdb_write_uint32(buf, address, pnanovdb_float_as_uint32(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_double(pnanovdb_buf_t buf, pnanovdb_address_t address, double value) +{ + pnanovdb_write_uint64(buf, address, pnanovdb_double_as_uint64(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_coord(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) value) +{ + pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 0u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).x)); + pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 4u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).y)); + pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 8u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).z)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_vec3(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_vec3_t) value) +{ + pnanovdb_write_float(buf, pnanovdb_address_offset(address, 0u), PNANOVDB_DEREF(value).x); + pnanovdb_write_float(buf, pnanovdb_address_offset(address, 4u), PNANOVDB_DEREF(value).y); + pnanovdb_write_float(buf, pnanovdb_address_offset(address, 8u), PNANOVDB_DEREF(value).z); +} + // ------------------------------------------------ Core Structures ----------------------------------------------------------- #define PNANOVDB_MAGIC_NUMBER 0x304244566f6e614eUL// "NanoVDB0" in hex - little endian (uint64_t) #define PNANOVDB_MAJOR_VERSION_NUMBER 32// reflects changes to the ABI -#define PNANOVDB_MINOR_VERSION_NUMBER 4// reflects changes to the API but not ABI -#define PNANOVDB_PATCH_VERSION_NUMBER 2// reflects bug-fixes with no ABI or API changes +#define PNANOVDB_MINOR_VERSION_NUMBER 5// reflects changes to the API but not ABI +#define PNANOVDB_PATCH_VERSION_NUMBER 1// reflects bug-fixes with no ABI or API changes #define PNANOVDB_GRID_TYPE_UNKNOWN 0 #define PNANOVDB_GRID_TYPE_FLOAT 1 @@ -654,10 +946,16 @@ PNANOVDB_FORCE_INLINE float pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_addr #define PNANOVDB_GRID_TYPE_VEC4F 17 #define PNANOVDB_GRID_TYPE_VEC4D 18 #define PNANOVDB_GRID_TYPE_INDEX 19 -#define PNANOVDB_GRID_TYPE_END 20 +#define PNANOVDB_GRID_TYPE_ONINDEX 20 +#define PNANOVDB_GRID_TYPE_INDEXMASK 21 +#define PNANOVDB_GRID_TYPE_ONINDEXMASK 22 +#define PNANOVDB_GRID_TYPE_POINTINDEX 23 +#define PNANOVDB_GRID_TYPE_VEC3U8 24 +#define PNANOVDB_GRID_TYPE_VEC3U16 25 +#define PNANOVDB_GRID_TYPE_END 26 #define PNANOVDB_GRID_CLASS_UNKNOWN 0 -#define PNANOVDB_GRID_CLASS_LEVEL_SET 1 // narrow band levelset, e.g. SDF +#define PNANOVDB_GRID_CLASS_LEVEL_SET 1 // narrow band level set, e.g. SDF #define PNANOVDB_GRID_CLASS_FOG_VOLUME 2 // fog volume, e.g. density #define PNANOVDB_GRID_CLASS_STAGGERED 3 // staggered MAC grid, e.g. velocity #define PNANOVDB_GRID_CLASS_POINT_INDEX 4 // point index grid @@ -665,7 +963,8 @@ PNANOVDB_FORCE_INLINE float pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_addr #define PNANOVDB_GRID_CLASS_TOPOLOGY 6 // grid with active states only (no values) #define PNANOVDB_GRID_CLASS_VOXEL_VOLUME 7 // volume of geometric cubes, e.g. minecraft #define PNANOVDB_GRID_CLASS_INDEX_GRID 8 // grid whose values are offsets, e.g. into an external array -#define PNANOVDB_GRID_CLASS_END 9 +#define PNANOVDB_GRID_CLASS_TENSOR_GRID 9 // grid which can have extra metadata and features +#define PNANOVDB_GRID_CLASS_END 10 #define PNANOVDB_GRID_FLAGS_HAS_LONG_GRID_NAME (1 << 0) #define PNANOVDB_GRID_FLAGS_HAS_BBOX (1 << 1) @@ -679,13 +978,22 @@ PNANOVDB_FORCE_INLINE float pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_addr #define PNANOVDB_LEAF_TYPE_LITE 1 #define PNANOVDB_LEAF_TYPE_FP 2 #define PNANOVDB_LEAF_TYPE_INDEX 3 - -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_value_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 0, 16, 32, 1, 32, 4, 8, 16, 0, 128, 256, 0 }; -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_table_strides_bits[PNANOVDB_GRID_TYPE_END] = { 64, 64, 64, 64, 64, 64, 128, 192, 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 256, 64 }; -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 8, 16, 32, 8, 32, 32, 32, 32, 32, 128, 256, 64 }; -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_aligns_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 32, 64, 8, 16, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64 }; -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_stat_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 32, 32, 64, 32, 64, 8, 32, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64 }; -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_leaf_type[PNANOVDB_GRID_TYPE_END] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 2, 2, 2, 0, 0, 3 }; +#define PNANOVDB_LEAF_TYPE_INDEXMASK 4 +#define PNANOVDB_LEAF_TYPE_POINTINDEX 5 + +// BuildType = Unknown, float, double, int16_t, int32_t, int64_t, Vec3f, Vec3d, Mask, ... +// bit count of values in leaf nodes, i.e. 8*sizeof(*nanovdb::LeafNode::mValues) or zero if no values are stored +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_value_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 0, 16, 32, 1, 32, 4, 8, 16, 0, 128, 256, 0, 0, 0, 0, 16, 24, 48 }; +// bit count of the Tile union in InternalNodes, i.e. 8*sizeof(nanovdb::InternalData::Tile) +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_table_strides_bits[PNANOVDB_GRID_TYPE_END] = { 64, 64, 64, 64, 64, 64, 128, 192, 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 256, 64, 64, 64, 64, 64, 64, 64 }; +// bit count of min/max values, i.e. 8*sizeof(nanovdb::LeafData::mMinimum) or zero if no min/max exists +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 8, 16, 32, 8, 32, 32, 32, 32, 32, 128, 256, 64, 64, 64, 64, 64, 24, 48 }; +// bit alignment of the value type, controlled by the smallest native type, which is why it is always 0, 8, 16, 32, or 64, e.g. for Vec3f it is 32 +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_aligns_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 32, 64, 8, 16, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 8, 16 }; +// bit alignment of the stats (avg/std-dev) types, e.g. 8*sizeof(nanovdb::LeafData::mAverage) +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_stat_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 32, 32, 64, 32, 64, 8, 32, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 32, 32 }; +// one of the 4 leaf types defined above, e.g. PNANOVDB_LEAF_TYPE_INDEX = 3 +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_leaf_type[PNANOVDB_GRID_TYPE_END] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 2, 2, 2, 0, 0, 3, 3, 4, 4, 5, 0, 0 }; struct pnanovdb_map_t { @@ -738,6 +1046,31 @@ PNANOVDB_FORCE_INLINE double pnanovdb_map_get_taperd(pnanovdb_buf_t buf, pnanovd return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERD)); } +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_matf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float matf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATF + 4u * index), matf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_invmatf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float invmatf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATF + 4u * index), invmatf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_vecf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float vecf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECF + 4u * index), vecf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_taperf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float taperf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERF), taperf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_matd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double matd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATD + 8u * index), matd); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_invmatd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double invmatd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATD + 8u * index), invmatd); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_vecd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double vecd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECD + 8u * index), vecd); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_taperd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double taperd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERD), taperd); +} + struct pnanovdb_grid_t { pnanovdb_uint64_t magic; // 8 bytes, 0 @@ -827,6 +1160,54 @@ PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_blind_metadata_count(p return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT)); } +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_magic(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t magic) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_MAGIC), magic); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_checksum(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t checksum) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_CHECKSUM), checksum); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_version(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t version) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VERSION), version); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_flags(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t flags) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_FLAGS), flags); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_index(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_index) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_INDEX), grid_index); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_count) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_COUNT), grid_count); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t grid_size) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_SIZE), grid_size); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_name(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, pnanovdb_uint32_t grid_name) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_NAME + 4u * index), grid_name); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_world_bbox(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, double world_bbox) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_WORLD_BBOX + 8u * index), world_bbox); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_voxel_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, double voxel_size) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VOXEL_SIZE + 8u * index), voxel_size); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_class(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_class) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_CLASS), grid_class); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_type(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_type) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_TYPE), grid_type); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_blind_metadata_offset(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t blind_metadata_offset) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_OFFSET), blind_metadata_offset); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_blind_metadata_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t metadata_count) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT), metadata_count); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_make_version(pnanovdb_uint32_t major, pnanovdb_uint32_t minor, pnanovdb_uint32_t patch_num) +{ + return (major << 21u) | (minor << 10u) | patch_num; +} + PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_major(pnanovdb_uint32_t version) { return (version >> 21u) & ((1u << 11u) - 1u); @@ -952,6 +1333,40 @@ PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_voxel_count(pnanovdb_b return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_VOXEL_COUNT)); } +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_leaf) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LEAF), node_offset_leaf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_lower) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LOWER), node_offset_lower); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_upper) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_UPPER), node_offset_upper); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_root(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_root) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_ROOT), node_offset_root); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_leaf) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LEAF), node_count_leaf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_lower) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LOWER), node_count_lower); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_upper) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_UPPER), node_count_upper); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_leaf) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LEAF), tile_count_leaf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_lower) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LOWER), tile_count_lower); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_upper) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_UPPER), tile_count_upper); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_voxel_count(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t voxel_count) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_VOXEL_COUNT), voxel_count); +} + struct pnanovdb_root_t { pnanovdb_coord_t bbox_min; @@ -980,6 +1395,16 @@ PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_get_tile_count(pnanovdb_bu return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_TABLE_SIZE)); } +PNANOVDB_FORCE_INLINE void pnanovdb_root_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MAX), bbox_max); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_set_tile_count(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, pnanovdb_uint32_t tile_count) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_TABLE_SIZE), tile_count); +} + struct pnanovdb_root_tile_t { pnanovdb_uint64_t key; @@ -1008,6 +1433,16 @@ PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_tile_get_state(pnanovdb_bu return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_STATE)); } +PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_key(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_uint64_t key) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_KEY), key); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_child(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_int64_t child) { + pnanovdb_write_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_CHILD), child); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_state(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_uint32_t state) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_STATE), state); +} + struct pnanovdb_upper_t { pnanovdb_coord_t bbox_min; @@ -1049,6 +1484,20 @@ PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_upper_get_child_mask(pnanovdb_buf return ((value >> (bit_index & 31u)) & 1) != 0u; } +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MAX), bbox_max); +} +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_child_mask(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, pnanovdb_uint32_t bit_index, pnanovdb_bool_t value) { + pnanovdb_address_t addr = pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_CHILD_MASK + 4u * (bit_index >> 5u)); + pnanovdb_uint32_t valueMask = pnanovdb_read_uint32(buf, addr); + if (!value) { valueMask &= ~(1u << (bit_index & 31u)); } + if (value) valueMask |= (1u << (bit_index & 31u)); + pnanovdb_write_uint32(buf, addr, valueMask); +} + struct pnanovdb_lower_t { pnanovdb_coord_t bbox_min; @@ -1090,6 +1539,20 @@ PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_lower_get_child_mask(pnanovdb_buf return ((value >> (bit_index & 31u)) & 1) != 0u; } +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MAX), bbox_max); +} +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_child_mask(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, pnanovdb_uint32_t bit_index, pnanovdb_bool_t value) { + pnanovdb_address_t addr = pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_CHILD_MASK + 4u * (bit_index >> 5u)); + pnanovdb_uint32_t valueMask = pnanovdb_read_uint32(buf, addr); + if (!value) { valueMask &= ~(1u << (bit_index & 31u)); } + if (value) valueMask |= (1u << (bit_index & 31u)); + pnanovdb_write_uint32(buf, addr, valueMask); +} + struct pnanovdb_leaf_t { pnanovdb_coord_t bbox_min; @@ -1124,6 +1587,13 @@ PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_get_value_mask(pnanovdb_buf_ return ((value >> (bit_index & 31u)) & 1) != 0u; } +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_set_bbox_dif_and_flags(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, pnanovdb_uint32_t bbox_dif_and_flags) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS), bbox_dif_and_flags); +} + struct pnanovdb_grid_type_constants_t { pnanovdb_uint32_t root_off_background; @@ -1157,28 +1627,35 @@ struct pnanovdb_grid_type_constants_t }; PNANOVDB_STRUCT_TYPEDEF(pnanovdb_grid_type_constants_t) +// The following table with offsets will nedd to be updates as new GridTypes are added in NanoVDB.h PNANOVDB_STATIC_CONST pnanovdb_grid_type_constants_t pnanovdb_grid_type_constants[PNANOVDB_GRID_TYPE_END] = { - {28, 28, 28, 28, 28, 32, 0, 8, 20, 32, 8224, 8224, 8224, 8224, 8224, 270368, 1056, 1056, 1056, 1056, 1056, 33824, 80, 80, 80, 80, 96, 96}, - {28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, - {32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, - {28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, - {28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, - {32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, - {28, 40, 52, 64, 68, 96, 96, 16, 20, 32, 8224, 8236, 8248, 8252, 8256, 532544, 1056, 1068, 1080, 1084, 1088, 66624, 80, 92, 104, 108, 128, 6272}, - {32, 56, 80, 104, 112, 128, 192, 24, 24, 64, 8224, 8248, 8272, 8280, 8288, 794720, 1056, 1080, 1104, 1112, 1120, 99424, 80, 104, 128, 136, 160, 12448}, - {28, 29, 30, 31, 32, 64, 0, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 96}, - {28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, - {28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, - {28, 29, 30, 31, 32, 64, 1, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 160}, - {28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, - {28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 352}, - {28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 608}, - {28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 1120}, - {28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 96}, - {28, 44, 60, 76, 80, 96, 128, 16, 20, 64, 8224, 8240, 8256, 8260, 8288, 532576, 1056, 1072, 1088, 1092, 1120, 66656, 80, 96, 112, 116, 128, 8320}, - {32, 64, 96, 128, 136, 160, 256, 32, 24, 64, 8224, 8256, 8288, 8296, 8320, 1056896, 1056, 1088, 1120, 1128, 1152, 132224, 80, 112, 144, 152, 160, 16544}, - {32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 88, 96}, +{28, 28, 28, 28, 28, 32, 0, 8, 20, 32, 8224, 8224, 8224, 8224, 8224, 270368, 1056, 1056, 1056, 1056, 1056, 33824, 80, 80, 80, 80, 96, 96}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, +{28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, +{28, 40, 52, 64, 68, 96, 96, 16, 20, 32, 8224, 8236, 8248, 8252, 8256, 532544, 1056, 1068, 1080, 1084, 1088, 66624, 80, 92, 104, 108, 128, 6272}, +{32, 56, 80, 104, 112, 128, 192, 24, 24, 64, 8224, 8248, 8272, 8280, 8288, 794720, 1056, 1080, 1104, 1112, 1120, 99424, 80, 104, 128, 136, 160, 12448}, +{28, 29, 30, 31, 32, 64, 0, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 96}, +{28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{28, 29, 30, 31, 32, 64, 1, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 160}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 352}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 608}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 1120}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 96}, +{28, 44, 60, 76, 80, 96, 128, 16, 20, 64, 8224, 8240, 8256, 8260, 8288, 532576, 1056, 1072, 1088, 1092, 1120, 66656, 80, 96, 112, 116, 128, 8320}, +{32, 64, 96, 128, 136, 160, 256, 32, 24, 64, 8224, 8256, 8288, 8296, 8320, 1056896, 1056, 1088, 1120, 1128, 1152, 132224, 80, 112, 144, 152, 160, 16544}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 96}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 96}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 160}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 160}, +{32, 40, 48, 56, 64, 96, 16, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 96, 96, 1120}, +{28, 31, 34, 40, 44, 64, 24, 8, 20, 32, 8224, 8227, 8232, 8236, 8256, 270400, 1056, 1059, 1064, 1068, 1088, 33856, 80, 83, 88, 92, 96, 1632}, +{28, 34, 40, 48, 52, 64, 48, 8, 20, 32, 8224, 8230, 8236, 8240, 8256, 270400, 1056, 1062, 1068, 1072, 1088, 33856, 80, 86, 92, 96, 128, 3200}, }; // ------------------------------------------------ Basic Lookup ----------------------------------------------------------- @@ -1192,12 +1669,11 @@ PNANOVDB_FORCE_INLINE pnanovdb_gridblindmetadata_handle_t pnanovdb_grid_get_grid return meta; } -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanodvb_grid_get_gridblindmetadata_value_address(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, pnanovdb_uint32_t index) +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_grid_get_gridblindmetadata_value_address(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, pnanovdb_uint32_t index) { pnanovdb_gridblindmetadata_handle_t meta = pnanovdb_grid_get_gridblindmetadata(buf, grid, index); pnanovdb_int64_t byte_offset = pnanovdb_gridblindmetadata_get_byte_offset(buf, meta); - pnanovdb_address_t address = grid.address; - address = pnanovdb_address_offset64(address, pnanovdb_int64_as_uint64(byte_offset)); + pnanovdb_address_t address = pnanovdb_address_offset64(meta.address, pnanovdb_int64_as_uint64(byte_offset)); return address; } @@ -1272,6 +1748,8 @@ PNANOVDB_FORCE_INLINE pnanovdb_root_tile_handle_t pnanovdb_root_find_tile(pnanov return null_handle; } +// ----------------------------- Leaf Node --------------------------------------- + PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) { return (((PNANOVDB_DEREF(ijk).x & 7) >> 0) << (2 * 3)) + @@ -1315,6 +1793,8 @@ PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_value_address(pnanovd return pnanovdb_leaf_get_table_address(grid_type, buf, leaf, n); } +// ----------------------------- Leaf FP Types Specialization --------------------------------------- + PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t value_log_bits) { // value_log_bits // 2 3 4 @@ -1354,99 +1834,296 @@ PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fpn_read_float(pnanovdb_buf_t buf, pna return pnanovdb_leaf_fp_read_float(buf, address, ijk, value_log_bits); } -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint32_countbits(pnanovdb_uint32_t value) +// ----------------------------- Leaf Index Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_index_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) { -#if defined(PNANOVDB_C) -#if defined(_MSC_VER) && (_MSC_VER >= 1928) && defined(PNANOVDB_USE_INTRINSICS) - return __popcnt(value); -#elif (defined(__GNUC__) || defined(__clang__)) && defined(PNANOVDB_USE_INTRINSICS) - return __builtin_popcount(value); -#else - value = value - ((value >> 1) & 0x55555555); - value = (value & 0x33333333) + ((value >> 2) & 0x33333333); - value = (value + (value >> 4)) & 0x0F0F0F0F; - return (value * 0x01010101) >> 24; -#endif -#elif defined(PNANOVDB_HLSL) - return countbits(value); -#elif defined(PNANOVDB_GLSL) - return bitCount(value); -#endif + return (pnanovdb_leaf_get_bbox_dif_and_flags(buf, leaf) & (1u << 28u)) != 0u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, min_address), 512u); } -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_count_on_range(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, pnanovdb_uint32_t max_index) +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) { - pnanovdb_uint32_t mask_idx_max = max_index >> 5u; - pnanovdb_uint32_t sum = 0u; - pnanovdb_uint32_t mask_val = 0u; - for (pnanovdb_uint32_t mask_idx = 0u; mask_idx < mask_idx_max; mask_idx++) - { - mask_val = pnanovdb_read_uint32( - buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 4u * mask_idx)); - sum += pnanovdb_uint32_countbits(mask_val); - } - mask_val = pnanovdb_read_uint32( - buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 4u * mask_idx_max)); - sum += pnanovdb_uint32_countbits(mask_val & ((1u << (max_index & 31u)) - 1u)); - return sum; + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, max_address), 513u); } -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_offset(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) { -#if defined(PNANOVDB_ADDRESS_32) - pnanovdb_uint32_t low = pnanovdb_uint64_low(a); - pnanovdb_uint32_t high = pnanovdb_uint64_high(a); - low += b; - if (low < b) + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, ave_address), 514u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, dev_address), 515u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + pnanovdb_uint64_t offset = pnanovdb_read_uint64(buf, value_address); + return pnanovdb_uint64_offset(offset, n); +} + +// ----------------------------- Leaf IndexMask Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_indexmask_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_index_has_stats(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + return pnanovdb_leaf_index_get_min_index(buf, min_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + return pnanovdb_leaf_index_get_max_index(buf, max_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + return pnanovdb_leaf_index_get_ave_index(buf, ave_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + return pnanovdb_leaf_index_get_dev_index(buf, dev_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_index_get_value_index(buf, value_address, ijk); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_indexmask_get_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + return (val_mask & (1u << bit_idx)) != 0u; +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_indexmask_set_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n, pnanovdb_bool_t v) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + if (v) { - high += 1u; + val_mask = val_mask | (1u << bit_idx); } - return pnanovdb_uint32_as_uint64(low, high); -#else - return a + b; -#endif + else + { + val_mask = val_mask & ~(1u << bit_idx); + } + pnanovdb_write_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx), val_mask); } -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +// ----------------------------- Leaf OnIndex Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_onindex_get_value_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) { - return pnanovdb_read_uint64(buf, min_address); + pnanovdb_uint64_t val_mask = pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 8u * 7u)); + pnanovdb_uint64_t prefix_sum = pnanovdb_read_uint64( + buf, pnanovdb_address_offset(leaf.address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table) + 8u)); + return pnanovdb_uint64_countbits(val_mask) + (pnanovdb_uint64_to_uint32_lsr(prefix_sum, 54u) & 511u); } -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_last_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) { - return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, max_address), 1u); + return pnanovdb_uint64_offset( + pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table))), + pnanovdb_leaf_onindex_get_value_count(buf, leaf) - 1u); } -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindex_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) { - return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, ave_address), 2u); + return (pnanovdb_leaf_get_bbox_dif_and_flags(buf, leaf) & (1u << 28u)) != 0u; } -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(min_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 1u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) { - return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, dev_address), 3u); + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(max_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 2u); + } + return idx; } -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_value_index(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk) +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) { - pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); - pnanovdb_uint32_t bbox_dif_and_flags = pnanovdb_leaf_get_bbox_dif_and_flags(buf, leaf); - pnanovdb_address_t value_address = pnanovdb_leaf_get_table_address(grid_type, buf, leaf, 0u); - if ((bbox_dif_and_flags & 0x10000000) != 0u) + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(ave_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) { - if (pnanovdb_leaf_get_value_mask(buf, leaf, n)) - { - n = pnanovdb_leaf_count_on_range(buf, leaf, n); - } - else + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 3u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(dev_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 4u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(value_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + + pnanovdb_uint32_t word_idx = n >> 6u; + pnanovdb_uint32_t bit_idx = n & 63u; + pnanovdb_uint64_t val_mask = pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 8u * word_idx)); + pnanovdb_uint64_t mask = pnanovdb_uint64_bit_mask(bit_idx); + pnanovdb_uint64_t value_index = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_uint64_any_bit(pnanovdb_uint64_and(val_mask, mask))) + { + pnanovdb_uint32_t sum = 0u; + sum += pnanovdb_uint64_countbits(pnanovdb_uint64_and(val_mask, pnanovdb_uint64_dec(mask))); + if (word_idx > 0u) { - value_address = pnanovdb_address_null(); - n = 0; + pnanovdb_uint64_t prefix_sum = pnanovdb_read_uint64(buf, pnanovdb_address_offset(value_address, 8u)); + sum += pnanovdb_uint64_to_uint32_lsr(prefix_sum, 9u * (word_idx - 1u)) & 511u; } + pnanovdb_uint64_t offset = pnanovdb_read_uint64(buf, value_address); + value_index = pnanovdb_uint64_offset(offset, sum); } - return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, value_address), n); + return value_index; } +// ----------------------------- Leaf OnIndexMask Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_onindexmask_get_value_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_onindex_get_value_count(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_last_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_onindex_get_last_offset(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindexmask_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_onindex_has_stats(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + return pnanovdb_leaf_onindex_get_min_index(buf, min_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + return pnanovdb_leaf_onindex_get_max_index(buf, max_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + return pnanovdb_leaf_onindex_get_ave_index(buf, ave_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + return pnanovdb_leaf_onindex_get_dev_index(buf, dev_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_onindex_get_value_index(buf, value_address, ijk); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindexmask_get_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + return (val_mask & (1u << bit_idx)) != 0u; +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_onindexmask_set_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n, pnanovdb_bool_t v) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + if (v) + { + val_mask = val_mask | (1u << bit_idx); + } + else + { + val_mask = val_mask & ~(1u << bit_idx); + } + pnanovdb_write_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx), val_mask); +} + +// ----------------------------- Leaf PointIndex Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_read_uint64(buf, pnanovdb_leaf_get_min_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_point_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_read_uint64(buf, pnanovdb_leaf_get_max_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_first(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), + (i == 0u ? 0u : pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i - 1u)))); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_last(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), + pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i))); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_value(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + return pnanovdb_uint32_as_uint64_low(pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i))); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_value_only(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i, pnanovdb_uint32_t value) +{ + pnanovdb_address_t addr = pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i); + pnanovdb_uint32_t raw32 = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(addr, 3u)); + if ((i & 1) == 0u) + { + raw32 = (raw32 & 0xFFFF0000) | (value & 0x0000FFFF); + } + else + { + raw32 = (raw32 & 0x0000FFFF) | (value << 16u); + } + pnanovdb_write_uint32(buf, addr, raw32); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_on(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + pnanovdb_uint32_t word_idx = i >> 5; + pnanovdb_uint32_t bit_idx = i & 31; + pnanovdb_address_t addr = pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 4u * word_idx); + pnanovdb_uint32_t val_mask = pnanovdb_read_uint32(buf, addr); + val_mask = val_mask | (1u << bit_idx); + pnanovdb_write_uint32(buf, addr, val_mask); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_value(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i, pnanovdb_uint32_t value) +{ + pnanovdb_leaf_pointindex_set_on(buf, leaf, i); + pnanovdb_leaf_pointindex_set_value_only(buf, leaf, i, value); +} + +// ------------------------------------------------ Lower Node ----------------------------------------------------------- + PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_lower_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) { return (((PNANOVDB_DEREF(ijk).x & 127) >> 3) << (2 * 4)) + @@ -1521,6 +2198,8 @@ PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address(pnanov return pnanovdb_lower_get_value_address_and_level(grid_type, buf, lower, ijk, PNANOVDB_REF(level)); } +// ------------------------------------------------ Upper Node ----------------------------------------------------------- + PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_upper_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) { return (((PNANOVDB_DEREF(ijk).x & 4095) >> 7) << (2 * 5)) + @@ -1594,6 +2273,14 @@ PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address(pnanov return pnanovdb_upper_get_value_address_and_level(grid_type, buf, upper, ijk, PNANOVDB_REF(level)); } +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node, pnanovdb_uint32_t n, pnanovdb_int64_t child) +{ + pnanovdb_address_t bufAddress = pnanovdb_upper_get_table_address(grid_type, buf, node, n); + pnanovdb_write_int64(buf, bufAddress, child); +} + +// ------------------------------------------------ Root ----------------------------------------------------------- + PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) { pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_min); @@ -1716,6 +2403,93 @@ PNANOVDB_FORCE_INLINE float pnanovdb_root_fpn_read_float(pnanovdb_buf_t buf, pna return ret; } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_index_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + pnanovdb_uint64_t ret; + if (level == 0) + { + ret = pnanovdb_leaf_index_get_value_index(buf, address, ijk); + } + else + { + ret = pnanovdb_read_uint64(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_onindex_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + pnanovdb_uint64_t ret; + if (level == 0) + { + ret = pnanovdb_leaf_onindex_get_value_index(buf, address, ijk); + } + else + { + ret = pnanovdb_read_uint64(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_pointindex_get_point_range( + pnanovdb_buf_t buf, + pnanovdb_address_t value_address, + PNANOVDB_IN(pnanovdb_coord_t) ijk, + pnanovdb_uint32_t level, + PNANOVDB_INOUT(pnanovdb_uint64_t)range_begin, + PNANOVDB_INOUT(pnanovdb_uint64_t)range_end +) +{ + pnanovdb_uint32_t local_range_begin = 0u; + pnanovdb_uint32_t local_range_end = 0u; + pnanovdb_uint64_t offset = pnanovdb_uint32_as_uint64_low(0u); + if (level == 0) + { + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + // recover leaf address + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(value_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_POINTINDEX, leaf_off_table) + 2u * n) }; + if (n > 0u) + { + local_range_begin = pnanovdb_read_uint16(buf, pnanovdb_address_offset_neg(value_address, 2u)); + } + local_range_end = pnanovdb_read_uint16(buf, value_address); + offset = pnanovdb_leaf_pointindex_get_offset(buf, leaf); + } + PNANOVDB_DEREF(range_begin) = pnanovdb_uint64_offset(offset, local_range_begin); + PNANOVDB_DEREF(range_end) = pnanovdb_uint64_offset(offset, local_range_end); + return pnanovdb_uint32_as_uint64_low(local_range_end - local_range_begin); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_pointindex_get_point_address_range( + pnanovdb_buf_t buf, + pnanovdb_grid_type_t value_type, + pnanovdb_address_t value_address, + pnanovdb_address_t blindmetadata_value_address, + PNANOVDB_IN(pnanovdb_coord_t) ijk, + pnanovdb_uint32_t level, + PNANOVDB_INOUT(pnanovdb_address_t)address_begin, + PNANOVDB_INOUT(pnanovdb_address_t)address_end +) +{ + pnanovdb_uint64_t range_begin; + pnanovdb_uint64_t range_end; + pnanovdb_uint64_t range_size = pnanovdb_root_pointindex_get_point_range(buf, value_address, ijk, level, PNANOVDB_REF(range_begin), PNANOVDB_REF(range_end)); + + pnanovdb_address_t base_address = blindmetadata_value_address; + pnanovdb_uint32_t stride = 12u; // vec3f + if (value_type == PNANOVDB_GRID_TYPE_VEC3U8) + { + stride = 3u; + } + else if (value_type == PNANOVDB_GRID_TYPE_VEC3U16) + { + stride = 6u; + } + PNANOVDB_DEREF(address_begin) = pnanovdb_address_offset64_product(blindmetadata_value_address, range_begin, stride); + PNANOVDB_DEREF(address_end) = pnanovdb_address_offset64_product(blindmetadata_value_address, range_end, stride); + return range_size; +} + // ------------------------------------------------ ReadAccessor ----------------------------------------------------------- struct pnanovdb_readaccessor_t @@ -1806,6 +2580,12 @@ PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address_and_ca return pnanovdb_lower_get_value_address_and_level_and_cache(grid_type, buf, lower, ijk, acc, PNANOVDB_REF(level)); } +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node, pnanovdb_uint32_t n, pnanovdb_int64_t child) +{ + pnanovdb_address_t table_address = pnanovdb_lower_get_table_address(grid_type, buf, node, n); + pnanovdb_write_int64(buf, table_address, child); +} + PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address_and_level_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_INOUT(pnanovdb_uint32_t) level) { pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); diff --git a/nanovdb/nanovdb/Readme.md b/nanovdb/nanovdb/Readme.md index 15fda0a2eb..f5d0fb5201 100644 --- a/nanovdb/nanovdb/Readme.md +++ b/nanovdb/nanovdb/Readme.md @@ -3,10 +3,11 @@ # NanoVDB: A lightweight GPU friendly version of VDB initially targeting rendering applications. -* [Build instructions for make and cmake](../../doc/nanovdb/HowToBuild.md) -* [Frequently asked questions](../../doc/nanovdb/FAQ.md) -* [Source tree](../../doc/nanovdb/SourceTree.md) -* [Examples](../../doc/nanovdb/HelloWorld.md) +* [Build instructions for make and cmake](docs/HowToBuild.md) +* [Frequently asked questions](docs/FAQ.md) +* [Grid cells vs grid nodes](docs/GridCells_vs_GridNodes/Main.pdf) +* [Source tree](docs/SourceTree.md) +* [Examples](docs/HelloWorld.md) ### Copyright Contributors to the OpenVDB Project ### SPDX-License-Identifier: MPL-2.0 diff --git a/nanovdb/nanovdb/cmd/convert/nanovdb_convert.cc b/nanovdb/nanovdb/cmd/convert/nanovdb_convert.cc index f3aa7ce8ea..7a3a5b5170 100644 --- a/nanovdb/nanovdb/cmd/convert/nanovdb_convert.cc +++ b/nanovdb/nanovdb/cmd/convert/nanovdb_convert.cc @@ -16,7 +16,7 @@ #include #include // this is required to read (and write) NanoVDB files on the host -#include +#include #include void usage [[noreturn]] (const std::string& progName, int exitStatus = EXIT_FAILURE) @@ -201,36 +201,26 @@ int main(int argc, char* argv[]) auto openToNano = [&](const openvdb::GridBase::Ptr& base) { - if (auto floatGrid = openvdb::GridBase::grid(base)) { + using SrcGridT = openvdb::FloatGrid; + if (auto floatGrid = openvdb::GridBase::grid(base)) { + nanovdb::CreateNanoGrid s(*floatGrid); + s.setStats(sMode); + s.setChecksum(cMode); + s.enableDithering(dither); + s.setVerbose(verbose ? 1 : 0); switch (qMode) { - case nanovdb::GridType::Fp4: { - nanovdb::OpenToNanoVDB s; - s.enableDithering(dither); - return s(*floatGrid, sMode, cMode, verbose ? 1 : 0); - } - case nanovdb::GridType::Fp8: { - nanovdb::OpenToNanoVDB s; - s.enableDithering(dither); - return s(*floatGrid, sMode, cMode, verbose ? 1 : 0); - } - case nanovdb::GridType::Fp16: { - nanovdb::OpenToNanoVDB s; - s.enableDithering(dither); - return s(*floatGrid, sMode, cMode, verbose ? 1 : 0); - } - case nanovdb::GridType::FpN: { + case nanovdb::GridType::Fp4: + return s.getHandle(); + case nanovdb::GridType::Fp8: + return s.getHandle(); + case nanovdb::GridType::Fp16: + return s.getHandle(); + case nanovdb::GridType::FpN: if (absolute) { - nanovdb::OpenToNanoVDB s; - s.enableDithering(dither); - s.oracle() = nanovdb::AbsDiff(tolerance); - return s(*floatGrid, sMode, cMode, verbose ? 1 : 0); + return s.getHandle(nanovdb::AbsDiff(tolerance)); } else { - nanovdb::OpenToNanoVDB s; - s.enableDithering(dither); - s.oracle() = nanovdb::RelDiff(tolerance); - return s(*floatGrid, sMode, cMode, verbose ? 1 : 0); + return s.getHandle(nanovdb::RelDiff(tolerance)); } - } default: break; }// end of switch @@ -251,13 +241,15 @@ int main(int argc, char* argv[]) file.open(false); //disable delayed loading if (gridName.empty()) {// convert all grid in the file auto grids = file.getGrids(); + std::vector > handles; for (auto& grid : *grids) { if (verbose) { std::cout << "Converting OpenVDB grid named \"" << grid->getName() << "\" to NanoVDB" << std::endl; } - auto handle = openToNano(grid); - nanovdb::io::writeGrid(os, handle, codec); + handles.push_back(openToNano(grid)); } // loop over OpenVDB grids in file + auto handle = nanovdb::mergeGrids(handles); + nanovdb::io::writeGrid(os, handle, codec); } else {// convert only grid with matching name auto grid = file.readGrid(gridName); if (verbose) { @@ -280,9 +272,11 @@ int main(int argc, char* argv[]) if (gridName.empty()) { auto handles = nanovdb::io::readGrids(inputFile, verbose); for (auto &h : handles) { - if (verbose) - std::cout << "Converting NanoVDB grid named \"" << h.gridMetaData()->shortGridName() << "\" to OpenVDB" << std::endl; - grids->push_back(nanoToOpenVDB(h)); + for (uint32_t i = 0; i < h.gridCount(); ++i) { + if (verbose) + std::cout << "Converting NanoVDB grid named \"" << h.gridMetaData(i)->shortGridName() << "\" to OpenVDB" << std::endl; + grids->push_back(nanoToOpenVDB(h, 0, i)); + } } } else { auto handle = nanovdb::io::readGrid(inputFile, gridName); diff --git a/nanovdb/nanovdb/cmd/print/nanovdb_print.cc b/nanovdb/nanovdb/cmd/print/nanovdb_print.cc index 5f24f52c4e..ff16ada7e7 100644 --- a/nanovdb/nanovdb/cmd/print/nanovdb_print.cc +++ b/nanovdb/nanovdb/cmd/print/nanovdb_print.cc @@ -103,7 +103,7 @@ int main(int argc, char* argv[]) if (size > n) n = size; }; - auto vec3RToStr = [](const nanovdb::Vec3R& v) { + auto Vec3dToStr = [](const nanovdb::Vec3d& v) { std::stringstream ss; ss << std::setprecision(3); ss << "(" << v[0] << "," << v[1] << "," << v[2] << ")"; @@ -186,7 +186,7 @@ int main(int argc, char* argv[]) width(configWidth, nodesToStr(m.nodeCount)); width(tileWidth, nodesToStr(m.tileCount)); width(voxelsWidth, std::to_string(m.voxelCount)); - width(voxelSizeWidth, vec3RToStr(m.voxelSize)); + width(voxelSizeWidth, Vec3dToStr(m.voxelSize)); } std::cout << "\nThe file \"" << file << "\" contains the following "; if (list.size()>1) { @@ -227,7 +227,7 @@ int main(int argc, char* argv[]) << std::left << std::setw(codecWidth) << nanovdb::io::toStr(m.codec) << std::left << std::setw(sizeWidth) << format(m.gridSize) << std::left << std::setw(fileWidth) << format(m.fileSize) - << std::left << std::setw(voxelSizeWidth) << vec3RToStr(m.voxelSize); + << std::left << std::setw(voxelSizeWidth) << Vec3dToStr(m.voxelSize); } std::cout << std::left << std::setw(voxelsWidth) << m.voxelCount << std::left << std::setw(resWidth) << resToStr(m.indexBBox); diff --git a/nanovdb/nanovdb/examples/CMakeLists.txt b/nanovdb/nanovdb/examples/CMakeLists.txt index dc0d243495..df4fabb059 100644 --- a/nanovdb/nanovdb/examples/CMakeLists.txt +++ b/nanovdb/nanovdb/examples/CMakeLists.txt @@ -88,6 +88,7 @@ if(NANOVDB_BUILD_BENCHMARK) endif() nanovdb_example(NAME "ex_make_custom_nanovdb") +nanovdb_example(NAME "ex_make_custom_nanovdb_cuda") nanovdb_example(NAME "ex_make_funny_nanovdb") nanovdb_example(NAME "ex_make_typed_grids") nanovdb_example(NAME "ex_make_nanovdb_sphere") @@ -100,7 +101,8 @@ nanovdb_example(NAME "ex_read_nanovdb_sphere_accessor") nanovdb_example(NAME "ex_read_nanovdb_sphere_accessor_cuda") nanovdb_example(NAME "ex_index_grid_cuda") nanovdb_example(NAME "ex_nodemanager_cuda") -nanovdb_example(NAME "ex_modify_nanovdb_thrust") +nanovdb_example(NAME "ex_voxels_to_grid_cuda") +#nanovdb_example(NAME "ex_modify_nanovdb_thrust") nanovdb_example(NAME "ex_map_pool_buffer") nanovdb_example(NAME "ex_bump_pool_buffer") nanovdb_example(NAME "ex_collide_level_set") diff --git a/nanovdb/nanovdb/examples/benchmark/BenchKernels_dense.cu b/nanovdb/nanovdb/examples/benchmark/BenchKernels_dense.cu index 0c8175dfb3..9e7f0892d3 100644 --- a/nanovdb/nanovdb/examples/benchmark/BenchKernels_dense.cu +++ b/nanovdb/nanovdb/examples/benchmark/BenchKernels_dense.cu @@ -9,7 +9,8 @@ /// @brief CUDA kernel for a simple ray-tracing benchmark test. #include "DenseGrid.h" -#include // for CUDA memory management +#include // for CUDA memory management +#include #include // for nanovdb::Ray #include // for nanovdb::DDA @@ -78,31 +79,16 @@ extern "C" float launch_kernels(const nanovdb::DenseGridHandle(); // note this cannot be de-referenced since it points to a memory address on the GPU! auto* deviceImage = imgHandle.deviceImage(); // note this cannot be de-referenced since it points to a memory address on the GPU! assert(deviceGrid && deviceImage); - + float elapsedTime = 0.0f; #ifdef CUDA_TIMING - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, stream); + nanovdb::GpuTimer timer; + timer.start(); #endif - // kernal syntax: <<>> render_kernel<<>>(*deviceGrid, *camera, *deviceImage); - - float elapsedTime = 0.0f; -#ifdef CUDA_TIMING - cudaEventRecord(stop, stream); - cudaEventSynchronize(stop); - - cudaEventElapsedTime(&elapsedTime, start, stop); - //printf("DenseGrid: GPU kernel with %i rays ... completed in %5.3f milliseconds\n", imgHandle.image()->size(), elapsedTime); - cudaError_t errCode = cudaGetLastError(); - if (errCode != cudaSuccess) { - fprintf(stderr, "CUDA Runtime Error: %s %s %d\n", cudaGetErrorString(errCode), __FILE__, __LINE__); - exit(errCode); - } - cudaEventDestroy(start); - cudaEventDestroy(stop); + #ifdef CUDA_TIMING + elapsedTime = timer.elapsed(); #endif + cudaCheckError(); return elapsedTime; -} \ No newline at end of file +} diff --git a/nanovdb/nanovdb/examples/benchmark/BenchKernels_nano.cu b/nanovdb/nanovdb/examples/benchmark/BenchKernels_nano.cu index 13d2f0088f..61675707fc 100644 --- a/nanovdb/nanovdb/examples/benchmark/BenchKernels_nano.cu +++ b/nanovdb/nanovdb/examples/benchmark/BenchKernels_nano.cu @@ -9,9 +9,10 @@ /// @brief CUDA kernel for a simple ray-tracing benchmark test. #include // for nanovdb::GridHandle -#include // for CUDA memory management +#include // for CUDA memory management #include // for nanovdb::Ray #include // for nanovdb::ZeroCrossing +#include #include "Image.h" #include "Camera.h" @@ -33,8 +34,7 @@ __global__ void render_kernel(const nanovdb::NanoGrid& grid, const int w = blockIdx.x * blockDim.x + threadIdx.x; const int h = blockIdx.y * blockDim.y + threadIdx.y; - if (w >= img.width() || h >= img.height()) - return; + if (w >= img.width() || h >= img.height()) return; const auto& tree = grid.tree(); const auto& bbox = tree.bbox(); @@ -45,6 +45,7 @@ __global__ void render_kernel(const nanovdb::NanoGrid& grid, CoordT ijk; float t; float v0; + if (nanovdb::ZeroCrossing(ray, acc, ijk, v0, t)) { #if 1// second-order central difference Vec3T grad(acc.getValue(ijk.offsetBy(1,0,0)) - acc.getValue(ijk.offsetBy(-1,0,0)), @@ -75,38 +76,24 @@ extern "C" float launch_kernels(const nanovdb::GridHandle* camera, cudaStream_t stream) { - using BuildT = nanovdb::FpN; + using BuildT = float;// nanovdb::FpN; const auto* img = imgHandle.image(); // host image! auto round = [](int a, int b) { return (a + b - 1) / b; }; const dim3 threadsPerBlock(8, 8), numBlocks(round(img->width(), threadsPerBlock.x), round(img->height(), threadsPerBlock.y)); auto* deviceGrid = gridHandle.deviceGrid(); // note this cannot be de-referenced since it points to a memory address on the GPU! auto* deviceImage = imgHandle.deviceImage(); // note this cannot be de-referenced since it points to a memory address on the GPU! - assert(deviceGrid && deviceImage); - + if (!deviceGrid) throw std::runtime_error(std::string("\nError in launch_kernels: No device grid of type: ") + nanovdb::toStr(nanovdb::mapToGridType())); + if (!deviceImage) throw std::runtime_error("\nError in launch_kernels: No device image!"); + float elapsedTime = 0.0f; #ifdef CUDA_TIMING - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, stream); + nanovdb::GpuTimer timer; + timer.start(); #endif - // kernal syntax: <<>> render_kernel<<>>(*deviceGrid, *camera, *deviceImage); - - float elapsedTime = 0.0f; #ifdef CUDA_TIMING - cudaEventRecord(stop, stream); - cudaEventSynchronize(stop); - - cudaEventElapsedTime(&elapsedTime, start, stop); - //printf("NanoVDB: GPU kernel with %i rays ... completed in %5.3f milliseconds\n", imgHandle.image()->size(), elapsedTime); - cudaError_t errCode = cudaGetLastError(); - if (errCode != cudaSuccess) { - fprintf(stderr, "CUDA Runtime Error: %s %s %d\n", cudaGetErrorString(errCode), __FILE__, __LINE__); - exit(errCode); - } - cudaEventDestroy(start); - cudaEventDestroy(stop); + elapsedTime = timer.elapsed(); #endif + cudaCheckError(); return elapsedTime; -} \ No newline at end of file +} diff --git a/nanovdb/nanovdb/examples/benchmark/Benchmark.cc b/nanovdb/nanovdb/examples/benchmark/Benchmark.cc index ba3b46dcdb..65436ee886 100644 --- a/nanovdb/nanovdb/examples/benchmark/Benchmark.cc +++ b/nanovdb/nanovdb/examples/benchmark/Benchmark.cc @@ -14,17 +14,15 @@ #include #include "Image.h" #include "Camera.h" -#include "../ex_util/CpuTimer.h" +#include #include "DenseGrid.h" #if defined(NANOVDB_USE_CUDA) -#include +#include #endif #if defined(NANOVDB_USE_OPENVDB) -#include - #include #include #include @@ -41,33 +39,6 @@ #include -namespace nanovdb { - -inline std::ostream& -operator<<(std::ostream& os, const CoordBBox& b) -{ - os << "(" << b[0][0] << "," << b[0][1] << "," << b[0][2] << ") ->" - << "(" << b[1][0] << "," << b[1][1] << "," << b[1][2] << ")"; - return os; -} - -inline std::ostream& -operator<<(std::ostream& os, const Coord& ijk) -{ - os << "(" << ijk[0] << "," << ijk[1] << "," << ijk[2] << ")"; - return os; -} - -template -inline std::ostream& -operator<<(std::ostream& os, const Vec3& v) -{ - os << "(" << v[0] << "," << v[1] << "," << v[2] << ")"; - return os; -} - -} - // define the environment variable VDB_DATA_PATH to use models from the web // e.g. setenv VDB_DATA_PATH /home/kmu/dev/data/vdb // or export VDB_DATA_PATH=/Users/ken/dev/data/vdb @@ -144,7 +115,7 @@ class Benchmark : public ::testing::Test return grid; } #endif - nanovdb::CpuTimer<> mTimer; + nanovdb::CpuTimer mTimer; }; // Benchmark TEST_F(Benchmark, Ray) @@ -358,7 +329,7 @@ TEST_F(Benchmark, DenseGrid) grid->setValue(pos, 1.0f); EXPECT_EQ( 0.0f, grid->getValue(min) ); EXPECT_EQ( 1.0f, grid->getValue(pos) ); - EXPECT_EQ( 0.0f, grid->getValue(max) ); + EXPECT_EQ( 0.0f, grid->getValue(max)); for (auto it = bbox.begin(); it; ++it) { auto &ijk = *it; EXPECT_TRUE(grid->test(ijk)); @@ -426,7 +397,7 @@ TEST_F(Benchmark, OpenVDB_CPU) auto srcGrid = this->getSrcGrid(); mTimer.start("Generating NanoVDB grid"); - auto handle = nanovdb::openToNanoVDB(*srcGrid, nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Disable, /*verbose=*/0); + auto handle = nanovdb::createNanoGrid(*srcGrid, nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Disable); mTimer.restart("Writing NanoVDB grid"); #if defined(NANOVDB_USE_BLOSC) nanovdb::io::writeGrid("data/test.nvdb", handle, nanovdb::io::Codec::BLOSC); @@ -537,7 +508,7 @@ TEST_F(Benchmark, DenseGrid_CPU) const auto bbox = grid->worldBBox(); const Vec3T lookat(0.5 * (bbox.min() + bbox.max())), up(0, -1, 0); auto eye = [&lookat, &radius](int angle) { - const RealT theta = angle * openvdb::math::pi() / 180.0f; + const RealT theta = angle * RealT(3.14159265358979323846) / 180.0f; return lookat + radius * Vec3T(sin(theta), 0, cos(theta)); }; @@ -642,7 +613,7 @@ TEST_F(Benchmark, NanoVDB_GPU) #if defined(NANOVDB_USE_OPENVDB) auto handle = nanovdb::io::readGrid("data/test.nvdb"); #else - auto handle = nanovdb::createLevelSetTorus(100.0f, 50.0f); + auto handle = nanovdb::createLevelSetTorus(100.0f, 50.0f); #endif //auto handle = nanovdb::io::readGrid("data/test.nvdb"); const auto* grid = handle.grid(); @@ -650,6 +621,7 @@ TEST_F(Benchmark, NanoVDB_GPU) EXPECT_TRUE(grid->isLevelSet()); EXPECT_FALSE(grid->isFogVolume()); handle.deviceUpload(stream, false); + EXPECT_TRUE(handle.deviceGrid()); std::cout << "\nRay-tracing NanoVDB grid named \"" << grid->gridName() << "\"" << std::endl; @@ -658,7 +630,7 @@ TEST_F(Benchmark, NanoVDB_GPU) const auto bbox = grid->worldBBox(); const Vec3T lookat(0.5 * (bbox.min() + bbox.max())), up(0, -1, 0); auto eye = [&lookat, &radius](int angle) { - const RealT theta = angle * openvdb::math::pi() / 180.0f; + const RealT theta = angle * nanovdb::pi() / 180.0f; return lookat + radius * Vec3T(sin(theta), 0, cos(theta)); }; CameraT *host_camera, *dev_camera; @@ -675,7 +647,7 @@ TEST_F(Benchmark, NanoVDB_GPU) host_camera->update(eye(angle), lookat, up, vfov, aspect); cudaCheck(cudaMemcpyAsync(dev_camera, host_camera, sizeof(CameraT), cudaMemcpyHostToDevice, stream)); mTimer.start(ss.str()); - launch_kernels(handle, imgHandle, dev_camera, stream); + launch_kernels(handle, imgHandle, dev_camera, stream);// defined in BenchKernels_nano.cu mTimer.stop(); //mTimer.start("Write image to file"); @@ -689,7 +661,7 @@ TEST_F(Benchmark, NanoVDB_GPU) } //frame number angle cudaCheck(cudaStreamDestroy(stream)); - cudaCheck(cudaFree(host_camera)); + cudaCheck(cudaFreeHost(host_camera)); cudaCheck(cudaFree(dev_camera)); } // NanoVDB_GPU #endif diff --git a/nanovdb/nanovdb/examples/benchmark/Benchmark_dense.cc b/nanovdb/nanovdb/examples/benchmark/Benchmark_dense.cc index 041ca8bada..d7c0d44e2b 100644 --- a/nanovdb/nanovdb/examples/benchmark/Benchmark_dense.cc +++ b/nanovdb/nanovdb/examples/benchmark/Benchmark_dense.cc @@ -1,19 +1,23 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/// @file Benchmark_nano.cc +/// @file Benchmark_nano.cpp /// /// @author Ken Museth /// /// @brief A super lightweight and portable ray-tracing benchmark /// that only depends on NanoVDB (not OpenVDB) and CUDA. +#ifdef _WIN32 +#define _USE_MATH_DEFINES +#endif + #include -#include +#include #include "Image.h" #include "Camera.h" #include "DenseGrid.h" -#include "../ex_util/CpuTimer.h" +#include #include // for std::setfill and std::setw @@ -28,7 +32,7 @@ int main(int argc, char** argv) using RealT = float; using Vec3T = nanovdb::Vec3; using CameraT = nanovdb::Camera; - nanovdb::CpuTimer<> timer; + nanovdb::CpuTimer timer; if (argc!=2) { std::cerr << "Usage: " << argv[0] << " path/level_set.vol" << std::endl; @@ -57,7 +61,7 @@ int main(int argc, char** argv) const auto* grid = handle.grid(); if (!grid || !grid->isLevelSet()) { std::cerr << "Error loading NanoVDB level set from file" << std::endl; - return 1; + exit (EXIT_FAILURE); } handle.deviceUpload(stream, false); std::cout << "\nRay-tracing DenseGrid of size " @@ -96,7 +100,7 @@ int main(int argc, char** argv) } //frame number angle cudaCheck(cudaStreamDestroy(stream)); - cudaCheck(cudaFree(host_camera)); + cudaCheck(cudaFreeHost(host_camera)); cudaCheck(cudaFree(dev_camera)); printf("\nRay-traced %i different frames, each with %i rays, in %5.3f ms.\nThis corresponds to an average of %5.3f ms per frame or %5.3f FPS!\n", diff --git a/nanovdb/nanovdb/examples/benchmark/Benchmark_nano.cc b/nanovdb/nanovdb/examples/benchmark/Benchmark_nano.cc index aed38ddd07..3c17538dbf 100644 --- a/nanovdb/nanovdb/examples/benchmark/Benchmark_nano.cc +++ b/nanovdb/nanovdb/examples/benchmark/Benchmark_nano.cc @@ -1,18 +1,22 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/// @file Benchmark_nano.cc +/// @file Benchmark_nano.cpp /// /// @author Ken Museth /// /// @brief A super lightweight and portable ray-tracing benchmark /// that only depends on NanoVDB (not OpenVDB) and CUDA. +#ifdef _WIN32 +#define _USE_MATH_DEFINES +#endif + #include -#include +#include #include "Image.h" #include "Camera.h" -#include "../ex_util/CpuTimer.h" +#include #include // for std::setfill and std::setw @@ -25,10 +29,10 @@ int main(int argc, char** argv) { using BufferT = nanovdb::CudaDeviceBuffer; using ValueT = float; - using BuildT = nanovdb::FpN; + using BuildT = float;//nanovdb::FpN; using Vec3T = nanovdb::Vec3; using CameraT = nanovdb::Camera; - nanovdb::CpuTimer<> timer; + nanovdb::CpuTimer timer; if (argc!=2) { std::cerr << "Usage: " << argv[0] << " path/level_set.nvdb" << std::endl; @@ -57,12 +61,17 @@ int main(int argc, char** argv) cudaStream_t stream; cudaCheck(cudaStreamCreate(&stream)); - auto handle = nanovdb::io::readGrid(argv[1]); - - const auto* grid = handle.grid(); - if (!grid || !grid->isLevelSet()) { - std::cerr << "Error loading NanoVDB level set from file" << std::endl; - return 1; + const int gridID = 0, verbose = 1; + auto handle = nanovdb::io::readGrid(argv[1], gridID, verbose); + + const auto* grid = handle.grid(gridID); + if (!grid) { + std::cerr << "Error loading \"" << nanovdb::toStr(nanovdb::mapToGridType()) << "\" grid from file " << argv[1] << std::endl; + exit (EXIT_FAILURE); + if (!grid->isLevelSet()) { + std::cerr << "Grid is not a level set\n"; + exit (EXIT_FAILURE); + } } handle.deviceUpload(stream, false); std::cout << "\nRay-tracing NanoVDB grid named \"" << grid->gridName() << "\" of size " @@ -103,7 +112,7 @@ int main(int argc, char** argv) } //frame number angle cudaCheck(cudaStreamDestroy(stream)); - cudaCheck(cudaFree(host_camera)); + cudaCheck(cudaFreeHost(host_camera)); cudaCheck(cudaFree(dev_camera)); printf("\nRay-traced %i different frames, each with %i rays, in %5.3f ms.\nThis corresponds to an average of %5.3f ms per frame or %5.3f FPS!\n", diff --git a/nanovdb/nanovdb/examples/benchmark/DenseGrid.h b/nanovdb/nanovdb/examples/benchmark/DenseGrid.h index 1fd00630af..9666f8a260 100644 --- a/nanovdb/nanovdb/examples/benchmark/DenseGrid.h +++ b/nanovdb/nanovdb/examples/benchmark/DenseGrid.h @@ -33,14 +33,15 @@ class DenseGridHandle; struct DenseData { + uint64_t mMagic;// magic number + uint64_t mSize; Map mMap;// defined in NanoVDB.h CoordBBox mIndexBBox;// min/max of bbox - BBox mWorldBBox;// 48B. floating-point AABB of active values in WORLD SPACE (2 x 3 doubles) - Vec3R mVoxelSize; + BBox mWorldBBox;// 48B. floating-point AABB of active values in WORLD SPACE (2 x 3 doubles) + Vec3d mVoxelSize; GridClass mGridClass;// defined in NanoVDB.h GridType mGridType; // defined in NanoVDB.h uint64_t mY, mX;//strides in the y and x direction - uint64_t mSize; __hostdev__ Coord dim() const { return mIndexBBox.dim(); } @@ -106,7 +107,7 @@ class DenseGrid : private DenseData __hostdev__ const Map& map() const { return DenseData::mMap; } // @brief Return a const reference to the size of a voxel in world units - __hostdev__ const Vec3R& voxelSize() const { return DenseData::mVoxelSize; } + __hostdev__ const Vec3d& voxelSize() const { return DenseData::mVoxelSize; } /// @brief world to index space transformation template @@ -155,7 +156,7 @@ class DenseGrid : private DenseData __hostdev__ Vec3T indexToWorldGradF(const Vec3T& grad) const { return DenseData::applyIJTF(grad); } /// @brief Computes a AABB of active values in world space - __hostdev__ const BBox& worldBBox() const { return DenseData::mWorldBBox; } + __hostdev__ const BBox& worldBBox() const { return DenseData::mWorldBBox; } __hostdev__ bool isLevelSet() const { return DenseData::mGridClass == GridClass::LevelSet; } __hostdev__ bool isFogVolume() const { return DenseData::mGridClass == GridClass::FogVolume; } @@ -189,9 +190,7 @@ DenseGrid::create(Coord min, GridClass gridClass, const BufferT& allocator) { - if (dx <= 0) { - throw std::runtime_error("GridBuilder: voxel size is zero or negative"); - } + if (dx <= 0) throw std::runtime_error("GridBuilder: voxel size is zero or negative"); max += Coord(1,1,1);// now max is exclusive #if LOG2_TILE_SIZE > 0 @@ -206,8 +205,10 @@ DenseGrid::create(Coord min, const uint64_t size = sizeof(DenseGrid) + sizeof(ValueT)*dim[0]*dim[1]*dim[2]; #endif - DenseGridHandle handle(allocator.create(size)); - DenseGrid* grid = reinterpret_cast(handle.data()); + auto buffer = allocator.create(size); + DenseGrid* grid = reinterpret_cast(buffer.data()); + std::memset(grid, 0, size);// initiate all dense grid values to zero + grid->mMagic = DENSE_MAGIC_NUMBER; grid->mSize = size; const double Tx = p0[0], Ty = p0[1], Tz = p0[2]; const double mat[4][4] = { @@ -245,7 +246,7 @@ DenseGrid::create(Coord min, grid->mGridType = mapToGridType(); grid->mY = dim[2]; grid->mX = dim[2] * dim[1]; - return handle; + return DenseGridHandle(std::move(buffer)); } template @@ -299,24 +300,16 @@ template void writeDense(const DenseGrid &grid, const char* fileName) { std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); - if (!os.is_open()) { - throw std::runtime_error("Unable to open file for output"); - } - const uint64_t tmp[2] = {DENSE_MAGIC_NUMBER, grid.memUsage()}; - os.write(reinterpret_cast(tmp), 2*sizeof(uint64_t)); - os.write(reinterpret_cast(&grid), tmp[1]); + if (!os.is_open()) throw std::runtime_error("Unable to open file for output"); + os.write(reinterpret_cast(&grid), grid.memUsage()); } template void writeDense(const DenseGridHandle &handle, const char* fileName) { std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); - if (!os.is_open()) { - throw std::runtime_error("Unable to open file for output"); - } - const uint64_t tmp[2] = {DENSE_MAGIC_NUMBER, handle.size()}; - os.write(reinterpret_cast(tmp), 2*sizeof(uint64_t)); - os.write(reinterpret_cast(handle.data()), tmp[1]); + if (!os.is_open()) throw std::runtime_error("Unable to open file for output"); + os.write(reinterpret_cast(handle.data()), handle.size()); } template @@ -329,12 +322,11 @@ readDense(const char* fileName, const BufferT& allocator = BufferT()) } uint64_t tmp[2]; is.read(reinterpret_cast(tmp), 2*sizeof(uint64_t)); - if (tmp[0] != DENSE_MAGIC_NUMBER) { - throw std::runtime_error("This is not a dense NanoVDB file!"); - } - DenseGridHandle handle(allocator.create(tmp[1])); - is.read(reinterpret_cast(handle.data()), tmp[1]); - return handle; + if (tmp[0] != DENSE_MAGIC_NUMBER) throw std::runtime_error("This is not a dense NanoVDB file!"); + auto buffer = allocator.create(tmp[1]); + is.seekg(0);// rewind + is.read(reinterpret_cast(buffer.data()), tmp[1]); + return DenseGridHandle(std::move(buffer)); } }// namespace io ///////////////////////////////////////////// @@ -359,11 +351,14 @@ DenseGridHandle convertToDense(const GridT &grid, const BufferT& alloca const uint64_t size = sizeof(DenseT) + sizeof(ValueT)*dim[0]*dim[1]*dim[2]; #endif - DenseGridHandle handle( allocator.create(size) ); - auto *dense = reinterpret_cast(handle.data()); + auto buffer = allocator.create(size); + auto *dense = reinterpret_cast(buffer.data()); auto *data = dense->data(); + std::memset(data, 0, size);// zero buffer since we're only setting sparse values below // copy DenseData + data->mMagic = DENSE_MAGIC_NUMBER; + data->mSize = size; data->mMap = grid.map(); data->mIndexBBox = grid.indexBBox(); data->mWorldBBox = grid.worldBBox(); @@ -372,7 +367,6 @@ DenseGridHandle convertToDense(const GridT &grid, const BufferT& alloca data->mGridType = grid.gridType(); data->mY = dim[2]; data->mX = dim[2] * dim[1]; - data->mSize = size; // copy values auto kernel = [&](const Range<1,int> &r) { @@ -393,7 +387,7 @@ DenseGridHandle convertToDense(const GridT &grid, const BufferT& alloca kernel(range); #endif - return handle; + return DenseGridHandle( std::move(buffer) ); } ///////////////////////////////////////////// @@ -403,7 +397,12 @@ class DenseGridHandle BufferT mBuffer; public: - DenseGridHandle(BufferT&& resources) { mBuffer = std::move(resources); } + DenseGridHandle(BufferT&& resources) { + if (*reinterpret_cast(resources.data()) != DENSE_MAGIC_NUMBER) { + throw std::runtime_error("DenseGridHandle was constructed with an invalid buffer"); + } + mBuffer = std::move(resources); + } DenseGridHandle() = default; /// @brief Disallow copy-construction diff --git a/nanovdb/nanovdb/examples/ex_bump_pool_buffer/bump_pool_buffer.cc b/nanovdb/nanovdb/examples/ex_bump_pool_buffer/bump_pool_buffer.cc index 33f37281fe..12edb019d5 100644 --- a/nanovdb/nanovdb/examples/ex_bump_pool_buffer/bump_pool_buffer.cc +++ b/nanovdb/nanovdb/examples/ex_bump_pool_buffer/bump_pool_buffer.cc @@ -110,8 +110,8 @@ int main() std::vector> gridHdls; // create two grids... - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0f, nanovdb::Vec3f(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3R(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, -1.0f, false, bufferContext)); - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3R(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, -1.0f, false, bufferContext)); + gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, bufferContext)); + gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, bufferContext)); // Get a (raw) pointer to the NanoVDB grid form the GridManager. auto* dstGrid = gridHdls[0].grid(); diff --git a/nanovdb/nanovdb/examples/ex_collide_level_set/common.h b/nanovdb/nanovdb/examples/ex_collide_level_set/common.h index ad3ce160ae..dc54e8f5f1 100644 --- a/nanovdb/nanovdb/examples/ex_collide_level_set/common.h +++ b/nanovdb/nanovdb/examples/ex_collide_level_set/common.h @@ -3,6 +3,7 @@ #pragma once +#define _USE_MATH_DEFINES #include #include #include diff --git a/nanovdb/nanovdb/examples/ex_collide_level_set/main.cc b/nanovdb/nanovdb/examples/ex_collide_level_set/main.cc index 637b5d98fb..876c08e16a 100644 --- a/nanovdb/nanovdb/examples/ex_collide_level_set/main.cc +++ b/nanovdb/nanovdb/examples/ex_collide_level_set/main.cc @@ -5,7 +5,7 @@ #include #include #include -#include +#include #if defined(NANOVDB_USE_CUDA) using BufferT = nanovdb::CudaDeviceBuffer; @@ -26,7 +26,7 @@ int main(int ac, char** av) handle = nanovdb::io::readGrid(av[1]); std::cout << "Loaded NanoVDB grid[" << handle.gridMetaData()->shortGridName() << "]...\n"; } else { - handle = nanovdb::createLevelSetSphere(100.0f, nanovdb::Vec3f(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3R(0), "sphere"); + handle = nanovdb::createLevelSetSphere(100.0f, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphere"); } if (handle.gridMetaData()->isLevelSet() == false) { diff --git a/nanovdb/nanovdb/examples/ex_collide_level_set/nanovdb.cu b/nanovdb/nanovdb/examples/ex_collide_level_set/nanovdb.cu index 994637c27c..71a976eca4 100644 --- a/nanovdb/nanovdb/examples/ex_collide_level_set/nanovdb.cu +++ b/nanovdb/nanovdb/examples/ex_collide_level_set/nanovdb.cu @@ -1,12 +1,12 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 +#define _USE_MATH_DEFINES #include #include #include -#include -#include +#include #include #include @@ -44,8 +44,7 @@ void runNanoVDB(nanovdb::GridHandle& handle, int numIterations, int num using namespace nanovdb; auto* h_grid = handle.grid(); - if (!h_grid) - throw std::runtime_error("GridHandle does not contain a FloatGrid"); + if (!h_grid) throw std::runtime_error("GridHandle does not contain a FloatGrid"); Vec3f* h_positions = reinterpret_cast(positionBuffer.data()); computeFill(false, h_positions, 0, sizeof(Vec3f) * numPoints); diff --git a/nanovdb/nanovdb/examples/ex_collide_level_set/openvdb.cc b/nanovdb/nanovdb/examples/ex_collide_level_set/openvdb.cc index 0c07a02035..aed2bc3f0a 100644 --- a/nanovdb/nanovdb/examples/ex_collide_level_set/openvdb.cc +++ b/nanovdb/nanovdb/examples/ex_collide_level_set/openvdb.cc @@ -3,13 +3,14 @@ #if defined(NANOVDB_USE_OPENVDB) +#define _USE_MATH_DEFINES #include #include #include #include -#include +#include #include #include "common.h" @@ -36,4 +37,4 @@ void runOpenVDB(nanovdb::GridHandle& handle, int numIterations, int num // Not yet implemented... } -#endif +#endif \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cc b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cc index 45cfdff394..42d93c725b 100644 --- a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cc +++ b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cc @@ -1,35 +1,37 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include // nanovdb::IndexGridBuilder +#include #include // for nanovdb::createLevelSetSphere -#include // for nanovdb::CudaDeviceBuffer +#include // for nanovdb::CudaDeviceBuffer -extern "C" void launch_kernels(const nanovdb::NanoGrid*,// device grid - const nanovdb::NanoGrid*,// host grid +extern "C" void launch_kernels(const nanovdb::NanoGrid*,// device grid + const nanovdb::NanoGrid*,// host grid cudaStream_t stream); /// @brief This examples depends on NanoVDB and CUDA. int main() { + using SrcGridT = nanovdb::FloatGrid; + using DstBuildT = nanovdb::ValueOnIndex; + using BufferT = nanovdb::CudaDeviceBuffer; try { // Create an NanoVDB grid of a sphere at the origin with radius 100 and voxel size 1. auto srcHandle = nanovdb::createLevelSetSphere(); auto *srcGrid = srcHandle.grid(); // Converts the FloatGrid to an IndexGrid using CUDA for memory management. - nanovdb::IndexGridBuilder builder(*srcGrid, /*only active values*/true); - auto idxHandle = builder.getHandle("IndexGrid_test", /*number of channels*/1u); + auto idxHandle = nanovdb::createNanoGrid(*srcGrid, 1u, false , false);// 1 channel, no tiles or stats cudaStream_t stream; // Create a CUDA stream to allow for asynchronous copy of pinned CUDA memory. cudaStreamCreate(&stream); idxHandle.deviceUpload(stream, false); // Copy the NanoVDB grid to the GPU asynchronously - auto* cpuGrid = idxHandle.grid(); // get a (raw) pointer to a NanoVDB grid of value type float on the CPU - auto* gpuGrid = idxHandle.deviceGrid(); // get a (raw) pointer to a NanoVDB grid of value type float on the GPU + auto* cpuGrid = idxHandle.grid(); // get a (raw) pointer to a NanoVDB grid of value type float on the CPU + auto* gpuGrid = idxHandle.deviceGrid(); // get a (raw) pointer to a NanoVDB grid of value type float on the GPU - if (!gpuGrid || !cpuGrid) - throw std::runtime_error("GridHandle did not contain a grid with value type float"); + if (!gpuGrid) throw std::runtime_error("GridHandle did not contain a device grid with value type float"); + if (!cpuGrid) throw std::runtime_error("GridHandle did not contain a host grid with value type float"); launch_kernels(cpuGrid, cpuGrid, stream); // Call a host method to print a grid value on both the CPU and GPU diff --git a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cu b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cu index 6a2770d3a6..be83ceb074 100644 --- a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cu +++ b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cu @@ -5,9 +5,9 @@ #include // for printf // This is called by the host only -void cpu_kernel(const nanovdb::NanoGrid* cpuGrid) +void cpu_kernel(const nanovdb::NanoGrid* cpuGrid) { - nanovdb::ChannelAccessor acc(*cpuGrid); + nanovdb::ChannelAccessor acc(*cpuGrid); //printf("\nNanoVDB CPU: channels=%u values=%lu\n", acc.grid().blindDataCount(), acc.root().maximum()); printf("NanoVDB CPU; %lu\n", acc.idx( 0, 0, 0)); printf("NanoVDB CPU; %lu\n", acc.idx( 99, 0, 0)); @@ -18,9 +18,9 @@ void cpu_kernel(const nanovdb::NanoGrid* cpuGrid) } // This is called by the device only -__global__ void gpu_kernel(const nanovdb::NanoGrid* gpuGrid) +__global__ void gpu_kernel(const nanovdb::NanoGrid* gpuGrid) { - nanovdb::ChannelAccessor acc(*gpuGrid); + nanovdb::ChannelAccessor acc(*gpuGrid); //printf("\nNanoVDB GPU: channels=%u values=%lu\n", gpuGrid->blindDataCount(), acc.root().maximum()); printf("NanoVDB GPU; %lu\n", acc.idx( 0, 0, 0)); printf("NanoVDB GPU; %lu\n", acc.idx( 99, 0, 0)); @@ -31,9 +31,9 @@ __global__ void gpu_kernel(const nanovdb::NanoGrid* gpuGrid } // This is called by the client code on the host -extern "C" void launch_kernels(const nanovdb::NanoGrid* gpuGrid, - const nanovdb::NanoGrid* cpuGrid, - cudaStream_t stream) +extern "C" void launch_kernels(const nanovdb::NanoGrid* gpuGrid, + const nanovdb::NanoGrid* cpuGrid, + cudaStream_t stream) { gpu_kernel<<<1, 1, 0, stream>>>(gpuGrid); // Launch the device kernel asynchronously diff --git a/nanovdb/nanovdb/examples/ex_make_custom_nanovdb/make_custom_nanovdb.cc b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb/make_custom_nanovdb.cc index d5a37f1b81..aea2812a4b 100644 --- a/nanovdb/nanovdb/examples/ex_make_custom_nanovdb/make_custom_nanovdb.cc +++ b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb/make_custom_nanovdb.cc @@ -2,6 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 #include +#include #include @@ -11,14 +12,14 @@ int main() { try { - nanovdb::GridBuilder builder(0.0f); - auto acc = builder.getAccessor(); + nanovdb::build::Grid grid(0.0f); + auto acc = grid.getAccessor(); acc.setValue(nanovdb::Coord(1, 2, 3), 1.0f); - printf("GridBuilder: (%i,%i,%i)=%4.2f\t", 1, 2, 3, acc.getValue(nanovdb::Coord(1, 2, 3))); - printf("GridBuilder: (%i,%i,%i)=%4.2f\n", 1, 2,-3, acc.getValue(nanovdb::Coord(1, 2,-3))); + printf("build::Grid: (%i,%i,%i)=%4.2f\t", 1, 2, 3, acc.getValue(nanovdb::Coord(1, 2, 3))); + printf("build::Grid: (%i,%i,%i)=%4.2f\n", 1, 2,-3, acc.getValue(nanovdb::Coord(1, 2,-3))); - auto handle = builder.getHandle<>(); + auto handle = nanovdb::createNanoGrid(grid); auto* dstGrid = handle.grid(); // Get a (raw) pointer to the NanoVDB grid form the GridManager. if (!dstGrid) throw std::runtime_error("GridHandle does not contain a grid with value type float"); diff --git a/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cc b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cc new file mode 100644 index 0000000000..767026a167 --- /dev/null +++ b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cc @@ -0,0 +1,45 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +#include +#include +#include + +#include + +extern "C" void launch_kernels(const nanovdb::NanoGrid*,// GPU grid + const nanovdb::NanoGrid*,// CPU grid + cudaStream_t stream); + +/// @brief Creates a NanoVDB grids with custom values and access them. +/// +/// @note This example only depends on NanoVDB. +int main() +{ + try { + using GridT = nanovdb::build::Grid; + GridT grid(0.0f);// empty grid with a background value of zero + auto acc = grid.getAccessor(); + acc.setValue(nanovdb::Coord(1, 2, 3), 1.0f); + printf("build::Grid: (%i,%i,%i)=%4.2f\n", 1, 2,-3, acc.getValue(nanovdb::Coord(1, 2,-3))); + printf("build::Grid: (%i,%i,%i)=%4.2f\n", 1, 2, 3, acc.getValue(nanovdb::Coord(1, 2, 3))); + + // convert build::grid to a nanovdb::GridHandle using a Cuda buffer + auto handle = nanovdb::createNanoGrid(grid); + + auto* cpuGrid = handle.grid(); //get a (raw) pointer to a NanoVDB grid of value type float on the CPU + if (!cpuGrid) throw std::runtime_error("GridHandle does not contain a grid with value type float"); + + cudaStream_t stream; // Create a CUDA stream to allow for asynchronous copy of pinned CUDA memory. + cudaStreamCreate(&stream); + handle.deviceUpload(stream, false); // Copy the NanoVDB grid to the GPU asynchronously + auto* gpuGrid = handle.deviceGrid(); // get a (raw) pointer to a NanoVDB grid of value type float on the GPU + + launch_kernels(gpuGrid, cpuGrid, stream); // Call a host method to print a grid values on both the CPU and GPU + cudaStreamDestroy(stream); // Destroy the CUDA stream + } + catch (const std::exception& e) { + std::cerr << "An exception occurred: \"" << e.what() << "\"" << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cu b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cu new file mode 100644 index 0000000000..335d0b0631 --- /dev/null +++ b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cu @@ -0,0 +1,35 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +#include // this defined the core tree data structure of NanoVDB accessable on both the host and device +#include // for printf + +// This is called by the host only +void cpu_kernel(const nanovdb::NanoGrid* cpuGrid) +{ + auto cpuAcc = cpuGrid->getAccessor(); + for (int k=-3; k<=3; k+=6) { + printf("NanoVDB cpu: (%i,%i,%i)=%4.2f\n", 1, 2, k, cpuAcc.getValue(nanovdb::Coord(1, 2, k))); + } +} + +// This is called by the device only +__global__ void gpu_kernel(const nanovdb::NanoGrid* deviceGrid) +{ + if (threadIdx.x != 0 && threadIdx.x != 6) return; + int k = threadIdx.x - 3; + auto gpuAcc = deviceGrid->getAccessor(); + printf("NanoVDB gpu: (%i,%i,%i)=%4.2f\n", 1, 2, k, gpuAcc.getValue(nanovdb::Coord(1, 2, k))); +} + +// This is called by the client code on the host +extern "C" void launch_kernels(const nanovdb::NanoGrid* deviceGrid, + const nanovdb::NanoGrid* cpuGrid, + cudaStream_t stream) +{ + // Launch the device kernel asynchronously + gpu_kernel<<<1, 64, 0, stream>>>(deviceGrid); + + // Launch the host "kernel" (synchronously) + cpu_kernel(cpuGrid); +} \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_make_funny_nanovdb/make_funny_nanovdb.cc b/nanovdb/nanovdb/examples/ex_make_funny_nanovdb/make_funny_nanovdb.cc index 7909285540..e9b7350bb8 100644 --- a/nanovdb/nanovdb/examples/ex_make_funny_nanovdb/make_funny_nanovdb.cc +++ b/nanovdb/nanovdb/examples/ex_make_funny_nanovdb/make_funny_nanovdb.cc @@ -2,6 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 #include +#include #include #include @@ -11,22 +12,20 @@ /// @note This example only depends on NanoVDB. int main() { + using namespace nanovdb; try { const float background = 5.0f; - nanovdb::GridBuilder builder(background, nanovdb::GridClass::LevelSet); - auto acc = builder.getAccessor(); const int size = 500; - auto func = [&](const nanovdb::Coord &ijk){ + auto func = [&](const Coord &ijk){ float v = 40.0f + 50.0f*(cos(ijk[0]*0.1f)*sin(ijk[1]*0.1f) + cos(ijk[1]*0.1f)*sin(ijk[2]*0.1f) + cos(ijk[2]*0.1f)*sin(ijk[0]*0.1f)); - v = nanovdb::Max(v, nanovdb::Vec3f(ijk).length() - size);// CSG intersection with a sphere + v = Max(v, Vec3f(ijk).length() - size);// CSG intersection with a sphere return v > background ? background : v < -background ? -background : v;// clamp value }; - builder(func, nanovdb::CoordBBox(nanovdb::Coord(-size),nanovdb::Coord(size))); - - auto handle = builder.getHandle<>(); - nanovdb::io::writeGrid("data/funny.nvdb", handle, nanovdb::io::Codec::BLOSC); + build::Grid grid(background, "funny", GridClass::LevelSet); + grid(func, CoordBBox(Coord(-size), Coord(size))); + io::writeGrid("data/funny.nvdb", createNanoGrid(grid), io::Codec::BLOSC); } catch (const std::exception& e) { std::cerr << "An exception occurred: \"" << e.what() << "\"" << std::endl; diff --git a/nanovdb/nanovdb/examples/ex_make_typed_grids/make_typed_grids.cc b/nanovdb/nanovdb/examples/ex_make_typed_grids/make_typed_grids.cc index dae0a9413a..f9d4666784 100644 --- a/nanovdb/nanovdb/examples/ex_make_typed_grids/make_typed_grids.cc +++ b/nanovdb/nanovdb/examples/ex_make_typed_grids/make_typed_grids.cc @@ -2,6 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 #include +#include #include // Helper struct to create a default value for the type. @@ -36,11 +37,9 @@ void buildGridForType(std::vector>& gridHandles, T const& try { - nanovdb::GridBuilder builder(bgValue); - auto acc = builder.getAccessor(); - + nanovdb::build::Grid grid(bgValue, typeNameStr); + auto acc = grid.getAccessor(); const int radius = 16; - for (int z = -radius; z <= radius; ++z) { for (int y = -radius; y <= radius; ++y) { for (int x = -radius; x <= radius; ++x) { @@ -50,8 +49,7 @@ void buildGridForType(std::vector>& gridHandles, T const& } } } - - gridHandles.push_back(builder.template getHandle<>(1.0, nanovdb::Vec3d(0), typeNameStr)); + gridHandles.push_back(nanovdb::createNanoGrid(grid)); } catch (const std::exception& e) { std::cerr << "An exception occurred: \"" << e.what() << "\"" << std::endl; @@ -83,8 +81,11 @@ int main() */ buildGridForType(gridHandles, float(0), double(0), int16_t(0), int32_t(0), int64_t(0), uint32_t(0), nanovdb::Vec3f(0) /*, nanovdb::Vec3d(0)*/ /*, bool(false)*/ /*, uint16_t(0)*/); - - nanovdb::io::writeGrids("data/custom_types.nvdb", gridHandles); +#if 0 + nanovdb::io::writeGrids("data/custom_types.nvdb", gridHandles); +#else + nanovdb::io::writeUncompressedGrids("data/custom_types.nvdb", gridHandles); +#endif } catch (const std::exception& e) { std::cerr << "An exception occurred: \"" << e.what() << "\"" << std::endl; diff --git a/nanovdb/nanovdb/examples/ex_map_pool_buffer/map_pool_buffer.cc b/nanovdb/nanovdb/examples/ex_map_pool_buffer/map_pool_buffer.cc index 2a5de0844e..526ed9c8cf 100644 --- a/nanovdb/nanovdb/examples/ex_map_pool_buffer/map_pool_buffer.cc +++ b/nanovdb/nanovdb/examples/ex_map_pool_buffer/map_pool_buffer.cc @@ -148,8 +148,8 @@ int main() std::vector> gridHdls; // create two grids... - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0f, nanovdb::Vec3f(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3R(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, -1.0f, false, bufferContext)); - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3R(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, -1.0f, false, bufferContext)); + gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, bufferContext)); + gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, bufferContext)); // share grid[0]'s buffer into a parent-scope handle to prevent deletion. anotherHdl = nanovdb::GridHandle(bufferContext.copy(gridHdls[0].buffer().mId)); diff --git a/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu b/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu index 6e8c45fd71..6b98939768 100644 --- a/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu +++ b/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu @@ -8,7 +8,7 @@ #include #include -#include +#include void scaleActiveVoxels(nanovdb::FloatGrid *grid_d, uint64_t leafCount, float scale) { @@ -29,7 +29,7 @@ int main() { try { // Create an NanoVDB grid of a sphere at the origin with radius 100 and voxel size 1. - auto handle = nanovdb::createLevelSetSphere(100.0f); + auto handle = nanovdb::createLevelSetSphere(100.0f); using GridT = nanovdb::FloatGrid; handle.deviceUpload(0, false); // Copy the NanoVDB grid to the GPU asynchronously diff --git a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc index bf8e250d5b..7d668a48c0 100644 --- a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc +++ b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc @@ -2,8 +2,8 @@ // SPDX-License-Identifier: MPL-2.0 #include // replace with your own dependencies for generating the OpenVDB grid -#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) -#include +#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) +#include #include extern "C" void launch_kernels(const nanovdb::NodeManager*, @@ -13,15 +13,17 @@ extern "C" void launch_kernels(const nanovdb::NodeManager*, /// @brief This examples depends on OpenVDB, NanoVDB and CUDA. int main() { + using SrcGridT = openvdb::FloatGrid; + using BufferT = nanovdb::CudaDeviceBuffer; try { cudaStream_t stream; // Create a CUDA stream to allow for asynchronous copy of pinned CUDA memory. cudaStreamCreate(&stream); // Create an OpenVDB grid of a sphere at the origin with radius 100 and voxel size 1. - auto srcGrid = openvdb::tools::createLevelSetSphere(100.0f, openvdb::Vec3f(0.0f), 1.0f); + auto srcGrid = openvdb::tools::createLevelSetSphere(100.0f, openvdb::Vec3f(0.0f), 1.0f); // Converts the OpenVDB to NanoVDB and returns a GridHandle that uses CUDA for memory management. - auto gridHandle = nanovdb::openToNanoVDB(*srcGrid); + auto gridHandle = nanovdb::createNanoGrid(*srcGrid); gridHandle.deviceUpload(stream, false); // Copy the NanoVDB grid to the GPU asynchronously auto* grid = gridHandle.grid(); // get a (raw) pointer to a NanoVDB grid of value type float on the CPU auto* deviceGrid = gridHandle.deviceGrid(); // get a (raw) pointer to a NanoVDB grid of value type float on the GPU @@ -29,7 +31,7 @@ int main() throw std::runtime_error("GridHandle did not contain a grid with value type float"); } - auto nodeHandle = nanovdb::createNodeManager(*grid); + auto nodeHandle = nanovdb::createNodeManager(*grid); nodeHandle.deviceUpload(deviceGrid, stream, false); auto *nodeMgr = nodeHandle.template mgr(); auto *deviceNodeMgr = nodeHandle.template deviceMgr(); diff --git a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb/openvdb_to_nanovdb.cc b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb/openvdb_to_nanovdb.cc index 433e8da590..870114db39 100644 --- a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb/openvdb_to_nanovdb.cc +++ b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb/openvdb_to_nanovdb.cc @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 #include // replace with your own dependencies for generating the OpenVDB grid -#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) +#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) #include /// @brief Convert an openvdb level set sphere into a nanovdb, access a single value in both grids, and save NanoVDB to file. @@ -13,9 +13,7 @@ int main() try { // Create an OpenVDB grid of a sphere at the origin with radius 100 and voxel size 1. auto srcGrid = openvdb::tools::createLevelSetSphere(100.0f, openvdb::Vec3f(0.0f), 1.0f); - - auto handle = nanovdb::openToNanoVDB(*srcGrid); // Convert from OpenVDB to NanoVDB and return a shared pointer to a GridHandle. - + auto handle = nanovdb::createNanoGrid(*srcGrid); // Convert from OpenVDB to NanoVDB and return a shared pointer to a GridHandle. auto* dstGrid = handle.grid(); // Get a (raw) pointer to the NanoVDB grid form the GridManager. if (!dstGrid) throw std::runtime_error("GridHandle does not contain a grid with value type float"); diff --git a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_accessor/openvdb_to_nanovdb_accessor.cc b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_accessor/openvdb_to_nanovdb_accessor.cc index 773c639b63..4851732882 100644 --- a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_accessor/openvdb_to_nanovdb_accessor.cc +++ b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_accessor/openvdb_to_nanovdb_accessor.cc @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 #include // replace with your own dependencies for generating the OpenVDB grid -#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) +#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) #include // Convert an openvdb level set sphere into a nanovdb, use accessors to print out multiple values from both @@ -15,7 +15,7 @@ int main() auto srcGrid = openvdb::tools::createLevelSetSphere(100.0f, openvdb::Vec3f(0.0f), 1.0f); // Convert the OpenVDB grid, srcGrid, into a NanoVDB grid handle. - auto handle = nanovdb::openToNanoVDB(*srcGrid); + auto handle = nanovdb::createNanoGrid(*srcGrid); // Define a (raw) pointer to the NanoVDB grid on the host. Note we match the value type of the srcGrid! auto* dstGrid = handle.grid(); diff --git a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda.cc b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda.cc index ea1172b1a4..6cb9f5b4d7 100644 --- a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda.cc +++ b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda.cc @@ -2,8 +2,8 @@ // SPDX-License-Identifier: MPL-2.0 #include // replace with your own dependencies for generating the OpenVDB grid -#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) -#include +#include // converter from OpenVDB to NanoVDB (includes NanoVDB.h and GridManager.h) +#include extern "C" void launch_kernels(const nanovdb::NanoGrid*, const nanovdb::NanoGrid*, @@ -12,12 +12,13 @@ extern "C" void launch_kernels(const nanovdb::NanoGrid*, /// @brief This examples depends on OpenVDB, NanoVDB and CUDA. int main() { + using SrcGridT = openvdb::FloatGrid; try { // Create an OpenVDB grid of a sphere at the origin with radius 100 and voxel size 1. - auto srcGrid = openvdb::tools::createLevelSetSphere(100.0f, openvdb::Vec3f(0.0f), 1.0f); + auto srcGrid = openvdb::tools::createLevelSetSphere(100.0f, openvdb::Vec3f(0.0f), 1.0f); // Converts the OpenVDB to NanoVDB and returns a GridHandle that uses CUDA for memory management. - auto handle = nanovdb::openToNanoVDB(*srcGrid); + auto handle = nanovdb::createNanoGrid(*srcGrid); cudaStream_t stream; // Create a CUDA stream to allow for asynchronous copy of pinned CUDA memory. cudaStreamCreate(&stream); diff --git a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/common.h b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/common.h index 44ed1c739b..edc4c27a32 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/common.h +++ b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/common.h @@ -3,6 +3,7 @@ #pragma once +#define _USE_MATH_DEFINES #include #include #include diff --git a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/main.cc b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/main.cc index 9179ae758d..fad142657c 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/main.cc +++ b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/main.cc @@ -5,7 +5,7 @@ #include #include #include -#include +#include #if defined(NANOVDB_USE_CUDA) using BufferT = nanovdb::CudaDeviceBuffer; @@ -26,7 +26,7 @@ int main(int ac, char** av) handle = nanovdb::io::readGrid(av[1]); std::cout << "Loaded NanoVDB grid[" << handle.gridMetaData()->shortGridName() << "]...\n"; } else { - handle = nanovdb::createFogVolumeSphere(100.0f, nanovdb::Vec3f(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphere"); + handle = nanovdb::createFogVolumeSphere(100.0f, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphere"); } if (handle.gridMetaData()->isFogVolume() == false) { diff --git a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/nanovdb.cu b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/nanovdb.cu index 663dcddd34..1af67e3c88 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/nanovdb.cu +++ b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/nanovdb.cu @@ -1,11 +1,12 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 +#define _USE_MATH_DEFINES #include #include #include -#include +#include #include #include "common.h" diff --git a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/openvdb.cc b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/openvdb.cc index c2870c4dfa..aaa9aa6a63 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/openvdb.cc +++ b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/openvdb.cc @@ -3,6 +3,7 @@ #if defined(NANOVDB_USE_OPENVDB) +#define _USE_MATH_DEFINES #include #include @@ -10,7 +11,7 @@ #include #include -#include +#include #include #include "common.h" @@ -92,4 +93,4 @@ void runOpenVDB(nanovdb::GridHandle& handle, int numIterations, int wid } } -#endif +#endif \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_raytrace_level_set/common.h b/nanovdb/nanovdb/examples/ex_raytrace_level_set/common.h index 44ed1c739b..edc4c27a32 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_level_set/common.h +++ b/nanovdb/nanovdb/examples/ex_raytrace_level_set/common.h @@ -3,6 +3,7 @@ #pragma once +#define _USE_MATH_DEFINES #include #include #include diff --git a/nanovdb/nanovdb/examples/ex_raytrace_level_set/main.cc b/nanovdb/nanovdb/examples/ex_raytrace_level_set/main.cc index 35254bd283..5e066c20d7 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_level_set/main.cc +++ b/nanovdb/nanovdb/examples/ex_raytrace_level_set/main.cc @@ -5,7 +5,7 @@ #include #include #include -#include +#include #if defined(NANOVDB_USE_CUDA) using BufferT = nanovdb::CudaDeviceBuffer; @@ -26,7 +26,7 @@ int main(int ac, char** av) handle = nanovdb::io::readGrid(av[1]); std::cout << "Loaded NanoVDB grid[" << handle.gridMetaData()->shortGridName() << "]...\n"; } else { - handle = nanovdb::createLevelSetSphere(100.0f, nanovdb::Vec3f(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphere"); + handle = nanovdb::createLevelSetSphere(100.0f, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphere"); } if (handle.gridMetaData()->isLevelSet() == false) { diff --git a/nanovdb/nanovdb/examples/ex_raytrace_level_set/nanovdb.cu b/nanovdb/nanovdb/examples/ex_raytrace_level_set/nanovdb.cu index 61e0076943..53f7bd83a5 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_level_set/nanovdb.cu +++ b/nanovdb/nanovdb/examples/ex_raytrace_level_set/nanovdb.cu @@ -1,14 +1,14 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 +#define _USE_MATH_DEFINES #include #include #include -#include +#include #include #include -#include #include "common.h" diff --git a/nanovdb/nanovdb/examples/ex_raytrace_level_set/openvdb.cc b/nanovdb/nanovdb/examples/ex_raytrace_level_set/openvdb.cc index 4d75990d64..c8a28e60eb 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_level_set/openvdb.cc +++ b/nanovdb/nanovdb/examples/ex_raytrace_level_set/openvdb.cc @@ -3,6 +3,7 @@ #if defined(NANOVDB_USE_OPENVDB) +#define _USE_MATH_DEFINES #include #include @@ -10,7 +11,7 @@ #include #include -#include +#include #include #include "common.h" @@ -95,4 +96,4 @@ void runOpenVDB(nanovdb::GridHandle& handle, int numI } } -#endif +#endif \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cc b/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cc index ca8b360a56..f1fe52e5df 100644 --- a/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cc +++ b/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cc @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 #include // this is required to read (and write) NanoVDB files on the host -#include // required for CUDA memory management +#include // required for CUDA memory management extern "C" void launch_kernels(const nanovdb::NanoGrid*, const nanovdb::NanoGrid*, diff --git a/nanovdb/nanovdb/examples/ex_util/CpuTimer.h b/nanovdb/nanovdb/examples/ex_util/CpuTimer.h deleted file mode 100644 index 5f58fd3ebc..0000000000 --- a/nanovdb/nanovdb/examples/ex_util/CpuTimer.h +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -/// @file CpuTimer.h -/// -/// @author Ken Museth -/// -/// @brief A simple timing class - -#ifndef NANOVDB_CPU_TIMER_H_HAS_BEEN_INCLUDED -#define NANOVDB_CPU_TIMER_H_HAS_BEEN_INCLUDED - -#include -#include - -namespace nanovdb { - -template -class CpuTimer -{ - std::chrono::high_resolution_clock::time_point mStart; -public: - CpuTimer() {} - void start(const std::string &msg, std::ostream& os = std::cerr) { - os << msg << " ... " << std::flush; - mStart = std::chrono::high_resolution_clock::now(); - } - void restart(const std::string &msg, std::ostream& os = std::cerr) { - this->stop(); - os << msg << " ... " << std::flush; - mStart = std::chrono::high_resolution_clock::now(); - } - void stop(std::ostream& os = std::cerr) - { - auto end = std::chrono::high_resolution_clock::now(); - auto diff = std::chrono::duration_cast(end - mStart).count(); - os << "completed in " << diff; - if (std::is_same::value) {// resolved at compile-time - os << " microseconds" << std::endl; - } else if (std::is_same::value) { - os << " milliseconds" << std::endl; - } else if (std::is_same::value) { - os << " seconds" << std::endl; - } else { - os << " unknown time unit" << std::endl; - } - } -};// CpuTimer - -} // namespace nanovdb - -#endif // NANOVDB_CPU_TIMER_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/examples/ex_vox_to_nanovdb/VoxToNanoVDB.h b/nanovdb/nanovdb/examples/ex_vox_to_nanovdb/VoxToNanoVDB.h index 33348e4dc8..98bacb538e 100644 --- a/nanovdb/nanovdb/examples/ex_vox_to_nanovdb/VoxToNanoVDB.h +++ b/nanovdb/nanovdb/examples/ex_vox_to_nanovdb/VoxToNanoVDB.h @@ -4,6 +4,7 @@ #pragma once #include +#include #define OGT_VOX_IMPLEMENTATION #include "ogt_vox.h" @@ -28,7 +29,7 @@ inline const ogt_vox_scene* load_vox_scene(const char* filename, uint32_t scene_ uint32_t buffer_size = ftell(fp); fseek(fp, 0, SEEK_SET); uint8_t* buffer = new uint8_t[buffer_size]; - fread(buffer, buffer_size, 1, fp); + size_t bytes = fread(buffer, buffer_size, 1, fp); fclose(fp); const ogt_vox_scene* scene = ogt_vox_read_scene_with_flags(buffer, buffer_size, scene_read_flags); delete[] buffer; // the buffer can be safely deleted once the scene is instantiated. @@ -131,8 +132,8 @@ nanovdb::GridHandle convertVoxToNanoVDB(const std::string& inFilename, try { if (const auto* scene = detail::load_vox_scene(inFilename.c_str())) { // we just merge into one grid... - nanovdb::GridBuilder builder; - auto acc = builder.getAccessor(); + nanovdb::build::Grid grid(nanovdb::Rgba8(),modelName,nanovdb::GridClass::VoxelVolume); + auto acc = grid.getAccessor(); auto processModelFn = [&](int modelIndex, const ogt_vox_transform& xform) { const auto* model = scene->models[modelIndex]; @@ -143,7 +144,7 @@ nanovdb::GridHandle convertVoxToNanoVDB(const std::string& inFilename, for (uint32_t x = 0; x < model->size_x; ++x, ++voxel_index) { if (uint8_t color_index = model->voxel_data[voxel_index]) { ogt_vox_rgba rgba = scene->palette.color[color_index]; - auto ijk = nanovdb::Coord::Floor(detail::matMult4x4((float*)&xform, nanovdb::Vec4f(x, y, z, 1))); + auto ijk = nanovdb::Coord::Floor(detail::matMult4x4((float*)&xform, nanovdb::Vec4f(x, y, z, 1))); acc.setValue(nanovdb::Coord(ijk[0], ijk[2], -ijk[1]), *reinterpret_cast(&rgba)); } } @@ -184,8 +185,7 @@ nanovdb::GridHandle convertVoxToNanoVDB(const std::string& inFilename, printf("scene processing end.\n"); ogt_vox_destroy_scene(scene); - builder.setGridClass(nanovdb::GridClass::VoxelVolume); - return builder.getHandle<>(1.0f, nanovdb::Vec3d(0), modelName); + return nanovdb::createNanoGrid(grid); } else { std::ostringstream ss; ss << "Invalid file \"" << inFilename << "\""; diff --git a/nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu b/nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu new file mode 100644 index 0000000000..44fb25407a --- /dev/null +++ b/nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu @@ -0,0 +1,53 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +#include + +/// @brief Demonstrates how to create a NanoVDB grid from voxel coordinates on the GPU +int main() +{ + using namespace nanovdb; + + try { + // Define list of voxel coordinates and copy them to the device + const size_t numVoxels = 3; + Coord coords[numVoxels] = {Coord(1, 2, 3), Coord(-1,3,6), Coord(-90,100,5678)}, *d_coords = nullptr; + cudaCheck(cudaMalloc(&d_coords, numVoxels * sizeof(Coord))); + cudaCheck(cudaMemcpy(d_coords, coords, numVoxels * sizeof(Coord), cudaMemcpyHostToDevice));// coords CPU -> GPU + + // Generate a NanoVDB grid that contains the list of voxels on the device + auto handle = cudaVoxelsToGrid(d_coords, numVoxels); + auto *grid = handle.deviceGrid(); + + // Define a list of values and copy them to the device + float values[numVoxels] = {1.4f, 6.7f, -5.0f}, *d_values; + cudaCheck(cudaMalloc(&d_values, numVoxels * sizeof(float))); + cudaCheck(cudaMemcpy(d_values, values, numVoxels * sizeof(float), cudaMemcpyHostToDevice));// values CPU -> GPU + + // Launch a device kernel that sets the values of voxels define above and prints them + const unsigned int numThreads = 128, numBlocks = (numVoxels + numThreads - 1) / numThreads; + cudaLambdaKernel<<>>(numVoxels, [=] __device__(size_t tid) { + using OpT = SetVoxel;// defined to type of random-access operation (set value) + const Coord &ijk = d_coords[tid]; + grid->tree().set(ijk, d_values[tid]); + printf("GPU: voxel # %lu, grid(%4i,%4i,%4i) = %5.1f\n", tid, ijk[0], ijk[1], ijk[2], grid->tree().getValue(ijk)); + }); cudaCheckError(); + + // Copy grid from GPU to CPU and print the voxel values + handle.deviceDownload();// creates a copy on the CPU + grid = handle.grid(); + for (size_t i=0; itree().getValue(ijk)); + } + + // free arrays allocated on the device + cudaCheck(cudaFree(d_coords)); + cudaCheck(cudaFree(d_values)); + } + catch (const std::exception& e) { + std::cerr << "An exception occurred: \"" << e.what() << "\"" << std::endl; + } + + return 0; +} \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_write_nanovdb_grids/write_nanovdb_grids.cc b/nanovdb/nanovdb/examples/ex_write_nanovdb_grids/write_nanovdb_grids.cc index 0722fcf6e2..f883b3aded 100644 --- a/nanovdb/nanovdb/examples/ex_write_nanovdb_grids/write_nanovdb_grids.cc +++ b/nanovdb/nanovdb/examples/ex_write_nanovdb_grids/write_nanovdb_grids.cc @@ -10,7 +10,7 @@ int main() { try { - std::vector> handles; + std::vector> handles; // Create multiple NanoVDB grids of various types handles.push_back(nanovdb::createLevelSetSphere(100.0f)); handles.push_back(nanovdb::createLevelSetTorus(100.0f, 50.0f)); diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cc b/nanovdb/nanovdb/unittest/TestNanoVDB.cc index 22558626c5..9d68937d3c 100644 --- a/nanovdb/nanovdb/unittest/TestNanoVDB.cc +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cc @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include @@ -29,8 +29,7 @@ #include #include #include -#include -#include "../examples/ex_util/CpuTimer.h" +#include #if !defined(_MSC_VER) // does not compile in msvc c++ due to zero-sized arrays. #include @@ -44,50 +43,21 @@ #include - -namespace nanovdb {// this namespace is required by gtest - -std::ostream& -operator<<(std::ostream& os, const Coord& ijk) -{ - os << "(" << ijk[0] << "," << ijk[1] << "," << ijk[2] << ")"; - return os; -} - -std::ostream& -operator<<(std::ostream& os, const CoordBBox& b) -{ - os << b[0] << " -> " << b[1]; - return os; -} - -template -std::ostream& -operator<<(std::ostream& os, const Vec3& v) -{ - os << "(" << v[0] << "," << v[1] << "," << v[2] << ")"; - return os; -} -}// namespace nanovdb - namespace { template struct Sphere { - Sphere(const nanovdb::Vec3& center, - ValueT radius, - ValueT voxelSize = 1.0, - ValueT halfWidth = 3.0) + Sphere(const nanovdb::Vec3d& center, + double radius, + double voxelSize = 1.0, + double halfWidth = 3.0) : mCenter(center) , mRadius(radius) , mVoxelSize(voxelSize) , mBackground(voxelSize * halfWidth) { } - ValueT background() const { return mBackground; } - - /// @brief Only method required by GridBuilder ValueT operator()(const nanovdb::Coord& ijk) const { const ValueT dst = this->sdf(ijk); @@ -161,7 +131,7 @@ class TestNanoVDB : public ::testing::Test const auto n = sizeof(T); std::cerr << "Size of " << s << ": " << n << " bytes which is" << (n % 32 == 0 ? " " : " NOT ") << "32 byte aligned" << std::endl; } - nanovdb::CpuTimer<> mTimer; + nanovdb::CpuTimer mTimer; }; // TestNanoVDB template @@ -195,7 +165,13 @@ using MyTypes = ::testing::Types; @@ -332,6 +308,37 @@ TEST_F(TestNanoVDB, Basic) } } +TEST_F(TestNanoVDB, toStr) +{ + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Unknown ), "?"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Float ), "float"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Double ), "double"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Int16 ), "int16"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Int32 ), "int32"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Int64 ), "int64"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Vec3f ), "Vec3f"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Vec3d ), "Vec3d"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Mask ), "Mask"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Half ), "Half"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::UInt32 ), "uint32"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Boolean ), "bool"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::RGBA8 ), "RGBA8"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Fp4 ), "Float4"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Fp8 ), "Float8"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Fp16 ), "Float16"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::FpN ), "FloatN"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Vec4f ), "Vec4f"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Vec4d ), "Vec4d"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Index ), "Index"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::OnIndex ), "OnIndex"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::IndexMask ), "IndexMask"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::OnIndexMask ), "OnIndexMask"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::PointIndex ), "PointIndex"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Vec3u8 ), "Vec3u8"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::Vec3u16 ), "Vec3u16"), 0 ); + EXPECT_EQ( strcmp(nanovdb::toStr( nanovdb::GridType::End ), "End"), 0 ); +} + TEST_F(TestNanoVDB, Assumptions) { struct A @@ -642,6 +649,53 @@ TEST_F(TestNanoVDB, reduce) } } +TEST_F(TestNanoVDB, prefixSum) +{ + const uint64_t size = 50000000;// test on fifty million elements + {// multi-threaded inclusive prefix sum + std::vector array(size); + EXPECT_EQ(size, array.size()); + uint64_t sum = 0; + for (uint64_t i=0; i array(size); + EXPECT_EQ(size, array.size()); + uint64_t sum = 0; + for (uint64_t i=0; i::value; EXPECT_TRUE(test); } - {// ValueIndex + {// ValueIndex using A = typename nanovdb::BuildToValueMap::Type; bool test = nanovdb::is_same::value; EXPECT_TRUE(test); @@ -702,6 +756,47 @@ TEST_F(TestNanoVDB, Traits) test = nanovdb::is_same::value; EXPECT_TRUE(test); } + {// nanovdb::BuildTraits + bool test = nanovdb::BuildTraits::is_index; + EXPECT_FALSE(test); + test = nanovdb::BuildTraits::is_index; + EXPECT_TRUE(test); + test = nanovdb::BuildTraits::is_index; + EXPECT_TRUE(test); + test = nanovdb::BuildTraits::is_index; + EXPECT_TRUE(test); + test = nanovdb::BuildTraits::is_index; + EXPECT_TRUE(test); + test = nanovdb::BuildTraits::is_Fp; + EXPECT_TRUE(test); + test = nanovdb::BuildTraits::is_Fp; + EXPECT_TRUE(test); + test = nanovdb::BuildTraits::is_Fp; + EXPECT_TRUE(test); + test = nanovdb::BuildTraits::is_Fp; + EXPECT_TRUE(test); + test = nanovdb::BuildTraits::is_FpX; + EXPECT_TRUE(test); + test = nanovdb::BuildTraits::is_FpX; + EXPECT_TRUE(test); + test = nanovdb::BuildTraits::is_FpX; + EXPECT_TRUE(test); + test = nanovdb::BuildTraits::is_FpX; + EXPECT_FALSE(test); + } + {// nanovdb::is_specialization + bool test = nanovdb::is_specialization,nanovdb::Vec3>::value; + EXPECT_TRUE(test); + test = nanovdb::is_specialization::value; + EXPECT_TRUE(test); + test = nanovdb::is_specialization::value; + EXPECT_FALSE(test); + using VecT = std::vector; + test = nanovdb::is_specialization::value; + EXPECT_TRUE(test); + test = nanovdb::is_specialization::value; + EXPECT_FALSE(test); + } } TEST_F(TestNanoVDB, Rgba8) @@ -938,6 +1033,13 @@ TEST_F(TestNanoVDB, CoordBBox) } EXPECT_FALSE(iter); } + + {// test CoordBBox::createCube + EXPECT_EQ(nanovdb::Coord(-7,-7,-7), nanovdb::CoordBBox::createCube(nanovdb::Coord(-7), 8).min()); + EXPECT_EQ(nanovdb::Coord( 0, 0, 0), nanovdb::CoordBBox::createCube(nanovdb::Coord(-7), 8).max()); + EXPECT_EQ(nanovdb::Coord(-7,-7,-7), nanovdb::CoordBBox::createCube(-7, 0).min()); + EXPECT_EQ(nanovdb::Coord( 0, 0, 0), nanovdb::CoordBBox::createCube(-7, 0).max()); + } } TEST_F(TestNanoVDB, Vec3) @@ -946,19 +1048,19 @@ TEST_F(TestNanoVDB, Vec3) EXPECT_FALSE(test); test = nanovdb::TensorTraits::IsVector; EXPECT_FALSE(test); - test = nanovdb::is_specialization::value; + test = nanovdb::is_specialization::value; EXPECT_TRUE(test); - test = nanovdb::is_same::value; + test = nanovdb::is_same::value; EXPECT_TRUE(test); - test = nanovdb::TensorTraits::IsVector; + test = nanovdb::TensorTraits::IsVector; EXPECT_TRUE(test); - test = nanovdb::is_same::ElementType>::value; + test = nanovdb::is_same::ElementType>::value; EXPECT_TRUE(test); - test = nanovdb::is_same::FloatType>::value; + test = nanovdb::is_same::FloatType>::value; EXPECT_TRUE(test); - EXPECT_EQ(size_t(3 * 8), sizeof(nanovdb::Vec3R)); + EXPECT_EQ(size_t(3 * 8), sizeof(nanovdb::Vec3d)); - nanovdb::Vec3R xyz(1.0, 2.0, 3.0); + nanovdb::Vec3d xyz(1.0, 2.0, 3.0); EXPECT_EQ(1.0, xyz[0]); EXPECT_EQ(2.0, xyz[1]); EXPECT_EQ(3.0, xyz[2]); @@ -992,7 +1094,7 @@ TEST_F(TestNanoVDB, Vec4) EXPECT_TRUE(test); int rank = nanovdb::TensorTraits::Rank; EXPECT_EQ(0, rank); - rank = nanovdb::TensorTraits::Rank; + rank = nanovdb::TensorTraits::Rank; EXPECT_EQ(1, rank); test = nanovdb::is_same::FloatType>::value; EXPECT_FALSE(test); @@ -1004,11 +1106,11 @@ TEST_F(TestNanoVDB, Vec4) EXPECT_TRUE(test); test = nanovdb::is_specialization::value; EXPECT_TRUE(test); - test = nanovdb::is_specialization::value; + test = nanovdb::is_specialization::value; EXPECT_FALSE(test); test = nanovdb::is_same::value; EXPECT_TRUE(test); - test = nanovdb::TensorTraits::IsVector; + test = nanovdb::TensorTraits::IsVector; EXPECT_TRUE(test); test = nanovdb::is_same::ElementType>::value; EXPECT_TRUE(test); @@ -1041,6 +1143,20 @@ TEST_F(TestNanoVDB, Vec4) EXPECT_NE(nanovdb::Vec4f(1, 2, 3, 4), nanovdb::Vec4f(1, 2, 3, 5)); }// Vec4 +TEST_F(TestNanoVDB, Map) +{ + EXPECT_EQ(264u, sizeof(nanovdb::Map)); + nanovdb::Map map1, map2; + EXPECT_EQ(nanovdb::Vec3d(1.0), map1.getVoxelSize()); + map1.set(1.0, nanovdb::Vec3d(0.0)); + EXPECT_EQ(nanovdb::Vec3d(1.0), map1.getVoxelSize()); + map2.set(2.0, nanovdb::Vec3d(0.0)); + EXPECT_EQ(nanovdb::Vec3d(2.0), map2.getVoxelSize()); + map1 = map2;// default assignment operator + EXPECT_EQ(nanovdb::Vec3d(2.0), map2.getVoxelSize()); + EXPECT_EQ(nanovdb::Vec3d(2.0), map1.getVoxelSize()); +}// Map + TEST_F(TestNanoVDB, Extrema) { { // int @@ -1159,7 +1275,7 @@ TEST_F(TestNanoVDB, Ray) EXPECT_EQ(ray(2.0)[1], 5); //higher y component of intersection ray.reset(eye, dir, t0, t1); - // intersects the lower edge anlong the z-axis of the box + // intersects the lower edge along the z-axis of the box EXPECT_TRUE(ray.clip(BBoxT(Vec3T(1.5, 2.0, 2.0), Vec3T(4.5, 4.0, 6.0)))); //std::cerr << ray(0.5) << ", " << ray(2.0) << std::endl; EXPECT_EQ(0.5, ray.t0()); @@ -1307,23 +1423,29 @@ TEST_F(TestNanoVDB, Mask) EXPECT_EQ(size_t(8 * 8), MaskT::memUsage()); MaskT mask; - EXPECT_EQ(0U, mask.countOn()); + EXPECT_EQ(0u, mask.countOn()); EXPECT_TRUE(mask.isOff()); EXPECT_FALSE(mask.isOn()); EXPECT_FALSE(mask.beginOn()); - for (uint32_t i = 0; i < MaskT::bitCount(); ++i) { + for (uint32_t i=0u; i(i)); + EXPECT_EQ(512u, mask.findPrev(i)); + EXPECT_EQ(i<512u ? i : 512u, mask.findNext(i)); + EXPECT_EQ(i<512u ? i : 512u, mask.findPrev(i)); + } + mask.setOn(256u); EXPECT_FALSE(mask.isOff()); EXPECT_FALSE(mask.isOn()); auto iter = mask.beginOn(); EXPECT_TRUE(iter); - EXPECT_EQ(256U, *iter); + EXPECT_EQ(256u, *iter); EXPECT_FALSE(++iter); - for (uint32_t i = 0; i < MaskT::bitCount(); ++i) { - if (i != 256) { + for (uint32_t i=0u; i(i)); + EXPECT_EQ(i<256u || i>=512u ? 512u : 256u, mask.findPrev(i)); + EXPECT_EQ(i==256u ? 257u : i<512u ? i : 512u, mask.findNext(i)); + EXPECT_EQ(i==256u ? 255u : i<512u ? i : 512u, mask.findPrev(i)); + } - mask.set(256, false); + mask.set(256u, false); EXPECT_TRUE(mask.isOff()); EXPECT_FALSE(mask.isOn()); - EXPECT_FALSE(mask.isOn(256)); + EXPECT_FALSE(mask.isOn(256u)); - mask.set(256, true); + mask.set(256u, true); EXPECT_FALSE(mask.isOff()); EXPECT_FALSE(mask.isOn()); - EXPECT_TRUE(mask.isOn(256)); + EXPECT_TRUE(mask.isOn(256u)); EXPECT_EQ(1u, mask.countOn()); - for (int i=0; i<512; ++i) EXPECT_EQ(i<=256 ? 0u : 1u, mask.countOn(i)); + for (int i=0u; i<512u; ++i) EXPECT_EQ(i<=256u ? 0u : 1u, mask.countOn(i)); mask.setOn(); EXPECT_EQ(512u, mask.countOn()); - for (uint32_t i=0; i<512; ++i) EXPECT_EQ(i, mask.countOn(i)); + for (uint32_t i=0; i<512u; ++i) EXPECT_EQ(i, mask.countOn(i)); + for (uint32_t i=0; i<1000u; ++i) { + EXPECT_EQ(i<512u ? i : 512u, mask.findNext(i)); + EXPECT_EQ(i<512u ? i : 512u, mask.findPrev(i)); + EXPECT_EQ(512u, mask.findNext(i)); + EXPECT_EQ(512u, mask.findPrev(i)); + } mask.setOff(); EXPECT_TRUE(mask.isOff()); - mask.setOn(7); - mask.setOn(123); + mask.setOn(7u); + mask.setOn(123u); EXPECT_FALSE(mask.isOn()); auto it1 = mask.beginOff(); EXPECT_TRUE(it1); - EXPECT_EQ(0, *it1); + EXPECT_EQ(0u, *it1); EXPECT_TRUE(++it1); - EXPECT_EQ(1, *it1); + EXPECT_EQ(1u, *it1); EXPECT_TRUE(++it1); - EXPECT_EQ(2, *it1); + EXPECT_EQ(2u, *it1); auto it2 = mask.beginOn(); EXPECT_TRUE(it2); - EXPECT_EQ(7, *it2); + EXPECT_EQ(7u, *it2); EXPECT_TRUE(++it2); - EXPECT_EQ(123, *it2); + EXPECT_EQ(123u, *it2); EXPECT_FALSE(++it2); } @@ -1424,10 +1558,11 @@ TEST_F(TestNanoVDB, LeafNode) EXPECT_EQ(8u, data.mValueMask.wordCount()); nanovdb::CoordBBox bbox(nanovdb::Coord(-1), nanovdb::Coord(-1)); - uint64_t word = 0u; + uint64_t word = 0u; + const uint64_t *w = data.mValueMask.words(); for (int i = 0; i < 8; ++i) { - if (uint64_t w = data.mValueMask.getWord(i)) { - word |= w; + if (w[i]) { + word |= w[i]; if (bbox[0][0] == -1) bbox[0][0] = i; bbox[1][0] = i; @@ -1467,10 +1602,11 @@ TEST_F(TestNanoVDB, LeafNode) auto localBBox = [](const LeafT* leaf) { // static_assert(8u == LeafT::dim(), "Expected dim = 8"); nanovdb::CoordBBox bbox(nanovdb::Coord(-1, 0, 0), nanovdb::Coord(-1, 7, 7)); - uint64_t word64 = 0u; + uint64_t word64 = 0u; + const uint64_t *w = leaf->valueMask().words(); for (int i = 0; i < 8; ++i) { - if (uint64_t w = leaf->valueMask().getWord(i)) { - word64 |= w; + if (w[i]) { + word64 |= w[i]; if (bbox[0][0] == -1) bbox[0][0] = i; // only set once bbox[1][0] = i; @@ -1503,8 +1639,22 @@ TEST_F(TestNanoVDB, LeafNode) EXPECT_EQ(bbox[1], max); } + { // test LeafNode::updateBBox + leaf->data()->mValueMask.setOff(); + leaf->data()->mBBoxMin = nanovdb::Coord(0); + const nanovdb::Coord min(1, 2, 3); + leaf->setValue(min, 1.0f); + EXPECT_EQ(1.0f, leaf->getValue(min)); + leaf->updateBBox(); + const auto bbox = leaf->bbox(); + //std::cerr << "bbox = " << bbox << std::endl; + EXPECT_EQ(bbox[0], min); + EXPECT_EQ(bbox[1], min); + } + { // test LeafNode::updateBBox leaf->data()->mValueMask.setOff(); + leaf->data()->mBBoxMin = nanovdb::Coord(0); const nanovdb::Coord min(1, 2, 3), max(5, 6, 7); leaf->setValue(min, 1.0f); leaf->setValue(max, 2.0f); @@ -1789,6 +1939,7 @@ TEST_F(TestNanoVDB, RootNode) EXPECT_EQ(0u, root->tileCount()); EXPECT_EQ(nanovdb::AlignUp(sizeof(nanovdb::CoordBBox) + sizeof(uint32_t) + (5 * sizeof(float))), root->memUsage()); // background, min, max, tileCount + bbox EXPECT_EQ(1.234f, root->getValue(CoordT(1, 2, 3))); + EXPECT_EQ(1.234f, root->getValue(1, 2, 3)); {// examine padding of RootNode //std::cerr << "sizeof(Coord) = " << sizeof(nanovdb::Coord) << " bytes\n"; @@ -1835,6 +1986,35 @@ TEST_F(TestNanoVDB, RootNode) TEST_F(TestNanoVDB, Offsets) { + {// check GridBlindMetaData + /* + static const int MaxNameSize = 256;// due to NULL termination the maximum length is one less! + int64_t mDataOffset; // byte offset to the blind data, relative to the GridData. + uint64_t mValueCount; // number of elements, e.g. point count + uint32_t mFlags; // flags + GridBlindDataSemantic mSemantic; // semantic meaning of the data. + GridBlindDataClass mDataClass; // 4 bytes + GridType mDataType; // 4 bytes + char mName[MaxNameSize];// note this includes the NULL termination + */ + int offset = 0; + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mDataOffset), offset); + offset += 8; + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mValueCount), offset); + offset += 8; + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mValueSize), offset); + offset += 4; + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mSemantic), offset); + offset += 4; + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mDataClass), offset); + offset += 4; + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mDataType), offset); + offset += 4; + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mName), offset); + offset += 256; + //std::cerr << "offset = " << offset << " sizeof() = " << sizeof(nanovdb::GridBlindMetaData) << std::endl; + EXPECT_EQ(offset, sizeof(nanovdb::GridBlindMetaData)); + } { // check GridData memory alignment, total 672 bytes /* static const int MaxNameSize = 256;// due to NULL termination the maximum length is one less @@ -1847,8 +2027,8 @@ TEST_F(TestNanoVDB, Offsets) uint64_t mGridSize; // 8B. byte count of this entire grid occupied in the buffer. char mGridName[MaxNameSize]; // 256B Map mMap; // 264B. affine transformation between index and world space in both single and double precision - BBox mWorldBBox; // 48B. floating-point AABB of active values in WORLD SPACE (2 x 3 doubles) - Vec3R mVoxelSize; // 24B. size of a voxel in world units + BBox mWorldBBox; // 48B. floating-point AABB of active values in WORLD SPACE (2 x 3 doubles) + Vec3d mVoxelSize; // 24B. size of a voxel in world units GridClass mGridClass; // 4B. GridType mGridType; // 4B. int64_t mBlindMetadataOffset; // 8B. offset of GridBlindMetaData structures that follow this grid. @@ -1906,18 +2086,18 @@ TEST_F(TestNanoVDB, Offsets) uint64_t mVoxelCount;// 8B, total number of active voxels in the root and all its child nodes. */ int offset = 0; - EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::TreeData<>, mNodeOffset), offset); + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::TreeData, mNodeOffset), offset); offset += 4*8; - EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::TreeData<>, mNodeCount), offset); + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::TreeData, mNodeCount), offset); offset += 12; - EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::TreeData<>, mTileCount), offset); + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::TreeData, mTileCount), offset); offset += 12; - EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::TreeData<>, mVoxelCount), offset); + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::TreeData, mVoxelCount), offset); offset += 8; //std::cerr << "TreeData padding at end = " << (nanovdb::AlignUp(offset)-offset) << std::endl; offset = nanovdb::AlignUp(offset); //std::cerr << "TreeData: Offset = " << offset << std::endl; - EXPECT_EQ(offset, (int)sizeof(nanovdb::TreeData<>)); + EXPECT_EQ(offset, (int)sizeof(nanovdb::TreeData)); } } @@ -2156,10 +2336,44 @@ template<> void checkLeaf(int &offset) { using DataT = typename nanovdb::LeafNode::DataType; - EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mStatsOff), offset); + EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mOffset), offset); + offset += 8; + EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mPrefixSum), offset); offset += 8; - EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mValueOff), offset); +} + +template<> +void checkLeaf(int &offset) +{ + using DataT = typename nanovdb::LeafNode::DataType; + EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mOffset), offset); + offset += 8; + EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mPrefixSum), offset); + offset += 8; + EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mMask), offset); + offset += 64; +} + +template<> +void checkLeaf(int &offset) +{ + using DataT = typename nanovdb::LeafNode::DataType; + EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mOffset), offset); + offset += 8; + EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mPrefixSum), offset); + offset += 8; +} + +template<> +void checkLeaf(int &offset) +{ + using DataT = typename nanovdb::LeafNode::DataType; + EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mOffset), offset); offset += 8; + EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mPrefixSum), offset); + offset += 8; + EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mMask), offset); + offset += 64; } template<> @@ -2244,6 +2458,19 @@ void checkLeaf(int &offset) offset = nanovdb::AlignUp<32>(offset); } +template<> +void checkLeaf(int &offset) +{ + using DataT = typename nanovdb::LeafNode::DataType; + EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mOffset), offset); + offset += sizeof(uint64_t); + EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mPointCount), offset); + offset += sizeof(uint64_t); + offset = nanovdb::AlignUp<32>(offset); + EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mValues), offset); + offset += (8*8*8)*sizeof(uint16_t); +} + TEST_F(TestNanoVDB, BasicGrid) { using LeafT = nanovdb::LeafNode; @@ -2390,26 +2617,31 @@ TEST_F(TestNanoVDB, BasicGrid) EXPECT_DOUBLE_EQ(expected, sum); } } - - data->setFlagsOff(); +#if 1 + nanovdb::Map map; + map.set(mat, invMat); + data->init({nanovdb::GridFlags::HasMinMax, nanovdb::GridFlags::IsBreadthFirst}, bytes[5], map, nanovdb::GridType::Float); +#else + data-> setFlagsOff(); data->setMinMaxOn(); data->mGridIndex = 0; data->mGridCount = 1; data->mBlindMetadataOffset = 0; data->mBlindMetadataCount = 0; - data->mVoxelSize = nanovdb::Vec3R(dx); + data->mVoxelSize = nanovdb::Vec3d(dx); data->mMap.set(mat, invMat, 1.0); data->mGridClass = nanovdb::GridClass::Unknown; data->mGridType = nanovdb::GridType::Float; data->mMagic = NANOVDB_MAGIC_NUMBER; data->mVersion = nanovdb::Version(); +#endif memcpy(data->mGridName, name.c_str(), name.size() + 1); } EXPECT_EQ(tree, &grid->tree()); - const nanovdb::Vec3R p1(1.0, 2.0, 3.0); + const nanovdb::Vec3d p1(1.0, 2.0, 3.0); const auto p2 = grid->worldToIndex(p1); - EXPECT_EQ(nanovdb::Vec3R(0.5, 1.0, 1.5), p2); + EXPECT_EQ(nanovdb::Vec3d(0.5, 1.0, 1.5), p2); const auto p3 = grid->indexToWorld(p2); EXPECT_EQ(p1, p3); { @@ -2434,7 +2666,7 @@ TEST_F(TestNanoVDB, BasicGrid) EXPECT_DOUBLE_EQ(expected, sum); } } - data->mVoxelSize = nanovdb::Vec3R(dx); + data->mVoxelSize = nanovdb::Vec3d(dx); data->mMap.set(mat, invMat, 1.0); } @@ -2442,7 +2674,7 @@ TEST_F(TestNanoVDB, BasicGrid) // Start actual tests auto const p4 = grid->worldToIndex(p3); - EXPECT_EQ(nanovdb::Vec3R(0.0, 0.0, 0.0), p4); + EXPECT_EQ(nanovdb::Vec3d(0.0, 0.0, 0.0), p4); const auto p5 = grid->indexToWorld(p4); EXPECT_EQ(p1, p5); } @@ -2566,9 +2798,10 @@ TEST_F(TestNanoVDB, BasicGrid) TEST_F(TestNanoVDB, GridBuilderEmpty) { { // empty grid - nanovdb::GridBuilder builder(0.0f); - auto srcAcc = builder.getAccessor(); - auto handle = builder.getHandle<>(1.0, nanovdb::Vec3d(0.0), "test"); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f, "test"); + auto srcAcc = srcGrid.getAccessor(); + auto handle = nanovdb::createNanoGrid(srcGrid); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -2587,22 +2820,68 @@ TEST_F(TestNanoVDB, GridBuilderEmpty) EXPECT_EQ(0.0f, srcAcc.getValue(nanovdb::Coord(1, 2, 3))); EXPECT_FALSE(srcAcc.isActive(nanovdb::Coord(1, 2, 3))); EXPECT_EQ(0.0f, dstAcc.getValue(nanovdb::Coord(1, 2, 3))); + EXPECT_EQ(dstGrid->tree().root().minimum(), 0.0f); EXPECT_EQ(dstGrid->tree().root().maximum(), 0.0f); EXPECT_EQ(dstGrid->tree().root().average(), 0.0f); + EXPECT_EQ(dstGrid->tree().root().variance(), 0.0f); EXPECT_EQ(dstGrid->tree().root().stdDeviation(), 0.0f); } } // GridBuilderEmpty -TEST_F(TestNanoVDB, GridBuilderBasic1) +TEST_F(TestNanoVDB, BuilderGridEmpty) +{ + { // empty grid + using SrcGridT = nanovdb::build::Grid; + SrcGridT grid(0.0f, "test"); + auto srcAcc = grid.getAccessor(); + auto handle = nanovdb::createNanoGrid(grid); + EXPECT_TRUE(handle); + auto* meta = handle.gridMetaData(); + EXPECT_TRUE(meta); + EXPECT_TRUE(meta->isEmpty()); + EXPECT_EQ("test", std::string(meta->shortGridName())); + EXPECT_EQ(nanovdb::GridType::Float, meta->gridType()); + EXPECT_EQ(nanovdb::GridClass::Unknown, meta->gridClass()); + EXPECT_EQ(uint32_t(NANOVDB_MAJOR_VERSION_NUMBER), meta->version().getMajor()); + EXPECT_EQ(uint32_t(NANOVDB_MINOR_VERSION_NUMBER), meta->version().getMinor()); + EXPECT_EQ(uint32_t(NANOVDB_PATCH_VERSION_NUMBER), meta->version().getPatch()); + auto* dstGrid = handle.grid(); + EXPECT_TRUE(dstGrid); + EXPECT_EQ("test", std::string(dstGrid->gridName())); + EXPECT_EQ(0u, dstGrid->activeVoxelCount()); + auto dstAcc = dstGrid->getAccessor(); + EXPECT_EQ(0.0f, srcAcc.getValue(nanovdb::Coord(1, 2, 3))); + EXPECT_FALSE(srcAcc.isActive(nanovdb::Coord(1, 2, 3))); + EXPECT_EQ(0.0f, dstAcc.getValue(nanovdb::Coord(1, 2, 3))); + + EXPECT_EQ(dstGrid->tree().root().minimum(), 0.0f); + EXPECT_EQ(dstGrid->tree().root().maximum(), 0.0f); + EXPECT_EQ(dstGrid->tree().root().average(), 0.0f); + + EXPECT_EQ(dstGrid->tree().root().variance(), 0.0f); + EXPECT_EQ(dstGrid->tree().root().stdDeviation(), 0.0f); + } +} // BuilderGridEmpty + +// make -j 6 testNanoVDB && ./unittest/testNanoVDB --gtest_filter="*CreateNanoGrid_Basic1" --gtest_break_on_failure +TEST_F(TestNanoVDB, CreateNanoGrid_Basic1) { { // 1 grid point - nanovdb::GridBuilder builder(0.0f); - auto srcAcc = builder.getAccessor(); - srcAcc.setValue(nanovdb::Coord(1, 2, 3), 1.0f); - EXPECT_EQ(1.0f, srcAcc.getValue(nanovdb::Coord(1, 2, 3))); - auto handle = builder.getHandle<>(); + using SrcGridT = nanovdb::build::Grid; + const nanovdb::Coord ijk(1,2,3); + SrcGridT grid(0.0f); + auto srcAcc = grid.getAccessor(); + srcAcc.setValue(ijk, 1.0f); + auto nodeCount = grid.nodeCount(); + EXPECT_EQ(1u, nodeCount[0]); + EXPECT_EQ(1u, nodeCount[1]); + EXPECT_EQ(1u, nodeCount[2]); + EXPECT_EQ(1.0f, srcAcc.getValue(ijk)); + EXPECT_EQ(1.0f, srcAcc.getValue(1,2,3)); + + auto handle = nanovdb::createNanoGrid(grid); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -2616,11 +2895,12 @@ TEST_F(TestNanoVDB, GridBuilderBasic1) auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); EXPECT_EQ("", std::string(dstGrid->gridName())); - EXPECT_EQ(nanovdb::Vec3R(1.0), dstGrid->voxelSize()); - //EXPECT_EQ(1u, dstGrid->activeVoxelCount()); + EXPECT_EQ(nanovdb::Vec3d(1.0), dstGrid->voxelSize()); + EXPECT_EQ(1u, dstGrid->activeVoxelCount()); + EXPECT_EQ(1.0f, dstGrid->tree().getValue(ijk)); auto dstAcc = dstGrid->getAccessor(); - EXPECT_EQ(1.0f, dstAcc.getValue(nanovdb::Coord(1, 2, 3))); - EXPECT_TRUE(srcAcc.isActive(nanovdb::Coord(1, 2, 3))); + EXPECT_EQ(1.0f, dstAcc.getValue(ijk)); + EXPECT_TRUE(srcAcc.isActive(ijk)); EXPECT_EQ(nanovdb::Coord(1, 2, 3), dstGrid->indexBBox()[0]); EXPECT_EQ(nanovdb::Coord(1, 2, 3), dstGrid->indexBBox()[1]); EXPECT_EQ(dstGrid->tree().root().minimum(), 1.0f);// minimum active value @@ -2631,15 +2911,130 @@ TEST_F(TestNanoVDB, GridBuilderBasic1) } } // GridBuilderBasic1 +TEST_F(TestNanoVDB, CreateNanoGrid_Tile) +{ + { // 1 grid point and 1 tile + using SrcGridT = nanovdb::build::Grid; + const nanovdb::Coord ijk(1,2,3); + SrcGridT grid(0.0f); + auto srcAcc = grid.getAccessor(); + srcAcc.setValue(ijk, 1.0f); + + const nanovdb::Coord ijk2(-1,-2,-3); + grid.tree().root().addTile<1>(ijk2, 2.0f, true); + + auto nodeCount = grid.nodeCount(); + EXPECT_EQ(1u, nodeCount[0]); + EXPECT_EQ(2u, nodeCount[1]); + EXPECT_EQ(2u, nodeCount[2]); + EXPECT_EQ(1.0f, srcAcc.getValue(ijk)); + EXPECT_EQ(1.0f, srcAcc.getValue(1,2,3)); + EXPECT_EQ(2.0f, srcAcc.getValue(ijk2)); + EXPECT_EQ(2.0f, srcAcc.getValue(-1,-2,-3)); + + auto handle = nanovdb::createNanoGrid(grid); + EXPECT_TRUE(handle); + auto* meta = handle.gridMetaData(); + EXPECT_TRUE(meta); + EXPECT_FALSE(meta->isEmpty()); + EXPECT_EQ(uint32_t(NANOVDB_MAJOR_VERSION_NUMBER), meta->version().getMajor()); + EXPECT_EQ(uint32_t(NANOVDB_MINOR_VERSION_NUMBER), meta->version().getMinor()); + EXPECT_EQ(uint32_t(NANOVDB_PATCH_VERSION_NUMBER), meta->version().getPatch()); + EXPECT_EQ("", std::string(meta->shortGridName())); + EXPECT_EQ(nanovdb::GridType::Float, meta->gridType()); + EXPECT_EQ(nanovdb::GridClass::Unknown, meta->gridClass()); + auto* dstGrid = handle.grid(); + EXPECT_TRUE(dstGrid); + EXPECT_EQ("", std::string(dstGrid->gridName())); + EXPECT_EQ(nanovdb::Vec3d(1.0), dstGrid->voxelSize()); + EXPECT_EQ(128u * 128u * 128u + 1u, dstGrid->activeVoxelCount()); + EXPECT_EQ(1.0f, dstGrid->tree().getValue(ijk)); + auto dstAcc = dstGrid->getAccessor(); + EXPECT_EQ(1.0f, dstAcc.getValue(ijk)); + EXPECT_EQ(2.0f, dstAcc.getValue(ijk2)); + EXPECT_TRUE(srcAcc.isActive(ijk)); + EXPECT_EQ(nanovdb::Coord(-128, -128, -128), dstGrid->indexBBox()[0]); + EXPECT_EQ(nanovdb::Coord(1, 2, 3), dstGrid->indexBBox()[1]); + EXPECT_EQ(dstGrid->tree().root().minimum(), 1.0f);// minimum active value + EXPECT_EQ(dstGrid->tree().root().maximum(), 2.0f);// maximum active value + EXPECT_NEAR(dstGrid->tree().root().average(), 1.999999f, 1e-6);// 1 of 1.0 and 128*128*128 of 2.0 + EXPECT_NEAR(dstGrid->tree().root().variance(), 0.0f,1e-6); + EXPECT_NEAR(dstGrid->tree().root().stdDeviation(), 0.00069f, 1e-6); + } +} // GridBuilderTile + +TEST_F(TestNanoVDB, GridBuilderValueMask) +{ + { // 1 grid point + using SrcGridT = nanovdb::build::Grid; + const nanovdb::Coord ijk(1,2,3); + SrcGridT grid(false); + auto srcAcc = grid.getAccessor(); + srcAcc.setValue(ijk, true); + auto nodeCount = grid.nodeCount(); + EXPECT_EQ(1u, nodeCount[0]); + EXPECT_EQ(1u, nodeCount[1]); + EXPECT_EQ(1u, nodeCount[2]); + EXPECT_EQ(true, srcAcc.getValue(ijk)); + auto handle = nanovdb::createNanoGrid(grid); + EXPECT_TRUE(handle); + auto* meta = handle.gridMetaData(); + EXPECT_TRUE(meta); + EXPECT_FALSE(meta->isEmpty()); + EXPECT_EQ(uint32_t(NANOVDB_MAJOR_VERSION_NUMBER), meta->version().getMajor()); + EXPECT_EQ(uint32_t(NANOVDB_MINOR_VERSION_NUMBER), meta->version().getMinor()); + EXPECT_EQ(uint32_t(NANOVDB_PATCH_VERSION_NUMBER), meta->version().getPatch()); + EXPECT_EQ("", std::string(meta->shortGridName())); + EXPECT_EQ(nanovdb::GridType::Mask, meta->gridType()); + EXPECT_EQ(nanovdb::GridClass::Topology, meta->gridClass()); + auto* dstGrid = handle.grid(); + EXPECT_TRUE(dstGrid); + EXPECT_EQ("", std::string(dstGrid->gridName())); + EXPECT_EQ(nanovdb::Vec3d(1.0), dstGrid->voxelSize()); + EXPECT_EQ(1u, dstGrid->activeVoxelCount()); + auto dstAcc = dstGrid->getAccessor(); + EXPECT_EQ(false, dstAcc.getValue(nanovdb::Coord(1, 2, 2))); + EXPECT_EQ(true, dstAcc.getValue(ijk)); + EXPECT_EQ(false, dstAcc.getValue(nanovdb::Coord(0, 2, 2))); + EXPECT_TRUE( srcAcc.isActive(ijk)); + EXPECT_FALSE(srcAcc.isActive(nanovdb::Coord(2, 2, 3))); + EXPECT_EQ(ijk, dstGrid->indexBBox()[0]); + EXPECT_EQ(ijk, dstGrid->indexBBox()[1]); + //EXPECT_EQ(dstGrid->tree().root().minimum(), false);// minimum active value + //EXPECT_EQ(dstGrid->tree().root().maximum(), true);// maximum active value + //EXPECT_NEAR(dstGrid->tree().root().average(), 1.0f, 1e-6); + //EXPECT_NEAR(dstGrid->tree().root().variance(), 0.0f,1e-6); + //EXPECT_NEAR(dstGrid->tree().root().stdDeviation(), 0.0f, 1e-6); + } +} // GridBuilderValueMask + TEST_F(TestNanoVDB, GridBuilderBasic2) { { // 2 grid points - nanovdb::GridBuilder builder(0.0f); - auto srcAcc = builder.getAccessor(); - srcAcc.setValue(nanovdb::Coord(1, 2, 3), 1.0f); - srcAcc.setValue(nanovdb::Coord(2, -2, 9),-1.0f); - //srcAcc.setValue(nanovdb::Coord(20,-20,90), 0.0f);// same as background - auto handle = builder.getHandle<>(1.0, nanovdb::Vec3d(0.0), "test"); + using SrcGridT = nanovdb::build::Grid; + SrcGridT grid(0.0f, "test"); + auto srcAcc = grid.getAccessor(); + const nanovdb::Coord ijk1(1,2,3), ijk2(2,-2,9); + srcAcc.setValue(ijk1, 1.0f); + srcAcc.setValue(ijk2, -1.0f); + EXPECT_EQ( 1.0f, srcAcc.getValue(ijk1)); + EXPECT_EQ(-1.0f, srcAcc.getValue(ijk2)); + auto nodeCount = grid.nodeCount(); + EXPECT_EQ(2u, nodeCount[0]); + EXPECT_EQ(2u, nodeCount[1]); + EXPECT_EQ(2u, nodeCount[2]); + + nanovdb::build::NodeManager srcMgr(grid); + EXPECT_EQ(2u, srcMgr.nodeCount(0)); + EXPECT_EQ(2u, srcMgr.nodeCount(1)); + EXPECT_EQ(2u, srcMgr.nodeCount(2)); + EXPECT_EQ(-1.0f, srcMgr.node<0>(0).getValue(ijk2)); + EXPECT_EQ( 1.0f, srcMgr.node<0>(1).getValue(ijk1)); + //for (int i=0;igridName())); EXPECT_EQ(2u, dstGrid->activeVoxelCount()); + EXPECT_EQ(2u, dstGrid->tree().nodeCount(0)); + EXPECT_EQ(2u, dstGrid->tree().nodeCount(1)); + EXPECT_EQ(2u, dstGrid->tree().nodeCount(2)); + auto *dstLeaf = dstGrid->tree().getFirstNode<0>(); + EXPECT_EQ(1u, (dstLeaf+0)->getValueMask().countOn()); + EXPECT_EQ(1u, (dstLeaf+1)->getValueMask().countOn()); + EXPECT_EQ(-1.0f, (dstLeaf+0)->getValue(ijk2)); + EXPECT_EQ( 1.0f, (dstLeaf+1)->getValue(ijk1)); + auto *dstLower = dstGrid->tree().getFirstNode<1>(); + EXPECT_EQ(1u, (dstLower+0)->getChildMask().countOn()); + EXPECT_EQ(1u, (dstLower+1)->getChildMask().countOn()); + EXPECT_EQ(-1.0f, (dstLower+0)->getValue(ijk2)); + EXPECT_EQ( 1.0f, (dstLower+1)->getValue(ijk1)); + auto *dstUpper = dstGrid->tree().getFirstNode<2>(); + EXPECT_EQ(1u, (dstUpper+0)->getChildMask().countOn()); + EXPECT_EQ(1u, (dstUpper+1)->getChildMask().countOn()); + EXPECT_EQ(-1.0f, (dstUpper+0)->getValue(ijk2)); + EXPECT_EQ( 1.0f, (dstUpper+1)->getValue(ijk1)); + + EXPECT_EQ(-1.0f, dstGrid->tree().getValue(ijk2)); + EXPECT_EQ( 1.0f, dstGrid->tree().getValue(ijk1)); + auto dstAcc = dstGrid->getAccessor(); - EXPECT_EQ( 1.0f, dstAcc.getValue(nanovdb::Coord(1, 2, 3))); - EXPECT_EQ(-1.0f, dstAcc.getValue(nanovdb::Coord(2, -2, 9))); + EXPECT_EQ(-1.0f, dstAcc.getValue(ijk2)); + EXPECT_EQ( 1.0f, dstAcc.getValue(ijk1)); - const nanovdb::BBox indexBBox = dstGrid->indexBBox(); + const nanovdb::BBox indexBBox = dstGrid->indexBBox(); EXPECT_DOUBLE_EQ( 1.0, indexBBox[0][0]); EXPECT_DOUBLE_EQ(-2.0, indexBBox[0][1]); EXPECT_DOUBLE_EQ( 3.0, indexBBox[0][2]); @@ -2668,7 +3085,7 @@ TEST_F(TestNanoVDB, GridBuilderBasic2) EXPECT_DOUBLE_EQ(10.0, indexBBox[1][2]); EXPECT_EQ(nanovdb::Coord(1, -2, 3), dstGrid->indexBBox()[0]); - EXPECT_EQ(nanovdb::Coord(2, 2, 9), dstGrid->indexBBox()[1]); + EXPECT_EQ(nanovdb::Coord(2, 2, 9), dstGrid->indexBBox()[1]); EXPECT_EQ(dstGrid->tree().root().minimum(),-1.0f); EXPECT_EQ(dstGrid->tree().root().maximum(), 1.0f); @@ -2681,18 +3098,18 @@ TEST_F(TestNanoVDB, GridBuilderBasic2) TEST_F(TestNanoVDB, GridBuilderPrune) { { - nanovdb::GridBuilder builder(0.0f); - auto srcAcc = builder.getAccessor(); - const nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(8*16-1)); - auto func = [](const nanovdb::Coord&) { return 1.0f; }; - //auto func = [](const nanovdb::Coord&, float &v) { v = 1.0f; return true; }; - builder(func, bbox); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f, "test"); + auto srcAcc = srcGrid.getAccessor(); + const nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(8*16-1)); + auto func = [](const nanovdb::Coord&) { return 1.0f; }; + srcGrid(func, bbox); + for (auto ijk = bbox.begin(); ijk; ++ijk) { EXPECT_EQ(1.0f, srcAcc.getValue(*ijk)); EXPECT_TRUE(srcAcc.isActive(*ijk)); } - - auto handle = builder.getHandle<>(1.0, nanovdb::Vec3d(0.0), "test"); + auto handle = nanovdb::createNanoGrid(srcGrid); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -2719,7 +3136,7 @@ TEST_F(TestNanoVDB, GridBuilderPrune) } EXPECT_EQ( 0.0f, dstAcc.getValue(nanovdb::Coord(2, -2, 9))); - const nanovdb::BBox indexBBox = dstGrid->indexBBox(); + const nanovdb::BBox indexBBox = dstGrid->indexBBox(); EXPECT_DOUBLE_EQ( 0.0, indexBBox[0][0]); EXPECT_DOUBLE_EQ( 0.0, indexBBox[0][1]); EXPECT_DOUBLE_EQ( 0.0, indexBBox[0][2]); @@ -2747,8 +3164,9 @@ TEST_F(TestNanoVDB, GridBuilder_Vec3f) using VoxelT = nanovdb::Vec3f; EXPECT_EQ(nanovdb::AlignUp(12 + 3 + 1 + 2*4 + 64 + 3*(2*4 + 512*4)), sizeof(nanovdb::NanoLeaf)); { // 3 grid point - nanovdb::GridBuilder builder(nanovdb::Vec3f(0.0f)); - auto srcAcc = builder.getAccessor(); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(VoxelT(0.0f)); + auto srcAcc = srcGrid.getAccessor(); srcAcc.setValue(nanovdb::Coord( 1, 2, 3), nanovdb::Vec3f(1.0f)); srcAcc.setValue(nanovdb::Coord(-10, 20,-50), nanovdb::Vec3f(2.0f)); srcAcc.setValue(nanovdb::Coord( 50,-12, 30), nanovdb::Vec3f(3.0f)); @@ -2758,9 +3176,7 @@ TEST_F(TestNanoVDB, GridBuilder_Vec3f) EXPECT_EQ(nanovdb::Vec3f(2.0f), srcAcc.getValue(nanovdb::Coord(-10, 20,-50))); EXPECT_EQ(nanovdb::Vec3f(3.0f), srcAcc.getValue(nanovdb::Coord( 50,-12, 30))); - builder.setStats(nanovdb::StatsMode::All); - auto handle = builder.getHandle(); - + auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -2789,7 +3205,7 @@ TEST_F(TestNanoVDB, GridBuilder_Vec3f) EXPECT_TRUE(dstGrid->isSequential<1>()); EXPECT_TRUE(dstGrid->isSequential<0>()); - EXPECT_EQ(nanovdb::Vec3R(1.0), dstGrid->voxelSize()); + EXPECT_EQ(nanovdb::Vec3d(1.0), dstGrid->voxelSize()); auto *leaf = dstGrid->tree().root().probeLeaf(nanovdb::Coord(1, 2, 3)); EXPECT_TRUE(leaf); //std::cerr << leaf->origin() << ", " << leaf->data()->mBBoxMin << std::endl; @@ -2813,8 +3229,9 @@ TEST_F(TestNanoVDB, GridBuilder_Vec4f) using VoxelT = nanovdb::Vec4f; EXPECT_EQ(nanovdb::AlignUp(12 + 3 + 1 + 2*4 + 64 + 4*(2*4 + 512*4)), sizeof(nanovdb::NanoLeaf)); { // 3 grid point - nanovdb::GridBuilder builder(nanovdb::Vec4f(0.0f)); - auto srcAcc = builder.getAccessor(); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(VoxelT(0.0f)); + auto srcAcc = srcGrid.getAccessor(); srcAcc.setValue(nanovdb::Coord( 1, 2, 3), nanovdb::Vec4f(1.0f)); srcAcc.setValue(nanovdb::Coord(-10, 20,-50), nanovdb::Vec4f(2.0f)); srcAcc.setValue(nanovdb::Coord( 50,-12, 30), nanovdb::Vec4f(3.0f)); @@ -2824,9 +3241,7 @@ TEST_F(TestNanoVDB, GridBuilder_Vec4f) EXPECT_EQ(nanovdb::Vec4f(2.0f), srcAcc.getValue(nanovdb::Coord(-10, 20,-50))); EXPECT_EQ(nanovdb::Vec4f(3.0f), srcAcc.getValue(nanovdb::Coord( 50,-12, 30))); - builder.setStats(nanovdb::StatsMode::All); - auto handle = builder.getHandle(); - + auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -2855,7 +3270,7 @@ TEST_F(TestNanoVDB, GridBuilder_Vec4f) EXPECT_TRUE(dstGrid->isSequential<1>()); EXPECT_TRUE(dstGrid->isSequential<0>()); - EXPECT_EQ(nanovdb::Vec3R(1.0), dstGrid->voxelSize()); + EXPECT_EQ(nanovdb::Vec3d(1.0), dstGrid->voxelSize()); auto *leaf = dstGrid->tree().root().probeLeaf(nanovdb::Coord(1, 2, 3)); EXPECT_TRUE(leaf); //std::cerr << leaf->origin() << ", " << leaf->data()->mBBoxMin << std::endl; @@ -2880,8 +3295,9 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) using VoxelT = nanovdb::Fp4; EXPECT_EQ(96u + 512u/2, sizeof(nanovdb::NanoLeaf)); { // 3 grid point - nanovdb::GridBuilder builder(0.0f); - auto srcAcc = builder.getAccessor(); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f); + auto srcAcc = srcGrid.getAccessor(); srcAcc.setValue(nanovdb::Coord( 1, 2, 3), 1.0f); srcAcc.setValue(nanovdb::Coord(-10, 20,-50), 2.0f); srcAcc.setValue(nanovdb::Coord( 50,-12, 30), 3.0f); @@ -2891,9 +3307,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) EXPECT_EQ(2.0f, srcAcc.getValue(nanovdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(nanovdb::Coord( 50,-12, 30))); - builder.setStats(nanovdb::StatsMode::All); - auto handle = builder.getHandle(); - + auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -2908,10 +3322,13 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) EXPECT_TRUE(dstGrid); EXPECT_EQ("", std::string(dstGrid->gridName())); EXPECT_EQ((const char*)handle.data(), (const char*)dstGrid); + EXPECT_TRUE(dstGrid->isBreadthFirst()); + EXPECT_EQ(1.0f, dstGrid->tree().getValue(nanovdb::Coord( 1, 2, 3))); + EXPECT_EQ(2.0f, dstGrid->tree().getValue(nanovdb::Coord(-10, 20,-50))); + EXPECT_EQ(3.0f, dstGrid->tree().getValue(nanovdb::Coord( 50,-12, 30))); EXPECT_EQ(1.0f, dstGrid->tree().root().minimum()); EXPECT_EQ(3.0f, dstGrid->tree().root().maximum()); EXPECT_EQ(2.0f, dstGrid->tree().root().average()); - EXPECT_TRUE(dstGrid->isBreadthFirst()); using GridT = std::remove_pointer::type; EXPECT_TRUE(dstGrid->isSequential()); EXPECT_TRUE(dstGrid->isSequential()); @@ -2920,7 +3337,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) EXPECT_TRUE(dstGrid->isSequential<1>()); EXPECT_TRUE(dstGrid->isSequential<0>()); - EXPECT_EQ(nanovdb::Vec3R(1.0), dstGrid->voxelSize()); + EXPECT_EQ(nanovdb::Vec3d(1.0), dstGrid->voxelSize()); auto *leaf = dstGrid->tree().root().probeLeaf(nanovdb::Coord(1, 2, 3)); EXPECT_TRUE(leaf); //std::cerr << leaf->origin() << ", " << leaf->data()->mBBoxMin << std::endl; @@ -2966,22 +3383,18 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) EXPECT_EQ(dstGrid->tree().nodeCount(2), n[0]); } {// Sphere - const double voxelSize = 0.1, halfWidth = 3.0; - const float radius = 10.0f; - const nanovdb::Vec3f center(0); - const nanovdb::Vec3d origin(0); + const double voxelSize = 0.1, halfWidth = 3.0, radius = 10.0f; + const nanovdb::Vec3d center(0), origin(0); const float tolerance = 0.5f * voxelSize; - auto handle = nanovdb::createLevelSetSphere(radius, center, - voxelSize, halfWidth, - origin, "sphere", - nanovdb::StatsMode::Default, - nanovdb::ChecksumMode::Default, - tolerance, - false); + auto handle = nanovdb::createLevelSetSphere(radius, center, + voxelSize, halfWidth, + origin, "sphere", + nanovdb::StatsMode::Default, + nanovdb::ChecksumMode::Default); auto* nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); - Sphere sphere(center, radius, float(voxelSize), float(halfWidth)); + Sphere sphere(center, radius, voxelSize, halfWidth); auto kernel = [&](const nanovdb::CoordBBox& bbox) { auto nanoAcc = nanoGrid->getAccessor(); for (auto it = bbox.begin(); it; ++it) { @@ -3005,8 +3418,10 @@ TEST_F(TestNanoVDB, GridBuilder_Fp8) using VoxelT = nanovdb::Fp8; EXPECT_EQ(96u + 512u, sizeof(nanovdb::NanoLeaf)); { // 3 grid point - nanovdb::GridBuilder builder(0.0f); - auto srcAcc = builder.getAccessor(); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f); + auto srcAcc = srcGrid.getAccessor(); + srcAcc.setValue(nanovdb::Coord( 1, 2, 3), 1.0f); srcAcc.setValue(nanovdb::Coord(-10, 20,-50), 2.0f); srcAcc.setValue(nanovdb::Coord( 50,-12, 30), 3.0f); @@ -3016,9 +3431,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp8) EXPECT_EQ(2.0f, srcAcc.getValue(nanovdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(nanovdb::Coord( 50,-12, 30))); - builder.setStats(nanovdb::StatsMode::All); - auto handle = builder.getHandle(); - + auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3045,7 +3458,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp8) EXPECT_TRUE(dstGrid->isSequential<1>()); EXPECT_TRUE(dstGrid->isSequential<0>()); - EXPECT_EQ(nanovdb::Vec3R(1.0), dstGrid->voxelSize()); + EXPECT_EQ(nanovdb::Vec3d(1.0), dstGrid->voxelSize()); auto *leaf = dstGrid->tree().root().probeLeaf(nanovdb::Coord(1, 2, 3)); EXPECT_TRUE(leaf); //std::cerr << leaf->origin() << ", " << leaf->data()->mBBoxMin << std::endl; @@ -3091,19 +3504,15 @@ TEST_F(TestNanoVDB, GridBuilder_Fp8) EXPECT_EQ(dstGrid->tree().nodeCount(2), n[0]); } {// Sphere - const double voxelSize = 0.1, halfWidth = 3.0; - const float radius = 10.0f; - const nanovdb::Vec3f center(0); - const nanovdb::Vec3d origin(0); + const double voxelSize = 0.1, halfWidth = 3.0, radius = 10.0f; + const nanovdb::Vec3d center(0), origin(0); const float tolerance = 0.05f * voxelSize; - auto handle = nanovdb::createLevelSetSphere(radius, center, - voxelSize, halfWidth, - origin, "sphere", - nanovdb::StatsMode::Default, - nanovdb::ChecksumMode::Default, - tolerance, - false); + auto handle = nanovdb::createLevelSetSphere(radius, center, + voxelSize, halfWidth, + origin, "sphere", + nanovdb::StatsMode::Default, + nanovdb::ChecksumMode::Default); auto* nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); Sphere sphere(center, radius, float(voxelSize), float(halfWidth)); @@ -3130,8 +3539,9 @@ TEST_F(TestNanoVDB, GridBuilder_Fp16) using VoxelT = nanovdb::Fp16; EXPECT_EQ(96u + 512u*2, sizeof(nanovdb::NanoLeaf)); { // 3 grid point - nanovdb::GridBuilder builder(0.0f); - auto srcAcc = builder.getAccessor(); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f); + auto srcAcc = srcGrid.getAccessor(); srcAcc.setValue(nanovdb::Coord( 1, 2, 3), 1.0f); srcAcc.setValue(nanovdb::Coord(-10, 20,-50), 2.0f); srcAcc.setValue(nanovdb::Coord( 50,-12, 30), 3.0f); @@ -3141,9 +3551,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp16) EXPECT_EQ(2.0f, srcAcc.getValue(nanovdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(nanovdb::Coord( 50,-12, 30))); - builder.setStats(nanovdb::StatsMode::All); - auto handle = builder.getHandle(); - + auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3170,7 +3578,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp16) EXPECT_TRUE(dstGrid->isSequential<1>()); EXPECT_TRUE(dstGrid->isSequential<0>()); - EXPECT_EQ(nanovdb::Vec3R(1.0), dstGrid->voxelSize()); + EXPECT_EQ(nanovdb::Vec3d(1.0), dstGrid->voxelSize()); auto *leaf = dstGrid->tree().root().probeLeaf(nanovdb::Coord(1, 2, 3)); EXPECT_TRUE(leaf); //std::cerr << leaf->origin() << ", " << leaf->data()->mBBoxMin << std::endl; @@ -3216,19 +3624,15 @@ TEST_F(TestNanoVDB, GridBuilder_Fp16) EXPECT_EQ(dstGrid->tree().nodeCount(2), n[0]); } {// Sphere - const double voxelSize = 0.1, halfWidth = 3.0; - const float radius = 10.0f; - const nanovdb::Vec3f center(0); - const nanovdb::Vec3d origin(0); + const double voxelSize = 0.1, halfWidth = 3.0, radius = 10.0f; + const nanovdb::Vec3d center(0), origin(0); const float tolerance = 0.005f * voxelSize; - auto handle = nanovdb::createLevelSetSphere(radius, center, - voxelSize, halfWidth, - origin, "sphere", - nanovdb::StatsMode::Default, - nanovdb::ChecksumMode::Default, - tolerance, - false); + auto handle = nanovdb::createLevelSetSphere(radius, center, + voxelSize, halfWidth, + origin, "sphere", + nanovdb::StatsMode::Default, + nanovdb::ChecksumMode::Default); auto* nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); Sphere sphere(center, radius, float(voxelSize), float(halfWidth)); @@ -3255,17 +3659,15 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Basic1) using VoxelT = nanovdb::FpN; EXPECT_EQ(96u, sizeof(nanovdb::NanoLeaf)); { // 1 grid point - nanovdb::GridBuilder builder(0.0f); - auto srcAcc = builder.getAccessor(); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f); + auto srcAcc = srcGrid.getAccessor(); srcAcc.setValue(nanovdb::Coord( 0, 0, 0), 1.0f); EXPECT_TRUE(srcAcc.isActive(nanovdb::Coord(0, 0, 0))); EXPECT_TRUE(srcAcc.isValueOn(nanovdb::Coord(0, 0, 0))); EXPECT_EQ(1.0f, srcAcc.getValue(nanovdb::Coord( 0, 0, 0))); - builder.setStats(nanovdb::StatsMode::All); - //builder.setVerbose(true); - auto handle = builder.getHandle(); - + auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3293,7 +3695,7 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Basic1) EXPECT_TRUE(dstGrid->isSequential<1>()); EXPECT_FALSE(dstGrid->isSequential<0>()); - EXPECT_EQ(nanovdb::Vec3R(1.0), dstGrid->voxelSize()); + EXPECT_EQ(nanovdb::Vec3d(1.0), dstGrid->voxelSize()); auto *leaf = dstGrid->tree().root().probeLeaf(nanovdb::Coord(0, 0, 0)); EXPECT_TRUE(leaf); //std::cerr << leaf->origin() << ", " << leaf->data()->mBBoxMin << std::endl; @@ -3317,8 +3719,9 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Basic3) using VoxelT = nanovdb::FpN; EXPECT_EQ(96u, sizeof(nanovdb::NanoLeaf)); { // 3 grid point - nanovdb::GridBuilder builder(0.0f); - auto srcAcc = builder.getAccessor(); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f); + auto srcAcc = srcGrid.getAccessor(); srcAcc.setValue(nanovdb::Coord( 1, 2, 3), 1.0f); srcAcc.setValue(nanovdb::Coord(-10, 20,-50), 2.0f); srcAcc.setValue(nanovdb::Coord( 50,-12, 30), 3.0f); @@ -3328,10 +3731,7 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Basic3) EXPECT_EQ(2.0f, srcAcc.getValue(nanovdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(nanovdb::Coord( 50,-12, 30))); - builder.setStats(nanovdb::StatsMode::All); - //builder.setVerbose(true); - auto handle = builder.getHandle(); - + auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3358,7 +3758,7 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Basic3) EXPECT_TRUE(dstGrid->isSequential<1>()); EXPECT_FALSE(dstGrid->isSequential<0>()); - EXPECT_EQ(nanovdb::Vec3R(1.0), dstGrid->voxelSize()); + EXPECT_EQ(nanovdb::Vec3d(1.0), dstGrid->voxelSize()); auto *leaf = dstGrid->tree().root().probeLeaf(nanovdb::Coord(1, 2, 3)); EXPECT_TRUE(leaf); //std::cerr << leaf->origin() << ", " << leaf->data()->mBBoxMin << std::endl; @@ -3410,19 +3810,17 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Sphere) using VoxelT = nanovdb::FpN; EXPECT_EQ(96u, sizeof(nanovdb::NanoLeaf)); {// Sphere - const double voxelSize = 0.1, halfWidth = 3.0; - const float radius = 10.0f; - const nanovdb::Vec3f center(0); - const nanovdb::Vec3d origin(0); + const double voxelSize = 0.1, halfWidth = 3.0, radius = 10.0f; + const nanovdb::Vec3d center(0), origin(0); const float tolerance = 0.5f * voxelSize; - auto handle = nanovdb::createLevelSetSphere(radius, center, - voxelSize, halfWidth, - origin, "sphere", - nanovdb::StatsMode::Default, - nanovdb::ChecksumMode::Default, - tolerance, - false); + auto handle = nanovdb::createLevelSetSphere(radius, center, + voxelSize, halfWidth, + origin, "sphere", + nanovdb::StatsMode::Default, + nanovdb::ChecksumMode::Default, + tolerance, + false); auto* nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); Sphere sphere(center, radius, float(voxelSize), float(halfWidth)); @@ -3447,12 +3845,12 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Sphere) TEST_F(TestNanoVDB, NodeManager) { { // 1 active voxel - nanovdb::GridBuilder builder(0.0f); - auto srcAcc = builder.getAccessor(); - builder.setGridClass(nanovdb::GridClass::LevelSet); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f, "test", nanovdb::GridClass::LevelSet); + auto srcAcc = srcGrid.getAccessor(); const nanovdb::Coord x0(1, 2, 3), x1(1, 2, 4); srcAcc.setValue(x1, 1.0f); - auto handle = builder.getHandle<>(1.0, nanovdb::Vec3d(0.0), "test"); + auto handle = nanovdb::createNanoGrid(srcGrid); EXPECT_TRUE(handle); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); @@ -3519,12 +3917,13 @@ TEST_F(TestNanoVDB, NodeManager) EXPECT_EQ(dstGrid->tree().nodeCount(2), n[0]); } { // 2 active voxels - nanovdb::GridBuilder builder(0.0f, nanovdb::GridClass::LevelSet); - auto srcAcc = builder.getAccessor(); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f, "test", nanovdb::GridClass::LevelSet); + auto srcAcc = srcGrid.getAccessor(); const nanovdb::Coord x0(1, 2, 3), x1(2,-2, 9), x2(1, 2, 4); srcAcc.setValue(x1, 1.0f); srcAcc.setValue(x2, 2.0f); - auto handle = builder.getHandle<>(1.0, nanovdb::Vec3d(0.0), "test"); + auto handle = nanovdb::createNanoGrid(srcGrid); EXPECT_TRUE(handle); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); @@ -3599,12 +3998,13 @@ TEST_F(TestNanoVDB, NodeManager) } } EXPECT_EQ(voxelCount, voxels.size()); - nanovdb::GridBuilder builder(-1.0f, nanovdb::GridClass::LevelSet); - auto srcAcc = builder.getAccessor(); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(-1.0f, "test", nanovdb::GridClass::LevelSet); + auto srcAcc = srcGrid.getAccessor(); for (size_t i=0; i(1.0, nanovdb::Vec3d(0.0), "test"); + auto handle = nanovdb::createNanoGrid(srcGrid); EXPECT_TRUE(handle); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); @@ -3648,17 +4048,17 @@ TEST_F(TestNanoVDB, NodeManager) TEST_F(TestNanoVDB, GridBuilderBasicDense) { { // dense functor - nanovdb::GridBuilder builder(0.0f, nanovdb::GridClass::LevelSet); - auto srcAcc = builder.getAccessor(); - const nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(100)); - auto func = [](const nanovdb::Coord&) { return 1.0f; }; - //auto func = [](const nanovdb::Coord&, float &v) { v = 1.0f; return true; }; - builder(func, bbox); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f, "test", nanovdb::GridClass::LevelSet); + const nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(100)); + auto func = [](const nanovdb::Coord&) { return 1.0f; }; + srcGrid(func, bbox); + auto srcAcc = srcGrid.getAccessor(); for (auto ijk = bbox.begin(); ijk; ++ijk) { EXPECT_EQ(1.0f, srcAcc.getValue(*ijk)); EXPECT_TRUE(srcAcc.isActive(*ijk)); } - auto handle = builder.getHandle<>(1.0, nanovdb::Vec3d(0.0), "test"); + auto handle = nanovdb::createNanoGrid(srcGrid); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -3698,8 +4098,9 @@ TEST_F(TestNanoVDB, GridBuilderBasicDense) TEST_F(TestNanoVDB, GridBuilderBackground) { { - nanovdb::GridBuilder builder(0.5f); - auto acc = builder.getAccessor(); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.5f); + auto acc = srcGrid.getAccessor(); acc.setValue(nanovdb::Coord(1), 1); acc.setValue(nanovdb::Coord(2), 0); @@ -3710,8 +4111,7 @@ TEST_F(TestNanoVDB, GridBuilderBackground) EXPECT_TRUE(acc.isActive(nanovdb::Coord(1))); EXPECT_EQ(0, acc.getValue(nanovdb::Coord(2))); EXPECT_TRUE(acc.isActive(nanovdb::Coord(1))); - - auto gridHdl = builder.getHandle<>(); + auto gridHdl = nanovdb::createNanoGrid(srcGrid); auto grid = gridHdl.grid(); EXPECT_TRUE(grid); EXPECT_FALSE(grid->isEmpty()); @@ -3723,7 +4123,8 @@ TEST_F(TestNanoVDB, GridBuilderBackground) TEST_F(TestNanoVDB, GridBuilderSphere) { - Sphere sphere(nanovdb::Vec3(50), 20.0f); + using SrcGridT = nanovdb::build::Grid; + Sphere sphere(nanovdb::Vec3d(50), 20.0f); EXPECT_EQ(3.0f, sphere.background()); EXPECT_EQ(3.0f, sphere(nanovdb::Coord(100))); EXPECT_EQ(-3.0f, sphere(nanovdb::Coord(50))); @@ -3731,16 +4132,12 @@ TEST_F(TestNanoVDB, GridBuilderSphere) EXPECT_EQ(-1.0f, sphere(nanovdb::Coord(50, 50, 69))); EXPECT_EQ(2.0f, sphere(nanovdb::Coord(50, 50, 72))); - nanovdb::GridBuilder builder(sphere.background(), nanovdb::GridClass::LevelSet); - auto srcAcc = builder.getAccessor(); - + SrcGridT srcGrid(sphere.background(), "test", nanovdb::GridClass::LevelSet); const nanovdb::CoordBBox bbox(nanovdb::Coord(-100), nanovdb::Coord(100)); //mTimer.start("GridBulder Sphere"); - builder(sphere, bbox); + srcGrid(sphere, bbox); //mTimer.stop(); - - - auto handle = builder.getHandle<>(1.0, nanovdb::Vec3d(0.0), "test"); + auto handle = nanovdb::createNanoGrid(srcGrid); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); auto* meta = handle.gridMetaData(); @@ -3770,6 +4167,7 @@ TEST_F(TestNanoVDB, GridBuilderSphere) uint64_t count = 0; auto dstAcc = dstGrid->getAccessor(); + auto srcAcc = srcGrid.getAccessor(); for (nanovdb::Coord ijk = bbox[0]; ijk[0] <= bbox[1][0]; ++ijk[0]) { for (ijk[1] = bbox[0][1]; ijk[1] <= bbox[1][1]; ++ijk[1]) { for (ijk[2] = bbox[0][2]; ijk[2] <= bbox[1][2]; ++ijk[2]) { @@ -3787,29 +4185,32 @@ TEST_F(TestNanoVDB, GridBuilderSphere) TEST_F(TestNanoVDB, createLevelSetSphere) { - Sphere sphere(nanovdb::Vec3(50), 20.0f); - EXPECT_EQ(3.0f, sphere.background()); - EXPECT_EQ(3.0f, sphere(nanovdb::Coord(100))); - EXPECT_EQ(-3.0f, sphere(nanovdb::Coord(50))); - EXPECT_EQ(0.0f, sphere(nanovdb::Coord(50, 50, 70))); - EXPECT_EQ(-1.0f, sphere(nanovdb::Coord(50, 50, 69))); - EXPECT_EQ(2.0f, sphere(nanovdb::Coord(50, 50, 72))); - - auto handle = nanovdb::createLevelSetSphere(20.0f, nanovdb::Vec3f(50), - 1.0, 3.0, nanovdb::Vec3d(0), "sphere_20"); - - const nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(100)); + const int radius = 100, center = 50, width = 3, voxelSize = 1; + const std::string gridName("sphere_" + std::to_string(radius)); + Sphere sphere(nanovdb::Vec3d(center), radius); + EXPECT_EQ( 3.0f, sphere.background()); + EXPECT_EQ( 3.0f, sphere(nanovdb::Coord(center+2*radius))); + EXPECT_EQ(-3.0f, sphere(nanovdb::Coord(center))); + EXPECT_EQ( 0.0f, sphere(nanovdb::Coord(center, center, center+radius))); + EXPECT_EQ(-1.0f, sphere(nanovdb::Coord(center, center, center+radius-1))); + EXPECT_EQ( 2.0f, sphere(nanovdb::Coord(center, center, center+radius+2))); + //mTimer.start("createLevelSetSphere"); + auto handle = nanovdb::createLevelSetSphere(radius, nanovdb::Vec3d(center), + voxelSize, width, nanovdb::Vec3d(0), gridName); + //mTimer.stop(); + const nanovdb::CoordBBox bbox(nanovdb::Coord(center-radius-width-1), + nanovdb::Coord(center+radius+width+1)); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); - EXPECT_EQ("sphere_20", std::string(meta->shortGridName())); + EXPECT_EQ(gridName, std::string(meta->shortGridName())); EXPECT_EQ(nanovdb::GridType::Float, meta->gridType()); EXPECT_EQ(nanovdb::GridClass::LevelSet, meta->gridClass()); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); - EXPECT_EQ("sphere_20", std::string(dstGrid->gridName())); + EXPECT_EQ(gridName, std::string(dstGrid->gridName())); EXPECT_TRUE(dstGrid->hasBBox()); EXPECT_TRUE(dstGrid->hasMinMax()); @@ -3818,38 +4219,34 @@ TEST_F(TestNanoVDB, createLevelSetSphere) EXPECT_NEAR( -3.0f, dstGrid->tree().root().minimum(), 0.04f); EXPECT_NEAR( 3.0f, dstGrid->tree().root().maximum(), 0.04f); - EXPECT_NEAR( 0.0f, dstGrid->tree().root().average(), 0.3f); + EXPECT_NEAR( 0.0f, dstGrid->tree().root().average(), 0.30f); //std::cerr << dstGrid->tree().root().minimum() << std::endl; //std::cerr << dstGrid->tree().root().maximum() << std::endl; //std::cerr << dstGrid->tree().root().average() << std::endl; //std::cerr << dstGrid->tree().root().stdDeviation() << std::endl; - - EXPECT_EQ(nanovdb::Coord(50 - 20 - 2), dstGrid->indexBBox()[0]); - EXPECT_EQ(nanovdb::Coord(50 + 20 + 2), dstGrid->indexBBox()[1]); + EXPECT_EQ(nanovdb::Coord(center - radius - 2), dstGrid->indexBBox()[0]); + EXPECT_EQ(nanovdb::Coord(center + radius + 2), dstGrid->indexBBox()[1]); //std::cerr << "bbox.min = (" << dstGrid->indexBBox()[0][0] << ", " << dstGrid->indexBBox()[0][1] << ", " << dstGrid->indexBBox()[0][2] << ")" << std::endl; //std::cerr << "bbox.max = (" << dstGrid->indexBBox()[1][0] << ", " << dstGrid->indexBBox()[1][1] << ", " << dstGrid->indexBBox()[1][2] << ")" << std::endl; - uint64_t count = 0; - auto dstAcc = dstGrid->getAccessor(); - for (nanovdb::Coord ijk = bbox[0]; ijk[0] <= bbox[1][0]; ++ijk[0]) { - for (ijk[1] = bbox[0][1]; ijk[1] <= bbox[1][1]; ++ijk[1]) { - for (ijk[2] = bbox[0][2]; ijk[2] <= bbox[1][2]; ++ijk[2]) { - if (sphere.inNarrowBand(ijk)) - ++count; - EXPECT_EQ(sphere(ijk), dstAcc.getValue(ijk)); - EXPECT_EQ(sphere.inNarrowBand(ijk), dstAcc.isActive(ijk)); - } + std::atomic count{0}; + nanovdb::forEach(bbox, [&](const nanovdb::CoordBBox &b){ + auto dstAcc = dstGrid->getAccessor(); + for (auto it = b.begin(); it; ++it) { + const nanovdb::Coord ijk = *it; + if (sphere.inNarrowBand(ijk)) ++count; + EXPECT_EQ(sphere(ijk), dstAcc.getValue(ijk)); + EXPECT_EQ(sphere.inNarrowBand(ijk), dstAcc.isActive(ijk)); } - } - + }); EXPECT_EQ(count, dstGrid->activeVoxelCount()); } // createLevelSetSphere TEST_F(TestNanoVDB, createFogVolumeSphere) { - auto handle = nanovdb::createFogVolumeSphere(20.0f, nanovdb::Vec3f(50), + auto handle = nanovdb::createFogVolumeSphere(20.0f, nanovdb::Vec3d(50), 1.0, 3.0, nanovdb::Vec3d(0), "sphere_20"); const nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(100)); @@ -3883,7 +4280,7 @@ TEST_F(TestNanoVDB, createFogVolumeSphere) EXPECT_EQ(nanovdb::Coord(50 - 20), dstGrid->indexBBox()[0]); EXPECT_EQ(nanovdb::Coord(50 + 20), dstGrid->indexBBox()[1]); - Sphere sphere(nanovdb::Vec3(50), 20.0f); + Sphere sphere(nanovdb::Vec3d(50), 20.0f); uint64_t count = 0; auto dstAcc = dstGrid->getAccessor(); for (nanovdb::Coord ijk = bbox[0]; ijk[0] <= bbox[1][0]; ++ijk[0]) { @@ -3908,7 +4305,7 @@ TEST_F(TestNanoVDB, createFogVolumeSphere) TEST_F(TestNanoVDB, createPointSphere) { - Sphere sphere(nanovdb::Vec3(0), 100.0f, 1.0f, 1.0f); + Sphere sphere(nanovdb::Vec3d(0), 100.0, 1.0, 1.0); EXPECT_EQ(1.0f, sphere.background()); EXPECT_EQ(1.0f, sphere(nanovdb::Coord(101, 0, 0))); EXPECT_EQ(-1.0f, sphere(nanovdb::Coord(0))); @@ -3916,12 +4313,12 @@ TEST_F(TestNanoVDB, createPointSphere) EXPECT_EQ(-1.0f, sphere(nanovdb::Coord(0, 0, 99))); EXPECT_EQ(1.0f, sphere(nanovdb::Coord(0, 0, 101))); - auto handle = nanovdb::createPointSphere(1,// pointer per voxel - 100.0f,// radius of sphere - nanovdb::Vec3f(0),// center sphere - 1.0,// voxel size - nanovdb::Vec3d(0),// origin of grid - "point_sphere"); + auto handle = nanovdb::createPointSphere(1,// pointer per voxel + 100.0,// radius of sphere + nanovdb::Vec3d(0),// center sphere + 1.0,// voxel size + nanovdb::Vec3d(0),// origin of grid + "point_sphere"); const nanovdb::CoordBBox bbox(nanovdb::Coord(-100), nanovdb::Coord(100)); @@ -3939,16 +4336,16 @@ TEST_F(TestNanoVDB, createPointSphere) EXPECT_TRUE(dstGrid->hasMinMax()); EXPECT_FALSE(dstGrid->hasAverage()); EXPECT_FALSE(dstGrid->hasStdDeviation()); + EXPECT_EQ(dstGrid->voxelSize()[0], 1.0); + //std::cerr << "BBox = " << dstGrid->indexBBox() << std::endl; EXPECT_EQ(bbox[0], dstGrid->indexBBox()[0]); EXPECT_EQ(bbox[1], dstGrid->indexBBox()[1]); - //std::cerr << "bbox.min = (" << dstGrid->indexBBox()[0][0] << ", " << dstGrid->indexBBox()[0][1] << ", " << dstGrid->indexBBox()[0][2] << ")" << std::endl; - //std::cerr << "bbox.max = (" << dstGrid->indexBBox()[1][0] << ", " << dstGrid->indexBBox()[1][1] << ", " << dstGrid->indexBBox()[1][2] << ")" << std::endl; - - uint64_t count = 0; + uint64_t count = 0; nanovdb::PointAccessor acc(*dstGrid); - const nanovdb::Vec3f * begin = nullptr, *end = nullptr; + EXPECT_TRUE(acc); + const nanovdb::Vec3f *begin = nullptr, *end = nullptr; for (nanovdb::Coord ijk = bbox[0]; ijk[0] <= bbox[1][0]; ++ijk[0]) { for (ijk[1] = bbox[0][1]; ijk[1] <= bbox[1][1]; ++ijk[1]) { for (ijk[2] = bbox[0][2]; ijk[2] <= bbox[1][2]; ++ijk[2]) { @@ -3956,12 +4353,19 @@ TEST_F(TestNanoVDB, createPointSphere) ++count; EXPECT_TRUE(acc.isActive(ijk)); EXPECT_TRUE(acc.getValue(ijk) != std::numeric_limits::max()); - EXPECT_EQ(1u, acc.voxelPoints(ijk, begin, end)); // exactly one point per voxel - const nanovdb::Vec3f p = *begin + ijk.asVec3s();// local voxel coordinate + global index coordinates + const auto n = acc.voxelPoints(ijk, begin, end); + EXPECT_TRUE(begin); + EXPECT_TRUE(end); + EXPECT_LT(begin, end); + EXPECT_EQ(1u, n); // exactly one point per voxel + const nanovdb::Vec3f p = *begin;// + ijk.asVec3s();// local voxel coordinate + global index coordinates EXPECT_TRUE(nanovdb::Abs(sphere(p)) <= 1.0f); } else { EXPECT_FALSE(acc.isActive(ijk)); EXPECT_TRUE(acc.getValue(ijk) < 512 || acc.getValue(ijk) == std::numeric_limits::max()); + EXPECT_EQ(0u, acc.voxelPoints(ijk, begin, end)); + EXPECT_FALSE(begin); + EXPECT_FALSE(end); } } } @@ -3971,7 +4375,7 @@ TEST_F(TestNanoVDB, createPointSphere) TEST_F(TestNanoVDB, createLevelSetTorus) { - auto handle = nanovdb::createLevelSetTorus(100.0f, 50.0f, nanovdb::Vec3f(50), + auto handle = nanovdb::createLevelSetTorus(100.0f, 50.0f, nanovdb::Vec3d(50), 1.0, 3.0, nanovdb::Vec3d(0), "torus_100"); EXPECT_TRUE(handle); @@ -4008,7 +4412,7 @@ TEST_F(TestNanoVDB, createLevelSetTorus) TEST_F(TestNanoVDB, createFogVolumeTorus) { - auto handle = nanovdb::createFogVolumeTorus(100.0f, 50.0f, nanovdb::Vec3f(50), + auto handle = nanovdb::createFogVolumeTorus(100.0f, 50.0f, nanovdb::Vec3d(50), 1.0, 3.0, nanovdb::Vec3d(0), "torus_100"); EXPECT_TRUE(handle); @@ -4049,7 +4453,7 @@ TEST_F(TestNanoVDB, createFogVolumeTorus) TEST_F(TestNanoVDB, createLevelSetBox) { - auto handle = nanovdb::createLevelSetBox(40.0f, 60.0f, 80.0f, nanovdb::Vec3f(50), + auto handle = nanovdb::createLevelSetBox(40.0f, 60.0f, 80.0f, nanovdb::Vec3d(50), 1.0, 3.0, nanovdb::Vec3d(0), "box"); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); @@ -4085,7 +4489,7 @@ TEST_F(TestNanoVDB, createLevelSetBox) TEST_F(TestNanoVDB, createFogVolumeBox) { - auto handle = nanovdb::createFogVolumeBox(40.0f, 60.0f, 80.0f, nanovdb::Vec3f(50), + auto handle = nanovdb::createFogVolumeBox(40.0f, 60.0f, 80.0f, nanovdb::Vec3d(50), 1.0, 3.0, nanovdb::Vec3d(0), "box"); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); @@ -4121,7 +4525,7 @@ TEST_F(TestNanoVDB, createFogVolumeBox) TEST_F(TestNanoVDB, createLevelSetOctahedron) { - auto handle = nanovdb::createLevelSetOctahedron(100.0f, nanovdb::Vec3f(50), + auto handle = nanovdb::createLevelSetOctahedron(100.0f, nanovdb::Vec3d(50), 1.0f, 3.0f, nanovdb::Vec3d(0), "octahedron"); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); @@ -4162,7 +4566,7 @@ TEST_F(TestNanoVDB, CNanoVDBSize) EXPECT_EQ(sizeof(cnanovdb_mask3), sizeof(nanovdb::Mask<3>)); EXPECT_EQ(sizeof(cnanovdb_mask4), sizeof(nanovdb::Mask<4>)); EXPECT_EQ(sizeof(cnanovdb_mask5), sizeof(nanovdb::Mask<5>)); - EXPECT_EQ(sizeof(cnanovdb_map), sizeof(nanovdb::Map)); + EXPECT_EQ(sizeof(cnanovdb_map), sizeof(nanovdb::Map)); EXPECT_EQ(sizeof(cnanovdb_coord), sizeof(nanovdb::Coord)); EXPECT_EQ(sizeof(cnanovdb_Vec3F), sizeof(nanovdb::Vec3f)); @@ -4210,6 +4614,10 @@ TEST_F(TestNanoVDB, PNanoVDB_Basic) EXPECT_EQ((int)nanovdb::GridType::Vec4f, PNANOVDB_GRID_TYPE_VEC4F); EXPECT_EQ((int)nanovdb::GridType::Vec4d, PNANOVDB_GRID_TYPE_VEC4D); EXPECT_EQ((int)nanovdb::GridType::Index, PNANOVDB_GRID_TYPE_INDEX); + EXPECT_EQ((int)nanovdb::GridType::OnIndex, PNANOVDB_GRID_TYPE_ONINDEX); + EXPECT_EQ((int)nanovdb::GridType::IndexMask, PNANOVDB_GRID_TYPE_INDEXMASK); + EXPECT_EQ((int)nanovdb::GridType::OnIndexMask, PNANOVDB_GRID_TYPE_ONINDEXMASK); + EXPECT_EQ((int)nanovdb::GridType::PointIndex, PNANOVDB_GRID_TYPE_POINTINDEX); EXPECT_EQ((int)nanovdb::GridType::End, PNANOVDB_GRID_TYPE_END); EXPECT_EQ((int)nanovdb::GridClass::Unknown, PNANOVDB_GRID_CLASS_UNKNOWN); @@ -4250,96 +4658,129 @@ TEST_F(TestNanoVDB, PNanoVDB_Basic) EXPECT_EQ(NANOVDB_OFFSETOF(pnanovdb_map_t, taperd), PNANOVDB_MAP_OFF_TAPERD); EXPECT_TRUE(validate_strides(printf));// checks strides and prints out new ones if they have changed -} +}// PNanoVDB_Basic template void validateLeaf(pnanovdb_grid_type_t grid_type) { - using nodedata0_t = typename nanovdb::LeafData; - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mMinimum), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_min); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mMaximum), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_max); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mAverage), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_ave); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mStdDevi), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_stddev); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mValues), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_table); + using leaf_t = typename nanovdb::LeafNode; + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mMinimum), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_min); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mMaximum), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_max); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mAverage), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_ave); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mStdDevi), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_stddev); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mValues), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_table); } template <> void validateLeaf(pnanovdb_grid_type_t grid_type) { - using nodedata0_t = typename nanovdb::LeafData; - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mMin), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_min); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mMax), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_max); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mAvg), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_ave); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mDev), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_stddev); + using leaf_t = typename nanovdb::LeafNode; + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mMin), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_min); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mMax), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_max); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mAvg), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_ave); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mDev), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_stddev); } template <> void validateLeaf(pnanovdb_grid_type_t grid_type) { - using nodedata0_t = typename nanovdb::LeafData; - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mMin), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_min); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mMax), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_max); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mAvg), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_ave); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mDev), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_stddev); + using leaf_t = typename nanovdb::LeafNode; + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mMin), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_min); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mMax), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_max); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mAvg), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_ave); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mDev), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_stddev); } template <> void validateLeaf(pnanovdb_grid_type_t grid_type) { - using nodedata0_t = typename nanovdb::LeafData; - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mMin), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_min); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mMax), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_max); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mAvg), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_ave); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mDev), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_stddev); + using leaf_t = typename nanovdb::LeafNode; + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mMin), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_min); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mMax), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_max); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mAvg), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_ave); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mDev), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_stddev); } template <> void validateLeaf(pnanovdb_grid_type_t grid_type) { - using nodedata0_t = typename nanovdb::LeafData; - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mMin), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_min); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mMax), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_max); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mAvg), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_ave); - EXPECT_EQ(NANOVDB_OFFSETOF(nodedata0_t, mDev), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_stddev); + using leaf_t = typename nanovdb::LeafNode; + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mMin), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_min); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mMax), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_max); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mAvg), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_ave); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mDev), (int)pnanovdb_grid_type_constants[grid_type].leaf_off_stddev); } // template specializations for bool types template <> void validateLeaf(pnanovdb_grid_type_t grid_type) { - using nodeLeaf_t = typename nanovdb::LeafData; using leaf_t = typename nanovdb::LeafNode; - EXPECT_EQ(sizeof(leaf_t), (pnanovdb_grid_type_constants[grid_type].leaf_size)); - EXPECT_EQ(NANOVDB_OFFSETOF(nodeLeaf_t, mBBoxMin), PNANOVDB_LEAF_OFF_BBOX_MIN); - EXPECT_EQ(NANOVDB_OFFSETOF(nodeLeaf_t, mBBoxDif), PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS); - EXPECT_EQ(NANOVDB_OFFSETOF(nodeLeaf_t, mValueMask), PNANOVDB_LEAF_OFF_VALUE_MASK); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mValues), PNANOVDB_LEAF_OFF_VALUE_MASK + 64); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mPadding), PNANOVDB_LEAF_OFF_VALUE_MASK + 2*64); } // template specializations for nanovdb::ValueMask types template <> void validateLeaf(pnanovdb_grid_type_t grid_type) { - using nodeLeaf_t = typename nanovdb::LeafData; using leaf_t = typename nanovdb::LeafNode; - EXPECT_EQ(sizeof(leaf_t), (pnanovdb_grid_type_constants[grid_type].leaf_size)); - EXPECT_EQ(NANOVDB_OFFSETOF(nodeLeaf_t, mBBoxMin), PNANOVDB_LEAF_OFF_BBOX_MIN); - EXPECT_EQ(NANOVDB_OFFSETOF(nodeLeaf_t, mBBoxDif), PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS); - EXPECT_EQ(NANOVDB_OFFSETOF(nodeLeaf_t, mValueMask), PNANOVDB_LEAF_OFF_VALUE_MASK); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mPadding), PNANOVDB_LEAF_OFF_VALUE_MASK + 64); } // template specializations for nanovdb::ValueIndex types template <> void validateLeaf(pnanovdb_grid_type_t grid_type) { - using nodeLeaf_t = typename nanovdb::LeafData; using leaf_t = typename nanovdb::LeafNode; + EXPECT_EQ(sizeof(leaf_t), (pnanovdb_grid_type_constants[grid_type].leaf_size)); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mOffset), PNANOVDB_LEAF_OFF_VALUE_MASK + 64); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mPrefixSum), PNANOVDB_LEAF_OFF_VALUE_MASK + 64 + 8); +} +// template specializations for nanovdb::ValueIndexMask types +template <> +void validateLeaf(pnanovdb_grid_type_t grid_type) +{ + using leaf_t = typename nanovdb::LeafNode; EXPECT_EQ(sizeof(leaf_t), (pnanovdb_grid_type_constants[grid_type].leaf_size)); - EXPECT_EQ(NANOVDB_OFFSETOF(nodeLeaf_t, mBBoxMin), PNANOVDB_LEAF_OFF_BBOX_MIN); - EXPECT_EQ(NANOVDB_OFFSETOF(nodeLeaf_t, mBBoxDif), PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS); - EXPECT_EQ(NANOVDB_OFFSETOF(nodeLeaf_t, mValueMask), PNANOVDB_LEAF_OFF_VALUE_MASK); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mOffset), PNANOVDB_LEAF_OFF_VALUE_MASK + 64); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mPrefixSum), PNANOVDB_LEAF_OFF_VALUE_MASK + 64 + 8); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mMask), PNANOVDB_LEAF_OFF_VALUE_MASK + 64 + 8 + 8); +} + +// template specializations for nanovdb::ValueOnIndex types +template <> +void validateLeaf(pnanovdb_grid_type_t grid_type) +{ + using leaf_t = typename nanovdb::LeafNode; + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mOffset), PNANOVDB_LEAF_OFF_VALUE_MASK + 64); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mPrefixSum), PNANOVDB_LEAF_OFF_VALUE_MASK + 64 + 8); + EXPECT_EQ(sizeof(leaf_t), (pnanovdb_grid_type_constants[grid_type].leaf_size)); +} + +// template specializations for nanovdb::ValueOnIndexMask types +template <> +void validateLeaf(pnanovdb_grid_type_t grid_type) +{ + using leaf_t = typename nanovdb::LeafNode; + EXPECT_EQ(sizeof(leaf_t), (pnanovdb_grid_type_constants[grid_type].leaf_size)); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mOffset), PNANOVDB_LEAF_OFF_VALUE_MASK + 64); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mPrefixSum), PNANOVDB_LEAF_OFF_VALUE_MASK + 64 + 8); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mMask), PNANOVDB_LEAF_OFF_VALUE_MASK + 64 + 8 + 8); +} + +// template specializations for nanovdb::Points types +template <> +void validateLeaf(pnanovdb_grid_type_t grid_type) +{ + using leaf_t = typename nanovdb::LeafNode; + EXPECT_EQ(sizeof(leaf_t), (pnanovdb_grid_type_constants[grid_type].leaf_size)); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mOffset), PNANOVDB_LEAF_OFF_VALUE_MASK + 64); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mPointCount), PNANOVDB_LEAF_OFF_VALUE_MASK + 64 + 8); + EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mValues), PNANOVDB_LEAF_OFF_VALUE_MASK + 64 + 8 + 8); } TYPED_TEST(TestOffsets, PNanoVDB) @@ -4366,6 +4807,12 @@ TYPED_TEST(TestOffsets, PNanoVDB) grid_type = PNANOVDB_GRID_TYPE_MASK; } else if (std::is_same::value) { grid_type = PNANOVDB_GRID_TYPE_INDEX; + } else if (std::is_same::value) { + grid_type = PNANOVDB_GRID_TYPE_ONINDEX; + } else if (std::is_same::value) { + grid_type = PNANOVDB_GRID_TYPE_INDEXMASK; + } else if (std::is_same::value) { + grid_type = PNANOVDB_GRID_TYPE_ONINDEXMASK; } else if (std::is_same::value) { grid_type = PNANOVDB_GRID_TYPE_BOOLEAN; } else if (std::is_same::value) { @@ -4376,10 +4823,15 @@ TYPED_TEST(TestOffsets, PNanoVDB) grid_type = PNANOVDB_GRID_TYPE_FP16; } else if (std::is_same::value) { grid_type = PNANOVDB_GRID_TYPE_FPN; + } else if (std::is_same::value) { + grid_type = PNANOVDB_GRID_TYPE_POINTINDEX; + } else if (std::is_same::value) { + grid_type = PNANOVDB_GRID_TYPE_VEC3U8; + } else if (std::is_same::value) { + grid_type = PNANOVDB_GRID_TYPE_VEC3U16; } else { - EXPECT_TRUE(false); + EXPECT_TRUE(!"your forgot to add a grid_type to TestOffsets::PNanoVDB!"); } - static const uint32_t rootLevel = 3u; using nodeLeaf_t = typename nanovdb::LeafData; using leaf_t = typename nanovdb::LeafNode; using nodeLower_t = typename nanovdb::InternalData; @@ -4390,7 +4842,7 @@ TYPED_TEST(TestOffsets, PNanoVDB) using root_t = typename nanovdb::RootNode; using rootdata_tile_t = typename nanovdb::RootData::Tile; using root_tile_t = typename nanovdb::RootNode::Tile; - using treedata_t = typename nanovdb::TreeData; + using treedata_t = nanovdb::TreeData; using tree_t = typename nanovdb::Tree; // grid @@ -4426,9 +4878,9 @@ TYPED_TEST(TestOffsets, PNanoVDB) // test GridBlindMetaData EXPECT_EQ((int)sizeof(nanovdb::GridBlindMetaData), PNANOVDB_GRIDBLINDMETADATA_SIZE); - EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mByteOffset), PNANOVDB_GRIDBLINDMETADATA_OFF_BYTE_OFFSET); - EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mElementCount), PNANOVDB_GRIDBLINDMETADATA_OFF_ELEMENT_COUNT); - EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mFlags), PNANOVDB_GRIDBLINDMETADATA_OFF_FLAGS); + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mDataOffset), PNANOVDB_GRIDBLINDMETADATA_OFF_BYTE_OFFSET); + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mValueCount), PNANOVDB_GRIDBLINDMETADATA_OFF_ELEMENT_COUNT); + EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mValueSize), PNANOVDB_GRIDBLINDMETADATA_OFF_FLAGS); EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mSemantic), PNANOVDB_GRIDBLINDMETADATA_OFF_SEMANTIC); EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mDataClass), PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_CLASS); EXPECT_EQ(NANOVDB_OFFSETOF(nanovdb::GridBlindMetaData, mDataType), PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_TYPE); @@ -4543,25 +4995,26 @@ TYPED_TEST(TestOffsets, PNanoVDB) EXPECT_EQ(8u*sizeof(nodeLower_t::mStdDevi), pnanovdb_grid_type_stat_strides_bits[grid_type]); // leaf nodes - EXPECT_EQ(sizeof(leaf_t), (pnanovdb_grid_type_constants[grid_type].leaf_size)); - EXPECT_EQ(NANOVDB_OFFSETOF(nodeLeaf_t, mBBoxMin), PNANOVDB_LEAF_OFF_BBOX_MIN); - EXPECT_EQ(NANOVDB_OFFSETOF(nodeLeaf_t, mBBoxDif), PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS); + // The following data members exist in all flavors of the leaf nodes so we test them first + EXPECT_EQ(NANOVDB_OFFSETOF(nodeLeaf_t, mBBoxMin), PNANOVDB_LEAF_OFF_BBOX_MIN); + EXPECT_EQ(NANOVDB_OFFSETOF(nodeLeaf_t, mBBoxDif), PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS); EXPECT_EQ(NANOVDB_OFFSETOF(nodeLeaf_t, mValueMask), PNANOVDB_LEAF_OFF_VALUE_MASK); validateLeaf(grid_type); }// PNanoVDB -#endif +#endif // DISABLE_PNANOVDB TEST_F(TestNanoVDB, GridStats) { using GridT = nanovdb::NanoGrid; - Sphere sphere(nanovdb::Vec3(50), 50.0f); - nanovdb::GridBuilder builder(sphere.background(), nanovdb::GridClass::LevelSet); + Sphere sphere(nanovdb::Vec3d(50), 50.0f); + nanovdb::build::Grid grid(sphere.background(), "test", nanovdb::GridClass::LevelSet); const nanovdb::CoordBBox bbox(nanovdb::Coord(-100), nanovdb::Coord(100)); //mTimer.start("GridBuilder"); - builder(sphere, bbox); + grid(sphere, bbox); //mTimer.stop(); - auto handle1 = builder.getHandle<>(1.0, nanovdb::Vec3d(0.0), "test"); - auto handle2 = builder.getHandle<>(1.0, nanovdb::Vec3d(0.0), "test"); + nanovdb::CreateNanoGrid> converter(grid); + auto handle1 = converter.getHandle(); + auto handle2 = converter.getHandle(); EXPECT_TRUE(handle1); EXPECT_TRUE(handle2); GridT* grid1 = handle1.grid(); @@ -4579,12 +5032,10 @@ TEST_F(TestNanoVDB, GridStats) auto nodeMgrHandle2 = nanovdb::createNodeManager(*grid2); auto *mgr2 = nodeMgrHandle2.mgr(); EXPECT_TRUE(mgr2); - //nanovdb::NodeManager mgr1(*grid1); - //nanovdb::NodeManager mgr2(*grid2); { // reset stats in grid2 //grid2->tree().data()->mVoxelCount = uint64_t(0); - grid2->data()->mWorldBBox = nanovdb::BBox(); + grid2->data()->mWorldBBox = nanovdb::BBox(); grid2->tree().root().data()->mBBox = nanovdb::BBox(); for (uint32_t i = 0; i < grid2->tree().nodeCount(0); ++i) { auto& leaf = mgr2->leaf(i); @@ -4691,11 +5142,12 @@ TEST_F(TestNanoVDB, ScalarSampleFromVoxels) auto trilinearIndex = [&](const nanovdb::Coord& ijk) -> float { return 0.34f + 1.6f * dx * ijk[0] + 6.7f * dx * ijk[1] - 3.5f * dx * ijk[2]; // index coordinates }; - - nanovdb::GridBuilder builder(1.0f); - const nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(128)); - builder(trilinearIndex, bbox); - auto handle = builder.getHandle<>(dx); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(1.0f); + srcGrid.setTransform(dx); + const nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(128)); + srcGrid(trilinearIndex, bbox); + auto handle = nanovdb::createNanoGrid(srcGrid); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); auto* grid = handle.grid(); @@ -4748,11 +5200,12 @@ TEST_F(TestNanoVDB, VectorSampleFromVoxels) auto trilinearIndex = [&](const nanovdb::Coord& ijk) -> nanovdb::Vec3f { return nanovdb::Vec3f(0.34f, 1.6f * dx * ijk[0] + 6.7f * dx * ijk[1], -3.5f * dx * ijk[2]); // index coordinates }; - - nanovdb::GridBuilder builder(nanovdb::Vec3f(1.0f)); - const nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(128)); - builder(trilinearIndex, bbox); - auto handle = builder.getHandle<>(dx); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(nanovdb::Vec3f(1.0f)); + const nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(128)); + srcGrid(trilinearIndex, bbox); + srcGrid.setTransform(dx); + auto handle = nanovdb::createNanoGrid(srcGrid); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); auto* grid = handle.grid(); @@ -4796,16 +5249,16 @@ TEST_F(TestNanoVDB, GridChecksum) EXPECT_EQ(nanovdb::ChecksumMode::Default, nanovdb::ChecksumMode::Partial); EXPECT_NE(nanovdb::ChecksumMode::Default, nanovdb::ChecksumMode::Full); - nanovdb::CpuTimer<> timer; + nanovdb::CpuTimer timer; //timer.start("nanovdb::createLevelSetSphere"); - auto handle = nanovdb::createLevelSetSphere(100.0f, - nanovdb::Vec3f(50), - 1.0, - 3.0, - nanovdb::Vec3d(0), - "sphere_20", - nanovdb::StatsMode::Disable, - nanovdb::ChecksumMode::Disable); + auto handle = nanovdb::createLevelSetSphere(100.0f, + nanovdb::Vec3d(50), + 1.0, + 3.0, + nanovdb::Vec3d(0), + "sphere_20", + nanovdb::StatsMode::Disable, + nanovdb::ChecksumMode::Disable); //timer.stop(); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); @@ -4856,15 +5309,15 @@ TEST_F(TestNanoVDB, GridChecksum) TEST_F(TestNanoVDB, GridValidator) { - nanovdb::CpuTimer<> timer; + nanovdb::CpuTimer timer; //timer.start("nanovdb::createLevelSetSphere"); - auto handle = nanovdb::createLevelSetSphere(100.0f, - nanovdb::Vec3f(50), - 1.0, 3.0, - nanovdb::Vec3d(0), - "sphere_20", - nanovdb::StatsMode::All, - nanovdb::ChecksumMode::Full); + auto handle = nanovdb::createLevelSetSphere(100.0f, + nanovdb::Vec3d(50), + 1.0, 3.0, + nanovdb::Vec3d(0), + "sphere_20", + nanovdb::StatsMode::All, + nanovdb::ChecksumMode::Full); //timer.stop(); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); @@ -4909,10 +5362,10 @@ TEST_F(TestNanoVDB, RandomReadAccessor) const int voxelCount = 512, min = -10000, max = 10000; std::srand(98765); auto op = [&](){return rand() % (max - min) + min;}; - + using SrcGridT = nanovdb::build::Grid; for (int i=0; i<10; ++i) { - nanovdb::GridBuilder builder(background); - auto acc = builder.getAccessor(); + SrcGridT srcGrid(background); + auto acc = srcGrid.getAccessor(); std::vector voxels(voxelCount); for (int j=0; j(); + auto gridHdl = nanovdb::createNanoGrid(srcGrid); EXPECT_TRUE(gridHdl); EXPECT_EQ(1u, gridHdl.gridCount()); auto grid = gridHdl.grid(); @@ -4970,17 +5423,18 @@ TEST_F(TestNanoVDB, RandomReadAccessor) TEST_F(TestNanoVDB, StandardDeviation) { - nanovdb::GridBuilder builder(0.5f); + using OpT = nanovdb::GetNodeInfo; + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.5f); { - auto acc = builder.getAccessor(); + auto acc = srcGrid.getAccessor(); acc.setValue(nanovdb::Coord(-1), 1.0f); acc.setValue(nanovdb::Coord(0), 2.0f); acc.setValue(nanovdb::Coord(1), 3.0f); acc.setValue(nanovdb::Coord(2), 0.0f); } - - auto gridHdl = builder.getHandle<>(); + auto gridHdl = nanovdb::createNanoGrid(srcGrid); EXPECT_TRUE(gridHdl); auto grid = gridHdl.grid(); EXPECT_TRUE(grid); @@ -4992,26 +5446,49 @@ TEST_F(TestNanoVDB, StandardDeviation) EXPECT_EQ( 2.0f, acc.getValue(nanovdb::Coord( 0)) ); EXPECT_EQ( 3.0f, acc.getValue(nanovdb::Coord( 1)) ); EXPECT_EQ( 0.0f, acc.getValue(nanovdb::Coord( 2)) ); +#if 0 auto nodeInfo = acc.getNodeInfo(nanovdb::Coord(-1)); EXPECT_EQ(nodeInfo.mAverage, 1.f); EXPECT_EQ(nodeInfo.mLevel, 0u); EXPECT_EQ(nodeInfo.mDim, 8u); + { + auto nodeInfo = acc.getNodeInfo(nanovdb::Coord(1)); + EXPECT_EQ(nodeInfo.mAverage, (2.0f + 3.0f) / 3.0f); + auto getStdDev = [&](int n, float a, float b, float c) { + float m = (a + b + c) / n; + float sd = sqrtf(((a - m) * (a - m) + + (b - m) * (b - m) + + (c - m) * (c - m)) / + n); + return sd; + }; + EXPECT_NEAR(nodeInfo.mStdDevi, getStdDev(3.0f, 2.0f, 3.0f, 0), 1e-5); + EXPECT_EQ(nodeInfo.mLevel, 0u); + EXPECT_EQ(nodeInfo.mDim, 8u); + } +#else + auto nodeInfo = acc.get(nanovdb::Coord(-1)); + EXPECT_EQ(nodeInfo.average, 1.f); + EXPECT_EQ(nodeInfo.level, 0u); + EXPECT_EQ(nodeInfo.dim, 8u); + { + auto nodeInfo = acc.get(nanovdb::Coord(1)); + EXPECT_EQ(nodeInfo.average, (2.0f + 3.0f) / 3.0f); + auto getStdDev = [&](int n, float a, float b, float c) { + float m = (a + b + c) / n; + float sd = sqrtf(((a - m) * (a - m) + + (b - m) * (b - m) + + (c - m) * (c - m)) / + n); + return sd; + }; + EXPECT_NEAR(nodeInfo.stdDevi, getStdDev(3.0f, 2.0f, 3.0f, 0), 1e-5); + EXPECT_EQ(nodeInfo.level, 0u); + EXPECT_EQ(nodeInfo.dim, 8u); + } +#endif } - { - auto nodeInfo = acc.getNodeInfo(nanovdb::Coord(1)); - EXPECT_EQ(nodeInfo.mAverage, (2.0f + 3.0f) / 3.0f); - auto getStdDev = [&](int n, float a, float b, float c) { - float m = (a + b + c) / n; - float sd = sqrtf(((a - m) * (a - m) + - (b - m) * (b - m) + - (c - m) * (c - m)) / - n); - return sd; - }; - EXPECT_NEAR(nodeInfo.mStdDevi, getStdDev(3.0f, 2.0f, 3.0f, 0), 1e-5); - EXPECT_EQ(nodeInfo.mLevel, 0u); - EXPECT_EQ(nodeInfo.mDim, 8u); - } + } // ReadAccessor TEST_F(TestNanoVDB, BoxStencil) @@ -5019,12 +5496,13 @@ TEST_F(TestNanoVDB, BoxStencil) const float a = 0.54f, b[3]={0.12f, 0.78f,-0.34f}; const nanovdb::Coord min(-17, -10, -8), max(10, 21, 13); const nanovdb::CoordBBox bbox(min, max), bbox2(min, max.offsetBy(-1)); - nanovdb::GridBuilder builder(0.0f); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f); auto func = [&](const nanovdb::Coord &ijk) { return a + b[0]*ijk[0] + b[1]*ijk[1] + b[2]*ijk[2]; }; - builder(func, bbox); - auto handle = builder.getHandle(); + srcGrid(func, bbox); + auto handle = nanovdb::createNanoGrid(srcGrid); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); auto* grid = handle.grid(); @@ -5053,8 +5531,8 @@ TEST_F(TestNanoVDB, CurvatureStencil) { {// test of level set to sphere at (6,8,10) with R=10 and dx=0.5 const float radius = 10.0f; - const nanovdb::Vec3f center(6.0, 8.0, 10.0);//i.e. (12,16,20) in index space - auto handle = nanovdb::createLevelSetSphere(radius, + const nanovdb::Vec3d center(6.0, 8.0, 10.0);//i.e. (12,16,20) in index space + auto handle = nanovdb::createLevelSetSphere(radius, center, 0.5, // dx 20.0); // half-width so dense inside @@ -5117,9 +5595,9 @@ TEST_F(TestNanoVDB, CurvatureStencil) const int dim = 256; // sparse level set sphere - nanovdb::Vec3f C(0.35f, 0.35f, 0.35f); + nanovdb::Vec3d C(0.35f, 0.35f, 0.35f); double r = 0.15, voxelSize = 1.0/(dim-1); - auto handle = nanovdb::createLevelSetSphere(float(r), C, voxelSize); + auto handle = nanovdb::createLevelSetSphere(r, C, voxelSize); EXPECT_TRUE(handle); EXPECT_EQ(1u, handle.gridCount()); auto* sphere = handle.grid(); @@ -5193,7 +5671,7 @@ TEST_F(TestNanoVDB, GradStencil) { {// test of level set to sphere at (6,8,10) with R=10 and dx=0.5 const float radius = 10.0f;// 20 voxels - const nanovdb::Vec3f center(6.0, 8.0, 10.0);//i.e. (12,16,20) in index space + const nanovdb::Vec3d center(6.0, 8.0, 10.0);//i.e. (12,16,20) in index space auto handle = nanovdb::createLevelSetSphere(radius, center, 0.5, // dx @@ -5241,7 +5719,7 @@ TEST_F(TestNanoVDB, WenoStencil) { {// test of level set to sphere at (6,8,10) with R=10 and dx=0.5 const float radius = 10.0f;// 20 voxels - const nanovdb::Vec3f center(6.0, 8.0, 10.0);//i.e. (12,16,20) in index space + const nanovdb::Vec3d center(6.0, 8.0, 10.0);//i.e. (12,16,20) in index space auto handle = nanovdb::createLevelSetSphere(radius, center, 0.5, // dx @@ -5287,11 +5765,13 @@ TEST_F(TestNanoVDB, WenoStencil) TEST_F(TestNanoVDB, StencilIntersection) { + using SrcGridT = nanovdb::build::Grid; const nanovdb::Coord ijk(1,4,-9); - nanovdb::GridBuilder builder(0.0f); - auto acc = builder.getAccessor(); + SrcGridT srcGrid(0.0f); + auto acc = srcGrid.getAccessor(); acc.setValue(ijk,-1.0f); int cases = 0; + for (int mx=0; mx<2; ++mx) { acc.setValue(ijk.offsetBy(-1,0,0), mx ? 1.0f : -1.0f); for (int px=0; px<2; ++px) { @@ -5305,7 +5785,7 @@ TEST_F(TestNanoVDB, StencilIntersection) for (int pz=0; pz<2; ++pz) { acc.setValue(ijk.offsetBy(0,0,1), pz ? 1.0f : -1.0f); ++cases; - auto handle = builder.getHandle<>(); + auto handle = nanovdb::createNanoGrid(srcGrid); EXPECT_TRUE(handle); auto grid = handle.grid(); EXPECT_TRUE(grid); @@ -5337,41 +5817,39 @@ TEST_F(TestNanoVDB, MultiFile) { std::vector> handles; { // add an int32_t grid - nanovdb::GridBuilder builder(-1); - auto acc = builder.getAccessor(); + nanovdb::build::Grid grid(-1, "Int32 grid"); + auto acc = grid.getAccessor(); acc.setValue(nanovdb::Coord(-256), 10); - handles.push_back(builder.getHandle(1.0, nanovdb::Vec3d(0), "Int32 grid")); + handles.push_back(nanovdb::createNanoGrid(grid)); } { // add an empty int32_t grid - nanovdb::GridBuilder builder(-4); - handles.push_back(builder.getHandle(1.0, nanovdb::Vec3d(0), "Int32 grid, empty")); + nanovdb::build::Grid grid(-4, "Int32 grid, empty"); + handles.push_back(nanovdb::createNanoGrid(grid)); } { // add a Vec3f grid - nanovdb::GridBuilder builder(nanovdb::Vec3f(0.0f, 0.0f, -1.0f)); - builder.setGridClass(nanovdb::GridClass::Staggered); - auto acc = builder.getAccessor(); + nanovdb::build::Grid grid(nanovdb::Vec3f(0.0f, 0.0f, -1.0f),"Float vector grid",nanovdb::GridClass::Staggered); + auto acc = grid.getAccessor(); acc.setValue(nanovdb::Coord(-256), nanovdb::Vec3f(1.0f, 0.0f, 0.0f)); - handles.push_back(builder.getHandle(1.0, nanovdb::Vec3d(0), "Float vector grid")); + handles.push_back(nanovdb::createNanoGrid(grid)); } { // add an int64_t grid - nanovdb::GridBuilder builder(0); - auto acc = builder.getAccessor(); + nanovdb::build::Grid grid(0, "Int64 grid"); + auto acc = grid.getAccessor(); acc.setValue(nanovdb::Coord(0), 10); - handles.push_back(builder.getHandle(1.0, nanovdb::Vec3d(0), "Int64 grid")); + handles.push_back(nanovdb::createNanoGrid(grid)); } for (int i = 0; i < 10; ++i) { const float radius = 100.0f; const float voxelSize = 1.0f, width = 3.0f; - const nanovdb::Vec3f center(i * 10.0f, 0.0f, 0.0f); + const nanovdb::Vec3d center(i * 10.0f, 0.0f, 0.0f); handles.push_back(nanovdb::createLevelSetSphere(radius, center, voxelSize, width, nanovdb::Vec3d(0), "Level set sphere at (" + std::to_string(i * 10) + ",0,0)")); } { // add a double grid - nanovdb::GridBuilder builder(0.0); - builder.setGridClass(nanovdb::GridClass::FogVolume); - auto acc = builder.getAccessor(); + nanovdb::build::Grid grid(0.0, "Double grid", nanovdb::GridClass::FogVolume); + auto acc = grid.getAccessor(); acc.setValue(nanovdb::Coord(6000), 1.0); - handles.push_back(builder.getHandle(1.0, nanovdb::Vec3d(0), "Double grid")); + handles.push_back(nanovdb::createNanoGrid(grid)); } #if defined(NANOVDB_USE_BLOSC) nanovdb::io::writeGrids("data/multi1.nvdb", handles, nanovdb::io::Codec::BLOSC); @@ -5547,8 +6025,8 @@ TEST_F(TestNanoVDB, HostBuffer) std::vector > gridHdls; // create two grids... - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0f, nanovdb::Vec3f(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3R(0), "spheref")); - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3R(0), "sphered")); + gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref")); + gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered")); EXPECT_TRUE(gridHdls[0]); auto* meta0 = gridHdls[0].gridMetaData(); @@ -5586,8 +6064,8 @@ TEST_F(TestNanoVDB, HostBuffer) std::vector > gridHdls; // create two grids... - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0f, nanovdb::Vec3f(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3R(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, -1.0f, false, pool)); - gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3R(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, -1.0f, false, pool)); + gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)); + gridHdls.push_back(nanovdb::createLevelSetSphere(100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)); EXPECT_TRUE(gridHdls[0]); auto* meta0 = gridHdls[0].gridMetaData(); @@ -5667,8 +6145,8 @@ TEST_F(TestNanoVDB, HostBuffer) std::vector > gridHdls; // create two grids... - ASSERT_THROW(gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0f, nanovdb::Vec3f(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, -1.0f, false, pool)), std::runtime_error); - ASSERT_THROW(gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, -1.0f, false, pool)), std::runtime_error); + ASSERT_THROW(gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0f, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)), std::runtime_error); + ASSERT_THROW(gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)), std::runtime_error); } {// zero internal memory size ASSERT_THROW(nanovdb::HostBuffer::createPool(0), std::runtime_error); @@ -5689,8 +6167,8 @@ TEST_F(TestNanoVDB, HostBuffer) std::vector > gridHdls; // create two grids... - gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0f, nanovdb::Vec3f(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, -1.0f, false, pool)); - gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, -1.0f, false, pool)); + gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0f, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)); + gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)); EXPECT_TRUE(gridHdls[0]); auto* meta0 = gridHdls[0].gridMetaData(); @@ -5745,8 +6223,8 @@ TEST_F(TestNanoVDB, HostBuffer) std::vector > gridHdls; // create two grids... - ASSERT_THROW(gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0f, nanovdb::Vec3f(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, -1.0f, false, pool)), std::runtime_error); - ASSERT_THROW(gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, -1.0f, false, pool)), std::runtime_error); + ASSERT_THROW(gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)), std::runtime_error); + ASSERT_THROW(gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)), std::runtime_error); EXPECT_FALSE(pool.isManaged()); pool.resizePool(1<<26);// resize to 64 MB @@ -5767,8 +6245,8 @@ TEST_F(TestNanoVDB, HostBuffer) EXPECT_FALSE(buffer.isFull()); EXPECT_TRUE(buffer.isManaged()); - gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0f, nanovdb::Vec3f(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, -1.0f, false, pool)); - gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, -1.0f, false, pool)); + gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d(-20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "spheref", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)); + gridHdls.push_back(nanovdb::createLevelSetSphere( 100.0, nanovdb::Vec3d( 20, 0, 0), 1.0, 3.0, nanovdb::Vec3d(0), "sphered", nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Partial, pool)); EXPECT_TRUE(gridHdls[0]); auto* meta0 = gridHdls[0].gridMetaData(); @@ -5841,9 +6319,9 @@ TEST_F(TestNanoVDB, NodeIterators) const double voxelSize = 0.1; const float radius = 10.0f; const float halfWidth = 3.0f; - const nanovdb::Vec3f center(0); + const nanovdb::Vec3d center(0); //mTimer.start("Create level set sphere"); - auto handle1 = nanovdb::createLevelSetSphere(radius, center, voxelSize, halfWidth); + auto handle1 = nanovdb::createLevelSetSphere(radius, center, voxelSize, halfWidth); //mTimer.stop(); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); @@ -5936,15 +6414,33 @@ TEST_F(TestNanoVDB, NodeIterators) } } -// make testNanoVDB && ./unittest/testNanoVDB --gtest_filter="*IndexGridBuilder*" --gtest_break_on_failure --gtest_repeat=5 -TEST_F(TestNanoVDB, IndexGridBuilder1) +// make testNanoVDB && ./unittest/testNanoVDB --gtest_filter="*BasicValueIndexStats*" --gtest_break_on_failure --gtest_repeat=5 +TEST_F(TestNanoVDB, BasicValueIndexStats) { + { + using ValueIndexT = typename nanovdb::NanoLeaf::DataType; + using ValueIndexMaskT = typename nanovdb::NanoLeaf::DataType; + using ValueOnIndexT = typename nanovdb::NanoLeaf::DataType; + using ValueOnIndexMaskT = typename nanovdb::NanoLeaf::DataType; + const size_t size1 = sizeof(ValueOnIndexT), + size2 = sizeof(ValueOnIndexMaskT), + size3 = sizeof(ValueIndexT), + size4 = sizeof(ValueIndexMaskT); + EXPECT_EQ(size1, ValueOnIndexT::memUsage()); + EXPECT_EQ(size2, ValueOnIndexMaskT::memUsage()); + EXPECT_EQ(size3, ValueIndexT::memUsage()); + EXPECT_EQ(size4, ValueIndexMaskT::memUsage()); + EXPECT_EQ(64u, size2 - size1);// 512 bits = 64 bytes + EXPECT_EQ(64u, size4 - size3);// 512 bits = 64 bytes + } EXPECT_TRUE(nanovdb::Version() >= nanovdb::Version(32,3,4)); - nanovdb::GridBuilder builder1(0.0f); - auto acc = builder1.getAccessor(); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f); + auto acc = srcGrid.getAccessor(); const nanovdb::Coord ijk(0,0,1); acc.setValue(ijk, 1.0f); - auto handle1 = builder1.getHandle(); + + auto handle1 = nanovdb::createNanoGrid(srcGrid); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); @@ -5960,11 +6456,16 @@ TEST_F(TestNanoVDB, IndexGridBuilder1) EXPECT_EQ(1.0f, fltGrid->tree().getValue(ijk)); EXPECT_EQ(0.0f, fltGrid->tree().getValue(nanovdb::Coord(0,0,0))); - nanovdb::IndexGridBuilder builder2(*fltGrid); - auto handle2 = builder2.getHandle(); + auto handle2 = nanovdb::createNanoGrid(*fltGrid, 1u, true, true); auto *idxGrid = handle2.grid(); EXPECT_TRUE(idxGrid); - + EXPECT_EQ(1u, idxGrid->blindDataCount()); + //std::cerr << "meta name = " << idxGrid->blindMetaData(0).mName << std::endl; + EXPECT_EQ(-1, idxGrid->findBlindData("channel_")); + EXPECT_EQ(-1, idxGrid->findBlindData("channel_0 ")); + EXPECT_EQ(-1, idxGrid->findBlindData(" channel_0")); + EXPECT_EQ( 0, idxGrid->findBlindData("channel_0")); + EXPECT_EQ(std::string("channel_0"), std::string(idxGrid->blindMetaData(0).mName)); EXPECT_EQ(1u, idxGrid->tree().nodeCount(2)); EXPECT_EQ(1u, idxGrid->tree().nodeCount(1)); EXPECT_EQ(1u, idxGrid->tree().nodeCount(0)); @@ -5974,21 +6475,21 @@ TEST_F(TestNanoVDB, IndexGridBuilder1) EXPECT_EQ(nanovdb::Vec3d(1.0,1.0,1.0), idxGrid->voxelSize()); EXPECT_EQ(1u, idxGrid->tree().root().tileCount()); EXPECT_EQ(1u, idxGrid->activeVoxelCount()); - EXPECT_EQ(5u+4u+32*32*32u-1 + 4u+16*16*16u-1 + 4u+8*8*8u, idxGrid->valueCount()); + EXPECT_EQ(5u + 4u+32*32*32u-1u + 4u+16*16*16u-1u + 4u+8*8*8u, idxGrid->valueCount()); EXPECT_EQ(0u, idxGrid->tree().root().background()); EXPECT_EQ(1u, idxGrid->tree().root().minimum()); EXPECT_EQ(2u, idxGrid->tree().root().maximum()); EXPECT_EQ(3u, idxGrid->tree().root().average()); EXPECT_EQ(4u, idxGrid->tree().root().stdDeviation()); - EXPECT_EQ(idxGrid->valueCount(), builder2.getValueCount()); + //EXPECT_EQ(idxGrid->valueCount(), converter2.valueCount()); EXPECT_FALSE(idxGrid->tree().isActive(nanovdb::Coord(0,0,0))); EXPECT_TRUE(idxGrid->tree().isActive(ijk)); - EXPECT_EQ(5u+4u+32*32*32-1+4u+16*16*16-1+4u, idxGrid->tree().getValue(nanovdb::Coord(0,0,0))); - EXPECT_EQ(5u+4u+32*32*32-1+4u+16*16*16-1+4u+1u, idxGrid->tree().getValue(nanovdb::Coord(0,0,1))); - EXPECT_EQ(5u+4u+32*32*32-1+4u+16*16*16-1+4u+7u, idxGrid->tree().getValue(nanovdb::Coord(0,0,7))); - EXPECT_EQ(5u+4u+32*32*32-1+4u+16*16*16-1+4u+8*8*8-1u, idxGrid->tree().getValue(nanovdb::Coord(7,7,7))); + EXPECT_EQ(5u + 4u+32*32*32-1 + 4u+16*16*16-1 + 0u, idxGrid->tree().getValue(nanovdb::Coord(0,0,0))); + EXPECT_EQ(5u + 4u+32*32*32-1 + 4u+16*16*16-1 + 1u, idxGrid->tree().getValue(nanovdb::Coord(0,0,1))); + EXPECT_EQ(5u + 4u+32*32*32-1 + 4u+16*16*16-1 + 7u, idxGrid->tree().getValue(nanovdb::Coord(0,0,7))); + EXPECT_EQ(5u + 4u+32*32*32-1 + 4u+16*16*16-1 + 8*8*8-1u, idxGrid->tree().getValue(nanovdb::Coord(7,7,7))); EXPECT_EQ(0u, idxGrid->tree().getValue(nanovdb::Coord(-1,0,0))); EXPECT_EQ(0u, idxGrid->tree().getValue(nanovdb::Coord(-1,0,0))); @@ -6001,32 +6502,231 @@ TEST_F(TestNanoVDB, IndexGridBuilder1) EXPECT_EQ(nanovdb::Coord(0), fltAcc.getNode<2>()->origin()); auto idxAcc = idxGrid->getAccessor(); - EXPECT_EQ(5u+4u+32*32*32-1+4u+16*16*16-1+4u, idxAcc.getValue(nanovdb::Coord(0,0,0))); - EXPECT_EQ(5u+4u+32*32*32-1+4u+16*16*16-1+4u+1u, idxAcc.getValue(nanovdb::Coord(0,0,1))); - EXPECT_EQ(5u+4u+32*32*32-1+4u+16*16*16-1+4u+7u, idxAcc.getValue(nanovdb::Coord(0,0,7))); + EXPECT_EQ(5u + 4u+32*32*32-1 + 4u+16*16*16-1 + 0u, idxAcc.getValue(nanovdb::Coord(0,0,0))); + EXPECT_EQ(5u + 4u+32*32*32-1 + 4u+16*16*16-1 + 1u, idxAcc.getValue(nanovdb::Coord(0,0,1))); + EXPECT_EQ(5u + 4u+32*32*32-1 + 4u+16*16*16-1 + 7u, idxAcc.getValue(nanovdb::Coord(0,0,7))); EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<0>()->origin()); EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<1>()->origin()); EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<2>()->origin()); - auto buffer = builder2.getValues(1);// only allocate one channel - //std::cerr << "Value buffer count: " << (buffer.size()>>10) << "KB" << std::endl; - float *values = reinterpret_cast(buffer.data()); - // compare the values of the functor with the original fltGrid + const float *values = idxGrid->getBlindData(0); + EXPECT_TRUE(values); + EXPECT_EQ(values[0], srcGrid.tree().root().background()); for (auto iter = idxGrid->indexBBox().begin(); iter; ++iter) { - //std::cerr << "Grid" << *iter << " = " << getValue(*iter) << std::endl; + //std::cerr << "Grid" << *iter << " = " << fltGrid->tree().getValue(*iter) << std::endl; EXPECT_EQ(values[idxAcc.getValue(*iter)], fltGrid->tree().getValue(*iter)); } +}// BasicValueIndexStats -}// IndexGridBuilder1 +// make testNanoVDB && ./unittest/testNanoVDB --gtest_filter="*BasicValueIndexStats*" --gtest_break_on_failure --gtest_repeat=5 +TEST_F(TestNanoVDB, BasicValueIndexStats2) +{ + EXPECT_TRUE(nanovdb::Version() >= nanovdb::Version(32,3,4)); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f); + auto acc = srcGrid.getAccessor(); + const nanovdb::Coord ijk(0,0,1); + acc.setValue(ijk, 1.0f); -TEST_F(TestNanoVDB, SparseIndexGridBuilder1) + auto handle2 = nanovdb::createNanoGrid(srcGrid, 1u, true, true); + auto *idxGrid = handle2.grid(); + EXPECT_TRUE(idxGrid); + + EXPECT_EQ(1u, idxGrid->tree().nodeCount(2)); + EXPECT_EQ(1u, idxGrid->tree().nodeCount(1)); + EXPECT_EQ(1u, idxGrid->tree().nodeCount(0)); + EXPECT_EQ(1u, idxGrid->gridCount()); + EXPECT_EQ(nanovdb::Vec3d(1.0,1.0,1.0), idxGrid->voxelSize()); + EXPECT_EQ(1u, idxGrid->tree().root().tileCount()); + EXPECT_EQ(1u, idxGrid->activeVoxelCount()); + EXPECT_EQ(5u + 4u+32*32*32u-1u + 4u+16*16*16u-1u + 4u+8*8*8u, idxGrid->valueCount()); + EXPECT_EQ(0u, idxGrid->tree().root().background()); + EXPECT_EQ(1u, idxGrid->tree().root().minimum()); + EXPECT_EQ(2u, idxGrid->tree().root().maximum()); + EXPECT_EQ(3u, idxGrid->tree().root().average()); + EXPECT_EQ(4u, idxGrid->tree().root().stdDeviation()); + + EXPECT_FALSE(idxGrid->tree().isActive(nanovdb::Coord(0,0,0))); + EXPECT_TRUE(idxGrid->tree().isActive(ijk)); + + EXPECT_EQ(5u + 4u+32*32*32-1 + 4u+16*16*16-1 + 0u, idxGrid->tree().getValue(nanovdb::Coord(0,0,0))); + EXPECT_EQ(5u + 4u+32*32*32-1 + 4u+16*16*16-1 + 1u, idxGrid->tree().getValue(nanovdb::Coord(0,0,1))); + EXPECT_EQ(5u + 4u+32*32*32-1 + 4u+16*16*16-1 + 7u, idxGrid->tree().getValue(nanovdb::Coord(0,0,7))); + EXPECT_EQ(5u + 4u+32*32*32-1 + 4u+16*16*16-1 + 8*8*8-1u, idxGrid->tree().getValue(nanovdb::Coord(7,7,7))); + EXPECT_EQ(0u, idxGrid->tree().getValue(nanovdb::Coord(-1,0,0))); + EXPECT_EQ(0u, idxGrid->tree().getValue(nanovdb::Coord(-1,0,0))); + + //auto fltAcc = fltGrid->getAccessor(); + EXPECT_EQ(0.0f, acc.getValue(nanovdb::Coord(0,0,0))); + EXPECT_EQ(1.0f, acc.getValue(nanovdb::Coord(0,0,1))); + EXPECT_EQ(0.0f, acc.getValue(nanovdb::Coord(0,0,7))); + //EXPECT_EQ(nanovdb::Coord(0), fltAcc.getNode<0>()->origin()); + //EXPECT_EQ(nanovdb::Coord(0), fltAcc.getNode<1>()->origin()); + //EXPECT_EQ(nanovdb::Coord(0), fltAcc.getNode<2>()->origin()); + + auto idxAcc = idxGrid->getAccessor(); + const uint64_t count = 5u + 4u+32*32*32-1 + 4u+16*16*16-1; + EXPECT_EQ(count + 0u, idxAcc.getValue(nanovdb::Coord(0,0,0))); + EXPECT_EQ(count + 1u, idxAcc.getValue(nanovdb::Coord(0,0,1))); + EXPECT_EQ(count + 7u, idxAcc.getValue(nanovdb::Coord(0,0,7))); + EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<0>()->origin()); + EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<1>()->origin()); + EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<2>()->origin()); + + auto *leaf = idxAcc.probeLeaf(nanovdb::Coord(0,0,-1)); + EXPECT_FALSE(leaf); + leaf = idxAcc.probeLeaf(nanovdb::Coord(0,0,1)); + EXPECT_TRUE(leaf); + EXPECT_EQ(count + 512u, leaf->minimum()); + EXPECT_EQ(count + 513u, leaf->maximum()); + EXPECT_EQ(count + 514u, leaf->average()); + EXPECT_EQ(count + 515u, leaf->stdDeviation()); + + const float *values = idxGrid->getBlindData(0); + EXPECT_TRUE(values); + for (auto iter = idxGrid->indexBBox().begin(); iter; ++iter) { + EXPECT_EQ(values[idxAcc.getValue(*iter)], acc.getValue(*iter)); + } + +}// BasicValueIndexStats2 + +TEST_F(TestNanoVDB, ValueMask2ValueIndex) +{ + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(true); + auto acc = srcGrid.getAccessor(); + const nanovdb::Coord ijk(0,0,1); + acc.setValue(ijk, true); + auto handle = nanovdb::createNanoGrid(srcGrid, 0u, false, false);// no stats or tiles + auto *idxGrid = handle.grid(); + EXPECT_TRUE(idxGrid); + EXPECT_EQ(1u, idxGrid->activeVoxelCount()); + EXPECT_EQ(1u + 512u, idxGrid->valueCount());// background and 512 leaf values +}// ValueMask2ValueIndex + +TEST_F(TestNanoVDB, ValueMask2ValueOnIndex) +{ + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(true); + auto acc = srcGrid.getAccessor(); + const nanovdb::Coord ijk(0,0,1); + acc.setValue(ijk, true); + auto handle = nanovdb::createNanoGrid(srcGrid, 0u, true, false);// stats but no tiles + auto *idxGrid = handle.grid(); + EXPECT_TRUE(idxGrid); + EXPECT_EQ(1u, idxGrid->activeVoxelCount()); + EXPECT_EQ(1u + 4u + 1u, idxGrid->valueCount());// background, stats, and one active value + + auto idxAcc = idxGrid->getAccessor(); + const uint64_t count = 1u;// background + EXPECT_EQ(0u, idxAcc.getValue(nanovdb::Coord(0,0,0))); + EXPECT_EQ(1u, idxAcc.getValue(nanovdb::Coord(0,0,1))); + EXPECT_EQ(0u, idxAcc.getValue(nanovdb::Coord(0,0,7))); + EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<0>()->origin()); + EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<1>()->origin()); + EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<2>()->origin()); + + auto *leaf = idxAcc.probeLeaf(nanovdb::Coord(0,0,-1)); + EXPECT_FALSE(leaf); + leaf = idxAcc.probeLeaf(nanovdb::Coord(0,0,1)); + EXPECT_TRUE(leaf); + EXPECT_EQ(count + 1u, leaf->minimum()); + EXPECT_EQ(count + 2u, leaf->maximum()); + EXPECT_EQ(count + 3u, leaf->average()); + EXPECT_EQ(count + 4u, leaf->stdDeviation()); +}// ValueMask2ValueOnIndex + +TEST_F(TestNanoVDB, BasicValueIndexNoStats) +{ + EXPECT_TRUE(nanovdb::Version() >= nanovdb::Version(32,3,4)); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f); + auto acc = srcGrid.getAccessor(); + const nanovdb::Coord ijk(0,0,1); + acc.setValue(ijk, 1.0f); + nanovdb::CreateNanoGrid converter(srcGrid); + auto handle1 = converter.getHandle(); + auto *fltGrid = handle1.grid(); + EXPECT_TRUE(fltGrid); + + EXPECT_EQ(1u, fltGrid->tree().nodeCount(2)); + EXPECT_EQ(1u, fltGrid->tree().nodeCount(1)); + EXPECT_EQ(1u, fltGrid->tree().nodeCount(0)); + EXPECT_EQ(1u, fltGrid->gridCount()); + EXPECT_EQ(nanovdb::Vec3d(1.0,1.0,1.0), fltGrid->voxelSize()); + EXPECT_EQ(1u, fltGrid->tree().root().tileCount()); + EXPECT_EQ(1u, fltGrid->activeVoxelCount()); + EXPECT_FALSE(fltGrid->tree().isActive(nanovdb::Coord(0,0,0))); + EXPECT_TRUE(fltGrid->tree().isActive(ijk)); + EXPECT_EQ(1.0f, fltGrid->tree().getValue(ijk)); + EXPECT_EQ(0.0f, fltGrid->tree().getValue(nanovdb::Coord(0,0,0))); + + auto handle2 = converter.getHandle(1, false, true); + auto *idxGrid = handle2.grid(); + EXPECT_TRUE(idxGrid); + + EXPECT_EQ(1u, idxGrid->tree().nodeCount(2)); + EXPECT_EQ(1u, idxGrid->tree().nodeCount(1)); + EXPECT_EQ(1u, idxGrid->tree().nodeCount(0)); + EXPECT_EQ(1u, idxGrid->gridCount()); + EXPECT_EQ(fltGrid->worldBBox(), idxGrid->worldBBox()); + EXPECT_EQ(fltGrid->indexBBox(), idxGrid->indexBBox()); + EXPECT_EQ(nanovdb::Vec3d(1.0,1.0,1.0), idxGrid->voxelSize()); + EXPECT_EQ(1u, idxGrid->tree().root().tileCount()); + EXPECT_EQ(1u, idxGrid->activeVoxelCount()); + EXPECT_EQ(1u + 32*32*32u-1u + 16*16*16u-1u + 8*8*8u, idxGrid->valueCount()); + EXPECT_EQ(0u, idxGrid->tree().root().background()); + EXPECT_EQ(0u, idxGrid->tree().root().minimum()); + EXPECT_EQ(0u, idxGrid->tree().root().maximum()); + EXPECT_EQ(0u, idxGrid->tree().root().average()); + EXPECT_EQ(0u, idxGrid->tree().root().stdDeviation()); + + EXPECT_FALSE(idxGrid->tree().isActive(nanovdb::Coord(0,0,0))); + EXPECT_TRUE(idxGrid->tree().isActive(ijk)); + + EXPECT_EQ(1u + 32*32*32-1 + 16*16*16-1 + 0u, idxGrid->tree().getValue(nanovdb::Coord(0,0,0))); + EXPECT_EQ(1u + 32*32*32-1 + 16*16*16-1 + 1u, idxGrid->tree().getValue(nanovdb::Coord(0,0,1))); + EXPECT_EQ(1u + 32*32*32-1 + 16*16*16-1 + 7u, idxGrid->tree().getValue(nanovdb::Coord(0,0,7))); + EXPECT_EQ(1u + 32*32*32-1 + 16*16*16-1 + 8*8*8-1u, idxGrid->tree().getValue(nanovdb::Coord(7,7,7))); + EXPECT_EQ(0u, idxGrid->tree().getValue(nanovdb::Coord(-1,0,0))); + EXPECT_EQ(0u, idxGrid->tree().getValue(nanovdb::Coord(-1,0,0))); + + auto fltAcc = fltGrid->getAccessor(); + EXPECT_EQ(0.0f, fltAcc.getValue(nanovdb::Coord(0,0,0))); + EXPECT_EQ(1.0f, fltAcc.getValue(nanovdb::Coord(0,0,1))); + EXPECT_EQ(0.0f, fltAcc.getValue(nanovdb::Coord(0,0,7))); + EXPECT_EQ(nanovdb::Coord(0), fltAcc.getNode<0>()->origin()); + EXPECT_EQ(nanovdb::Coord(0), fltAcc.getNode<1>()->origin()); + EXPECT_EQ(nanovdb::Coord(0), fltAcc.getNode<2>()->origin()); + + auto idxAcc = idxGrid->getAccessor(); + EXPECT_EQ(1u + 32*32*32-1 + 16*16*16-1 + 0u, idxAcc.getValue(nanovdb::Coord(0,0,0))); + EXPECT_EQ(1u + 32*32*32-1 + 16*16*16-1 + 1u, idxAcc.getValue(nanovdb::Coord(0,0,1))); + EXPECT_EQ(1u + 32*32*32-1 + 16*16*16-1 + 7u, idxAcc.getValue(nanovdb::Coord(0,0,7))); + EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<0>()->origin()); + EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<1>()->origin()); + EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<2>()->origin()); + + const float *values = idxGrid->getBlindData(0); + EXPECT_TRUE(values); + EXPECT_EQ(values[0], srcGrid.tree().root().background()); + for (auto iter = idxGrid->indexBBox().begin(); iter; ++iter) { + //std::cerr << "Grid" << *iter << " = " << fltGrid->tree().getValue(*iter) << std::endl; + EXPECT_EQ(values[idxAcc.getValue(*iter)], fltGrid->tree().getValue(*iter)); + } +}// BasicValueIndexNoStats + +TEST_F(TestNanoVDB, BasicValueIndexNoStatsNoTiles) { EXPECT_TRUE(nanovdb::Version() >= nanovdb::Version(32,3,4)); - nanovdb::GridBuilder builder1(0.0f); - auto acc = builder1.getAccessor(); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f); + auto acc = srcGrid.getAccessor(); const nanovdb::Coord ijk(0,0,1); acc.setValue(ijk, 1.0f); - auto handle1 = builder1.getHandle(); + nanovdb::CreateNanoGrid converter(srcGrid); + + auto handle1 = converter.getHandle(); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); @@ -6042,11 +6742,95 @@ TEST_F(TestNanoVDB, SparseIndexGridBuilder1) EXPECT_EQ(1.0f, fltGrid->tree().getValue(ijk)); EXPECT_EQ(0.0f, fltGrid->tree().getValue(nanovdb::Coord(0,0,0))); - nanovdb::IndexGridBuilder builder2(*fltGrid, false, false);// no stats and no inactive values - auto handle2 = builder2.getHandle(); + auto handle2 = converter.getHandle(1u, false, false); auto *idxGrid = handle2.grid(); EXPECT_TRUE(idxGrid); + EXPECT_EQ(1u, idxGrid->tree().nodeCount(2)); + EXPECT_EQ(1u, idxGrid->tree().nodeCount(1)); + EXPECT_EQ(1u, idxGrid->tree().nodeCount(0)); + EXPECT_EQ(1u, idxGrid->gridCount()); + EXPECT_EQ(fltGrid->worldBBox(), idxGrid->worldBBox()); + EXPECT_EQ(fltGrid->indexBBox(), idxGrid->indexBBox()); + EXPECT_EQ(nanovdb::Vec3d(1.0,1.0,1.0), idxGrid->voxelSize()); + EXPECT_EQ(1u, idxGrid->tree().root().tileCount()); + EXPECT_EQ(1u, idxGrid->activeVoxelCount()); + EXPECT_EQ(1u + 8*8*8u, idxGrid->valueCount()); + EXPECT_EQ(0u, idxGrid->tree().root().background()); + EXPECT_EQ(0u, idxGrid->tree().root().minimum()); + EXPECT_EQ(0u, idxGrid->tree().root().maximum()); + EXPECT_EQ(0u, idxGrid->tree().root().average()); + EXPECT_EQ(0u, idxGrid->tree().root().stdDeviation()); + + EXPECT_FALSE(idxGrid->tree().isActive(nanovdb::Coord(0,0,0))); + EXPECT_TRUE(idxGrid->tree().isActive(ijk)); + + EXPECT_EQ(1u + 0u, idxGrid->tree().getValue(nanovdb::Coord(0,0,0))); + EXPECT_EQ(1u + 1u, idxGrid->tree().getValue(nanovdb::Coord(0,0,1))); + EXPECT_EQ(1u + 7u, idxGrid->tree().getValue(nanovdb::Coord(0,0,7))); + EXPECT_EQ(1u + 8*8*8-1u, idxGrid->tree().getValue(nanovdb::Coord(7,7,7))); + EXPECT_EQ(0u, idxGrid->tree().getValue(nanovdb::Coord(-1,0,0))); + EXPECT_EQ(0u, idxGrid->tree().getValue(nanovdb::Coord(-1,0,0))); + + auto fltAcc = fltGrid->getAccessor(); + EXPECT_EQ(0.0f, fltAcc.getValue(nanovdb::Coord(0,0,0))); + EXPECT_EQ(1.0f, fltAcc.getValue(nanovdb::Coord(0,0,1))); + EXPECT_EQ(0.0f, fltAcc.getValue(nanovdb::Coord(0,0,7))); + EXPECT_EQ(nanovdb::Coord(0), fltAcc.getNode<0>()->origin()); + EXPECT_EQ(nanovdb::Coord(0), fltAcc.getNode<1>()->origin()); + EXPECT_EQ(nanovdb::Coord(0), fltAcc.getNode<2>()->origin()); + + auto idxAcc = idxGrid->getAccessor(); + EXPECT_EQ(1u + 0u, idxAcc.getValue(nanovdb::Coord(0,0,0))); + EXPECT_EQ(1u + 1u, idxAcc.getValue(nanovdb::Coord(0,0,1))); + EXPECT_EQ(1u + 7u, idxAcc.getValue(nanovdb::Coord(0,0,7))); + EXPECT_EQ(1u + 8*8*8-1u, idxAcc.getValue(nanovdb::Coord(7,7,7))); + EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<0>()->origin()); + EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<1>()->origin()); + EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<2>()->origin()); + + const float *values = idxGrid->getBlindData(0); + EXPECT_TRUE(values); + EXPECT_EQ(values[0], srcGrid.tree().root().background()); + for (auto iter = idxGrid->indexBBox().begin(); iter; ++iter) { + //std::cerr << "Grid" << *iter << " = " << fltGrid->tree().getValue(*iter) << std::endl; + if (auto *leaf = idxAcc.probeLeaf(*iter)) { + EXPECT_FALSE(leaf->data()->hasStats()); + EXPECT_EQ(512u, leaf->data()->valueCount());// ValueIndex produces dense leaf nodes + EXPECT_EQ(values[idxAcc.getValue(*iter)], fltGrid->tree().getValue(*iter)); + } + } +}// BasicValueIndexNoStatsNoTiles + +TEST_F(TestNanoVDB, SparseIndexGridBuilder1) +{ + EXPECT_TRUE(nanovdb::Version() >= nanovdb::Version(32,3,4)); + using SrcGridT = nanovdb::build::Grid; + SrcGridT srcGrid(0.0f); + auto acc = srcGrid.getAccessor(); + const nanovdb::Coord ijk(0,0,1); + acc.setValue(ijk, 1.0f); + nanovdb::CreateNanoGrid converter(srcGrid); + auto handle1 = converter.getHandle(); + auto *fltGrid = handle1.grid(); + EXPECT_TRUE(fltGrid); + + EXPECT_EQ(1u, fltGrid->tree().nodeCount(2)); + EXPECT_EQ(1u, fltGrid->tree().nodeCount(1)); + EXPECT_EQ(1u, fltGrid->tree().nodeCount(0)); + EXPECT_EQ(1u, fltGrid->gridCount()); + EXPECT_EQ(nanovdb::Vec3d(1.0,1.0,1.0), fltGrid->voxelSize()); + EXPECT_EQ(1u, fltGrid->tree().root().tileCount()); + EXPECT_EQ(1u, fltGrid->activeVoxelCount()); + EXPECT_FALSE(fltGrid->tree().isActive(nanovdb::Coord(0,0,0))); + EXPECT_TRUE(fltGrid->tree().isActive(ijk)); + EXPECT_EQ(1.0f, fltGrid->tree().getValue(ijk)); + EXPECT_EQ(0.0f, fltGrid->tree().getValue(nanovdb::Coord(0,0,0))); + + auto handle2 = converter.getHandle(1u, false, true);// no stats and include active tile values + auto *idxGrid = handle2.grid(); + EXPECT_TRUE(idxGrid); + EXPECT_EQ(1u, idxGrid->tree().nodeCount(2)); EXPECT_EQ(1u, idxGrid->tree().nodeCount(1)); EXPECT_EQ(1u, idxGrid->tree().nodeCount(0)); @@ -6062,7 +6846,6 @@ TEST_F(TestNanoVDB, SparseIndexGridBuilder1) EXPECT_EQ(0u, idxGrid->tree().root().average()); EXPECT_EQ(0u, idxGrid->tree().root().stdDeviation()); EXPECT_EQ(2u, idxGrid->valueCount());// background + ijk(0,0,1) - EXPECT_EQ(idxGrid->valueCount(), builder2.getValueCount()); EXPECT_FALSE(idxGrid->tree().isActive(nanovdb::Coord(0,0,0))); EXPECT_TRUE(idxGrid->tree().isActive(ijk)); @@ -6090,12 +6873,10 @@ TEST_F(TestNanoVDB, SparseIndexGridBuilder1) EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<1>()->origin()); EXPECT_EQ(nanovdb::Coord(0), idxAcc.getNode<2>()->origin()); - auto buffer = builder2.getValues(1);// only allocate one channel - //std::cerr << "Value buffer size: " << buffer.size() << "bytes" << std::endl; - float *values = reinterpret_cast(buffer.data()); - // compare the values of the functor with the original fltGrid + const float *values = idxGrid->getBlindData(0); + EXPECT_TRUE(values); + EXPECT_EQ(values[0], srcGrid.tree().root().background()); for (auto iter = idxGrid->indexBBox().begin(); iter; ++iter) { - //std::cerr << "Grid" << *iter << " = " << getValue(*iter) << std::endl; EXPECT_EQ(values[idxAcc.getValue(*iter)], fltGrid->tree().getValue(*iter)); } @@ -6108,9 +6889,9 @@ TEST_F(TestNanoVDB, IndexGridBuilder2) const double voxelSize = 0.1; const float radius = 10.0f; const float halfWidth = 3.0f; - const nanovdb::Vec3f center(0); + const nanovdb::Vec3d center(0); //mTimer.start("Create level set sphere"); - auto handle1 = nanovdb::createLevelSetSphere(radius, center, voxelSize, halfWidth); + auto handle1 = nanovdb::createLevelSetSphere(radius, center, voxelSize, halfWidth); //mTimer.stop(); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); @@ -6119,9 +6900,9 @@ TEST_F(TestNanoVDB, IndexGridBuilder2) //std::cerr << "FloatGrid footprint: " << (fltGrid->gridSize()>>20) << "MB" << std::endl; // create an IndexGrid for the FloatGrid - nanovdb::IndexGridBuilder builder2(*fltGrid); + nanovdb::CreateNanoGrid builder2(*fltGrid); //mTimer.start("Create IndexGrid"); - auto handle2 = builder2.getHandle(); + auto handle2 = builder2.getHandle(1u); //mTimer.stop(); auto *idxGrid = handle2.grid(); EXPECT_TRUE(idxGrid); @@ -6146,9 +6927,8 @@ TEST_F(TestNanoVDB, IndexGridBuilder2) EXPECT_EQ(3u, idxGrid->tree().root().average()); EXPECT_EQ(4u, idxGrid->tree().root().stdDeviation()); - EXPECT_EQ(idxGrid->valueCount(), builder2.getValueCount()); - EXPECT_EQ(idxGrid->valueCount(), builder2.getValueCount()); - //EXPECT_EQ(idxAcc.valueCount(), builder2.getValueCount()); + EXPECT_EQ(idxGrid->valueCount(), builder2.valueCount()); + //EXPECT_EQ(idxAcc.valueCount(), builder2.valueCount()); EXPECT_TRUE(idxGrid->valueCount()>0);// this is the number of values pointed to by the indexGrid for (auto iter = fltGrid->indexBBox().begin(); iter; ++iter) { @@ -6157,9 +6937,9 @@ TEST_F(TestNanoVDB, IndexGridBuilder2) } {// allocate an external buffer and manually populate it with the floatGrid values - float *buffer = new float[builder2.getValueCount()];// this is the number of values pointed to by the indexGrid + float *buffer = new float[idxGrid->valueCount()];// this is the number of values pointed to by the indexGrid EXPECT_TRUE(buffer); - //std::cerr << "Buffer footprint: " << ((4*builder2.getValueCount())>>20) << "MB" << std::endl; + //std::cerr << "Buffer footprint: " << ((4*builder2.valueCount())>>20) << "MB" << std::endl; buffer[0] = fltTree.background(); for (auto iter = idxGrid->indexBBox().begin(); iter; ++iter) { const uint64_t idx = idxAcc.getValue(*iter); @@ -6168,6 +6948,7 @@ TEST_F(TestNanoVDB, IndexGridBuilder2) } // compare the values of the functor with the original fltGrid nanovdb::ChannelAccessor acc(*idxGrid, buffer); + EXPECT_TRUE(acc); for (auto iter = idxGrid->indexBBox().begin(); iter; ++iter) { EXPECT_EQ(buffer[idxAcc.getValue(*iter)], fltTree.getValue(*iter)); EXPECT_EQ(acc.getValue(*iter), fltTree.getValue(*iter)); @@ -6176,10 +6957,11 @@ TEST_F(TestNanoVDB, IndexGridBuilder2) } {// allocate an external buffer and populate it with the floatGrid values - float *buffer = new float[builder2.getValueCount()];// this is the number of values pointed to by the indexGrid + float *buffer = new float[builder2.valueCount()];// this is the number of values pointed to by the indexGrid EXPECT_TRUE(buffer); //std::cerr << "Buffer footprint: " << ((4*idxGrid->valueCount())>>20) << "MB" << std::endl; - EXPECT_TRUE(builder2.copyValues(buffer, builder2.getValueCount())); + builder2.copyValues(buffer); + //EXPECT_TRUE(builder2.copyValues(buffer, builder2.valueCount())); EXPECT_EQ(buffer[idxRoot.minimum()], fltRoot.minimum()); EXPECT_EQ(buffer[idxRoot.maximum()], fltRoot.maximum()); @@ -6188,6 +6970,7 @@ TEST_F(TestNanoVDB, IndexGridBuilder2) // compare the values of the functor with the original fltGrid nanovdb::ChannelAccessor acc(*idxGrid, buffer); + EXPECT_TRUE(acc); for (auto iter = idxGrid->indexBBox().begin(); iter; ++iter) { EXPECT_EQ(buffer[idxAcc.getValue(*iter)], fltTree.getValue(*iter)); EXPECT_EQ(acc.getValue(*iter), fltTree.getValue(*iter)); @@ -6196,12 +6979,13 @@ TEST_F(TestNanoVDB, IndexGridBuilder2) delete [] buffer; }// IndexGridBuilder2 - {// test the value buffer in IndexGridBuilder - //mTimer.start("Create value buffer"); - auto buffer = builder2.getValues(1);// only allocate one channel - //mTimer.stop(); - //std::cerr << "Value buffer footprint: " << (buffer.size()>>20) << "MB" << std::endl; - float *values = reinterpret_cast(buffer.data()); + {// test the value buffer in IndexGrid + const float *values = idxGrid->getBlindData(0); + EXPECT_TRUE(values); + EXPECT_EQ(values[idxRoot.minimum()], fltRoot.minimum()); + EXPECT_EQ(values[idxRoot.maximum()], fltRoot.maximum()); + EXPECT_EQ(values[idxRoot.average()], fltRoot.average()); + EXPECT_EQ(values[idxRoot.stdDeviation()], fltRoot.stdDeviation()); //mTimer.start("Sequential test of value buffer"); // compare the values of the functor with the original fltGrid for (auto iter = idxGrid->indexBBox().begin(); iter; ++iter) { @@ -6270,9 +7054,9 @@ TEST_F(TestNanoVDB, SparseIndexGridBuilder2) const double voxelSize = 0.1; const float radius = 10.0f; const float halfWidth = 3.0f; - const nanovdb::Vec3f center(0); + const nanovdb::Vec3d center(0); //mTimer.start("Create level set sphere"); - auto handle1 = nanovdb::createLevelSetSphere(radius, center, voxelSize, halfWidth); + auto handle1 = nanovdb::createLevelSetSphere(radius, center, voxelSize, halfWidth); //mTimer.stop(); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); @@ -6281,11 +7065,11 @@ TEST_F(TestNanoVDB, SparseIndexGridBuilder2) //std::cerr << "FloatGrid footprint: " << (fltGrid->gridSize()>>20) << "MB" << std::endl; // create an IndexGrid for the FloatGrid - nanovdb::IndexGridBuilder builder2(*fltGrid, false, false); + nanovdb::CreateNanoGrid builder2(*fltGrid); //mTimer.start("Create IndexGrid"); - auto handle2 = builder2.getHandle(); + auto handle2 = builder2.getHandle(1u, false, true); //mTimer.stop(); - auto *idxGrid = handle2.grid(); + auto *idxGrid = handle2.grid(); EXPECT_TRUE(idxGrid); auto &idxTree = idxGrid->tree(); auto &idxRoot = idxTree.root(); @@ -6306,15 +7090,16 @@ TEST_F(TestNanoVDB, SparseIndexGridBuilder2) EXPECT_EQ(0u, idxRoot.maximum()); EXPECT_EQ(0u, idxRoot.average()); EXPECT_EQ(0u, idxRoot.stdDeviation()); - EXPECT_EQ(idxGrid->valueCount(), builder2.getValueCount()); - //(idxAcc.valueCount(), builder2.getValueCount()); + EXPECT_EQ(idxGrid->valueCount(), builder2.valueCount()); + //(idxAcc.valueCount(), builder2.valueCount()); EXPECT_TRUE(idxGrid->valueCount()>0);// this is the number of values pointed to by the indexGrid for (auto it = fltGrid->indexBBox().begin(); it; ++it) EXPECT_EQ(fltTree.isActive(*it), idxTree.isActive(*it)); {// allocate an external buffer and manually populate it with the floatGrid values - float *buffer = new float[builder2.getValueCount()];// this is the number of values pointed to by the indexGrid + float *buffer = new float[builder2.valueCount()];// this is the number of values pointed to by the indexGrid EXPECT_TRUE(buffer); + buffer[0] = fltTree.background();// not required since we only check active values //std::cerr << "Value buffer footprint: " << ((4*idxRoot.maximum())>>20) << "MB" << std::endl; for (auto iter = idxGrid->indexBBox().begin(); iter; ++iter) { const uint64_t idx = idxAcc.getValue(*iter); @@ -6323,7 +7108,10 @@ TEST_F(TestNanoVDB, SparseIndexGridBuilder2) } // compare the values of the functor with the original fltGrid for (auto it = idxGrid->indexBBox().begin(); it; ++it) { - if (fltTree.isActive(*it)) EXPECT_EQ(buffer[idxAcc.getValue(*it)], fltTree.getValue(*it)); + EXPECT_LT(idxAcc.getValue(*it), idxGrid->valueCount()); + if (fltTree.isActive(*it)) { + EXPECT_EQ(buffer[idxAcc.getValue(*it)], fltTree.getValue(*it)); + } } delete [] buffer; } @@ -6332,7 +7120,7 @@ TEST_F(TestNanoVDB, SparseIndexGridBuilder2) float *buffer = new float[idxGrid->valueCount()];// this is the number of values pointed to by the indexGrid EXPECT_TRUE(buffer); //std::cerr << "Buffer footprint: " << ((4*idxGrid->valueCount())>>20) << "MB" << std::endl; - EXPECT_TRUE(builder2.copyValues(buffer, idxGrid->valueCount())); + builder2.copyValues(buffer); // compare the values of the functor with the original fltGrid for (auto it = idxGrid->indexBBox().begin(); it; ++it) { @@ -6341,13 +7129,9 @@ TEST_F(TestNanoVDB, SparseIndexGridBuilder2) delete [] buffer; } - {// test the value buffer in IndexGridBuilder - //mTimer.start("Create value buffer"); - auto buffer = builder2.getValues(1);// only allocate one channel - //mTimer.stop(); - EXPECT_EQ(sizeof(float)*builder2.getValueCount(), buffer.size()); - //std::cerr << "Value buffer footprint: " << (buffer.size()>>20) << "MB" << std::endl; - float *values = reinterpret_cast(buffer.data()); + {// test the value buffer in IndexGrid + const float *values = idxGrid->getBlindData(0); + EXPECT_TRUE(values); //mTimer.start("Sequential test of value buffer"); // compare the values of the functor with the original fltGrid for (auto it = idxGrid->indexBBox().begin(); it; ++it) { @@ -6408,9 +7192,9 @@ TEST_F(TestNanoVDB, ChannelIndexGridBuilder) const double voxelSize = 0.1; const float radius = 10.0f; const float halfWidth = 3.0f; - const nanovdb::Vec3f center(0); + const nanovdb::Vec3d center(0); //mTimer.start("Create level set sphere"); - auto handle1 = nanovdb::createLevelSetSphere(radius, center, voxelSize, halfWidth); + auto handle1 = nanovdb::createLevelSetSphere(radius, center, voxelSize, halfWidth); //mTimer.stop(); auto *fltGrid = handle1.grid(); EXPECT_TRUE(fltGrid); @@ -6419,9 +7203,9 @@ TEST_F(TestNanoVDB, ChannelIndexGridBuilder) //std::cerr << "FloatGrid footprint: " << (fltGrid->gridSize()>>20) << "MB" << std::endl; // create an IndexGrid for the FloatGrid - nanovdb::IndexGridBuilder builder2(*fltGrid, false, false); + nanovdb::CreateNanoGrid builder2(*fltGrid); //mTimer.start("Create IndexGrid"); - auto handle2 = builder2.getHandle("IndexGrid_test", channels); + auto handle2 = builder2.getHandle(channels, false); //mTimer.stop(); auto *idxGrid = handle2.grid(); EXPECT_TRUE(idxGrid); @@ -6439,20 +7223,21 @@ TEST_F(TestNanoVDB, ChannelIndexGridBuilder) EXPECT_EQ(fltGrid->activeVoxelCount(), idxGrid->activeVoxelCount()); EXPECT_EQ(fltGrid->worldBBox(), idxGrid->worldBBox()); EXPECT_EQ(fltGrid->indexBBox(), idxGrid->indexBBox()); - EXPECT_EQ(idxGrid->valueCount(), builder2.getValueCount()); + EXPECT_EQ(idxGrid->valueCount(), builder2.valueCount()); EXPECT_EQ(channels, idxGrid->blindDataCount()); EXPECT_TRUE(idxGrid->valueCount()>0);// this is the number of values pointed to by the indexGrid auto *leaf = idxTree.getFirstNode<0>(); for (uint32_t i=0; ivalueCount(), idxGrid->blindMetaData(i).mElementCount); + EXPECT_EQ(idxGrid->valueCount(), idxGrid->blindMetaData(i).mValueCount); EXPECT_EQ(nanovdb::GridType::Float, idxGrid->blindMetaData(i).mDataType); EXPECT_EQ(nanovdb::GridBlindDataClass::ChannelArray, idxGrid->blindMetaData(i).mDataClass); EXPECT_EQ(nanovdb::GridBlindDataSemantic::Unknown, idxGrid->blindMetaData(i).mSemantic); - const std::string name = std::string("float_channel_") + std::to_string(i); + const std::string name = std::string("channel_") + std::to_string(i); EXPECT_EQ(0, std::strcmp(idxGrid->blindMetaData(i).mName, name.c_str() )); //mTimer.start("Parallel leaf iterator test of active voxels in channel"); - auto *values = reinterpret_cast(idxGrid->blindData(i)); + const float *values = idxGrid->getBlindData(i); + EXPECT_TRUE(values); nanovdb::forEach(0,idxTree.nodeCount(0),8,[&](const nanovdb::Range1D &r){ auto fltAcc = fltTree.getAccessor();// NOT thread-safe! for (auto i=r.begin(); i!=r.end(); ++i){ @@ -6465,16 +7250,18 @@ TEST_F(TestNanoVDB, ChannelIndexGridBuilder) }; for (uint32_t i=0; ivalueCount(), idxGrid->blindMetaData(i).mElementCount); + EXPECT_EQ(idxGrid->valueCount(), idxGrid->blindMetaData(i).mValueCount); EXPECT_EQ(nanovdb::GridType::Float, idxGrid->blindMetaData(i).mDataType); EXPECT_EQ(nanovdb::GridBlindDataClass::ChannelArray, idxGrid->blindMetaData(i).mDataClass); EXPECT_EQ(nanovdb::GridBlindDataSemantic::Unknown, idxGrid->blindMetaData(i).mSemantic); - const std::string name = std::string("float_channel_") + std::to_string(i); + const std::string name = std::string("channel_") + std::to_string(i); EXPECT_EQ(0, std::strcmp(idxGrid->blindMetaData(i).mName, name.c_str() )); //mTimer.start("Parallel leaf iterator test of active voxels in channel"); - auto *values = reinterpret_cast(idxGrid->blindData(i)); + const float *values = idxGrid->getBlindData(i); + EXPECT_TRUE(values); nanovdb::forEach(0,idxTree.nodeCount(0),8,[&](const nanovdb::Range1D &r){ nanovdb::ChannelAccessor acc(*idxGrid, i);// NOT thread-safe + EXPECT_TRUE(acc); auto fltAcc = fltTree.getAccessor();// NOT thread-safe! float val; for (auto i=r.begin(); i!=r.end(); ++i){ @@ -6502,9 +7289,7 @@ TEST_F(TestNanoVDB, HelloWorld_IndexGrid_Dense) EXPECT_EQ(1.0f, fltGrid->tree().getValue(ijk)); {// create an IndexGrid with an internal channel and write it to file - nanovdb::IndexGridBuilder builder(*fltGrid, true, true);// include stats and inactive values - auto tmp = builder.getHandle("IndexGrid_test", 1u);// 1 channel - nanovdb::io::writeGrid("data/index_grid.nvdb", tmp); + nanovdb::io::writeGrid("data/index_grid.nvdb", nanovdb::createNanoGrid(*fltGrid,1u, true, true));// 1 channel, include stats and tile values } {// read and test IndexGrid auto tmp = nanovdb::io::readGrid("data/index_grid.nvdb"); @@ -6513,6 +7298,7 @@ TEST_F(TestNanoVDB, HelloWorld_IndexGrid_Dense) //std::cerr << "Dense IndexGrid size: " << (idxGrid->gridSize() >> 20) << " MB\n"; EXPECT_GT(idxGrid->gridSize(), fltGrid->gridSize()); nanovdb::ChannelAccessor acc(*idxGrid, 0u);// channel ID = 0 + EXPECT_TRUE(acc); EXPECT_EQ(1.0f, acc(ijk)); // compute the gradient from channel ID 0 @@ -6539,21 +7325,20 @@ TEST_F(TestNanoVDB, HelloWorld_IndexGrid_Sparse) EXPECT_EQ(1.0f, fltGrid->tree().getValue(ijk)); {// create an IndexGrid with an internal channel and write it to file - nanovdb::IndexGridBuilder builder(*fltGrid, false, false);// no stats, no inactive values - auto tmp = builder.getHandle("IndexGrid_test", 1u);// 1 channel - nanovdb::io::writeGrid("data/index_grid.nvdb", tmp); + nanovdb::io::writeGrid("data/index_grid.nvdb", nanovdb::createNanoGrid(*fltGrid, 1u, false, true));// 1 channel, no stats and include tile values } {// read and test IndexGrid auto tmp = nanovdb::io::readGrid("data/index_grid.nvdb"); - auto *idxGrid = tmp.grid(); + auto *idxGrid = tmp.grid(); EXPECT_TRUE(idxGrid); //std::cerr << "Sparse IndexGrid size: " << (idxGrid->gridSize() >> 20) << " MB\n"; EXPECT_LT(idxGrid->gridSize(), fltGrid->gridSize()); - nanovdb::ChannelAccessor acc(*idxGrid, 0u);// channel ID = 0 + nanovdb::ChannelAccessor acc(*idxGrid, 0u);// channel ID = 0 + EXPECT_TRUE(acc); EXPECT_EQ(1.0f, acc(ijk)); // compute the gradient from channel ID 0 - nanovdb::GradStencil> stencil(acc); + nanovdb::GradStencil> stencil(acc); stencil.moveTo(ijk); EXPECT_EQ(nanovdb::Vec3f(1.0f,0.0f,0.0f), stencil.gradient()); @@ -6566,6 +7351,42 @@ TEST_F(TestNanoVDB, HelloWorld_IndexGrid_Sparse) } }// HelloWorld_IndexGrid_Sparse +TEST_F(TestNanoVDB, HelloWorld_IndexGrid_Sparse2) +{ + const nanovdb::Coord ijk(101,0,0); + auto handle1 = nanovdb::createLevelSetSphere(); + auto *fltGrid = handle1.grid(); + EXPECT_TRUE(fltGrid); + //std::cerr << "Grid size: " << (fltGrid->gridSize() >> 20) << " MB\n"; + EXPECT_EQ(1.0f, fltGrid->tree().getValue(ijk)); + + {// create an IndexGrid with an internal channel and write it to file + nanovdb::io::writeGrid("data/index_grid2.nvdb", nanovdb::createNanoGrid(*fltGrid, 1u, false, false));// 1 channel, no stats and no tile values + } + {// read and test IndexGrid + auto tmp = nanovdb::io::readGrid("data/index_grid2.nvdb"); + auto *idxGrid = tmp.grid(); + EXPECT_TRUE(idxGrid); + //std::cerr << "Sparse IndexGrid size: " << (idxGrid->gridSize() >> 20) << " MB\n"; + EXPECT_LT(idxGrid->gridSize(), fltGrid->gridSize()); + nanovdb::ChannelAccessor acc(*idxGrid, 0u);// channel ID = 0 + EXPECT_TRUE(acc); + EXPECT_EQ(1.0f, acc(ijk)); + + // compute the gradient from channel ID 0 + nanovdb::GradStencil> stencil(acc); + stencil.moveTo(ijk); + EXPECT_EQ(nanovdb::Vec3f(1.0f,0.0f,0.0f), stencil.gradient()); + + EXPECT_EQ(0.0f, acc(100,0,0)); + acc(100,0,0) = 1.0f;// legal since acc was template on "float" and not "const float" + EXPECT_EQ(1.0f, acc(100,0,0)); + EXPECT_EQ(nanovdb::Vec3f(1.0f,0.0f,0.0f), stencil.gradient());// since stencil caches + stencil.moveTo(ijk);// re-populates the stencil cache + EXPECT_EQ(nanovdb::Vec3f(0.5f,0.0f,0.0f), stencil.gradient()); + } +}// HelloWorld_IndexGrid_Sparse2 + TEST_F(TestNanoVDB, writeReadUncompressedGrid) { using GridHandleT = nanovdb::GridHandle; @@ -6585,8 +7406,346 @@ TEST_F(TestNanoVDB, writeReadUncompressedGrid) auto *fltGrid2 = handles2[0].grid(); EXPECT_TRUE(fltGrid2); EXPECT_EQ(1.0f, fltGrid2->tree().getValue(ijk)); +}// writeReadUncompressedGrid + + +TEST_F(TestNanoVDB, GridMetaData) +{ + auto handle = nanovdb::createLevelSetSphere(); + auto *grid = handle.grid(); + EXPECT_TRUE(grid); + nanovdb::GridMetaData meta(*grid);// deep copy + EXPECT_EQ(672 + 64 + 24 + 8, sizeof(meta)); + EXPECT_TRUE(nanovdb::GridMetaData::safeCast(*grid)); + auto *metaPtr = reinterpret_cast(grid); + EXPECT_EQ(meta.indexBBox(), metaPtr->indexBBox()); + EXPECT_EQ(meta.rootTableSize(), metaPtr->rootTableSize()); } +TEST_F(TestNanoVDB, BuildTree) +{ + nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(511)); + nanovdb::build::Grid grid1(false), grid2(false); + { + mTimer.start("Serial build::Tree"); + auto kernel = [&](const nanovdb::CoordBBox& bbox) { + auto acc = grid1.getAccessor(); + for (auto it = bbox.begin(); it; ++it) acc.setValueOn(*it); + }; + kernel(bbox); + mTimer.stop(); + } + { + mTimer.start("Parallel build::Tree"); + auto kernel = [&](const nanovdb::CoordBBox& bbox) { + auto acc = grid2.getWriteAccessor(); + for (auto it = bbox.begin(); it; ++it) acc.setValueOn(*it); + }; + nanovdb::forEach(bbox, kernel); + mTimer.stop(); + } + { + auto acc1 = grid1.getAccessor(), acc2 = grid2.getAccessor(); + for (auto it = bbox.begin(); it; ++it) { + EXPECT_EQ(acc1.getValue(*it), acc2.getValue(*it)); + } + } +}// BuildTree + +TEST_F(TestNanoVDB, CreateNanoGridFromFloat) +{ + using SrcGridT = nanovdb::FloatGrid; + const float tolerance = 0.001f; + const nanovdb::Coord ijk(101,0,0); + auto srcHandle = nanovdb::createLevelSetSphere(); + SrcGridT *srcGrid = srcHandle.grid(); + EXPECT_TRUE(srcGrid); + //std::cerr << "Grid size: " << (srcGrid->gridSize() >> 20) << " MB\n"; + EXPECT_EQ(1.0f, srcGrid->tree().getValue(ijk)); + + nanovdb::CreateNanoGrid converter(*srcGrid); + + {// create nanovdb::FloatGrid from nanovdb::FloatGrid + using DstBuildT = float; + auto dstHandle = converter.getHandle(); + auto *dstGrid = dstHandle.grid(); + EXPECT_TRUE(dstGrid); + //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; + EXPECT_EQ(1.0f, dstGrid->tree().getValue(ijk)); + } + {// create nanovdb::DoubleGrid from nanovdb::FloatGrid + using DstBuildT = double; + auto dstHandle = converter.getHandle(); + auto *dstGrid = dstHandle.grid(); + EXPECT_TRUE(dstGrid); + //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; + EXPECT_EQ(1.0, dstGrid->tree().getValue(ijk)); + } + {// create nanovdb::Fp4Grid from nanovdb::FloatGrid + using DstBuildT = nanovdb::Fp4; + auto dstHandle = converter.getHandle(); + auto *dstGrid = dstHandle.grid(); + EXPECT_TRUE(dstGrid); + //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; + EXPECT_NEAR(1.0f, dstGrid->tree().getValue(ijk), tolerance); + //EXPECT_EQ(1.0f, dstGrid->tree().getValue(ijk)); + } + {// create nanovdb::Fp8Grid from nanovdb::FloatGrid + using DstBuildT = nanovdb::Fp8; + auto dstHandle = converter.getHandle(); + auto *dstGrid = dstHandle.grid(); + EXPECT_TRUE(dstGrid); + //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; + EXPECT_NEAR(1.0f, dstGrid->tree().getValue(ijk), tolerance); + //EXPECT_EQ(1.0f, dstGrid->tree().getValue(ijk)); + } + {// create nanovdb::Fp16Grid from nanovdb::FloatGrid + using DstBuildT = nanovdb::Fp16; + auto dstHandle = converter.getHandle(); + auto *dstGrid = dstHandle.grid(); + EXPECT_TRUE(dstGrid); + //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; + EXPECT_NEAR(1.0f, dstGrid->tree().getValue(ijk), tolerance); + //EXPECT_EQ(1.0f, dstGrid->tree().getValue(ijk)); + } + {// create nanovdb::FpNGrid from nanovdb::FloatGrid + using DstBuildT = nanovdb::FpN; + auto dstHandle = converter.getHandle(); + auto *dstGrid = dstHandle.grid(); + EXPECT_TRUE(dstGrid); + //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; + EXPECT_NEAR(1.0f, dstGrid->tree().getValue(ijk), tolerance); + //EXPECT_EQ(1.0f, dstGrid->tree().getValue(ijk)); + } + {// create nanovdb::MaskGrid from nanovdb::FloatGrid + using DstBuildT = nanovdb::ValueMask; + auto dstHandle = converter.getHandle(); + auto *dstGrid = dstHandle.grid(); + EXPECT_TRUE(dstGrid); + //std::cerr << "Grid<"<())<<"> size: " << (dstGrid->gridSize() >> 20) << " MB\n"; + EXPECT_EQ(true, dstGrid->tree().getValue(ijk)); + } +}// CreateNanoGridFromFloat + +TEST_F(TestNanoVDB, CreateNanoGridFromVec3f) +{ + using SrcBuildT = nanovdb::Vec3f; + using SrcGridT = nanovdb::build::Grid; + + // + const SrcBuildT a(1.5f,0.0f,-9.1f), b(0.0f,0.0f,0.0f); + SrcGridT grid(b); + const nanovdb::Coord p(0,0,7), q(0,0,0); + grid.setValue(p, a); + EXPECT_EQ(a, grid.tree().getValue(p)); + EXPECT_EQ(b, grid.tree().getValue(q)); + // + auto srcHandle = nanovdb::createNanoGrid(grid); + auto *srcGrid = srcHandle.grid(); + EXPECT_TRUE(srcGrid); + EXPECT_EQ(a, srcGrid->tree().getValue(p)); + EXPECT_EQ(b, srcGrid->tree().getValue(q)); + + {// create nanovdb::ValueIndexGrid from nanovdb::build::Grid + using DstBuildT = nanovdb::ValueIndex; + auto handle = nanovdb::createNanoGrid(grid, 0u, false, false);// no channels, stats or tiles + auto *idxGrid = handle.grid(); + EXPECT_TRUE(idxGrid); + EXPECT_EQ(1u, idxGrid->activeVoxelCount()); + EXPECT_EQ(1u + 512u, idxGrid->valueCount());// background and 512 leaf values + EXPECT_EQ(1, idxGrid->tree().getValue(q)); + EXPECT_EQ(8, idxGrid->tree().getValue(p)); + } + {// create nanovdb::ValueOnIndexGrid from nanovdb::build::Grid + using DstBuildT = nanovdb::ValueOnIndex; + auto handle = nanovdb::createNanoGrid(grid, 0u, false, false);// no channels, stats or tiles + auto *idxGrid = handle.grid(); + EXPECT_TRUE(idxGrid); + EXPECT_EQ(1u, idxGrid->activeVoxelCount()); + EXPECT_EQ(1u + 1u, idxGrid->valueCount());// background and one leaf value + EXPECT_EQ(0, idxGrid->tree().getValue(q)); + EXPECT_EQ(1, idxGrid->tree().getValue(p)); + } + {// create nanovdb::ValueIndexGrid from nanovdb::Grid + using DstBuildT = nanovdb::ValueIndex; + using SrcGridT = nanovdb::Vec3fGrid; + auto handle = nanovdb::createNanoGrid(*srcGrid, 0u, false, false);// no channels, stats or tiles + auto *idxGrid = handle.grid(); + EXPECT_TRUE(idxGrid); + EXPECT_EQ(1u, idxGrid->activeVoxelCount()); + EXPECT_EQ(1u + 512u, idxGrid->valueCount());// background and 512 leaf values + EXPECT_EQ(1, idxGrid->tree().getValue(q)); + EXPECT_EQ(8, idxGrid->tree().getValue(p)); + } + {// create nanovdb::ValueOnIndexGrid from nanovdb::Grid + using DstBuildT = nanovdb::ValueOnIndex; + using SrcGridT = nanovdb::Vec3fGrid; + auto handle = nanovdb::createNanoGrid(*srcGrid, 0u, false, false);// no channels, stats or tiles + auto *idxGrid = handle.grid(); + EXPECT_TRUE(idxGrid); + EXPECT_EQ(1u, idxGrid->activeVoxelCount()); + EXPECT_EQ(1u + 1u, idxGrid->valueCount());// background and 512 leaf values + EXPECT_EQ(0, idxGrid->tree().getValue(q)); + EXPECT_EQ(1, idxGrid->tree().getValue(p)); + } +}// CreateNanoGridFromVec3f + +TEST_F(TestNanoVDB, LongGridName) +{ + using SrcGridT = nanovdb::build::Grid; + nanovdb::GridData tmp; + tmp.init(); + EXPECT_EQ('\0', tmp.mGridName[0]); + for (int n = -10; n <= 10; ++n) { + SrcGridT srcGrid(0.0f); + const int limit = nanovdb::GridData::MaxNameSize - 1, length = limit + n; + char buffer[limit + 10 + 1] = {'\0'}; + srand (time(NULL)); + for (int i = 0; i < length; ++i) buffer[i] = 'a' + (rand() % 26);// a-z + buffer[length] = '\0'; + const std::string gridName(buffer); + //std::cout << "Long random grid name: " << gridName << std::endl; + EXPECT_EQ(gridName.length(), size_t(length)); + srcGrid.setName(gridName); + EXPECT_EQ(gridName, srcGrid.getName()); + srcGrid.tree().setValue(nanovdb::Coord(-256), 10.0f); + const bool isLong = length > limit; + auto handle = nanovdb::createNanoGrid(srcGrid); + auto* dstGrid = handle.grid(); + EXPECT_TRUE(dstGrid); + EXPECT_EQ(1u, dstGrid->activeVoxelCount()); + EXPECT_EQ(isLong ? 1u : 0u, dstGrid->blindDataCount()); + EXPECT_EQ(isLong, dstGrid->hasLongGridName()); + //std::cerr << "\nHas long grid name: " << (isLong?"yes":"no") << std::endl; + //std::cerr << "length = " << length << ", limit = " << limit << std::endl; + EXPECT_EQ(gridName, std::string(dstGrid->gridName())); + EXPECT_EQ( !isLong, std::string(dstGrid->shortGridName()) == std::string(dstGrid->gridName()) ); + EXPECT_EQ( 0.0, dstGrid->tree().getValue(nanovdb::Coord(-255))); + EXPECT_EQ(10.0, dstGrid->tree().getValue(nanovdb::Coord(-256))); + EXPECT_EQ(!isLong, tmp.setGridName(gridName.c_str())); + const char *ptr = dstGrid->getBlindData(0);// might be NULL + if (isLong) { + EXPECT_TRUE(ptr); + EXPECT_STREQ(buffer, dstGrid->gridName()); + EXPECT_STREQ(buffer, ptr); + EXPECT_EQ(ptr, dstGrid->gridName());// should point to the same memory + const nanovdb::GridBlindMetaData &blindMeta = dstGrid->blindMetaData(0); + //const nanovdb::GridBlindMetaData test = dstGrid->blindMetaData(0);// fails since + EXPECT_EQ(nanovdb::GridBlindDataClass::GridName, blindMeta.mDataClass); + EXPECT_EQ(nanovdb::GridBlindDataSemantic::Unknown, blindMeta.mSemantic); + EXPECT_EQ(nanovdb::GridType::Unknown, blindMeta.mDataType); + EXPECT_EQ(length + 1, blindMeta.mValueCount);// number of characters + terminating 0 + EXPECT_EQ(1u, blindMeta.mValueSize);// byte size of a character + EXPECT_TRUE(blindMeta.isValid()); + const char *str = blindMeta.getBlindData(); + EXPECT_TRUE(str); + //printf("ptr at address: %p\n", (const void*)ptr); + //printf("str at address: %p\n", (const void*)str); + EXPECT_EQ(str, ptr); + EXPECT_STREQ(buffer, ptr); + EXPECT_STREQ(buffer, str); + } else { + EXPECT_FALSE(ptr); + EXPECT_EQ(gridName, std::string(tmp.mGridName)); + for (int i = length; i<=limit; ++i) EXPECT_EQ('\0', tmp.mGridName[i]); + } + } +}// LongGridName + +TEST_F(TestNanoVDB, mergeSplitGrids) +{ + size_t size1 = 0, size2 = 0; + std::vector> handles1, handles2; + std::vector gridNames; + nanovdb::CpuTimer timer("create 5 host grids"); + for (int radius = 100; radius<150; radius += 10) { + gridNames.emplace_back("sphere_" + std::to_string(radius)); + handles1.emplace_back(nanovdb::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, + nanovdb::Vec3d(0), gridNames.back())); + EXPECT_FALSE(handles1.back().isPadded()); + size1 += handles1.back().size(); + } + EXPECT_EQ(5u, gridNames.size()); + EXPECT_EQ(5u, handles1.size()); + timer.restart("create 5 host grids"); + for (int radius = 150; radius<200; radius += 10) { + gridNames.emplace_back("sphere_" + std::to_string(radius)); + handles2.emplace_back(nanovdb::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, + nanovdb::Vec3d(0), gridNames.back())); + size2 += handles2.back().size(); + } + EXPECT_EQ(10u, gridNames.size()); + EXPECT_EQ( 5u, handles2.size()); + timer.restart("merging 5 host grids"); + auto mergedHandle = nanovdb::mergeGrids(handles2);// merge last 5 grid handles + EXPECT_EQ(size2, mergedHandle.size()); + EXPECT_FALSE(mergedHandle.isPadded()); + EXPECT_TRUE(mergedHandle.data()); + auto *gridData = mergedHandle.gridData();// first grid + EXPECT_TRUE(gridData); + EXPECT_EQ(5u, gridData->mGridCount); + EXPECT_EQ(0u, gridData->mGridIndex); + EXPECT_EQ(handles2[0].size(), gridData->mGridSize); + timer.restart("unit-test host grids"); + for (int i=0; i<5; ++i){ + gridData = mergedHandle.gridData(i); + EXPECT_TRUE(gridData); + EXPECT_EQ(i, gridData->mGridIndex); + EXPECT_EQ(handles2[i].size(), gridData->mGridSize); + EXPECT_EQ(strcmp(gridNames[i+5].c_str(), gridData->mGridName),0); + } + + EXPECT_FALSE(mergedHandle.empty()); + handles1.push_back(std::move(mergedHandle));// append one handle with 5 merged grids + EXPECT_TRUE(mergedHandle.empty()); + EXPECT_EQ(6u, handles1.size()); + +#if defined(NANOVDB_USE_BLOSC) + nanovdb::io::writeGrids("data/merge1.nvdb", handles1, nanovdb::io::Codec::BLOSC); +#elif defined(NANOVDB_USE_ZIP) + nanovdb::io::writeGrids("data/merge1.nvdb", handles1, nanovdb::io::Codec::ZIP); +#else + nanovdb::io::writeGrids("data/merge1.nvdb", handles1, nanovdb::io::Codec::NONE); +#endif + auto meta = nanovdb::io::readGridMetaData("data/merge1.nvdb"); + EXPECT_EQ(10u, meta.size()); + EXPECT_EQ(std::string("sphere_190"), meta.back().gridName); + auto handles3 = nanovdb::io::readGrids("data/merge1.nvdb"); + EXPECT_EQ(6u, handles3.size()); + auto& handle = handles3[5]; + EXPECT_EQ(5u, handle.gridCount()); + + timer.restart("merging 10 host grids"); + mergedHandle = nanovdb::mergeGrids(handles1); + EXPECT_EQ(size1 + size2, mergedHandle.size()); + EXPECT_TRUE(mergedHandle.data()); + gridData = mergedHandle.gridData();// first grid + EXPECT_TRUE(gridData); + EXPECT_EQ(10u, gridData->mGridCount); + EXPECT_EQ( 0u, gridData->mGridIndex); + EXPECT_EQ(handles1[0].size(), gridData->mGridSize); + + timer.restart("splitting host grids"); + auto splitHandles = nanovdb::splitGrids(mergedHandle); + timer.restart("unit-test split grids"); + EXPECT_EQ(10u, splitHandles.size()); + for (int i=0; i<5; ++i){ + EXPECT_EQ(handles1[i].size(), splitHandles[i].size()); + gridData = splitHandles[i].gridData(); + EXPECT_EQ(0u, gridData->mGridIndex); + EXPECT_EQ(1u, gridData->mGridCount); + EXPECT_EQ(strcmp(gridNames[i].c_str(), gridData->mGridName),0); + } + for (int i=5; i<10; ++i){ + EXPECT_EQ(handles2[i-5].size(), splitHandles[i].size()); + gridData = splitHandles[i].gridData(); + EXPECT_EQ(0u, gridData->mGridIndex); + EXPECT_EQ(1u, gridData->mGridCount); + EXPECT_EQ(strcmp(gridNames[i].c_str(), gridData->mGridName),0); + } + timer.stop(); +}// mergeSplitGrids + int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cu b/nanovdb/nanovdb/unittest/TestNanoVDB.cu new file mode 100644 index 0000000000..524649b940 --- /dev/null +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cu @@ -0,0 +1,2180 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace nanovdb {// this namespace is required by gtest + +namespace test { +// used for testing CudaDeviceBuffer +void device2host(size_t count) +{ + const size_t size = count * sizeof(float); + auto buffer = nanovdb::CudaDeviceBuffer::create(size, nullptr, false);// on device only + EXPECT_EQ(size, buffer.size()); + EXPECT_FALSE(buffer.data()); + EXPECT_TRUE(buffer.deviceData()); + float *d_array = reinterpret_cast(buffer.deviceData()); + constexpr unsigned int num_threads = 256; + unsigned int num_blocks = num_blocks = (static_cast(count) + num_threads - 1) / num_threads; + cudaLambdaKernel<<>>(count, [=] __device__ (size_t i) {d_array[i] = float(i);}); + buffer.deviceDownload();// copy device -> host + EXPECT_EQ(size, buffer.size()); + EXPECT_TRUE(buffer.data()); + EXPECT_TRUE(buffer.deviceData()); + float *array = reinterpret_cast(buffer.data()); + for (size_t i=0; i(buffer.data()); + for (size_t i=0; i device + EXPECT_EQ(size, buffer.size()); + EXPECT_TRUE(buffer.data()); + EXPECT_TRUE(buffer.deviceData()); + float *d_array = reinterpret_cast(buffer.deviceData()); + constexpr unsigned int num_threads = 256; + unsigned int num_blocks = num_blocks = (static_cast(count) + num_threads - 1) / num_threads; + cudaLambdaKernel<<>>(count, [=] __device__ (size_t i) { + if (d_array[i] != float(i)) *d_test = false; + d_array[i] = float(i) + 1.0f; + }); + cudaCheck(cudaMemcpy(test, d_test, sizeof(bool), cudaMemcpyDeviceToHost)); + EXPECT_TRUE(*test); + cudaCheck(cudaFreeHost(test)); + cudaCheck(cudaFree(d_test)); + buffer.deviceDownload();// copy device -> host + EXPECT_EQ(size, buffer.size()); + EXPECT_TRUE(buffer.data()); + EXPECT_TRUE(buffer.deviceData()); + for (size_t i=0; i>>(1, [=] __device__ (size_t) { + cudaStrcpy(d_str, "this is a test"); + }); + cudaCheck(cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost)); + EXPECT_STREQ(str, "this is a test"); + cudaLambdaKernel<<<1, 1>>>(1, [=] __device__ (size_t) { + cudaStrcat(d_str, " #2"); + }); + cudaCheck(cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost)); + EXPECT_STREQ(str, "this is a test #2"); + + cudaLambdaKernel<<<1, 1>>>(1, [=] __device__ (size_t) { + *d_n = cudaStrcmp(d_str, "this is a test"); + }); + cudaCheck(cudaMemcpy(&n, d_n, sizeof(int), cudaMemcpyDeviceToHost)); + //std::cerr << "n = " << n << std::endl; + EXPECT_EQ(std::strcmp(str, "this is a test"), n); + cudaLambdaKernel<<<1, 1>>>(1, [=] __device__ (size_t) { + *d_n = cudaStrcmp(d_str, "this is a test #2"); + }); + cudaCheck(cudaMemcpy(&n, d_n, sizeof(int), cudaMemcpyDeviceToHost)); + EXPECT_EQ(std::strcmp(str, "this is a test #2"), n); + EXPECT_EQ(0, n); + + cudaCheck(cudaFreeHost(str)); + cudaCheck(cudaFree(d_n)); + cudaCheck(cudaFree(d_str)); +}// cudaStr +}// namespace test +}// namespace nanovdb + +TEST(TestNanoVDBCUDA, CudaDeviceBuffer) +{ + nanovdb::test::device2host(1000); + nanovdb::test::host2device2host(1000); +} + +TEST(TestNanoVDBCUDA, CudaStr) +{ + nanovdb::test::cudaStr(); +} + +TEST(TestNanoVDBCUDA, Basic_CudaPointsToGrid_float) +{ + using BuildT = float; + using GridT = nanovdb::NanoGrid; + const size_t num_points = 1; + nanovdb::Coord coords[num_points] = {nanovdb::Coord(1, 2, 3)}, *d_coords = nullptr; + cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); + cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU + + nanovdb::CudaPointsToGrid converter; + auto handle = converter.getHandle(d_coords, num_points); + cudaCheck(cudaFree(d_coords)); + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + const uint64_t size = sizeof(GridT) + + sizeof(GridT::TreeType) + + GridT::RootType::memUsage(1) + + sizeof(GridT::UpperNodeType) + + sizeof(GridT::LowerNodeType) + + sizeof(GridT::LeafNodeType); + EXPECT_EQ(handle.size(), size); + + GridT *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + handle.deviceDownload();// creates a copy up the CPU + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + + auto acc = grid->getAccessor(); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(0,2,3))); + EXPECT_TRUE( acc.isActive(nanovdb::Coord(1,2,3))); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(1,2,4))); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(2,2,3))); + auto *leaf = acc.probeLeaf(nanovdb::Coord(1,2,3)); + EXPECT_TRUE(leaf); + EXPECT_EQ(nanovdb::Coord(0), leaf->origin()); + EXPECT_EQ(1u, leaf->valueMask().countOn()); + EXPECT_EQ(nanovdb::Coord(1,2,3), leaf->bbox()[0]); + EXPECT_EQ(nanovdb::Coord(1,2,3), leaf->bbox()[1]); +}// Basic_CudaPointsToGrid_float + +namespace nanovdb { +namespace test { + +/// @brief Implements Tree::probeValue(Coord) +/// @tparam BuildT Build type of the grid being called +template +struct ProbeValueNew { + using ValueT = typename BuildToValueMap::Type; + struct Probe { + bool state; + ValueT value; + operator bool() const { return state; } + }; + __hostdev__ static Probe get(const NanoRoot &root) { + return Probe{false, root.mBackground}; + } + __hostdev__ static Probe get(const typename NanoRoot::Tile &tile) { + return Probe{tile.state>0, tile.value}; + } + __hostdev__ static Probe get(const NanoUpper &node, uint32_t n) { + return Probe{node.mValueMask.isOn(n), node.mTable[n].value}; + } + __hostdev__ static Probe get(const NanoLower &node, uint32_t n) { + return Probe{node.mValueMask.isOn(n), node.mTable[n].value}; + } + __hostdev__ static Probe get(const NanoLeaf &leaf, uint32_t n) { + return Probe{leaf.isActive(n), leaf.getValue(n)}; + } +};// ProbeValueNew + +template +struct AccessLeafMask; + +// template specialization of AccessLeafMask wrt ValueOnIndexMask +template <> +struct AccessLeafMask{ + __hostdev__ static bool get(const NanoRoot&) {return false;} + __hostdev__ static bool get(const typename NanoRoot::Tile&) {return false;} + __hostdev__ static bool get(const NanoUpper&, uint32_t) {return false;} + __hostdev__ static bool get(const NanoLower&, uint32_t) {return false;} + __hostdev__ static bool get(const NanoLeaf &leaf, uint32_t n) {return leaf.mMask.isOn(n);} + __hostdev__ static void set(NanoRoot&) {} + __hostdev__ static void set(typename NanoRoot::Tile&) {} + __hostdev__ static void set(NanoUpper&, uint32_t) {} + __hostdev__ static void set(NanoLower&, uint32_t) {} + __hostdev__ static void set(NanoLeaf &leaf, uint32_t n) {leaf.mMask.setOn(n);} +};// AccessLeafMask + +}// end of test namespace +}// end of nanovdb namespace + +TEST(TestNanoVDBCUDA, Basic_CudaPointsToGrid_ValueIndex) +{ + using BuildT = nanovdb::ValueIndex; + using GridT = nanovdb::NanoGrid; + const size_t num_points = 3; + nanovdb::Coord coords[num_points] = {nanovdb::Coord(1, 2, 3), + nanovdb::Coord(1, 2, 4), + nanovdb::Coord(8, 2, 3)}, *d_coords = nullptr; + cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); + cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU +#if 0 + nanovdb::CudaPointsToGrid converter; + auto handle = converter.getHandle(d_coords, num_points); +#else + auto handle = nanovdb::cudaVoxelsToGrid(d_coords, num_points); +#endif + cudaCheck(cudaFree(d_coords)); + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + const uint64_t size = sizeof(GridT) + + sizeof(GridT::TreeType) + + GridT::RootType::memUsage(1) + + sizeof(GridT::UpperNodeType) + + sizeof(GridT::LowerNodeType) + + 2*sizeof(GridT::LeafNodeType); + EXPECT_EQ(handle.size(), size); + + GridT *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + handle.deviceDownload();// creates a copy up the CPU + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_EQ(1u + 2*512u, grid->valueCount()); + + auto acc = grid->getAccessor(); + EXPECT_FALSE( acc.isActive(nanovdb::Coord(0,2,3))); + EXPECT_TRUE( acc.isActive(nanovdb::Coord(1,2,3))); + EXPECT_TRUE( acc.isActive(nanovdb::Coord(1,2,4))); + EXPECT_TRUE( acc.isActive(nanovdb::Coord(8,2,3))); + EXPECT_EQ(1u + nanovdb::NanoLeaf::CoordToOffset(nanovdb::Coord(0,2,3)), acc.getValue(nanovdb::Coord(0,2,3))); + EXPECT_EQ(1u + nanovdb::NanoLeaf::CoordToOffset(nanovdb::Coord(1,2,3)), acc.getValue(nanovdb::Coord(1,2,3))); + EXPECT_EQ(1u + nanovdb::NanoLeaf::CoordToOffset(nanovdb::Coord(2,2,3)), acc.getValue(nanovdb::Coord(2,2,3))); + EXPECT_EQ(1u + 512u + nanovdb::NanoLeaf::CoordToOffset(nanovdb::Coord(8,2,3)), acc.getValue(nanovdb::Coord(8,2,3))); + + using OpT = nanovdb::GetValue; + EXPECT_EQ(1u + nanovdb::NanoLeaf::CoordToOffset(nanovdb::Coord(0,2,3)), acc.get(nanovdb::Coord(0,2,3))); + EXPECT_EQ(1u + nanovdb::NanoLeaf::CoordToOffset(nanovdb::Coord(1,2,3)), acc.get(nanovdb::Coord(1,2,3))); + EXPECT_EQ(1u + nanovdb::NanoLeaf::CoordToOffset(nanovdb::Coord(2,2,3)), acc.get(nanovdb::Coord(2,2,3))); + EXPECT_EQ(1u + 512u + nanovdb::NanoLeaf::CoordToOffset(nanovdb::Coord(8,2,3)), acc.get(nanovdb::Coord(8,2,3))); + + for (size_t i=0; i>(ijk); + EXPECT_TRUE(leaf); + const auto offset = leaf->CoordToOffset(ijk); + EXPECT_EQ(ijk, leaf->offsetToGlobalCoord(offset)); + } +}// Basic_CudaPointsToGrid_ValueIndex + +TEST(TestNanoVDBCUDA, Basic_CudaPointsToGrid_ValueOnIndex) +{ + using BuildT = nanovdb::ValueOnIndex; + using GridT = nanovdb::NanoGrid; + EXPECT_TRUE(nanovdb::BuildTraits::is_index); + EXPECT_FALSE(nanovdb::BuildTraits::is_indexmask); + EXPECT_TRUE(nanovdb::BuildTraits::is_onindex); + EXPECT_FALSE(nanovdb::BuildTraits::is_offindex); + const size_t num_points = 3; + nanovdb::Coord coords[num_points] = {nanovdb::Coord(1, 2, 3), + nanovdb::Coord(1, 2, 4), + nanovdb::Coord(8, 2, 3)}, *d_coords = nullptr; + cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); + cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU + +#if 0 + nanovdb::CudaPointsToGrid converter; + auto handle = converter.getHandle(d_coords, num_points); +#else + auto handle = nanovdb::cudaVoxelsToGrid(d_coords, num_points); +#endif + + cudaCheck(cudaFree(d_coords)); + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + const uint64_t size = sizeof(GridT) + + sizeof(GridT::TreeType) + + GridT::RootType::memUsage(1) + + sizeof(GridT::UpperNodeType) + + sizeof(GridT::LowerNodeType) + + 2*sizeof(GridT::LeafNodeType); + EXPECT_EQ(handle.size(), size); + + GridT *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + handle.deviceDownload();// creates a copy up the CPU + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_EQ(1u + num_points, grid->valueCount()); + + auto acc = grid->getAccessor(); + EXPECT_FALSE( acc.isActive(nanovdb::Coord(0,2,3))); + EXPECT_TRUE( acc.isActive(nanovdb::Coord(1,2,3))); + EXPECT_TRUE( acc.isActive(nanovdb::Coord(1,2,4))); + EXPECT_TRUE( acc.isActive(nanovdb::Coord(8,2,3))); + EXPECT_EQ(0u, acc.getValue(nanovdb::Coord(0,2,3))); + EXPECT_EQ(1u, acc.getValue(nanovdb::Coord(1,2,3))); + EXPECT_EQ(2u, acc.getValue(nanovdb::Coord(1,2,4))); + EXPECT_EQ(3u, acc.getValue(nanovdb::Coord(8,2,3))); + + using GetT = nanovdb::GetValue; + EXPECT_EQ(0u, acc.get(nanovdb::Coord(0,2,3))); + EXPECT_EQ(1u, acc.get(nanovdb::Coord(1,2,3))); + EXPECT_EQ(2u, acc.get(nanovdb::Coord(1,2,4))); + EXPECT_EQ(3u, acc.get(nanovdb::Coord(8,2,3))); + + { + using T = nanovdb::test::ProbeValueNew; + auto tmp = acc.get(nanovdb::Coord(0,2,3)); + EXPECT_EQ(false, tmp.state); + EXPECT_EQ(0u, tmp.value); + tmp = acc.get(nanovdb::Coord(1,2,3)); + EXPECT_EQ(true, tmp.state); + EXPECT_EQ(1u, tmp.value); + tmp = acc.get(nanovdb::Coord(1,2,4)); + EXPECT_EQ(true, tmp.state); + EXPECT_EQ(2u, tmp.value); + tmp = acc.get(nanovdb::Coord(8,2,3)); + EXPECT_EQ(true, tmp.state); + EXPECT_EQ(3u, tmp.value); + } + { + using T = nanovdb::ProbeValue; + uint64_t value = 0; + EXPECT_EQ(false, acc.get(nanovdb::Coord(0,2,3), value) ); + EXPECT_EQ(0u, value); + EXPECT_EQ(true, acc.get(nanovdb::Coord(1,2,3), value) ); + EXPECT_EQ(1u, value); + EXPECT_EQ(true, acc.get(nanovdb::Coord(1,2,4), value) ); + EXPECT_EQ(2u, value); + EXPECT_EQ(true, acc.get(nanovdb::Coord(8,2,3), value) ); + EXPECT_EQ(3u, value); + } + + for (size_t i=0; i>(ijk); + EXPECT_TRUE(leaf); + const auto offset = leaf->CoordToOffset(ijk); + EXPECT_EQ(ijk, leaf->offsetToGlobalCoord(offset)); + } +}// Basic_CudaPointsToGrid_ValueOnIndex + +TEST(TestNanoVDBCUDA, Basic_CudaPointsToGrid_ValueOnIndexMask) +{ + using BuildT = nanovdb::ValueOnIndexMask; + using GridT = nanovdb::NanoGrid; + EXPECT_TRUE(nanovdb::BuildTraits::is_index); + EXPECT_TRUE(nanovdb::BuildTraits::is_indexmask); + EXPECT_TRUE(nanovdb::BuildTraits::is_onindex); + EXPECT_FALSE(nanovdb::BuildTraits::is_offindex); + const size_t num_points = 3; + nanovdb::Coord coords[num_points] = {nanovdb::Coord(1, 2, 3), + nanovdb::Coord(1, 2, 4), + nanovdb::Coord(8, 2, 3)}, *d_coords = nullptr; + cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); + cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU + +#if 0 + nanovdb::CudaPointsToGrid converter; + auto handle = converter.getHandle(d_coords, num_points); +#else + auto handle = nanovdb::cudaVoxelsToGrid(d_coords, num_points); +#endif + + cudaCheck(cudaFree(d_coords)); + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + const uint64_t size = sizeof(GridT) + + sizeof(GridT::TreeType) + + GridT::RootType::memUsage(1) + + sizeof(GridT::UpperNodeType) + + sizeof(GridT::LowerNodeType) + + 2*sizeof(GridT::LeafNodeType); + EXPECT_EQ(handle.size(), size); + + GridT *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + handle.deviceDownload();// creates a copy up the CPU + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_EQ(4u, grid->valueCount()); + + auto acc = grid->getAccessor(); + EXPECT_FALSE( acc.isActive(nanovdb::Coord(0,2,3))); + EXPECT_TRUE( acc.isActive(nanovdb::Coord(1,2,3))); + EXPECT_TRUE( acc.isActive(nanovdb::Coord(1,2,4))); + EXPECT_TRUE( acc.isActive(nanovdb::Coord(8,2,3))); + EXPECT_EQ(0u, acc.getValue(nanovdb::Coord(0,2,3))); + EXPECT_EQ(1u, acc.getValue(nanovdb::Coord(1,2,3))); + EXPECT_EQ(2u, acc.getValue(nanovdb::Coord(1,2,4))); + EXPECT_EQ(3u, acc.getValue(nanovdb::Coord(8,2,3))); + + using GetT = nanovdb::GetValue; + EXPECT_EQ(0u, acc.get(nanovdb::Coord(0,2,3))); + EXPECT_EQ(1u, acc.get(nanovdb::Coord(1,2,3))); + EXPECT_EQ(2u, acc.get(nanovdb::Coord(1,2,4))); + EXPECT_EQ(3u, acc.get(nanovdb::Coord(8,2,3))); + + using OpT = nanovdb::test::AccessLeafMask; + EXPECT_EQ(false, acc.get(nanovdb::Coord(0,2,3))); + EXPECT_EQ(true, acc.get(nanovdb::Coord(1,2,3))); + EXPECT_EQ(true, acc.get(nanovdb::Coord(1,2,4))); + EXPECT_EQ(true, acc.get(nanovdb::Coord(8,2,3))); + + acc.set(nanovdb::Coord(1,2,3)); + acc.set(nanovdb::Coord(8,2,3)); + + EXPECT_EQ(false, acc.get(nanovdb::Coord(0,2,3))); + EXPECT_EQ(true , acc.get(nanovdb::Coord(1,2,3))); + EXPECT_EQ(true, acc.get(nanovdb::Coord(1,2,4))); + EXPECT_EQ(true, acc.get(nanovdb::Coord(8,2,3))); + + { + using T = nanovdb::ProbeValue; + uint64_t value = 0; + EXPECT_EQ(false, acc.get(nanovdb::Coord(0,2,3), value) ); + EXPECT_EQ(0u, value); + EXPECT_EQ(true, acc.get(nanovdb::Coord(1,2,3), value) ); + EXPECT_EQ(1u, value); + EXPECT_EQ(true, acc.get(nanovdb::Coord(1,2,4), value) ); + EXPECT_EQ(2u, value); + EXPECT_EQ(true, acc.get(nanovdb::Coord(8,2,3), value) ); + EXPECT_EQ(3u, value); + EXPECT_EQ(false, acc.get(nanovdb::Coord(-18,2,3), value) ); + EXPECT_EQ(0u, value); + + EXPECT_EQ(false, grid->tree().get(nanovdb::Coord(0,2,3), value) ); + EXPECT_EQ(0u, value); + EXPECT_EQ(true, grid->tree().get(nanovdb::Coord(1,2,3), value) ); + EXPECT_EQ(1u, value); + EXPECT_EQ(true, grid->tree().get(nanovdb::Coord(1,2,4), value) ); + EXPECT_EQ(2u, value); + EXPECT_EQ(true, grid->tree().get(nanovdb::Coord(8,2,3), value) ); + EXPECT_EQ(3u, value); + EXPECT_EQ(false, grid->tree().get(nanovdb::Coord(-18,2,3), value) ); + EXPECT_EQ(0u, value); + } + + for (size_t i=0; i>(ijk); + EXPECT_TRUE(leaf); + const auto offset = leaf->CoordToOffset(ijk); + EXPECT_EQ(ijk, leaf->offsetToGlobalCoord(offset)); + EXPECT_EQ(leaf->mValueMask, leaf->mMask); + } +}// Basic_CudaPointsToGrid_ValueOnIndexMask + +TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_old) +{ + using BuildT = nanovdb::ValueOnIndex; + nanovdb::CpuTimer timer; + const size_t voxelCount = 1 << 20;// 1048576 + std::vector voxels; + {//generate random voxels + voxels.reserve(voxelCount); + std::srand(98765); + const int max = 512, min = -max; + auto op = [&](){return rand() % (max - min) + min;}; + timer.start("Creating "+std::to_string(voxelCount)+" random voxels on the CPU"); + while (voxels.size() < voxelCount) voxels.push_back(nanovdb::Coord(op(), op(), op())); + timer.stop(); + EXPECT_EQ(voxelCount, voxels.size()); + } +#if 0 + {// Build grid on CPU + nanovdb::build::Grid buildGrid(0.0f); + timer.start("Building grid on CPU from "+std::to_string(voxels.size())+" points"); + nanovdb::forEach(0, voxelCount, voxelCount >> 6, [&](const nanovdb::Range1D &r){ + auto acc = buildGrid.getWriteAccessor(); + for (size_t i=r.begin(); i!=r.end(); ++i) acc.setValueOn(voxels[i]); + }); + timer.restart("Converting CPU build::Grid to nanovdb"); + auto handle = nanovdb::createNanoGrid(buildGrid); + timer.stop(); + } +#endif + nanovdb::Coord* d_coords; + const size_t voxelSize = voxels.size() * sizeof(nanovdb::Coord); + //timer.start("Allocating "+std::to_string(voxelSize >> 20)+" MB on the GPU"); + cudaCheck(cudaMalloc(&d_coords, voxelSize)); + //timer.restart("Copying voxels from CPU to GPU"); + cudaCheck(cudaMemcpy(d_coords, voxels.data(), voxelSize, cudaMemcpyHostToDevice)); + //timer.stop(); + + timer.start("Building grid on GPU from "+std::to_string(voxels.size())+" points"); + nanovdb::CudaPointsToGrid converter; + //converter.setVerbose(); + auto handle = converter.getHandle(d_coords, voxelCount); + timer.stop(); + + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_TRUE(handle.deviceGrid()); + EXPECT_FALSE(handle.deviceGrid(0)); + EXPECT_TRUE(handle.deviceGrid(0)); + EXPECT_FALSE(handle.deviceGrid(1)); + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + //timer.start("Allocating and copying grid from GPU to CPU"); + auto *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + handle.deviceDownload();// creates a copy on the CPU + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_TRUE(grid->valueCount()>0); + EXPECT_EQ(nanovdb::Vec3d(1.0), grid->voxelSize()); + + //timer.restart("Parallel unit-testing on CPU"); + nanovdb::forEach(voxels,[&](const nanovdb::Range1D &r){ + auto acc = grid->getAccessor(); + for (size_t i=r.begin(); i!=r.end(); ++i) { + const nanovdb::Coord &ijk = voxels[i]; + EXPECT_TRUE(acc.probeLeaf(ijk)!=nullptr); + EXPECT_TRUE(acc.isActive(ijk)); + EXPECT_TRUE(acc.getValue(ijk) > 0u); + const auto *leaf = acc.get>(ijk); + EXPECT_TRUE(leaf); + const auto offset = leaf->CoordToOffset(ijk); + EXPECT_EQ(ijk, leaf->offsetToGlobalCoord(offset)); + } + }); + + //timer.stop(); +}// Large_CudaPointsToGrid_old + +TEST(TestNanoVDBCUDA, mergeSplitGrids) +{ + size_t size1 = 0, size2 = 0; + std::vector> handles1, handles2; + std::vector gridNames; + nanovdb::CpuTimer timer("create 5 host grids"); + for (int radius = 100; radius<150; radius += 10) { + gridNames.emplace_back("sphere_" + std::to_string(radius)); + handles1.emplace_back(nanovdb::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, + nanovdb::Vec3d(0), gridNames.back())); + EXPECT_FALSE(handles1.back().isPadded()); + size1 += handles1.back().size(); + } + EXPECT_EQ(5u, gridNames.size()); + EXPECT_EQ(5u, handles1.size()); + timer.restart("create 5 host grids"); + for (int radius = 150; radius<200; radius += 10) { + gridNames.emplace_back("sphere_" + std::to_string(radius)); + handles2.emplace_back(nanovdb::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, + nanovdb::Vec3d(0), gridNames.back())); + size2 += handles2.back().size(); + } + EXPECT_EQ(10u, gridNames.size()); + EXPECT_EQ( 5u, handles2.size()); + timer.restart("merging 5 host grids"); + auto mergedHandle = nanovdb::mergeGrids(handles2);// merge last 5 grid handles + EXPECT_EQ(size2, mergedHandle.size()); + EXPECT_FALSE(mergedHandle.isPadded()); + EXPECT_TRUE(mergedHandle.data()); + auto *gridData = mergedHandle.gridData();// first grid + EXPECT_TRUE(gridData); + EXPECT_EQ(5u, gridData->mGridCount); + EXPECT_EQ(0u, gridData->mGridIndex); + EXPECT_EQ(handles2[0].size(), gridData->mGridSize); + timer.restart("unit-test host grids"); + for (int i=0; i<5; ++i){ + gridData = mergedHandle.gridData(i); + EXPECT_TRUE(gridData); + EXPECT_EQ(i, gridData->mGridIndex); + EXPECT_EQ(handles2[i].size(), gridData->mGridSize); + EXPECT_EQ(strcmp(gridNames[i+5].c_str(), gridData->mGridName),0); + } + + EXPECT_FALSE(mergedHandle.empty()); + handles1.push_back(std::move(mergedHandle));// append one handle with 5 merged grids + EXPECT_TRUE(mergedHandle.empty()); + EXPECT_EQ(6u, handles1.size()); + timer.restart("merging 10 host grids"); + mergedHandle = nanovdb::mergeGrids(handles1); + EXPECT_EQ(size1 + size2, mergedHandle.size()); + EXPECT_TRUE(mergedHandle.data()); + gridData = mergedHandle.gridData();// first grid + EXPECT_TRUE(gridData); + EXPECT_EQ(10u, gridData->mGridCount); + EXPECT_EQ( 0u, gridData->mGridIndex); + EXPECT_EQ(handles1[0].size(), gridData->mGridSize); + + timer.restart("splitting host grids"); + auto splitHandles = nanovdb::splitGrids(mergedHandle); + timer.restart("unit-test split grids"); + EXPECT_EQ(10u, splitHandles.size()); + for (int i=0; i<5; ++i){ + EXPECT_EQ(handles1[i].size(), splitHandles[i].size()); + gridData = splitHandles[i].gridData(); + EXPECT_EQ(0u, gridData->mGridIndex); + EXPECT_EQ(1u, gridData->mGridCount); + EXPECT_EQ(strcmp(gridNames[i].c_str(), gridData->mGridName),0); + } + for (int i=5; i<10; ++i){ + EXPECT_EQ(handles2[i-5].size(), splitHandles[i].size()); + gridData = splitHandles[i].gridData(); + EXPECT_EQ(0u, gridData->mGridIndex); + EXPECT_EQ(1u, gridData->mGridCount); + EXPECT_EQ(strcmp(gridNames[i].c_str(), gridData->mGridName),0); + } + timer.stop(); +}// mergeSplitGrids + +TEST(TestNanoVDBCUDA, mergeSplitDeviceGrids) +{ + using BufferT = nanovdb::CudaDeviceBuffer; + using HandleT = nanovdb::GridHandle; + size_t size = 0; + std::vector handles; + std::vector gridNames; + nanovdb::CpuTimer timer("create 10 host grids"); + for (int radius = 100; radius<200; radius += 10) { + gridNames.emplace_back("sphere_" + std::to_string(radius)); + handles.emplace_back(nanovdb::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, + nanovdb::Vec3d(0), gridNames.back())); + EXPECT_FALSE(handles.back().isPadded()); + size += handles.back().size(); + } + timer.restart("copy grids to device"); + for (auto &h : handles) h.deviceUpload(); + EXPECT_EQ(10u, handles.size()); + timer.restart("merging device grids"); + auto mergedHandle = nanovdb::mergeDeviceGrids(handles); + EXPECT_EQ(size, mergedHandle.size()); + EXPECT_FALSE(mergedHandle.data()); + EXPECT_TRUE(mergedHandle.deviceData()); + EXPECT_FALSE(mergedHandle.isPadded()); + timer.restart("copy grids to host"); + mergedHandle.deviceDownload(); + EXPECT_TRUE(mergedHandle.data()); + EXPECT_TRUE(mergedHandle.deviceData()); + EXPECT_FALSE(mergedHandle.isPadded()); + auto *gridData = mergedHandle.gridData();// first grid + EXPECT_TRUE(gridData); + EXPECT_EQ(10u, gridData->mGridCount); + EXPECT_EQ(0u, gridData->mGridIndex); + timer.restart("unit-test host grids"); + for (uint32_t i=0; i<10; ++i) { + gridData = mergedHandle.gridData(i); + EXPECT_TRUE(gridData); + EXPECT_EQ(i, gridData->mGridIndex); + EXPECT_EQ(strcmp(gridNames[i].c_str(), gridData->mGridName),0); + } + timer.restart("splitting device grids"); + auto splitHandles = nanovdb::splitDeviceGrids(mergedHandle); + timer.restart("unit-test split grids"); + EXPECT_EQ(10u, splitHandles.size()); + for (uint32_t i=0u; i<10u; ++i) { + EXPECT_EQ(handles[i].size(), splitHandles[i].size()); + EXPECT_FALSE(splitHandles[i].isPadded()); + EXPECT_FALSE(splitHandles[i].gridData()); + splitHandles[i].deviceDownload(); + gridData = splitHandles[i].gridData(); + EXPECT_TRUE(gridData); + EXPECT_EQ(0u, gridData->mGridIndex); + EXPECT_EQ(1u, gridData->mGridCount); + EXPECT_EQ(strcmp(gridNames[i].c_str(), gridData->mGridName),0); + } + timer.stop(); +}// mergeSplitDeviceGrids + +// make -j 4 testNanoVDB && ./unittest/testNanoVDB --gtest_filter="*Cuda*" --gtest_break_on_failure +TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_basic) +{ + using BufferT = nanovdb::CudaDeviceBuffer; + const float value = 1.23456f, backgroud = 1.0f; + const nanovdb::Coord ijk(1,2,3); + nanovdb::GridHandle floatHdl; + nanovdb::FloatGrid *floatGrid = nullptr; + //nanovdb::CpuTimer timer; + {// create float grid with one active voxel + nanovdb::build::Grid grid(backgroud); + auto srcAcc = grid.getAccessor(); + srcAcc.setValue(ijk, value); + auto nodeCount = grid.nodeCount(); + EXPECT_EQ(1u, nodeCount[0]); + EXPECT_EQ(1u, nodeCount[1]); + EXPECT_EQ(1u, nodeCount[2]); + EXPECT_EQ(value, srcAcc.getValue(ijk)); + EXPECT_EQ(value, srcAcc.getValue(1,2,3)); + //timer.start("Create FloatGrid on CPU"); + floatHdl = nanovdb::createNanoGrid, float, BufferT>(grid); + EXPECT_TRUE(floatHdl); + floatGrid = floatHdl.grid(); + EXPECT_TRUE(floatGrid); + EXPECT_EQ(ijk, floatGrid->indexBBox()[0]); + EXPECT_EQ(ijk, floatGrid->indexBBox()[1]); + auto acc = floatGrid->getAccessor(); + EXPECT_EQ(backgroud, acc.getValue(nanovdb::Coord(-1))); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(-1))); + EXPECT_EQ(backgroud, acc.getValue(nanovdb::Coord(8))); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(8))); + EXPECT_EQ(backgroud, acc.getValue(nanovdb::Coord(0))); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(0))); + EXPECT_EQ(value, acc.getValue(ijk)); + EXPECT_TRUE(acc.isActive(ijk)); + } + //timer.restart("Create IndexGrid on CPU"); + using BufferT = nanovdb::CudaDeviceBuffer; + auto idxHdl = nanovdb::createNanoGrid(*floatGrid, 0u, false, false, 1); + //timer.restart("Copy IndexGrid from CPU to GPU"); + EXPECT_FALSE(idxHdl.deviceGrid()); + idxHdl.deviceUpload(); + EXPECT_TRUE(idxHdl.deviceGrid()); + auto *idxGrid = idxHdl.grid(); + EXPECT_TRUE(idxGrid); + //timer.restart("Create value list on CPU"); + EXPECT_EQ(1u + 512u, idxGrid->valueCount());// background + 512 values in one leaf node + float *values = new float[idxGrid->valueCount()], *d_values = nullptr; + values[0] = backgroud; + const float *q = floatGrid->tree().getFirstLeaf()->data()->mValues; + for (float *p=values+1, *e=p+512;p!=e; ++p) *p = *q++; + //timer.restart("Allocate and copy values from CPU to GPU"); + cudaCheck(cudaMalloc((void**)&d_values, idxGrid->valueCount()*sizeof(float))); + EXPECT_TRUE(d_values); + cudaCheck(cudaMemcpy(d_values, values, idxGrid->valueCount()*sizeof(float), cudaMemcpyHostToDevice)); + EXPECT_FALSE(idxHdl.deviceGrid()); + auto *d_idxGrid = idxHdl.deviceGrid(); + EXPECT_TRUE(d_idxGrid); + //timer.restart("Call CudaIndexToGrid"); + auto hdl = nanovdb::cudaIndexToGrid(d_idxGrid, d_values); + //timer.restart("unit-test"); + EXPECT_FALSE(hdl.grid());// no host grid + EXPECT_TRUE(hdl.deviceGrid()); + hdl.deviceDownload(); + auto *floatGrid2 = hdl.grid(); + EXPECT_TRUE(floatGrid2); + auto *leaf2 = floatGrid2->tree().getFirstLeaf(); + EXPECT_TRUE(leaf2); + auto acc = floatGrid->getAccessor(); + auto acc2 = floatGrid2->getAccessor(); + EXPECT_EQ(floatGrid->indexBBox(), floatGrid2->indexBBox()); + EXPECT_EQ(floatGrid->worldBBox(), floatGrid2->worldBBox()); + // probe background in root node + EXPECT_EQ(backgroud, acc.getValue(nanovdb::Coord(-1))); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(-1))); + EXPECT_EQ(backgroud, acc2.getValue(nanovdb::Coord(-1))); + EXPECT_FALSE(acc2.isActive(nanovdb::Coord(-1))); + // probe background in upper node + EXPECT_EQ(backgroud, acc.getValue(nanovdb::Coord(128))); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(128))); + EXPECT_EQ(backgroud, floatGrid2->tree().getValue(nanovdb::Coord(128))); + EXPECT_EQ(backgroud, acc2.getValue(nanovdb::Coord(128))); + EXPECT_FALSE(acc2.isActive(nanovdb::Coord(128))); + // probe background in leaf node + EXPECT_EQ(backgroud, acc.getValue(nanovdb::Coord(0))); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(0))); + EXPECT_EQ(backgroud, leaf2->getValue(nanovdb::Coord(0))); + EXPECT_FALSE(leaf2->isActive(nanovdb::Coord(0))); + EXPECT_EQ(backgroud, floatGrid2->tree().getValue(nanovdb::Coord(0))); + EXPECT_EQ(backgroud, acc2.getValue(nanovdb::Coord(0))); + EXPECT_FALSE(acc2.isActive(nanovdb::Coord(0))); + + EXPECT_EQ(value, acc2.getValue(ijk)); + EXPECT_TRUE(acc2.isActive(ijk)); + //timer.stop(); + cudaFree(d_values); +}// CudaIndexGridToGrid_basic + +TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_ValueIndex) +{ + using BuildT = nanovdb::ValueIndex; + using BufferT = nanovdb::CudaDeviceBuffer; + //nanovdb::CpuTimer timer("Create FloatGrid on CPU"); + auto floatHdl = nanovdb::createLevelSetSphere(100,nanovdb::Vec3d(0),1,3, nanovdb::Vec3d(0), "test"); + auto *floatGrid = floatHdl.grid(); + EXPECT_TRUE(floatGrid); + auto acc = floatGrid->getAccessor(); + //timer.restart("Create IndexGrid on CPU"); + auto idxHdl = nanovdb::createNanoGrid(*floatGrid); + //timer.restart("Copy IndexGrid from CPU to GPU"); + idxHdl.deviceUpload(); + auto *idxGrid = idxHdl.grid(); + EXPECT_TRUE(idxGrid); + //timer.restart("Create value list on CPU"); + float *values = new float[idxGrid->valueCount()], *d_values = nullptr; + values[0] = floatGrid->tree().root().background(); + for (auto it = floatGrid->indexBBox().begin(); it; ++it) { + EXPECT_EQ(acc.isActive(*it), idxGrid->tree().isActive(*it)); + const uint64_t idx = idxGrid->tree().getValue(*it); + EXPECT_TRUE(idx < idxGrid->valueCount()); + values[idx] = acc.getValue(*it); + } + //timer.restart("Allocate and copy values from CPU to GPU"); + cudaCheck(cudaMalloc((void**)&d_values, idxGrid->valueCount()*sizeof(float))); + cudaCheck(cudaMemcpy(d_values, values, idxGrid->valueCount()*sizeof(float), cudaMemcpyHostToDevice)); + EXPECT_FALSE(idxHdl.deviceGrid()); + auto *d_idxGrid = idxHdl.deviceGrid(); + EXPECT_TRUE(d_idxGrid); + //timer.restart("Call CudaIndexToGrid"); + auto hdl = nanovdb::cudaIndexToGrid(d_idxGrid, d_values); + //timer.restart("unit-test"); + EXPECT_FALSE(hdl.grid());// no host grid + EXPECT_TRUE(hdl.deviceGrid()); + hdl.deviceDownload(); + auto *floatGrid2 = hdl.grid(); + EXPECT_TRUE(floatGrid2); + auto acc2 = floatGrid2->getAccessor(); + EXPECT_EQ(floatGrid->indexBBox(), floatGrid2->indexBBox()); + EXPECT_EQ(floatGrid->worldBBox(), floatGrid2->worldBBox()); + EXPECT_EQ(floatGrid->tree().root().background(), floatGrid2->tree().root().background()); + for (auto it = floatGrid->indexBBox().begin(); it; ++it) { + EXPECT_EQ(acc.isActive(*it), acc2.isActive(*it)); + EXPECT_EQ(acc.getValue(*it), acc2.getValue(*it)); + } + //timer.stop(); + cudaFree(d_values); +}// CudaPointToGrid_ValueIndex + +TEST(TestNanoVDBCUDA, CudaIndexGridToGrid_ValueOnIndex) +{ + using BuildT = nanovdb::ValueOnIndex; + using BufferT = nanovdb::CudaDeviceBuffer; + //nanovdb::CpuTimer timer("Create FloatGrid on CPU"); + auto floatHdl = nanovdb::createLevelSetSphere(100,nanovdb::Vec3d(0),1,3, nanovdb::Vec3d(0), "test"); + auto *floatGrid = floatHdl.grid(); + EXPECT_TRUE(floatGrid); + auto acc = floatGrid->getAccessor(); + //timer.restart("Create IndexGrid on CPU"); + auto idxHdl = nanovdb::createNanoGrid(*floatGrid); + //timer.restart("Copy IndexGrid from CPU to GPU"); + idxHdl.deviceUpload(); + auto *idxGrid = idxHdl.grid(); + EXPECT_TRUE(idxGrid); + //timer.restart("Create value list on CPU"); + float *values = new float[idxGrid->valueCount()], *d_values = nullptr; + values[0] = floatGrid->tree().root().background(); + for (auto it = floatGrid->indexBBox().begin(); it; ++it) { + EXPECT_EQ(acc.isActive(*it), idxGrid->tree().isActive(*it)); + if (acc.isActive(*it)) { + const uint64_t idx = idxGrid->tree().getValue(*it); + EXPECT_TRUE(idx < idxGrid->valueCount()); + values[idx] = acc.getValue(*it); + } + } + //timer.restart("Allocate and copy values from CPU to GPU"); + cudaCheck(cudaMalloc((void**)&d_values, idxGrid->valueCount()*sizeof(float))); + cudaCheck(cudaMemcpy(d_values, values, idxGrid->valueCount()*sizeof(float), cudaMemcpyHostToDevice)); + EXPECT_FALSE(idxHdl.deviceGrid()); + auto *d_idxGrid = idxHdl.deviceGrid(); + EXPECT_TRUE(d_idxGrid); + //timer.restart("Call CudaIndexToGrid"); + auto hdl = nanovdb::cudaIndexToGrid(d_idxGrid, d_values); + //timer.restart("unit-test"); + EXPECT_FALSE(hdl.grid());// no host grid + EXPECT_TRUE(hdl.deviceGrid()); + hdl.deviceDownload(); + auto *floatGrid2 = hdl.grid(); + EXPECT_TRUE(floatGrid2); + auto acc2 = floatGrid2->getAccessor(); + EXPECT_EQ(floatGrid->indexBBox(), floatGrid2->indexBBox()); + EXPECT_EQ(floatGrid->worldBBox(), floatGrid2->worldBBox()); + EXPECT_EQ(floatGrid->tree().root().background(), floatGrid2->tree().root().background()); + for (auto it = floatGrid->indexBBox().begin(); it; ++it) { + EXPECT_EQ(acc.isActive(*it), acc2.isActive(*it)); + if (acc.isActive(*it)) EXPECT_EQ(acc.getValue(*it), acc2.getValue(*it)); + } + //timer.stop(); + cudaFree(d_values); +}// CudaPointToGrid_ValueOnIndex + +TEST(TestNanoVDBCUDA, CudaSignedFloodFill) +{ + using BufferT = nanovdb::CudaDeviceBuffer; + //nanovdb::CpuTimer timer("Create FloatGrid on CPU"); + auto floatHdl = nanovdb::createLevelSetSphere(100); + auto *floatGrid = floatHdl.grid(); + EXPECT_TRUE(floatGrid); + auto acc = floatGrid->getAccessor(); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(103,0,0))); + EXPECT_TRUE( acc.isActive(nanovdb::Coord(100,0,0))); + EXPECT_FALSE(acc.isActive(nanovdb::Coord( 97,0,0))); + EXPECT_EQ( 3.0f, acc(103,0,0)); + EXPECT_EQ( 0.0f, acc(100,0,0)); + EXPECT_EQ(-3.0f, acc( 97,0,0)); + using OpT = nanovdb::SetVoxel;// only set the voxel value + acc.set(nanovdb::Coord(103,0,0),-1.0f);// flip sign and value of inactive voxel + acc.set(nanovdb::Coord( 97,0,0), 1.0f);// flip sign and value of inactive voxel + EXPECT_EQ(-1.0f, acc(103,0,0)); + EXPECT_EQ( 0.0f, acc(100,0,0)); + EXPECT_EQ( 1.0f, acc( 97,0,0)); + //timer.restart("Copy FloatGrid from CPU to GPU"); + floatHdl.deviceUpload();// CPU -> GPU + auto *d_floatGrid = floatHdl.deviceGrid(); + EXPECT_TRUE(d_floatGrid); + //timer.restart("Signed flood-fill on the GPU"); + //nanovdb::cudaSignedFloodFill(d_floatGrid, true); + nanovdb::cudaSignedFloodFill(d_floatGrid); + //timer.restart("Copy FloatGrid from GPU to CPU"); + floatHdl.deviceDownload();// GPU -> CPU + //timer.stop(); + floatGrid = floatHdl.grid(); + EXPECT_TRUE(floatGrid); + acc = floatGrid->getAccessor(); + EXPECT_EQ( 3.0f, acc(103,0,0)); + EXPECT_EQ( 0.0f, acc(100,0,0)); + EXPECT_EQ(-3.0f, acc( 97,0,0)); +}// CudaSignedFloodFill + +TEST(TestNanoVDBCUDA, OneVoxelToGrid) +{ + using BuildT = float; + using GridT = nanovdb::NanoGrid; + const size_t num_points = 1; + nanovdb::Coord coords[num_points] = {nanovdb::Coord(1, 2, 3)}, *d_coords = nullptr; + cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); + cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU + + //nanovdb::GpuTimer timer("Create FloatGrid on GPU"); + nanovdb::CudaPointsToGrid converter; + auto handle = converter.getHandle(d_coords, num_points); + cudaCheck(cudaFree(d_coords)); + //timer.stop(); + + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + const uint64_t size = sizeof(GridT) + + sizeof(GridT::TreeType) + + GridT::RootType::memUsage(1) + + sizeof(GridT::UpperNodeType) + + sizeof(GridT::LowerNodeType) + + sizeof(GridT::LeafNodeType); + EXPECT_EQ(handle.size(), size); + + GridT *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + //timer.start("Copy data from GPU to CPU"); + handle.deviceDownload();// creates a copy up the CPU + //timer.stop(); + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + + //timer.start("Unit-testing grid on the CPU"); + auto acc = grid->getAccessor(); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(0,2,3))); + EXPECT_TRUE( acc.isActive(nanovdb::Coord(1,2,3))); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(1,2,4))); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(2,2,3))); + auto *leaf = acc.probeLeaf(nanovdb::Coord(1,2,3)); + EXPECT_TRUE(leaf); + EXPECT_EQ(nanovdb::Coord(0), leaf->origin()); + EXPECT_EQ(1u, leaf->valueMask().countOn()); + EXPECT_EQ(nanovdb::Coord(1,2,3), leaf->bbox()[0]); + EXPECT_EQ(nanovdb::Coord(1,2,3), leaf->bbox()[1]); + auto *lower = acc.getNode<1>(); + EXPECT_TRUE(lower); + EXPECT_EQ(nanovdb::Coord(1,2,3), lower->bbox()[0]); + EXPECT_EQ(nanovdb::Coord(1,2,3), lower->bbox()[1]); + auto *upper = acc.getNode<2>(); + EXPECT_TRUE(upper); + EXPECT_EQ(nanovdb::Coord(1,2,3), upper->bbox()[0]); + EXPECT_EQ(nanovdb::Coord(1,2,3), upper->bbox()[1]); + EXPECT_EQ(nanovdb::Coord(1,2,3), acc.root().bbox()[0]); + EXPECT_EQ(nanovdb::Coord(1,2,3), acc.root().bbox()[1]); + //timer.stop(); +}// OneVoxelToGrid + +TEST(TestNanoVDBCUDA, ThreePointsToGrid) +{ + using BuildT = nanovdb::Points; + using Vec3T = nanovdb::Vec3f; + using GridT = nanovdb::NanoGrid; + const size_t num_points = 3; + Vec3T points[num_points] = {Vec3T(1, 0, 0),Vec3T(1, 2, 3),Vec3T(1, 2, 3)}, *d_points = nullptr; + cudaCheck(cudaMalloc(&d_points, num_points * sizeof(Vec3T))); + cudaCheck(cudaMemcpy(d_points, points, num_points * sizeof(Vec3T), cudaMemcpyHostToDevice));// CPU -> GPU + + //nanovdb::GpuTimer timer("Create FloatGrid on GPU"); + nanovdb::CudaPointsToGrid converter; + auto handle = converter.getHandle(d_points, num_points); + cudaCheck(cudaFree(d_points)); + //timer.stop(); + + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + const uint64_t size = sizeof(GridT) + + sizeof(GridT::TreeType) + + GridT::RootType::memUsage(1) + + sizeof(GridT::UpperNodeType) + + sizeof(GridT::LowerNodeType) + + sizeof(GridT::LeafNodeType) + + sizeof(nanovdb::GridBlindMetaData) + + num_points*sizeof(Vec3T); + EXPECT_EQ(handle.size(), size); + + GridT *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + //timer.start("Copy data from GPU to CPU"); + handle.deviceDownload();// creates a copy on the CPU + //timer.stop(); + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_EQ(1u, grid->blindDataCount()); + const Vec3T *blindData = grid->getBlindData(0); + EXPECT_TRUE(blindData); + for (const Vec3T *p = blindData, *q=p+num_points, *ptr=points; p!=q; ++p) { + EXPECT_EQ(*ptr++, *p); + } + //timer.start("Unit-testing grid on the CPU"); + nanovdb::PointAccessor acc(*grid); + EXPECT_TRUE(acc); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(0,2,3))); + EXPECT_TRUE( acc.isActive(nanovdb::Coord(1,0,0))); + EXPECT_TRUE( acc.isActive(nanovdb::Coord(1,2,3))); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(1,2,4))); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(2,2,3))); + auto *leaf = acc.probeLeaf(nanovdb::Coord(1,2,3)); + EXPECT_TRUE(leaf); + EXPECT_EQ(nanovdb::Coord(0), leaf->origin()); + EXPECT_EQ(2u, leaf->valueMask().countOn()); + EXPECT_EQ(nanovdb::Coord(1,0,0), leaf->bbox()[0]); + EXPECT_EQ(nanovdb::Coord(1,2,3), leaf->bbox()[1]); + nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(7)); + for (auto it = bbox.begin(); it; ++it) { + //std::cerr << *it << " offset = " << leaf->CoordToOffset(*it) << " value = " << leaf->getValue(*it) << std::endl; + if (*it < nanovdb::Coord(1,0,0)) { + EXPECT_EQ(0u, leaf->getValue(*it)); + } else if (*it < nanovdb::Coord(1,2,3)) { + EXPECT_EQ(1u, leaf->getValue(*it)); + } else { + EXPECT_EQ(3u, leaf->getValue(*it)); + } + } + const Vec3T *start=nullptr, *stop=nullptr; + + EXPECT_EQ(0u, acc.voxelPoints(nanovdb::Coord(0,0,0), start, stop)); + EXPECT_FALSE(start); + EXPECT_FALSE(stop); + + EXPECT_EQ(1u, acc.voxelPoints(nanovdb::Coord(1,0,0), start, stop)); + EXPECT_TRUE(start); + EXPECT_TRUE(stop); + EXPECT_LT(start, stop); + EXPECT_EQ(Vec3T(1, 0, 0), start[0]); + + EXPECT_EQ(2u, acc.voxelPoints(nanovdb::Coord(1,2,3), start, stop)); + EXPECT_TRUE(start); + EXPECT_TRUE(stop); + EXPECT_LT(start, stop); + EXPECT_EQ(Vec3T(1, 2, 3), start[0]); + EXPECT_EQ(Vec3T(1, 2, 3), start[1]); + + auto *lower = acc.getNode<1>(); + EXPECT_TRUE(lower); + EXPECT_EQ(nanovdb::Coord(1,0,0), lower->bbox()[0]); + EXPECT_EQ(nanovdb::Coord(1,2,3), lower->bbox()[1]); + auto *upper = acc.getNode<2>(); + EXPECT_TRUE(upper); + EXPECT_EQ(nanovdb::Coord(1,0,0), upper->bbox()[0]); + EXPECT_EQ(nanovdb::Coord(1,2,3), upper->bbox()[1]); + EXPECT_EQ(nanovdb::Coord(1,0,0), acc.root().bbox()[0]); + EXPECT_EQ(nanovdb::Coord(1,2,3), acc.root().bbox()[1]); + //timer.stop(); +}// ThreePointsToGrid + +TEST(TestNanoVDBCUDA, EightVoxelsToFloatGrid) +{ + using BuildT = float; + using GridT = nanovdb::NanoGrid; + const size_t num_points = 8; + //std::cerr << nanovdb::NanoLeaf::CoordToOffset(nanovdb::Coord( 1, 1, 1)) << std::endl; + //std::cerr << nanovdb::NanoLeaf::CoordToOffset(nanovdb::Coord(-7, 1, 1)) << std::endl; + //std::cerr << nanovdb::NanoLeaf::CoordToOffset(nanovdb::Coord( 1,-7, 1)) << std::endl; + //std::cerr << nanovdb::NanoLeaf::CoordToOffset(nanovdb::Coord( 1,-7, 1)) << std::endl; + nanovdb::Coord coords[num_points] = {nanovdb::Coord( 1, 1, 1), + nanovdb::Coord(-7, 1, 1), + nanovdb::Coord( 1,-7, 1), + nanovdb::Coord( 1, 1,-7), + nanovdb::Coord(-7,-7, 1), + nanovdb::Coord(-7, 1,-7), + nanovdb::Coord( 1,-7,-7), + nanovdb::Coord(-7,-7,-7)}, *d_coords = nullptr; + for (int i=0; i<8; ++i) EXPECT_EQ(73u, nanovdb::NanoLeaf::CoordToOffset(coords[i])); + cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); + cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU + + //nanovdb::GpuTimer timer("Create FloatGrid on GPU"); + nanovdb::CudaPointsToGrid converter; + auto handle = converter.getHandle(d_coords, num_points); + //timer.stop(); + cudaCheck(cudaFree(d_coords)); + + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + const uint64_t size = sizeof(GridT) + + sizeof(GridT::TreeType) + + GridT::RootType::memUsage(8) + + 8*sizeof(GridT::UpperNodeType) + + 8*sizeof(GridT::LowerNodeType) + + 8*sizeof(GridT::LeafNodeType); + EXPECT_EQ(handle.size(), size); + + GridT *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + //timer.start("Copy data from GPU to CPU"); + handle.deviceDownload();// creates a copy up the CPU + //timer.stop(); + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + + //timer.start("Unit-testing grid on the CPU"); + auto acc = grid->getAccessor(); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(0,2,3))); + EXPECT_TRUE( acc.isActive(nanovdb::Coord(1,1,1))); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(1,2,4))); + EXPECT_FALSE(acc.isActive(nanovdb::Coord(2,2,3))); + auto *leaf = acc.probeLeaf(nanovdb::Coord(1,0,0)); + EXPECT_TRUE(leaf); + EXPECT_EQ(nanovdb::Coord(0), leaf->origin()); + EXPECT_EQ(1u, leaf->valueMask().countOn()); + EXPECT_EQ(nanovdb::Coord( 1, 1, 1), leaf->bbox()[0]); + EXPECT_EQ(nanovdb::Coord( 1, 1, 1), leaf->bbox()[1]); + auto *lower = acc.getNode<1>(); + EXPECT_TRUE(lower); + EXPECT_EQ(nanovdb::Coord(1,1,1), lower->bbox()[0]); + EXPECT_EQ(nanovdb::Coord(1,1,1), lower->bbox()[1]); + auto *upper = acc.getNode<2>(); + EXPECT_TRUE(upper); + EXPECT_EQ(nanovdb::Coord(1,1,1), upper->bbox()[0]); + EXPECT_EQ(nanovdb::Coord(1,1,1), upper->bbox()[1]); + EXPECT_EQ(nanovdb::Coord(-7,-7,-7), acc.root().bbox()[0]); + EXPECT_EQ(nanovdb::Coord( 1, 1, 1), acc.root().bbox()[1]); + //timer.stop(); +}// EightVoxelsToFloatGrid + +TEST(TestNanoVDBCUDA, Random_CudaPointsToGrid_World64) +{ + using BuildT = nanovdb::Points;//uint32_t; + using Vec3T = nanovdb::Vec3d; + //nanovdb::CpuTimer timer; + const size_t pointCount = 1 << 20;// 1048576 + std::vector points; + //generate random points + points.reserve(pointCount); + std::srand(98765); + const int max = 512, min = -max; + auto op = [&](){return rand() % (max - min) + min;}; + //timer.start("Creating "+std::to_string(pointCount)+" random points on the CPU"); + while (points.size() < pointCount) points.emplace_back(op(), op(), op()); + //timer.stop(); + EXPECT_EQ(pointCount, points.size()); + Vec3T* d_points; + const size_t pointSize = points.size() * sizeof(Vec3T); + //std::cerr << "Point footprint: " << (pointSize >> 20) << " MB" << std::endl; + //timer.start("Allocating "+std::to_string(pointSize >> 20)+" MB on the GPU"); + cudaCheck(cudaMalloc(&d_points, pointSize)); + //timer.restart("Copying points from CPU to GPU"); + cudaCheck(cudaMemcpy(d_points, points.data(), pointSize, cudaMemcpyHostToDevice)); + //timer.stop(); + + const double voxelSize = 8.0; + //timer.start("Building grid on GPU from "+std::to_string(points.size())+" points"); + nanovdb::CudaPointsToGrid converter(voxelSize);// unit map + //converter.setVerbose(); + auto handle = converter.getHandle(d_points, pointCount); + //timer.stop(); + cudaCheck(cudaFree(d_points)); + //std::cerr << "Grid size: " << (handle.size() >> 20) << " MB" << std::endl; + + const uint32_t maxPointsPerVoxel = converter.maxPointsPerVoxel(); + const uint32_t maxPointsPerLeaf = converter.maxPointsPerLeaf(); + EXPECT_GT(maxPointsPerVoxel, 0u); + EXPECT_LT(maxPointsPerLeaf, 1024u); + EXPECT_LE(maxPointsPerVoxel, maxPointsPerLeaf); + //std::cerr << "maxPointsPerLeaf = " << maxPointsPerLeaf << " maxPointsPerVoxel = " << maxPointsPerVoxel << std::endl; + + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_TRUE(handle.deviceGrid()); + EXPECT_FALSE(handle.deviceGrid(0)); + EXPECT_TRUE(handle.deviceGrid(0)); + EXPECT_FALSE(handle.deviceGrid(1)); + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + //timer.start("Allocating and copying grid from GPU to CPU"); + auto *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + handle.deviceDownload();// creates a copy on the CPU + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_EQ(nanovdb::Vec3d(voxelSize), grid->voxelSize()); + EXPECT_TRUE(nanovdb::CoordBBox::createCube(min, max-1).isInside(grid->indexBBox())); + //std::cerr << grid->indexBBox() << std::endl; + EXPECT_STREQ("World64: Vec3 point coordinates in world space", grid->blindMetaData(0).mName); + { + auto mgrHdl = nanovdb::createNodeManager(*grid); + auto *mgr = mgrHdl.mgr(); + EXPECT_TRUE(mgr); + for (uint32_t i=0; ileafCount(); ++i) { + const auto &leaf = mgr->leaf(i); + for (int j=0; j<512; ++j) { + EXPECT_LE(leaf.getValue(j), maxPointsPerLeaf); + if (leaf.isActive(j)) { + if (j>0) { + EXPECT_LE(leaf.getValue(j) - leaf.getValue(j-1), maxPointsPerVoxel); + } else { + EXPECT_LE(leaf.getValue(0), maxPointsPerVoxel); + } + } else if (j>0) { + EXPECT_EQ(leaf.getValue(j), leaf.getValue(j-1)); + } else { + EXPECT_EQ(leaf.getValue(0), 0u); + } + }// loop over voxels + }// loop over leaf nodes + } + + //timer.restart("Parallel unit-testing on CPU"); + nanovdb::forEach(points,[&](const nanovdb::Range1D &r){ + nanovdb::PointAccessor acc(*grid); + EXPECT_TRUE(acc); + const Vec3T *start = nullptr, *stop = nullptr; + for (size_t i=r.begin(); i!=r.end(); ++i) { + const nanovdb::Coord ijk = grid->worldToIndex(points[i]).round(); + EXPECT_TRUE(acc.probeLeaf(ijk)!=nullptr); + EXPECT_TRUE(acc.isActive(ijk)); + EXPECT_LE(acc.getValue(ijk), pointCount); + const auto *leaf = acc.get>(ijk); + EXPECT_TRUE(leaf); + const auto offset = leaf->CoordToOffset(ijk); + EXPECT_EQ(ijk, leaf->offsetToGlobalCoord(offset)); + const uint64_t count = acc.voxelPoints(ijk, start, stop); + EXPECT_TRUE(start); + EXPECT_TRUE(stop); + EXPECT_LT(start, stop); + EXPECT_LE(count, maxPointsPerVoxel); + bool test = false; + for (uint64_t j=0; test == false && j points; + //generate random points + points.reserve(pointCount); + std::srand(98765); + const int max = 512, min = -max; + auto op = [&](){return rand() % (max - min) + min;}; + //timer.start("Creating "+std::to_string(pointCount)+" random points on the CPU"); + while (points.size() < pointCount) points.emplace_back(op(), op(), op()); + //timer.stop(); + EXPECT_EQ(pointCount, points.size()); + Vec3T* d_points; + const size_t pointSize = points.size() * sizeof(Vec3T); + //std::cerr << "Point footprint: " << (pointSize >> 20) << " MB" << std::endl; + //timer.start("Allocating "+std::to_string(pointSize >> 20)+" MB on the GPU"); + cudaCheck(cudaMalloc(&d_points, pointSize)); + //timer.restart("Copying points from CPU to GPU"); + cudaCheck(cudaMemcpy(d_points, points.data(), pointSize, cudaMemcpyHostToDevice)); + //timer.stop(); + + const double voxelSize = 8.0; + //timer.start("Building grid on GPU from "+std::to_string(points.size())+" points"); + nanovdb::CudaPointsToGrid converter(voxelSize);// unit map + //converter.setVerbose(); + auto handle = converter.getHandle(d_points, pointCount); + //timer.stop(); + cudaCheck(cudaFree(d_points)); + //std::cerr << "Grid size: " << (handle.size() >> 20) << " MB" << std::endl; + + const uint32_t maxPointsPerVoxel = converter.maxPointsPerVoxel(); + const uint32_t maxPointsPerLeaf = converter.maxPointsPerLeaf(); + EXPECT_GT(maxPointsPerVoxel, 0u); + EXPECT_LT(maxPointsPerLeaf, 1024u); + EXPECT_LE(maxPointsPerVoxel, maxPointsPerLeaf); + //std::cerr << "maxPointsPerLeaf = " << maxPointsPerLeaf << " maxPointsPerVoxel = " << maxPointsPerVoxel << std::endl; + + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_TRUE(handle.deviceGrid()); + EXPECT_FALSE(handle.deviceGrid(0)); + EXPECT_TRUE(handle.deviceGrid(0)); + EXPECT_FALSE(handle.deviceGrid(1)); + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + //timer.start("Allocating and copying grid from GPU to CPU"); + auto *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + handle.deviceDownload();// creates a copy on the CPU + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_EQ(nanovdb::Vec3d(voxelSize), grid->voxelSize()); + EXPECT_EQ(pointCount, grid->pointCount()); + EXPECT_TRUE(nanovdb::CoordBBox::createCube(min, max-1).isInside(grid->indexBBox())); + //std::cerr << grid->indexBBox() << std::endl; + + EXPECT_STREQ("World64: Vec3 point coordinates in world space", grid->blindMetaData(0).mName); + { + auto mgrHdl = nanovdb::createNodeManager(*grid); + auto *mgr = mgrHdl.mgr(); + EXPECT_TRUE(mgr); + for (uint32_t i=0; ileafCount(); ++i) { + const auto &leaf = mgr->leaf(i); + for (int j=0; j<512; ++j) { + EXPECT_LE(leaf.getValue(j), maxPointsPerLeaf); + if (leaf.isActive(j)) { + if (j>0) { + EXPECT_LE(leaf.getValue(j) - leaf.getValue(j-1), maxPointsPerVoxel); + } else { + EXPECT_LE(leaf.getValue(0), maxPointsPerVoxel); + } + } else if (j>0) { + EXPECT_EQ(leaf.getValue(j), leaf.getValue(j-1)); + } else { + EXPECT_EQ(leaf.getValue(0), 0u); + } + }// loop over voxels + }// loop over leaf nodes + } + + //timer.restart("Parallel unit-testing on CPU"); + nanovdb::forEach(points,[&](const nanovdb::Range1D &r){ + nanovdb::PointAccessor acc(*grid); + EXPECT_TRUE(acc); + const Vec3T *start = nullptr, *stop = nullptr; + for (size_t i=r.begin(); i!=r.end(); ++i) { + const nanovdb::Coord ijk = grid->worldToIndex(points[i]).round(); + EXPECT_TRUE(acc.probeLeaf(ijk)!=nullptr); + EXPECT_TRUE(acc.isActive(ijk)); + EXPECT_LE(acc.getValue(ijk), pointCount); + const auto *leaf = acc.get>(ijk); + EXPECT_TRUE(leaf); + const auto offset = leaf->CoordToOffset(ijk); + EXPECT_EQ(ijk, leaf->offsetToGlobalCoord(offset)); + const uint64_t count = acc.voxelPoints(ijk, start, stop); + EXPECT_TRUE(start); + EXPECT_TRUE(stop); + EXPECT_LT(start, stop); + EXPECT_LE(count, maxPointsPerVoxel); + bool test = false; + for (uint64_t j=0; test == false && j( (points[i] - xyz).lengthSqr() ); + } + EXPECT_TRUE(test); + } + }); + + //timer.stop(); +}// Large_CudaPointsToGrid_World64 + +TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_World32) +{ + using BuildT = nanovdb::Points; + using Vec3T = nanovdb::Vec3f; + + nanovdb::CpuTimer timer("Generate sphere with points"); + auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); + timer.stop(); + + auto *pointGrid = pointsHandle.grid(); + EXPECT_TRUE(pointGrid); + nanovdb::PointAccessor acc2(*pointGrid); + EXPECT_TRUE(acc2); + const Vec3T *begin, *end; + const size_t pointCount = acc2.gridPoints(begin, end); + EXPECT_TRUE(begin); + EXPECT_TRUE(end); + EXPECT_LT(begin, end); + + const size_t pointSize = pointCount * sizeof(Vec3T); + //std::cerr << "Point count = " << pointCount << ", point footprint: " << (pointSize >> 20) << " MB" << std::endl; + //std::cerr << "Upper count: " << pointGrid->tree().nodeCount(2) << ", lower count: " << pointGrid->tree().nodeCount(1) + // << ", leaf count: " << pointGrid->tree().nodeCount(0) << ", voxelSize = " << pointGrid->voxelSize()[0] << std::endl; + + //timer.start("Allocating "+std::to_string(pointSize >> 20)+" MB on the GPU"); + Vec3T* d_points; + cudaCheck(cudaMalloc(&d_points, pointSize)); + //timer.restart("Copying points from CPU to GPU"); + cudaCheck(cudaMemcpy(d_points, begin, pointSize, cudaMemcpyHostToDevice)); + //timer.stop(); + + timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); + nanovdb::CudaPointsToGrid converter(pointGrid->map()); + //converter.setVerbose(); + auto handle = converter.getHandle(d_points, pointCount); + timer.stop(); + cudaCheck(cudaFree(d_points)); + //std::cerr << "Grid size: " << (handle.size() >> 20) << " MB" << std::endl; + + const uint32_t maxPointsPerVoxel = converter.maxPointsPerVoxel(); + const uint32_t maxPointsPerLeaf = converter.maxPointsPerLeaf(); + EXPECT_GT(maxPointsPerVoxel, 0u); + EXPECT_LT(maxPointsPerLeaf, 1024u); + EXPECT_LE(maxPointsPerVoxel, maxPointsPerLeaf); + //std::cerr << "maxPointsPerLeaf = " << maxPointsPerLeaf << " maxPointsPerVoxel = " << maxPointsPerVoxel << std::endl; + + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_TRUE(handle.deviceGrid()); + EXPECT_FALSE(handle.deviceGrid(0)); + EXPECT_TRUE(handle.deviceGrid(0)); + EXPECT_FALSE(handle.deviceGrid(1)); + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + //timer.start("Allocating and copying grid from GPU to CPU"); + auto *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + handle.deviceDownload();// creates a copy on the CPU + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); + //std::cerr << grid->indexBBox() << std::endl; + + EXPECT_STREQ("World32: Vec3 point coordinates in world space", grid->blindMetaData(0).mName); + + { + auto mgrHdl = nanovdb::createNodeManager(*grid); + auto *mgr = mgrHdl.mgr(); + EXPECT_TRUE(mgr); + for (uint32_t i=0; ileafCount(); ++i) { + const auto &leaf = mgr->leaf(i); + for (int j=0; j<512; ++j) { + EXPECT_LE(leaf.getValue(j), maxPointsPerLeaf); + if (leaf.isActive(j)) { + if (j>0) { + EXPECT_LE(leaf.getValue(j) - leaf.getValue(j-1), maxPointsPerVoxel); + } else { + EXPECT_LE(leaf.getValue(0), maxPointsPerVoxel); + } + } else if (j>0) { + EXPECT_EQ(leaf.getValue(j), leaf.getValue(j-1)); + } else { + EXPECT_EQ(leaf.getValue(0), 0u); + } + }// loop over voxels + }// loop over leaf nodes + } + + //timer.restart("Parallel unit-testing on CPU"); + nanovdb::forEach(0u, pointCount, 1u,[&](const nanovdb::Range1D &r){ + nanovdb::PointAccessor acc(*grid); + EXPECT_TRUE(acc); + const Vec3T *start = nullptr, *stop = nullptr; + for (size_t i=r.begin(); i!=r.end(); ++i) { + const nanovdb::Coord ijk = grid->worldToIndex(begin[i]).round(); + EXPECT_TRUE(acc.probeLeaf(ijk)!=nullptr); + EXPECT_TRUE(acc.isActive(ijk)); + EXPECT_LE(acc.getValue(ijk), pointCount); + const auto *leaf = acc.get>(ijk); + EXPECT_TRUE(leaf); + const auto offset = leaf->CoordToOffset(ijk); + EXPECT_EQ(ijk, leaf->offsetToGlobalCoord(offset)); + const uint64_t count = acc.voxelPoints(ijk, start, stop); + EXPECT_TRUE(start); + EXPECT_TRUE(stop); + EXPECT_LT(start, stop); + EXPECT_LE(count, maxPointsPerVoxel); + bool test = false; + for (uint64_t j=0; test == false && j(); + EXPECT_TRUE(pointGrid); + nanovdb::PointAccessor acc2(*pointGrid); + EXPECT_TRUE(acc2); + const Vec3T *begin, *end; + const size_t pointCount = acc2.gridPoints(begin, end); + EXPECT_TRUE(begin); + EXPECT_TRUE(end); + EXPECT_LT(begin, end); + + const size_t pointSize = pointCount * sizeof(Vec3T); + //std::cerr << "Point count = " << pointCount << ", point footprint: " << (pointSize >> 20) << " MB" << std::endl; + //std::cerr << "Upper count: " << pointGrid->tree().nodeCount(2) << ", lower count: " << pointGrid->tree().nodeCount(1) + // << ", leaf count: " << pointGrid->tree().nodeCount(0) << ", voxelSize = " << pointGrid->voxelSize()[0] << std::endl; + + //timer.start("Allocating "+std::to_string(pointSize >> 20)+" MB on the GPU"); + Vec3T* d_points; + cudaCheck(cudaMalloc(&d_points, pointSize)); + //timer.restart("Copying points from CPU to GPU"); + cudaCheck(cudaMemcpy(d_points, begin, pointSize, cudaMemcpyHostToDevice)); + //timer.stop(); + + timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); + ///////////////////////////////////////////////////////////////////////// + nanovdb::CudaPointsToGrid converter(pointGrid->map()); + //converter.setVerbose(); + converter.setPointType(nanovdb::PointType::Voxel32); + auto handle = converter.getHandle(d_points, pointCount); + ///////////////////////////////////////////////////////////////////////// + timer.stop(); + cudaCheck(cudaFree(d_points)); + //std::cerr << "Grid size: " << (handle.size() >> 20) << " MB" << std::endl; + + const uint32_t maxPointsPerVoxel = converter.maxPointsPerVoxel(); + const uint32_t maxPointsPerLeaf = converter.maxPointsPerLeaf(); + EXPECT_GT(maxPointsPerVoxel, 0u); + EXPECT_LT(maxPointsPerLeaf, 1024u); + EXPECT_LE(maxPointsPerVoxel, maxPointsPerLeaf); + //std::cerr << "maxPointsPerLeaf = " << maxPointsPerLeaf << " maxPointsPerVoxel = " << maxPointsPerVoxel << std::endl; + + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_TRUE(handle.deviceGrid()); + EXPECT_FALSE(handle.deviceGrid(0)); + EXPECT_TRUE(handle.deviceGrid(0)); + EXPECT_FALSE(handle.deviceGrid(1)); + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + //timer.start("Allocating and copying grid from GPU to CPU"); + auto *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + handle.deviceDownload();// creates a copy on the CPU + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); + //std::cerr << grid->indexBBox() << std::endl; + + EXPECT_STREQ("Voxel32: Vec3 point coordinates in voxel space", grid->blindMetaData(0).mName); + + { + auto mgrHdl = nanovdb::createNodeManager(*grid); + auto *mgr = mgrHdl.mgr(); + EXPECT_TRUE(mgr); + for (uint32_t i=0; ileafCount(); ++i) { + const auto &leaf = mgr->leaf(i); + for (int j=0; j<512; ++j) { + EXPECT_LE(leaf.getValue(j), maxPointsPerLeaf); + if (leaf.isActive(j)) { + if (j>0) { + EXPECT_LE(leaf.getValue(j) - leaf.getValue(j-1), maxPointsPerVoxel); + } else { + EXPECT_LE(leaf.getValue(0), maxPointsPerVoxel); + } + } else if (j>0) { + EXPECT_EQ(leaf.getValue(j), leaf.getValue(j-1)); + } else { + EXPECT_EQ(leaf.getValue(0), 0u); + } + }// loop over voxels + }// loop over leaf nodes + } + + //timer.restart("Parallel unit-testing on CPU"); + nanovdb::forEach(0u, pointCount, 1u,[&](const nanovdb::Range1D &r){ + nanovdb::PointAccessor acc(*grid); + EXPECT_TRUE(acc); + const Vec3T *start = nullptr, *stop = nullptr; + for (size_t i=r.begin(); i!=r.end(); ++i) { + const nanovdb::Coord ijk = grid->worldToIndex(begin[i]).round(); + EXPECT_TRUE(acc.probeLeaf(ijk)!=nullptr); + EXPECT_TRUE(acc.isActive(ijk)); + EXPECT_LE(acc.getValue(ijk), pointCount); + const auto *leaf = acc.get>(ijk); + EXPECT_TRUE(leaf); + const auto offset = leaf->CoordToOffset(ijk); + EXPECT_EQ(ijk, leaf->offsetToGlobalCoord(offset)); + const uint64_t count = acc.voxelPoints(ijk, start, stop); + EXPECT_TRUE(start); + EXPECT_TRUE(stop); + EXPECT_LT(start, stop); + EXPECT_LE(count, maxPointsPerVoxel); + bool test = false; + for (uint64_t j=0; test == false && jmap())).length() < 1e-9; + } + EXPECT_TRUE(test); + } + }); + + //timer.stop(); +}// Sphere_CudaPointsToGrid_Voxel32 + +TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel16) +{ + EXPECT_EQ(6u, sizeof(nanovdb::Vec3u16)); + using BuildT = nanovdb::Points; + using Vec3T = nanovdb::Vec3f; + + nanovdb::CpuTimer timer("Generate sphere with points"); + auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); + timer.stop(); + + auto *pointGrid = pointsHandle.grid(); + EXPECT_TRUE(pointGrid); + nanovdb::PointAccessor acc2(*pointGrid); + EXPECT_TRUE(acc2); + const Vec3T *begin, *end; + const size_t pointCount = acc2.gridPoints(begin, end); + EXPECT_TRUE(begin); + EXPECT_TRUE(end); + EXPECT_LT(begin, end); + + const size_t pointSize = pointCount * sizeof(Vec3T); + //std::cerr << "Point count = " << pointCount << ", point footprint: " << (pointSize >> 20) << " MB" << std::endl; + //std::cerr << "Upper count: " << pointGrid->tree().nodeCount(2) << ", lower count: " << pointGrid->tree().nodeCount(1) + // << ", leaf count: " << pointGrid->tree().nodeCount(0) << ", voxelSize = " << pointGrid->voxelSize()[0] << std::endl; + + //timer.start("Allocating "+std::to_string(pointSize >> 20)+" MB on the GPU"); + Vec3T* d_points; + cudaCheck(cudaMalloc(&d_points, pointSize)); + //timer.restart("Copying points from CPU to GPU"); + cudaCheck(cudaMemcpy(d_points, begin, pointSize, cudaMemcpyHostToDevice)); + //timer.stop(); + + timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); + ///////////////////////////////////////////////////////////////////////// + nanovdb::CudaPointsToGrid converter(pointGrid->map()); + //converter.setVerbose(); + converter.setPointType(nanovdb::PointType::Voxel16); + auto handle = converter.getHandle(d_points, pointCount); + ///////////////////////////////////////////////////////////////////////// + timer.stop(); + cudaCheck(cudaFree(d_points)); + //std::cerr << "Grid size: " << (handle.size() >> 20) << " MB" << std::endl; + + const uint32_t maxPointsPerVoxel = converter.maxPointsPerVoxel(); + const uint32_t maxPointsPerLeaf = converter.maxPointsPerLeaf(); + EXPECT_GT(maxPointsPerVoxel, 0u); + EXPECT_LT(maxPointsPerLeaf, 1024u); + EXPECT_LE(maxPointsPerVoxel, maxPointsPerLeaf); + //std::cerr << "maxPointsPerLeaf = " << maxPointsPerLeaf << " maxPointsPerVoxel = " << maxPointsPerVoxel << std::endl; + + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_TRUE(handle.deviceGrid()); + EXPECT_FALSE(handle.deviceGrid(0)); + EXPECT_TRUE(handle.deviceGrid(0)); + EXPECT_FALSE(handle.deviceGrid(1)); + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + //timer.start("Allocating and copying grid from GPU to CPU"); + auto *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + handle.deviceDownload();// creates a copy on the CPU + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); + //std::cerr << grid->indexBBox() << std::endl; + + EXPECT_STREQ("Voxel16: Vec3 point coordinates in voxel space", grid->blindMetaData(0).mName); + + { + auto mgrHdl = nanovdb::createNodeManager(*grid); + auto *mgr = mgrHdl.mgr(); + EXPECT_TRUE(mgr); + for (uint32_t i=0; ileafCount(); ++i) { + const auto &leaf = mgr->leaf(i); + for (int j=0; j<512; ++j) { + EXPECT_LE(leaf.getValue(j), maxPointsPerLeaf); + if (leaf.isActive(j)) { + if (j>0) { + EXPECT_LE(leaf.getValue(j) - leaf.getValue(j-1), maxPointsPerVoxel); + } else { + EXPECT_LE(leaf.getValue(0), maxPointsPerVoxel); + } + } else if (j>0) { + EXPECT_EQ(leaf.getValue(j), leaf.getValue(j-1)); + } else { + EXPECT_EQ(leaf.getValue(0), 0u); + } + }// loop over voxels + }// loop over leaf nodes + } + + //timer.restart("Parallel unit-testing on CPU"); + nanovdb::forEach(0u, pointCount, 1u,[&](const nanovdb::Range1D &r){ + nanovdb::PointAccessor acc(*grid); + EXPECT_TRUE(acc); + const nanovdb::Vec3u16 *start = nullptr, *stop = nullptr; + for (size_t i=r.begin(); i!=r.end(); ++i) { + const nanovdb::Coord ijk = grid->worldToIndex(begin[i]).round(); + EXPECT_TRUE(acc.probeLeaf(ijk)!=nullptr); + EXPECT_TRUE(acc.isActive(ijk)); + EXPECT_LE(acc.getValue(ijk), pointCount); + const auto *leaf = acc.get>(ijk); + EXPECT_TRUE(leaf); + const auto offset = leaf->CoordToOffset(ijk); + EXPECT_EQ(ijk, leaf->offsetToGlobalCoord(offset)); + const uint64_t count = acc.voxelPoints(ijk, start, stop); + EXPECT_TRUE(start); + EXPECT_TRUE(stop); + EXPECT_LT(start, stop); + EXPECT_LE(count, maxPointsPerVoxel); + bool test = false; + for (uint64_t j=0; test == false && jmap())).length() < 1e-6; + } + } + }); + + //timer.stop(); +}// Sphere_CudaPointsToGrid_Voxel16 + +TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel8) +{ + EXPECT_EQ(3u, sizeof(nanovdb::Vec3u8)); + + using BuildT = nanovdb::Points; + using Vec3T = nanovdb::Vec3f; + + nanovdb::CpuTimer timer("Generate sphere with points"); + auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); + timer.stop(); + + auto *pointGrid = pointsHandle.grid(); + EXPECT_TRUE(pointGrid); + std::cerr << "nanovdb::bbox = " << pointGrid->indexBBox() << " voxel count = " << pointGrid->activeVoxelCount() << std::endl; + nanovdb::PointAccessor acc2(*pointGrid); + EXPECT_TRUE(acc2); + const Vec3T *begin, *end; + const size_t pointCount = acc2.gridPoints(begin, end); + EXPECT_TRUE(begin); + EXPECT_TRUE(end); + EXPECT_LT(begin, end); + + const size_t pointSize = pointCount * sizeof(Vec3T); + //std::cerr << "Point count = " << pointCount << ", point footprint: " << (pointSize >> 20) << " MB" << std::endl; + //std::cerr << "Upper count: " << pointGrid->tree().nodeCount(2) << ", lower count: " << pointGrid->tree().nodeCount(1) + // << ", leaf count: " << pointGrid->tree().nodeCount(0) << ", voxelSize = " << pointGrid->voxelSize()[0] << std::endl; + + //timer.start("Allocating "+std::to_string(pointSize >> 20)+" MB on the GPU"); + Vec3T* d_points; + cudaCheck(cudaMalloc(&d_points, pointSize)); + //timer.restart("Copying points from CPU to GPU"); + cudaCheck(cudaMemcpy(d_points, begin, pointSize, cudaMemcpyHostToDevice)); + //timer.stop(); + + timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); + ///////////////////////////////////////////////////////////////////////// + //auto handle = nanovdb::cudaPointsToGrid(d_points, pointCount, nanovdb::PointType::Voxel8); + nanovdb::CudaPointsToGrid converter(pointGrid->map()); + //converter.setVerbose(); + converter.setPointType(nanovdb::PointType::Voxel8); + auto handle = converter.getHandle(d_points, pointCount); + ///////////////////////////////////////////////////////////////////////// + timer.stop(); + cudaCheck(cudaFree(d_points)); + //std::cerr << "Grid size: " << (handle.size() >> 20) << " MB" << std::endl; + + const uint32_t maxPointsPerVoxel = converter.maxPointsPerVoxel(); + const uint32_t maxPointsPerLeaf = converter.maxPointsPerLeaf(); + EXPECT_GT(maxPointsPerVoxel, 0u); + EXPECT_LT(maxPointsPerLeaf, 1024u); + EXPECT_LE(maxPointsPerVoxel, maxPointsPerLeaf); + //std::cerr << "maxPointsPerLeaf = " << maxPointsPerLeaf << " maxPointsPerVoxel = " << maxPointsPerVoxel << std::endl; + + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_TRUE(handle.deviceGrid()); + EXPECT_FALSE(handle.deviceGrid(0)); + EXPECT_TRUE(handle.deviceGrid(0)); + EXPECT_FALSE(handle.deviceGrid(1)); + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + //timer.start("Allocating and copying grid from GPU to CPU"); + auto *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + handle.deviceDownload();// creates a copy on the CPU + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); + std::cerr << grid->indexBBox() << std::endl; + + EXPECT_STREQ("Voxel8: Vec3 point coordinates in voxel space", grid->blindMetaData(0).mName); + + { + auto mgrHdl = nanovdb::createNodeManager(*grid); + auto *mgr = mgrHdl.mgr(); + EXPECT_TRUE(mgr); + for (uint32_t i=0; ileafCount(); ++i) { + const auto &leaf = mgr->leaf(i); + for (int j=0; j<512; ++j) { + EXPECT_LE(leaf.getValue(j), maxPointsPerLeaf); + if (leaf.isActive(j)) { + if (j>0) { + EXPECT_LE(leaf.getValue(j) - leaf.getValue(j-1), maxPointsPerVoxel); + } else { + EXPECT_LE(leaf.getValue(0), maxPointsPerVoxel); + } + } else if (j>0) { + EXPECT_EQ(leaf.getValue(j), leaf.getValue(j-1)); + } else { + EXPECT_EQ(leaf.getValue(0), 0u); + } + }// loop over voxels + }// loop over leaf nodes + } + + //timer.restart("Parallel unit-testing on CPU"); + nanovdb::forEach(0u, pointCount, 1u,[&](const nanovdb::Range1D &r){ + nanovdb::PointAccessor acc(*grid); + EXPECT_TRUE(acc); + const nanovdb::Vec3u8 *start = nullptr, *stop = nullptr; + for (size_t i=r.begin(); i!=r.end(); ++i) { + const nanovdb::Coord ijk = grid->worldToIndex(begin[i]).round(); + EXPECT_TRUE(acc.probeLeaf(ijk)!=nullptr); + EXPECT_TRUE(acc.isActive(ijk)); + EXPECT_LE(acc.getValue(ijk), pointCount); + const auto *leaf = acc.get>(ijk); + EXPECT_TRUE(leaf); + const auto offset = leaf->CoordToOffset(ijk); + EXPECT_EQ(ijk, leaf->offsetToGlobalCoord(offset)); + const uint64_t count = acc.voxelPoints(ijk, start, stop); + EXPECT_TRUE(start); + EXPECT_TRUE(stop); + EXPECT_LT(start, stop); + EXPECT_LE(count, maxPointsPerVoxel); + bool test = false; + for (uint64_t j=0; test == false && jmap())).length() < 1e-2; + } + EXPECT_TRUE(test); + } + }); + //timer.stop(); +}// Sphere_CudaPointsToGrid_Voxel8 + +TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_PointID) +{ + EXPECT_EQ(3u, sizeof(nanovdb::Vec3u8)); + + using BuildT = nanovdb::Points; + using Vec3T = nanovdb::Vec3f; + + nanovdb::CpuTimer timer("Generate sphere with points"); + auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); + timer.stop(); + + auto *pointGrid = pointsHandle.grid(); + EXPECT_TRUE(pointGrid); + std::cerr << "nanovdb::bbox = " << pointGrid->indexBBox() << " voxel count = " << pointGrid->activeVoxelCount() << std::endl; + nanovdb::PointAccessor acc2(*pointGrid); + EXPECT_TRUE(acc2); + const Vec3T *begin, *end; + const size_t pointCount = acc2.gridPoints(begin, end); + EXPECT_TRUE(begin); + EXPECT_TRUE(end); + EXPECT_LT(begin, end); + + const size_t pointSize = pointCount * sizeof(Vec3T); + //std::cerr << "Point count = " << pointCount << ", point footprint: " << (pointSize >> 20) << " MB" << std::endl; + //std::cerr << "Upper count: " << pointGrid->tree().nodeCount(2) << ", lower count: " << pointGrid->tree().nodeCount(1) + // << ", leaf count: " << pointGrid->tree().nodeCount(0) << ", voxelSize = " << pointGrid->voxelSize()[0] << std::endl; + + //timer.start("Allocating "+std::to_string(pointSize >> 20)+" MB on the GPU"); + Vec3T* d_points; + cudaCheck(cudaMalloc(&d_points, pointSize)); + //timer.restart("Copying points from CPU to GPU"); + cudaCheck(cudaMemcpy(d_points, begin, pointSize, cudaMemcpyHostToDevice)); + //timer.stop(); + + timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); + ///////////////////////////////////////////////////////////////////////// + //auto handle = nanovdb::cudaPointsToGrid(d_points, pointCount, nanovdb::PointType::Voxel8); + nanovdb::CudaPointsToGrid converter(pointGrid->map()); + converter.setVerbose(2); + converter.setPointType(nanovdb::PointType::PointID); + auto handle = converter.getHandle(d_points, pointCount); + ///////////////////////////////////////////////////////////////////////// + timer.stop(); + cudaCheck(cudaFree(d_points)); + //std::cerr << "Grid size: " << (handle.size() >> 20) << " MB" << std::endl; + + const uint32_t maxPointsPerVoxel = converter.maxPointsPerVoxel(); + const uint32_t maxPointsPerLeaf = converter.maxPointsPerLeaf(); + EXPECT_GT(maxPointsPerVoxel, 0u); + EXPECT_LT(maxPointsPerLeaf, 1024u); + EXPECT_LE(maxPointsPerVoxel, maxPointsPerLeaf); + //std::cerr << "maxPointsPerLeaf = " << maxPointsPerLeaf << " maxPointsPerVoxel = " << maxPointsPerVoxel << std::endl; + + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_TRUE(handle.deviceGrid()); + EXPECT_FALSE(handle.deviceGrid(0)); + EXPECT_TRUE(handle.deviceGrid(0)); + EXPECT_FALSE(handle.deviceGrid(1)); + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + //timer.start("Allocating and copying grid from GPU to CPU"); + auto *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + handle.deviceDownload();// creates a copy on the CPU + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); + //std::cerr << grid->indexBBox() << std::endl; + + EXPECT_STREQ("PointID: uint32_t indices to points", grid->blindMetaData(0).mName); + + { + auto mgrHdl = nanovdb::createNodeManager(*grid); + auto *mgr = mgrHdl.mgr(); + EXPECT_TRUE(mgr); + for (uint32_t i=0; ileafCount(); ++i) { + const auto &leaf = mgr->leaf(i); + for (int j=0; j<512; ++j) { + EXPECT_LE(leaf.getValue(j), maxPointsPerLeaf); + if (leaf.isActive(j)) { + if (j>0) { + EXPECT_LE(leaf.getValue(j) - leaf.getValue(j-1), maxPointsPerVoxel); + } else { + EXPECT_LE(leaf.getValue(0), maxPointsPerVoxel); + } + } else if (j>0) { + EXPECT_EQ(leaf.getValue(j), leaf.getValue(j-1)); + } else { + EXPECT_EQ(leaf.getValue(0), 0u); + } + }// loop over voxels + }// loop over leaf nodes + } + + //timer.restart("Parallel unit-testing on CPU"); + nanovdb::forEach(0u, pointCount, 1u,[&](const nanovdb::Range1D &r){ + nanovdb::PointAccessor acc(*grid); + EXPECT_TRUE(acc); + const uint32_t *start = nullptr, *stop = nullptr; + for (size_t i=r.begin(); i!=r.end(); ++i) { + const nanovdb::Coord ijk = grid->worldToIndex(begin[i]).round(); + EXPECT_TRUE(acc.probeLeaf(ijk)!=nullptr); + EXPECT_TRUE(acc.isActive(ijk)); + EXPECT_LE(acc.getValue(ijk), pointCount); + const auto *leaf = acc.get>(ijk); + EXPECT_TRUE(leaf); + const auto offset = leaf->CoordToOffset(ijk); + EXPECT_EQ(ijk, leaf->offsetToGlobalCoord(offset)); + const uint64_t count = acc.voxelPoints(ijk, start, stop); + EXPECT_TRUE(start); + EXPECT_TRUE(stop); + EXPECT_LT(start, stop); + EXPECT_LE(count, maxPointsPerVoxel); + } + }); + + //timer.stop(); +}// Sphere_CudaPointsToGrid_PointID + +TEST(TestNanoVDBCUDA, NanoGrid_Rgba8) +{ + using BuildT = nanovdb::Rgba8; + using GridT = nanovdb::NanoGrid; + const size_t num_points = 1; + nanovdb::Coord coords[num_points] = {nanovdb::Coord(1, 2, 3)}, *d_coords = nullptr; + cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); + cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU + + nanovdb::CudaPointsToGrid converter; + auto handle = converter.getHandle(d_coords, num_points); + cudaCheck(cudaFree(d_coords)); + + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + + const uint64_t size = sizeof(GridT) + + sizeof(GridT::TreeType) + + GridT::RootType::memUsage(1) + + sizeof(GridT::UpperNodeType) + + sizeof(GridT::LowerNodeType) + + sizeof(GridT::LeafNodeType); + EXPECT_EQ(handle.size(), size); + + GridT *grid = handle.grid();// no grid on the CPU + EXPECT_FALSE(grid); + handle.deviceDownload();// creates a copy up the CPU + EXPECT_TRUE(handle.deviceData()); + EXPECT_TRUE(handle.data()); + auto *data = handle.gridData(); + EXPECT_TRUE(data); + grid = handle.grid(); + EXPECT_TRUE(grid); +}// NanoGrid_Rgba8 + +TEST(TestNanoVDBCUDA, cudaAddBlindData) +{ + using BuildT = float; + using GridT = nanovdb::NanoGrid; + const size_t num_points = 2; + nanovdb::Coord coords[num_points] = {nanovdb::Coord(1, 2, 3), nanovdb::Coord(10,20,8)}, *d_coords = nullptr; + cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); + cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU + auto handle = nanovdb::cudaVoxelsToGrid(d_coords, num_points); + cudaCheck(cudaFree(d_coords)); + EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU + EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU + const uint64_t size = sizeof(GridT) + + sizeof(GridT::TreeType) + + GridT::RootType::memUsage(1) + + sizeof(GridT::UpperNodeType) + + sizeof(GridT::LowerNodeType) + + 2*sizeof(GridT::LeafNodeType); + EXPECT_EQ(handle.size(), size); + GridT *d_grid = handle.deviceGrid();// no grid on the CPU + EXPECT_TRUE(d_grid); + float *d_blind = nullptr, blind[num_points] = {1.2f, 3.0f}; + cudaCheck(cudaMalloc(&d_blind, num_points * sizeof(float))); + cudaCheck(cudaMemcpy(d_blind, blind, num_points * sizeof(float), cudaMemcpyHostToDevice));// CPU -> GPU + + nanovdb::GpuTimer timer("cudaAddBlindData"); + auto handle2 = nanovdb::cudaAddBlindData(d_grid, d_blind, num_points); + cudaCheck(cudaFree(d_blind)); + timer.stop(); + EXPECT_TRUE(handle2.deviceData());// grid only exists on the GPU + EXPECT_FALSE(handle2.data());// no grid was yet allocated on the CPU + EXPECT_EQ(handle2.size(), handle.size() + sizeof(nanovdb::GridBlindMetaData) + nanovdb::AlignUp(num_points*sizeof(float))); + + auto *grid2 = handle2.grid();// no grid on the CPU + EXPECT_FALSE(grid2); + handle2.deviceDownload();// creates a copy on the CPU + EXPECT_TRUE(handle2.deviceData()); + EXPECT_TRUE(handle2.data()); + auto *data = handle2.gridData(); + EXPECT_TRUE(data); + grid2 = handle2.grid(); + EXPECT_TRUE(grid2); + EXPECT_EQ(nanovdb::Vec3d(1.0), grid2->voxelSize()); + EXPECT_EQ(1u, grid2->blindDataCount()); + const auto &bd2 = grid2->blindMetaData(0); + EXPECT_EQ(num_points, bd2.mValueCount); + EXPECT_EQ(nanovdb::GridBlindDataSemantic::Unknown, bd2.mSemantic); + EXPECT_EQ(nanovdb::GridBlindDataClass::Unknown, bd2.mDataClass); + EXPECT_EQ(nanovdb::GridType::Float, bd2.mDataType); + EXPECT_STREQ("", bd2.mName); + const float *dataPtr = bd2.getBlindData(); + EXPECT_TRUE(dataPtr); + for (size_t i=0; i();// no grid on the CPU + EXPECT_TRUE(d_grid2); + + nanovdb::Vec3f *d_blind2 = nullptr, blind2[num_points] = {nanovdb::Vec3f(1.2f), nanovdb::Vec3f(3.0f)}; + cudaCheck(cudaMalloc(&d_blind2, num_points * sizeof(nanovdb::Vec3f))); + cudaCheck(cudaMemcpy(d_blind2, blind2, num_points * sizeof(nanovdb::Vec3f), cudaMemcpyHostToDevice));// CPU -> GPU + + auto handle3 = nanovdb::cudaAddBlindData(d_grid2, d_blind2, num_points, + nanovdb::GridBlindDataClass::AttributeArray, + nanovdb::GridBlindDataSemantic::PointPosition, + "this is a test"); + cudaCheck(cudaFree(d_blind2)); + handle3.deviceDownload();// creates a copy on the CPU + GridT *grid3 = handle3.grid();// no grid on the CPU + EXPECT_TRUE(grid3); + EXPECT_EQ(2, grid3->blindDataCount()); + + const auto &bd3 = grid3->blindMetaData(0); + EXPECT_EQ(num_points, bd3.mValueCount); + EXPECT_EQ(nanovdb::GridBlindDataSemantic::Unknown, bd3.mSemantic); + EXPECT_EQ(nanovdb::GridBlindDataClass::Unknown, bd3.mDataClass); + EXPECT_EQ(nanovdb::GridType::Float, bd3.mDataType); + EXPECT_STREQ("", bd3.mName); + dataPtr = grid3->getBlindData(0); + EXPECT_TRUE(dataPtr); + for (size_t i=0; iblindMetaData(1); + EXPECT_EQ(num_points, bd4.mValueCount); + EXPECT_EQ(nanovdb::GridBlindDataSemantic::PointPosition, bd4.mSemantic); + EXPECT_EQ(nanovdb::GridBlindDataClass::AttributeArray, bd4.mDataClass); + EXPECT_EQ(nanovdb::GridType::Vec3f, bd4.mDataType); + EXPECT_STREQ("this is a test", bd4.mName); + auto *dataPtr2 = grid3->getBlindData(1); + EXPECT_TRUE(dataPtr2); + for (size_t i=0; i #include -#include +#include #include #include #include +#include #include -#include -#include #include #include #include +#include +#include #if !defined(_MSC_VER) // does not compile in msvc c++ due to zero-sized arrays. #include @@ -39,31 +40,6 @@ #include -namespace nanovdb {// this namespace is required by gtest -inline std::ostream& -operator<<(std::ostream& os, const CoordBBox& b) -{ - os << "(" << b[0][0] << "," << b[0][1] << "," << b[0][2] << ") -> " - << "(" << b[1][0] << "," << b[1][1] << "," << b[1][2] << ")"; - return os; -} - -inline std::ostream& -operator<<(std::ostream& os, const Coord& ijk) -{ - os << "(" << ijk[0] << "," << ijk[1] << "," << ijk[2] << ")"; - return os; -} - -template -inline std::ostream& -operator<<(std::ostream& os, const Vec3& v) -{ - os << "(" << v[0] << "," << v[1] << "," << v[2] << ")"; - return os; -} -}// namespace nanovdb - // define the environment variable VDB_DATA_PATH to use models from the web // e.g. setenv VDB_DATA_PATH /home/kmu/dev/data/vdb // or export VDB_DATA_PATH=/Users/ken/dev/data/vdb @@ -214,7 +190,7 @@ TEST_F(TestOpenVDB, getExtrema) { using wBBoxT = openvdb::math::BBox; auto srcGrid = this->getSrcGrid(false, 0, 3);// level set of a bunny if available, else an octahedron - auto handle = nanovdb::openToNanoVDB(*srcGrid, nanovdb::StatsMode::All); + auto handle = nanovdb::createNanoGrid(*srcGrid, nanovdb::StatsMode::All); EXPECT_TRUE(handle); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); @@ -256,11 +232,9 @@ TEST_F(TestOpenVDB, Basic) } } -TEST_F(TestOpenVDB, OpenToNanoType) +TEST_F(TestOpenVDB, MapToNano) { {// Coord - constexpr bool test = std::is_same::Type>::value; - EXPECT_TRUE(test); const openvdb::Coord ijk1(1, 2, -4); nanovdb::Coord ijk2(-2, 7, 9); EXPECT_NE(ijk2, nanovdb::Coord(1, 2, -4)); @@ -268,9 +242,9 @@ TEST_F(TestOpenVDB, OpenToNanoType) EXPECT_EQ(ijk2, nanovdb::Coord(1, 2, -4)); } {// Vec3f - constexpr bool test1 = std::is_same::Type>::value; + constexpr bool test1 = nanovdb::is_same::type>::value; EXPECT_TRUE(test1); - constexpr bool test2 = std::is_same::Type>::value; + constexpr bool test2 = nanovdb::is_same::type>::value; EXPECT_FALSE(test2); const openvdb::Vec3f xyz1(1, 2, -4); nanovdb::Vec3f xyz2(-2, 7, 9); @@ -279,9 +253,9 @@ TEST_F(TestOpenVDB, OpenToNanoType) EXPECT_EQ(xyz2, nanovdb::Vec3f(1, 2, -4)); } {// Vec4d - constexpr bool test1 = std::is_same::Type>::value; + constexpr bool test1 = nanovdb::is_same::type>::value; EXPECT_TRUE(test1); - constexpr bool test2 = std::is_same::Type>::value; + constexpr bool test2 = nanovdb::is_same::type>::value; EXPECT_FALSE(test2); const openvdb::Vec4d xyz1(1, 2, -4, 7); nanovdb::Vec4d xyz2(-2, 7, 9, -4); @@ -290,9 +264,9 @@ TEST_F(TestOpenVDB, OpenToNanoType) EXPECT_EQ(xyz2, nanovdb::Vec4d(1, 2, -4, 7)); } {// MaskValue - constexpr bool test1 = std::is_same::Type>::value; + constexpr bool test1 = nanovdb::is_same::type>::value; EXPECT_TRUE(test1); - constexpr bool test2 = std::is_same::Type>::value; + constexpr bool test2 = nanovdb::is_same::type>::value; EXPECT_FALSE(test2); EXPECT_EQ(sizeof(nanovdb::ValueMask), sizeof(openvdb::ValueMask)); } @@ -416,13 +390,18 @@ TEST_F(TestOpenVDB, BasicGrid) { // init Grid auto* data = grid->data(); { - openvdb::math::UniformScaleTranslateMap map(2.0, openvdb::Vec3R(0.0, 0.0, 0.0)); - auto affineMap = map.getAffineMap(); - data->mVoxelSize = affineMap->voxelSize(); + openvdb::math::UniformScaleTranslateMap map(2.0, openvdb::Vec3d(0.0, 0.0, 0.0)); + auto affineMap = map.getAffineMap(); const auto mat = affineMap->getMat4(), invMat = mat.inverse(); //for (int i=0; i<4; ++i) std::cout << "Row("<init({nanovdb::GridFlags::HasMinMax}, bytes[8], dstMap, nanovdb::GridType::Float); +#else data->mMap.set(mat, invMat, 1.0); + data->mVoxelSize = affineMap->voxelSize(); data->setFlagsOff(); data->setMinMaxOn(); data->mGridIndex = 0; @@ -433,12 +412,13 @@ TEST_F(TestOpenVDB, BasicGrid) data->mGridType = nanovdb::GridType::Float; data->mMagic = NANOVDB_MAGIC_NUMBER; data->mVersion = nanovdb::Version(); +#endif memcpy(data->mGridName, name.c_str(), name.size() + 1); } EXPECT_EQ(tree, &grid->tree()); - const openvdb::Vec3R p1(1.0, 2.0, 3.0); + const openvdb::Vec3d p1(1.0, 2.0, 3.0); const auto p2 = grid->worldToIndex(p1); - EXPECT_EQ(openvdb::Vec3R(0.5, 1.0, 1.5), p2); + EXPECT_EQ(openvdb::Vec3d(0.5, 1.0, 1.5), p2); const auto p3 = grid->indexToWorld(p2); EXPECT_EQ(p1, p3); { @@ -452,7 +432,7 @@ TEST_F(TestOpenVDB, BasicGrid) } auto const p4 = grid->worldToIndex(p3); - EXPECT_EQ(openvdb::Vec3R(0.0, 0.0, 0.0), p4); + EXPECT_EQ(openvdb::Vec3d(0.0, 0.0, 0.0), p4); const auto p5 = grid->indexToWorld(p4); EXPECT_EQ(p1, p5); } @@ -578,7 +558,7 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Empty) { // empty grid openvdb::FloatGrid srcGrid(0.0f); auto srcAcc = srcGrid.getAccessor(); - auto handle = nanovdb::openToNanoVDB(srcGrid); + auto handle = nanovdb::createNanoGrid(srcGrid); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -613,7 +593,7 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Basic1) srcAcc.setValue(openvdb::Coord(1, 2, 3), 1.0f); EXPECT_TRUE(srcAcc.isValueOn(openvdb::Coord(1, 2, 3))); EXPECT_EQ(1.0f, srcAcc.getValue(openvdb::Coord(1, 2, 3))); - auto handle = nanovdb::openToNanoVDB(srcGrid, nanovdb::StatsMode::All); + auto handle = nanovdb::createNanoGrid(srcGrid, nanovdb::StatsMode::All); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -633,7 +613,7 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Basic1) EXPECT_EQ(sizeof(nanovdb::NanoGrid) + sizeof(nanovdb::NanoTree) + (const char*)handle.data(), (const char*)&dstGrid->tree().root()); - EXPECT_EQ(nanovdb::Vec3R(1.0), dstGrid->voxelSize()); + EXPECT_EQ(nanovdb::Vec3d(1.0), dstGrid->voxelSize()); EXPECT_EQ(1.0f, dstGrid->tree().getValue(nanovdb::Coord(1, 2, 3))); auto dstAcc = dstGrid->getAccessor(); EXPECT_TRUE(dstAcc.isActive(nanovdb::Coord(1, 2, 3))); @@ -652,8 +632,7 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Model) { auto srcGrid = this->getSrcGrid(false); //mTimer.start("Generating NanoVDB grid"); - //auto handle = nanovdb::openToNanoVDB(*srcGrid, nanovdb::StatsMode::Default, nanovdb::ChecksumMode::Default, 2); - auto handle = nanovdb::openToNanoVDB(*srcGrid); + auto handle = nanovdb::createNanoGrid(*srcGrid); //mTimer.start("Writing NanoVDB grid"); nanovdb::io::writeGrid("data/test.nvdb", handle, this->getCodec()); //mTimer.stop(); @@ -700,10 +679,10 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp4) EXPECT_EQ(2.0f, srcAcc.getValue(openvdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(openvdb::Coord( 50,-12, 30))); - nanovdb::OpenToNanoVDB converter; + nanovdb::CreateNanoGrid converter(srcGrid); //converter.setVerbose(); converter.setStats(nanovdb::StatsMode::All); - auto handle = converter(srcGrid); + auto handle = converter.getHandle();// (srcGrid); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); @@ -731,7 +710,7 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp4) EXPECT_TRUE(dstGrid->isSequential<1>()); EXPECT_TRUE(dstGrid->isSequential<0>()); - EXPECT_EQ(nanovdb::Vec3R(1.0), dstGrid->voxelSize()); + EXPECT_EQ(nanovdb::Vec3d(1.0), dstGrid->voxelSize()); auto *leaf = dstGrid->tree().root().probeLeaf(nanovdb::Coord(1, 2, 3)); EXPECT_TRUE(leaf); //std::cerr << leaf->origin() << ", " << leaf->data()->mBBoxMin << std::endl; @@ -754,10 +733,10 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp4) {// Model auto openGrid = this->getSrcGrid(false); const float tolerance = 0.5f*openGrid->voxelSize()[0]; - nanovdb::OpenToNanoVDB converter; + nanovdb::CreateNanoGrid converter(*openGrid); converter.enableDithering(); //converter.setVerbose(2); - auto handle = converter(*openGrid); + auto handle = converter.getHandle(); auto* nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); @@ -795,9 +774,9 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp8) EXPECT_EQ(2.0f, srcAcc.getValue(openvdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(openvdb::Coord( 50,-12, 30))); - nanovdb::OpenToNanoVDB converter; + nanovdb::CreateNanoGrid converter(srcGrid); converter.setStats(nanovdb::StatsMode::All); - auto handle = converter(srcGrid); + auto handle = converter.getHandle(); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); @@ -812,7 +791,6 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp8) auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); EXPECT_EQ("", std::string(dstGrid->gridName())); - EXPECT_EQ((const char*)handle.data(), (const char*)dstGrid); EXPECT_EQ(1.0f, dstGrid->tree().root().minimum()); EXPECT_EQ(3.0f, dstGrid->tree().root().maximum()); EXPECT_EQ(2.0f, dstGrid->tree().root().average()); @@ -825,7 +803,7 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp8) EXPECT_TRUE(dstGrid->isSequential<1>()); EXPECT_TRUE(dstGrid->isSequential<0>()); - EXPECT_EQ(nanovdb::Vec3R(1.0), dstGrid->voxelSize()); + EXPECT_EQ(nanovdb::Vec3d(1.0), dstGrid->voxelSize()); EXPECT_EQ(1.0f, dstGrid->tree().getValue(nanovdb::Coord(1, 2, 3))); auto dstAcc = dstGrid->getAccessor(); EXPECT_TRUE(dstAcc.isActive(nanovdb::Coord(1, 2, 3))); @@ -837,9 +815,9 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp8) } {// Model auto openGrid = this->getSrcGrid(false); - const float tolerance = 0.05f*openGrid->voxelSize()[0]; - nanovdb::OpenToNanoVDB converter; - auto handle = converter(*openGrid); + const float tolerance = 0.05f*openGrid->voxelSize()[0]; + nanovdb::CreateNanoGrid converter(*openGrid); + auto handle = converter.getHandle(); converter.enableDithering(); //converter.setVerbose(2); auto* nanoGrid = handle.grid(); @@ -880,10 +858,10 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp16) EXPECT_EQ(2.0f, srcAcc.getValue(openvdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(openvdb::Coord( 50,-12, 30))); - nanovdb::OpenToNanoVDB converter; + nanovdb::CreateNanoGrid converter(srcGrid); //converter.setVerbose(2); converter.setStats(nanovdb::StatsMode::All); - auto handle = converter(srcGrid); + auto handle = converter.getHandle(); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); @@ -911,7 +889,7 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp16) EXPECT_TRUE(dstGrid->isSequential<1>()); EXPECT_TRUE(dstGrid->isSequential<0>()); - EXPECT_EQ(nanovdb::Vec3R(1.0), dstGrid->voxelSize()); + EXPECT_EQ(nanovdb::Vec3d(1.0), dstGrid->voxelSize()); EXPECT_EQ(1.0f, dstGrid->tree().getValue(nanovdb::Coord(1, 2, 3))); auto dstAcc = dstGrid->getAccessor(); EXPECT_TRUE(dstAcc.isActive(nanovdb::Coord(1, 2, 3))); @@ -924,9 +902,9 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_Fp16) {// Model auto openGrid = this->getSrcGrid(false); const float tolerance = 0.005f*openGrid->voxelSize()[0]; - nanovdb::OpenToNanoVDB converter; + nanovdb::CreateNanoGrid converter(*openGrid); converter.enableDithering(); - auto handle = converter(*openGrid); + auto handle = converter.getHandle(); //converter.setVerbose(2); auto* nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); @@ -966,9 +944,9 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_FpN) EXPECT_EQ(2.0f, srcAcc.getValue(openvdb::Coord(-10, 20,-50))); EXPECT_EQ(3.0f, srcAcc.getValue(openvdb::Coord( 50,-12, 30))); - nanovdb::OpenToNanoVDB converter; + nanovdb::CreateNanoGrid converter(srcGrid); converter.setStats(nanovdb::StatsMode::All); - auto handle = converter(srcGrid); + auto handle = converter.getHandle(); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); @@ -996,7 +974,7 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_FpN) EXPECT_TRUE(dstGrid->isSequential<1>()); EXPECT_FALSE(dstGrid->isSequential<0>()); - EXPECT_EQ(nanovdb::Vec3R(1.0), dstGrid->voxelSize()); + EXPECT_EQ(nanovdb::Vec3d(1.0), dstGrid->voxelSize()); EXPECT_EQ(1.0f, dstGrid->tree().getValue(nanovdb::Coord(1, 2, 3))); auto dstAcc = dstGrid->getAccessor(); EXPECT_TRUE(dstAcc.isActive(nanovdb::Coord(1, 2, 3))); @@ -1012,14 +990,15 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_FpN) #else auto openGrid = this->getSrcGrid(true, 1, 1);// FOG volume of Disney cloud or cube #endif - nanovdb::OpenToNanoVDB converter; - converter.oracle() = nanovdb::AbsDiff( 0.05f ); + nanovdb::CreateNanoGrid converter(*openGrid); //converter.setVerbose(2); - auto handle = converter(*openGrid); + + const float tolerance = 0.05f; + nanovdb::AbsDiff oracle(tolerance); + + auto handle = converter.getHandle(oracle); auto* nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); - const nanovdb::AbsDiff oracle = converter.oracle(); - const float tolerance = oracle.getTolerance(); nanovdb::io::writeGrid("data/test_fpN.nvdb", handle, this->getCodec()); @@ -1047,7 +1026,7 @@ TEST_F(TestOpenVDB, OpenToNanoVDB_FpN) // Generate random points by uniformly distributing points // on a unit-sphere. -inline void genPoints(const int numPoints, std::vector& points) +inline void genPoints(const int numPoints, std::vector& points) { openvdb::math::Random01 randNumber(0); const int n = int(std::sqrt(double(numPoints))); @@ -1055,7 +1034,7 @@ inline void genPoints(const int numPoints, std::vector& points) const double yScale = openvdb::math::pi() / double(n); double x, y, theta, phi; - openvdb::Vec3R pos; + openvdb::Vec3d pos; points.reserve(n * n); @@ -1082,10 +1061,10 @@ inline void genPoints(const int numPoints, std::vector& points) } // genPoints class PointList { - std::vector const* const mPoints; + std::vector const* const mPoints; public: - using PosType = openvdb::Vec3R; + using PosType = openvdb::Vec3d; PointList(const std::vector& points) : mPoints(&points) { @@ -1094,13 +1073,14 @@ class PointList void getPos(size_t n, PosType& xyz) const { xyz = (*mPoints)[n]; } }; // PointList +// make testOpenVDB && ./unittest/testOpenVDB --gtest_filter="*PointIndexGrid" --gtest_break_on_failure TEST_F(TestOpenVDB, PointIndexGrid) { const uint64_t pointCount = 40000; const float voxelSize = 0.01f; const auto transform = openvdb::math::Transform::createLinearTransform(voxelSize); - std::vector points; + std::vector points; genPoints(pointCount, points); PointList pointList(points); EXPECT_EQ(pointCount, points.size()); @@ -1109,16 +1089,16 @@ TEST_F(TestOpenVDB, PointIndexGrid) auto srcGrid = openvdb::tools::createPointIndexGrid(pointList, *transform); using MgrT = openvdb::tree::LeafManager; - MgrT leafs(srcGrid->tree()); + MgrT leafMgr(srcGrid->tree()); size_t count = 0; - for (size_t n = 0, N = leafs.leafCount(); n < N; ++n) { - count += leafs.leaf(n).indices().size(); + for (size_t n = 0, N = leafMgr.leafCount(); n < N; ++n) { + count += leafMgr.leaf(n).indices().size(); } EXPECT_EQ(pointCount, count); //mTimer.start("Generating NanoVDB grid from PointIndexGrid"); - auto handle = nanovdb::openToNanoVDB(*srcGrid, nanovdb::StatsMode::All, nanovdb::ChecksumMode::Full); + auto handle = nanovdb::createNanoGrid(*srcGrid, nanovdb::StatsMode::All, nanovdb::ChecksumMode::Full); //mTimer.stop(); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); @@ -1128,9 +1108,9 @@ TEST_F(TestOpenVDB, PointIndexGrid) auto dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); EXPECT_EQ(1u, dstGrid->blindDataCount()); - auto metaData = dstGrid->blindMetaData(0); - EXPECT_EQ(pointCount, metaData.mElementCount); - EXPECT_EQ(nanovdb::GridBlindDataSemantic::Unknown, metaData.mSemantic); + const auto &metaData = dstGrid->blindMetaData(0); + EXPECT_EQ(pointCount, metaData.mValueCount); + EXPECT_EQ(nanovdb::GridBlindDataSemantic::PointId, metaData.mSemantic); EXPECT_EQ(nanovdb::GridBlindDataClass::IndexArray, metaData.mDataClass); EXPECT_EQ(nanovdb::GridType::UInt32, metaData.mDataType); @@ -1147,12 +1127,14 @@ TEST_F(TestOpenVDB, PointIndexGrid) tbb::parallel_for(srcGrid->evalActiveVoxelBoundingBox(), kernel1); //mTimer.stop(); - EXPECT_EQ(pointCount, dstGrid->blindMetaData(0).mElementCount); + EXPECT_EQ(pointCount, dstGrid->blindMetaData(0).mValueCount); + //std::cerr << "" auto kernel = [&](const MgrT::LeafRange& r) { using CoordT = const nanovdb::Coord; auto dstAcc = dstGrid->getAccessor(); nanovdb::PointAccessor pointAcc(*dstGrid); + EXPECT_TRUE(pointAcc); const uint32_t * begin2 = nullptr, *end2 = nullptr; EXPECT_EQ(pointCount, pointAcc.gridPoints(begin2, end2)); for (auto leaf = r.begin(); leaf; ++leaf) { @@ -1167,14 +1149,13 @@ TEST_F(TestOpenVDB, PointIndexGrid) EXPECT_TRUE(leaf->getIndices(ijk, begin1, end1)); EXPECT_TRUE(pointAcc.voxelPoints(*abc, begin2, end2)); EXPECT_EQ(end1 - begin1, end2 - begin2); - for (auto* i = begin1; i != end1; ++i) - EXPECT_EQ(*i, *begin2++); + for (auto* i = begin1; i != end1; ++i) EXPECT_EQ(*i, *begin2++); } } }; //mTimer.start("Parallel unit test"); - tbb::parallel_for(leafs.leafRange(), kernel); + tbb::parallel_for(leafMgr.leafRange(), kernel); //mTimer.stop(); //mTimer.start("Testing bounding box"); @@ -1195,17 +1176,17 @@ TEST_F(TestOpenVDB, PointIndexGrid) TEST_F(TestOpenVDB, PointDataGridBasic) { // Create a vector with three point positions. - std::vector positions; - positions.push_back(openvdb::Vec3R(0.0, 0.0, 0.0)); - positions.push_back(openvdb::Vec3R(0.0, 0.0, 1.0)); - positions.push_back(openvdb::Vec3R(1.34, -56.1, 5.7)); + std::vector positions; + positions.push_back(openvdb::Vec3d(0.0, 0.0, 0.0)); + positions.push_back(openvdb::Vec3d(0.0, 0.0, 1.0)); + positions.push_back(openvdb::Vec3d(1.34, -56.1, 5.7)); EXPECT_EQ( 3UL, positions.size() ); // We need to define a custom search lambda function // to account for floating-point roundoffs! auto search = [&positions](const openvdb::Vec3f &p) { for (auto it = positions.begin(); it != positions.end(); ++it) { - const openvdb::Vec3R delta = *it - p; + const openvdb::Vec3d delta = *it - p; if ( delta.length() < 1e-5 ) return it; } return positions.end(); @@ -1216,7 +1197,7 @@ TEST_F(TestOpenVDB, PointDataGridBasic) // wrapper around an stl vector wrapper here, however it is also possible to // write one for a custom data structure in order to match the interface // required. - openvdb::points::PointAttributeVector positionsWrapper(positions); + openvdb::points::PointAttributeVector positionsWrapper(positions); // This method computes a voxel-size to match the number of // points / voxel requested. Although it won't be exact, it typically offers // a good balance of memory against performance. @@ -1229,7 +1210,7 @@ TEST_F(TestOpenVDB, PointDataGridBasic) srcGrid->setName("PointDataGrid"); //mTimer.start("Generating NanoVDB grid from PointDataGrid"); - auto handle = nanovdb::openToNanoVDB(*srcGrid); + auto handle = nanovdb::createNanoGrid(*srcGrid); //mTimer.stop(); EXPECT_TRUE(handle); @@ -1237,13 +1218,22 @@ TEST_F(TestOpenVDB, PointDataGridBasic) EXPECT_TRUE(meta); EXPECT_EQ(nanovdb::GridType::UInt32, meta->gridType()); EXPECT_EQ(nanovdb::GridClass::PointData, meta->gridClass()); - auto dstGrid = handle.grid(); + + auto *dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); for (int i=0; i<3; ++i) { EXPECT_EQ(srcGrid->voxelSize()[i], dstGrid->voxelSize()[i]); } + EXPECT_EQ(1u, dstGrid->blindDataCount());// only point positions + auto &metaData = dstGrid->blindMetaData(0u); + EXPECT_EQ(metaData.mValueCount, positions.size()); + EXPECT_EQ(strcmp("P", metaData.mName), 0); + EXPECT_EQ(metaData.mDataClass, nanovdb::GridBlindDataClass::AttributeArray); + EXPECT_EQ(metaData.mSemantic, nanovdb::GridBlindDataSemantic::PointPosition); + EXPECT_EQ(metaData.mDataType, nanovdb::GridType::Vec3f); nanovdb::PointAccessor acc(*dstGrid); + EXPECT_TRUE(acc); const nanovdb::Vec3f *begin = nullptr, *end = nullptr; // iterators over points in a given voxel EXPECT_EQ(positions.size(), openvdb::points::pointCount(srcGrid->tree())); EXPECT_EQ(acc.gridPoints(begin, end), positions.size()); @@ -1268,7 +1258,7 @@ TEST_F(TestOpenVDB, PointDataGridBasic) const nanovdb::Vec3f vxlDst = *begin++;// local voxel coordinates for (int i=0; i<3; ++i) { EXPECT_EQ( ijkSrc[i], ijkDst[i] ); - EXPECT_EQ( vxlSrc[i], vxlDst[i] ); + //EXPECT_EQ( vxlSrc[i], vxlDst[i] ); } // A PointDataGrid encodes local voxel coordinates // so transform those to global index coordinates! @@ -1285,7 +1275,7 @@ TEST_F(TestOpenVDB, PointDataGridBasic) EXPECT_EQ( wldSrc[i], wldDst[i] ); } - // compair to original input points + // compare to original input points auto it = search( wldSrc ); EXPECT_TRUE( it != positions.end() ); positions.erase( it ); @@ -1296,18 +1286,18 @@ TEST_F(TestOpenVDB, PointDataGridBasic) TEST_F(TestOpenVDB, PointDataGridRandom) { - std::vector positions; + std::vector positions; const size_t pointCount = 2000; - const openvdb::Vec3R wldMin(-234.3, -135.6, -503.7); - const openvdb::Vec3R wldMax( 57.8, 289.1, 0.2); - const openvdb::Vec3R wldDim = wldMax - wldMin; + const openvdb::Vec3d wldMin(-234.3, -135.6, -503.7); + const openvdb::Vec3d wldMax( 57.8, 289.1, 0.2); + const openvdb::Vec3d wldDim = wldMax - wldMin; openvdb::math::Random01 randNumber(0); // We need to define a custom search lambda function // to account for floating-point roundoffs! auto search = [&positions](const openvdb::Vec3f &p) { for (auto it = positions.begin(); it != positions.end(); ++it) { - const openvdb::Vec3R delta = *it - p; + const openvdb::Vec3d delta = *it - p; if ( delta.length() < 1e-3 ) return it; } return positions.end(); @@ -1315,8 +1305,8 @@ TEST_F(TestOpenVDB, PointDataGridRandom) // Create a vector with random point positions. for (size_t i=0; i positionsWrapper(positions); + openvdb::points::PointAttributeVector positionsWrapper(positions); // This method computes a voxel-size to match the number of // points / voxel requested. Although it won't be exact, it typically offers // a good balance of memory against performance. @@ -1340,7 +1330,7 @@ TEST_F(TestOpenVDB, PointDataGridRandom) srcGrid->setName("PointDataGrid"); //mTimer.start("Generating NanoVDB grid from PointDataGrid"); - auto handle = nanovdb::openToNanoVDB(*srcGrid); + auto handle = nanovdb::createNanoGrid(*srcGrid); //mTimer.stop(); EXPECT_TRUE(handle); @@ -1355,6 +1345,7 @@ TEST_F(TestOpenVDB, PointDataGridRandom) } nanovdb::PointAccessor acc(*dstGrid); + EXPECT_TRUE(acc); const nanovdb::Vec3f *begin = nullptr, *end = nullptr; // iterators over points in a given voxel EXPECT_EQ(positions.size(), openvdb::points::pointCount(srcGrid->tree())); EXPECT_EQ(acc.gridPoints(begin, end), positions.size()); @@ -1438,7 +1429,7 @@ TEST_F(TestOpenVDB, CNanoVDB) { auto srcGrid = this->getSrcGrid(); //mTimer.start("Generating NanoVDB grid"); - auto handle = nanovdb::openToNanoVDB(*srcGrid); + auto handle = nanovdb::createNanoGrid(*srcGrid); //mTimer.stop(); EXPECT_TRUE(handle); EXPECT_TRUE(handle.data()); @@ -1469,7 +1460,7 @@ TEST_F(TestOpenVDB, CNanoVDBTrilinear) { auto srcGrid = this->getSrcGrid(); //mTimer.start("Generating NanoVDB grid"); - auto handle = nanovdb::openToNanoVDB(*srcGrid); + auto handle = nanovdb::createNanoGrid(*srcGrid); //mTimer.stop(); EXPECT_TRUE(handle); EXPECT_TRUE(handle.data()); @@ -1511,7 +1502,7 @@ TEST_F(TestOpenVDB, CNanoVDBTrilinearStencil) { auto srcGrid = this->getSrcGrid(); //mTimer.start("Generating NanoVDB grid"); - auto handle = nanovdb::openToNanoVDB(*srcGrid); + auto handle = nanovdb::createNanoGrid(*srcGrid); //mTimer.stop(); EXPECT_TRUE(handle); EXPECT_TRUE(handle.data()); @@ -1550,15 +1541,15 @@ TEST_F(TestOpenVDB, CNanoVDBTrilinearStencil) #endif -TEST_F(TestOpenVDB, NanoToOpenVDB_GridBuilder) -{// test GridBuilder -> NanoVDB -> OpenVDB - nanovdb::GridBuilder builder(0.0f, nanovdb::GridClass::LevelSet); - auto buildAcc = builder.getAccessor(); +TEST_F(TestOpenVDB, NanoToOpenVDB_BuildGrid) +{// test build::Grid -> NanoVDB -> OpenVDB + nanovdb::build::Grid buildGrid(0.0f, "test", nanovdb::GridClass::LevelSet); + auto buildAcc = buildGrid.getAccessor(); buildAcc.setValue(nanovdb::Coord(1, 2, 3), 1.0f); buildAcc.setValue(nanovdb::Coord(2, -2, 9), 2.0f); EXPECT_EQ(1.0f, buildAcc.getValue(nanovdb::Coord(1, 2, 3))); EXPECT_EQ(2.0f, buildAcc.getValue(nanovdb::Coord(2, -2, 9))); - auto handle = builder.getHandle<>(1.0, nanovdb::Vec3d(0.0), "test"); + auto handle = nanovdb::createNanoGrid(buildGrid); EXPECT_TRUE(handle); auto* meta = handle.gridMetaData(); EXPECT_TRUE(meta); @@ -1691,13 +1682,13 @@ TEST_F(TestOpenVDB, MultiFile) grid.setName("Int32 grid"); grid.tree().setValue(openvdb::Coord(-256), 10); EXPECT_EQ(1u, grid.activeVoxelCount()); - handles.push_back(nanovdb::openToNanoVDB(grid)); + handles.push_back(nanovdb::createNanoGrid(grid)); } { // 2: add an empty int32_t grid openvdb::Int32Grid grid(-4); grid.setName("Int32 grid, empty"); EXPECT_EQ(0u, grid.activeVoxelCount()); - handles.push_back(nanovdb::openToNanoVDB(grid)); + handles.push_back(nanovdb::createNanoGrid(grid)); } { // 3: add a ValueMask grid openvdb::MaskGrid grid(false); @@ -1711,7 +1702,7 @@ TEST_F(TestOpenVDB, MultiFile) grid.tree().evalActiveVoxelBoundingBox(bbox); //std::cerr << bbox << std::endl; EXPECT_EQ(openvdb::CoordBBox(min, max), bbox); - handles.push_back(nanovdb::openToNanoVDB(grid)); + handles.push_back(nanovdb::createNanoGrid(grid)); } { // 4: add a bool grid openvdb::BoolGrid grid(false); @@ -1720,7 +1711,7 @@ TEST_F(TestOpenVDB, MultiFile) EXPECT_EQ(1u, grid.activeVoxelCount()); grid.tree().setValue(openvdb::Coord( 10, 450, 90), true); EXPECT_EQ(2u, grid.activeVoxelCount()); - handles.push_back(nanovdb::openToNanoVDB(grid)); + handles.push_back(nanovdb::createNanoGrid(grid)); } { // 5: add a Vec3f grid openvdb::Vec3fGrid grid(openvdb::Vec3f(0.0f, 0.0f, -1.0f)); @@ -1729,7 +1720,7 @@ TEST_F(TestOpenVDB, MultiFile) EXPECT_EQ(0u, grid.activeVoxelCount()); grid.tree().setValue(openvdb::Coord(-256), openvdb::Vec3f(1.0f, 0.0f, 0.0f)); EXPECT_EQ(1u, grid.activeVoxelCount()); - handles.push_back(nanovdb::openToNanoVDB(grid)); + handles.push_back(nanovdb::createNanoGrid(grid)); } { // 6: add a Vec4f grid using OpenVDBVec4fGrid = openvdb::Grid::Type>; @@ -1740,7 +1731,7 @@ TEST_F(TestOpenVDB, MultiFile) EXPECT_EQ(0u, grid.activeVoxelCount()); grid.tree().setValue(openvdb::Coord(-256), openvdb::Vec4f(1.0f, 0.0f, 0.0f, 0.0f)); EXPECT_EQ(1u, grid.activeVoxelCount()); - handles.push_back(nanovdb::openToNanoVDB(grid)); + handles.push_back(nanovdb::createNanoGrid(grid)); OpenVDBVec4fGrid::unregisterGrid(); } { // 7: add an int64_t grid @@ -1748,7 +1739,7 @@ TEST_F(TestOpenVDB, MultiFile) grid.setName("Int64 grid"); grid.tree().setValue(openvdb::Coord(0), 10); EXPECT_EQ(1u, grid.activeVoxelCount()); - handles.push_back(nanovdb::openToNanoVDB(grid)); + handles.push_back(nanovdb::createNanoGrid(grid)); } for (int i = 0; i < 10; ++i) {// 8 -> 17 const float radius = 100.0f; @@ -1756,7 +1747,7 @@ TEST_F(TestOpenVDB, MultiFile) const openvdb::Vec3f center(i * 10.0f, 0.0f, 0.0f); auto srcGrid = openvdb::tools::createLevelSetSphere(radius, center, voxelSize, width); srcGrid->setName("Level set sphere at (" + std::to_string(i * 10) + ",0,0)"); - handles.push_back(nanovdb::openToNanoVDB(*srcGrid)); + handles.push_back(nanovdb::createNanoGrid(*srcGrid)); } { // 18: add a double grid openvdb::DoubleGrid grid(0.0); @@ -1764,7 +1755,7 @@ TEST_F(TestOpenVDB, MultiFile) grid.setGridClass(openvdb::GRID_FOG_VOLUME); grid.tree().setValue(openvdb::Coord(6000), 1.0); EXPECT_EQ(1u, grid.activeVoxelCount()); - handles.push_back(nanovdb::openToNanoVDB(grid)); + handles.push_back(nanovdb::createNanoGrid(grid)); } nanovdb::io::writeGrids("data/multi.nvdb", handles, this->getCodec()); @@ -2062,7 +2053,12 @@ TEST_F(TestOpenVDB, LongGridName) srcGrid.tree().setValue(openvdb::Coord(-256), 10.0f); EXPECT_EQ(1u, srcGrid.activeVoxelCount()); const bool isLong = length > limit; - auto handle = nanovdb::openToNanoVDB(srcGrid); +#if 1 + auto handle = nanovdb::createNanoGrid(srcGrid); +#else + nanovdb::CreateNanoGrid converter(srcGrid); + auto handle = converter.getHandle(); +#endif auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); EXPECT_EQ(1u, dstGrid->activeVoxelCount()); @@ -2090,7 +2086,7 @@ TEST_F(TestOpenVDB, LevelSetFiles) std::vector foundModels; std::ofstream os("data/ls.nvdb", std::ios::out | std::ios::binary); for (const auto& fileName : fileNames) { - //mTimer.start("Reading grid from the file \"" + fileName + "\""); + //mTimer.start("\nReading grid from the file \"" + fileName + "\""); try { openvdb::io::File file(fileName); file.open(false); //disable delayed loading @@ -2100,8 +2096,8 @@ TEST_F(TestOpenVDB, LevelSetFiles) foundModels.push_back(fileName.substr(pos, fileName.size() - pos - 4 )); //mTimer.restart("Generating NanoVDB grid"); - auto handle = nanovdb::openToNanoVDB(*srcGrid, nanovdb::StatsMode::All, nanovdb::ChecksumMode::Partial); - //auto handle = nanovdb::openToNanoVDB(*srcGrid, nanovdb::StatsMode::Disable, nanovdb::ChecksumMode::Disable, false, 1); + //auto handle = nanovdb::createNanoGrid(*srcGrid, nanovdb::StatsMode::All, nanovdb::ChecksumMode::Partial); + auto handle = nanovdb::createNanoGrid(*srcGrid, nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Disable); //mTimer.restart("Writing NanoVDB grid"); nanovdb::io::writeGrid(os, handle, this->getCodec()); @@ -2178,8 +2174,7 @@ TEST_F(TestOpenVDB, FogFiles) foundModels.push_back(fileName.substr(pos, fileName.size() - pos - 4 )); //mTimer.restart("Generating NanoVDB grid"); - auto handle = nanovdb::openToNanoVDB(*srcGrid, nanovdb::StatsMode::All, nanovdb::ChecksumMode::Partial); - //auto handle = nanovdb::openToNanoVDB(*srcGrid, nanovdb::StatsMode::Disable, nanovdb::ChecksumMode::Disable, false, 1); + auto handle = nanovdb::createNanoGrid(*srcGrid, nanovdb::StatsMode::All, nanovdb::ChecksumMode::Partial); //mTimer.restart("Writing NanoVDB grid"); nanovdb::io::writeGrid(os, handle, this->getCodec()); @@ -2254,7 +2249,7 @@ TEST_F(TestOpenVDB, PointFiles) EXPECT_TRUE(positionIndex != openvdb::points::AttributeSet::INVALID_POS); //mTimer.restart("Generating NanoVDB grid from PointDataGrid"); - auto handle = nanovdb::openToNanoVDB(*srcGrid); + auto handle = nanovdb::createNanoGrid(*srcGrid); //mTimer.restart("Writing NanoVDB grid"); nanovdb::io::writeGrid(os, handle, this->getCodec()); @@ -2268,6 +2263,7 @@ TEST_F(TestOpenVDB, PointFiles) EXPECT_TRUE(dstGrid); nanovdb::PointAccessor acc(*dstGrid); + EXPECT_TRUE(acc); const nanovdb::Vec3f * begin = nullptr, *end = nullptr; // iterators over points in a given voxel EXPECT_EQ(acc.gridPoints(begin, end), openvdb::points::pointCount(srcGrid->tree())); //std::cerr << "Point count = " << acc.gridPoints(begin, end) << ", attribute count = " << attributeSet.size() << std::endl; @@ -2319,7 +2315,7 @@ TEST_F(TestOpenVDB, PointFiles) TEST_F(TestOpenVDB, Trilinear) { // create a grid so sample from - auto trilinear = [](const openvdb::Vec3R& xyz) -> float { + auto trilinear = [](const openvdb::Vec3d& xyz) -> float { return 0.34 + 1.6 * xyz[0] + 6.7 * xyz[1] - 3.5 * xyz[2]; // world coordinates }; @@ -2334,7 +2330,7 @@ TEST_F(TestOpenVDB, Trilinear) acc.setValue(ijk, trilinear(srcGrid->indexToWorld(ijk))); } //mTimer.restart("Generating NanoVDB grid"); - auto handle = nanovdb::openToNanoVDB(*srcGrid); + auto handle = nanovdb::createNanoGrid(*srcGrid); //mTimer.restart("Writing NanoVDB grid"); nanovdb::io::writeGrid("data/tmp.nvdb", handle); //mTimer.stop(); @@ -2351,7 +2347,7 @@ TEST_F(TestOpenVDB, Trilinear) EXPECT_FALSE(handles[0].grid()); EXPECT_EQ(voxelSize, dstGrid->voxelSize()[0]); - const openvdb::Vec3R ijk(13.4, 24.67, 5.23); // in index space + const openvdb::Vec3d ijk(13.4, 24.67, 5.23); // in index space const float exact = trilinear(srcGrid->indexToWorld(ijk)); const float approx = trilinear(srcGrid->indexToWorld(openvdb::Coord(13, 25, 5))); //std::cerr << "Trilinear: exact = " << exact << ", approx = " << approx << std::endl; @@ -2384,7 +2380,7 @@ TEST_F(TestOpenVDB, Trilinear) TEST_F(TestOpenVDB, Triquadratic) { // create a grid so sample from - auto triquadratic = [](const openvdb::Vec3R& xyz) -> double { + auto triquadratic = [](const openvdb::Vec3d& xyz) -> double { return 0.34 + 1.6 * xyz[0] + 2.7 * xyz[1] + 1.5 * xyz[2] + 0.025 * xyz[0] * xyz[1] * xyz[2] - 0.013 * xyz[0] * xyz[0]; // world coordinates }; @@ -2400,7 +2396,7 @@ TEST_F(TestOpenVDB, Triquadratic) acc.setValue(ijk, triquadratic(srcGrid->indexToWorld(ijk))); } //mTimer.restart("Generating NanoVDB grid"); - auto handle = nanovdb::openToNanoVDB(*srcGrid); + auto handle = nanovdb::createNanoGrid(*srcGrid); //mTimer.restart("Writing NanoVDB grid"); nanovdb::io::writeGrid("data/tmp.nvdb", handle); //mTimer.stop(); @@ -2416,7 +2412,7 @@ TEST_F(TestOpenVDB, Triquadratic) auto* dstGrid = handles[0].grid(); EXPECT_TRUE(dstGrid); - const openvdb::Vec3R ijk(3.4, 4.67, 5.23); // in index space + const openvdb::Vec3d ijk(3.4, 4.67, 5.23); // in index space const float exact = triquadratic(srcGrid->indexToWorld(ijk)); const float approx = triquadratic(srcGrid->indexToWorld(openvdb::Coord(3, 5, 5))); //std::cerr << "Trilinear: exact = " << exact << ", approx = " << approx << std::endl; @@ -2444,7 +2440,7 @@ TEST_F(TestOpenVDB, Triquadratic) TEST_F(TestOpenVDB, Tricubic) { // create a grid so sample from - auto tricubic = [](const openvdb::Vec3R& xyz) -> double { + auto tricubic = [](const openvdb::Vec3d& xyz) -> double { return 0.34 + 1.6 * xyz[0] + 2.7 * xyz[1] + 1.5 * xyz[2] + 0.025 * xyz[0] * xyz[1] * xyz[2] - 0.013 * xyz[0] * xyz[0] * xyz[0]; // world coordinates }; @@ -2459,7 +2455,7 @@ TEST_F(TestOpenVDB, Tricubic) acc.setValue(ijk, tricubic(srcGrid->indexToWorld(ijk))); } //mTimer.restart("Generating NanoVDB grid"); - auto handle = nanovdb::openToNanoVDB(*srcGrid); + auto handle = nanovdb::createNanoGrid(*srcGrid); //mTimer.restart("Writing NanoVDB grid"); nanovdb::io::writeGrid("data/tmp.nvdb", handle); //mTimer.stop(); @@ -2475,7 +2471,7 @@ TEST_F(TestOpenVDB, Tricubic) auto* dstGrid = handles[0].grid(); EXPECT_TRUE(dstGrid); - const openvdb::Vec3R ijk(3.4, 4.67, 5.23); // in index space + const openvdb::Vec3d ijk(3.4, 4.67, 5.23); // in index space const float exact = tricubic(srcGrid->indexToWorld(ijk)); const float approx = tricubic(srcGrid->indexToWorld(openvdb::Coord(3, 5, 5))); //std::cerr << "Trilinear: exact = " << exact << ", approx = " << approx << std::endl; @@ -2503,7 +2499,7 @@ TEST_F(TestOpenVDB, Tricubic) TEST_F(TestOpenVDB, GridValidator) { auto srcGrid = this->getSrcGrid(); - auto handle = nanovdb::openToNanoVDB(*srcGrid, nanovdb::StatsMode::All, nanovdb::ChecksumMode::Full, 0); + auto handle = nanovdb::createNanoGrid(*srcGrid, nanovdb::StatsMode::All, nanovdb::ChecksumMode::Full); //mTimer.stop(); EXPECT_TRUE(handle); EXPECT_TRUE(handle.data()); @@ -2572,17 +2568,18 @@ TEST_F(TestOpenVDB, DenseIndexGrid) // read openvdb::FloatGrid auto srcGrid = this->getSrcGrid(false, 0, 0);// level set of a dragon if available, else an octahedron auto& srcTree = srcGrid->tree(); - + nanovdb::CreateNanoGrid builder(*srcGrid); + builder.setStats(nanovdb::StatsMode::All); // openvdb::FloatGrid -> nanovdb::FloatGrid - auto handle = nanovdb::openToNanoVDB(*srcGrid, nanovdb::StatsMode::All); + auto handle = builder.getHandle(); EXPECT_TRUE(handle); auto* fltGrid = handle.grid(); + builder.setStats();// reset //std::cerr << "FloatGrid footprint: " << (fltGrid->gridSize()>>20) << "MB" << std::endl; - // nanovdb::FloatGrid -> nanovdb::IndexGrid - nanovdb::IndexGridBuilder builder(*fltGrid, true, true); + // openvdb::FloatGrid -> nanovdb::IndexGrid //mTimer.start("Create IndexGrid"); - auto handle2 = builder.getHandle(); + auto handle2 = builder.getHandle(1u, true, true); //mTimer.stop(); auto *idxGrid = handle2.grid(); auto idxAcc = idxGrid->getAccessor(); @@ -2593,8 +2590,8 @@ TEST_F(TestOpenVDB, DenseIndexGrid) // create external value buffer //mTimer.start("Create value buffer"); - auto buffer = builder.getValues(1);// only allocate one channel - const float *values = reinterpret_cast(buffer.data()); + const float *values = idxGrid->getBlindData(0); + EXPECT_TRUE(values); //mTimer.stop(); //std::cerr << "Value buffer footprint: " << (buffer.size()>>20) << "MB" << std::endl; @@ -2612,9 +2609,11 @@ TEST_F(TestOpenVDB, DenseIndexGrid) auto fltAcc = fltGrid->getAccessor();// NOT thread-safe! for (auto i=r.begin(); i!=r.end(); ++i){ auto *idxLeaf = idxLeaf0 + i; - auto *srcLeaf = fltAcc.probeLeaf(idxLeaf->origin()); - EXPECT_TRUE(srcLeaf); - EXPECT_EQ(values[idxLeaf->minimum()], srcLeaf->minimum()); + auto *fltLeaf = fltAcc.probeLeaf(idxLeaf->origin()); + EXPECT_TRUE(fltLeaf); + // since idxGrid was created from an OpenVDB Grid stats were not available + EXPECT_EQ(values[idxLeaf->minimum()], srcGrid->tree().root().background()); + //EXPECT_EQ(values[idxLeaf->minimum()], fltLeaf->minimum());// only if idxGrid was created from fltGrid for (auto vox = idxLeaf->beginValueOn(); vox; ++vox) { EXPECT_EQ(values[*vox], fltAcc.getValue(vox.getCoord())); } @@ -2626,34 +2625,22 @@ TEST_F(TestOpenVDB, SparseIndexGrid) { // read openvdb::FloatGrid auto srcGrid = this->getSrcGrid(false, 0, 0);// level set of a dragon if available, else an octahedron - //auto& srcTree = srcGrid->tree(); - // openvdb::FloatGrid -> nanovdb::FloatGrid - auto handle = nanovdb::openToNanoVDB(*srcGrid, nanovdb::StatsMode::All); - EXPECT_TRUE(handle); - auto* fltGrid = handle.grid(); - //std::cerr << "FloatGrid footprint: " << (fltGrid->gridSize()>>20) << "MB" << std::endl; - - // nanovdb::FloatGrid -> nanovdb::IndexGrid - nanovdb::IndexGridBuilder builder(*fltGrid, false, false); + // openvdb::FloatGrid -> nanovdb::IndexGrid + nanovdb::CreateNanoGrid builder(*srcGrid); //mTimer.start("Create IndexGrid"); - auto handle2 = builder.getHandle(); + auto handle2 = builder.getHandle(1u, false, false); //mTimer.stop(); auto *idxGrid = handle2.grid(); auto idxAcc = idxGrid->getAccessor(); EXPECT_TRUE(idxGrid); - const uint64_t vCount = idxGrid->data()->mData1; + const uint64_t vCount = idxGrid->valueCount(); //std::cerr << "IndexGrid value count = " << vCount << std::endl; //std::cerr << "IndexGrid footprint: " << (idxGrid->gridSize()>>20) << "MB" << std::endl; - // create external value buffer - //mTimer.start("Create value buffer"); - auto buffer = builder.getValues(1);// only allocate one channel - const float *values = reinterpret_cast(buffer.data()); - //mTimer.stop(); - //std::cerr << "Value buffer footprint: " << (buffer.size()>>20) << "MB" << std::endl; - // unit-test sparse value buffer + const float *values = idxGrid->getBlindData(0u); + EXPECT_TRUE(values); //mTimer.start("Testing sparse active values"); for (auto it = srcGrid->tree().cbeginValueOn(); it; ++it) { const openvdb::Coord ijk = it.getCoord(); @@ -2665,6 +2652,116 @@ TEST_F(TestOpenVDB, SparseIndexGrid) }// SparseIndexGrid +TEST_F(TestOpenVDB, BuildNodeManager) +{ + {// test NodeManager with build::Grid + using GridT = nanovdb::build::Grid; + GridT grid(0.0f); + nanovdb::build::NodeManager mgr(grid); + using TreeT = GridT::TreeType; + static const bool test = nanovdb::is_same::type, TreeT::LeafNodeType>::value; + EXPECT_TRUE(test); + } + {// test NodeManager with openvdb::Grid + using GridT = openvdb::FloatGrid; + GridT grid(0.0f); + nanovdb::build::NodeManager mgr(grid); + using TreeT = GridT::TreeType; + static const bool test = nanovdb::is_same::type, TreeT::LeafNodeType>::value; + EXPECT_TRUE(test); + } + {// test NodeTrait on nanovdb::Grid + using GridT = nanovdb::NanoGrid; + using TreeT = GridT::TreeType; + static const bool test = nanovdb::is_same::type, TreeT::LeafNodeType>::value; + EXPECT_TRUE(test); + } +}// BuildNodeManager + +#if 1 + +class NanoPointList +{ + size_t mSize; + const openvdb::Vec3f *mPoints; +public: + using PosType = openvdb::Vec3f; + using value_type = openvdb::Vec3f; + NanoPointList(const nanovdb::Vec3f *points, size_t size) : mSize(size), mPoints(reinterpret_cast(points)) {} + size_t size() const {return mSize;} + void getPos(size_t n, PosType& xyz) const {xyz = mPoints[n];} +}; // NanoPointList + +// make -j && ./unittest/testNanoVDB --gtest_filter="*CudaPointsToGrid_PointID" --gtest_repeat=3 && ./unittest/testOpenVDB --gtest_filter="*PointIndexGrid*" --gtest_repeat=3 +TEST_F(TestOpenVDB, Benchmark_OpenVDB_PointIndexGrid) +{ + const double voxelSize = 0.5; + + nanovdb::CpuTimer timer("Generate sphere with points"); + auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), voxelSize); + timer.stop(); + + auto *pointGrid = pointsHandle.grid(); + EXPECT_TRUE(pointGrid); + std::cerr << "nanovdb::bbox = " << pointGrid->indexBBox() << " voxel count = " << pointGrid->activeVoxelCount() << std::endl; + + nanovdb::PointAccessor acc2(*pointGrid); + EXPECT_TRUE(acc2); + const nanovdb::Vec3f *begin, *end; + const size_t pointCount = acc2.gridPoints(begin, end); + EXPECT_TRUE(begin); + EXPECT_TRUE(end); + EXPECT_LT(begin, end); + + // construct data structure + timer.start("Building openvdb::PointIndexGrid on CPU from "+std::to_string(pointCount)+" points"); + using PointIndexGrid = openvdb::tools::PointIndexGrid; + const openvdb::math::Transform::Ptr transform = openvdb::math::Transform::createLinearTransform(voxelSize); + NanoPointList pointList(begin, pointCount); + auto pointGridPtr = openvdb::tools::createPointIndexGrid(pointList, *transform); + timer.stop(); + openvdb::CoordBBox bbox; + pointGridPtr->tree().evalActiveVoxelBoundingBox(bbox); + std::cerr << "openvdb::bbox = " << bbox << " voxel count = " << pointGridPtr->tree().activeVoxelCount() << std::endl; + +}// Benchmark_OpenVDB_PointIndexGrid + +TEST_F(TestOpenVDB, Benchmark_OpenVDB_PointDataGrid) +{ + const double voxelSize = 0.5; + + nanovdb::CpuTimer timer("Generate sphere with points"); + auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), voxelSize); + timer.stop(); + + auto *pointGrid = pointsHandle.grid(); + EXPECT_TRUE(pointGrid); + std::cerr << "nanovdb::bbox = " << pointGrid->indexBBox() << " voxel count = " << pointGrid->activeVoxelCount() << std::endl; + + nanovdb::PointAccessor acc2(*pointGrid); + EXPECT_TRUE(acc2); + const nanovdb::Vec3f *begin, *end; + const size_t pointCount = acc2.gridPoints(begin, end); + EXPECT_TRUE(begin); + EXPECT_TRUE(end); + EXPECT_LT(begin, end); + + // construct data structure + timer.start("Building openvdb::PointDataGrid on CPU from "+std::to_string(pointCount)+" points"); + using PointIndexGrid = openvdb::tools::PointIndexGrid; + const auto transform = openvdb::math::Transform::createLinearTransform(voxelSize); + NanoPointList pointList(begin, pointCount); + auto pointIndexGridPtr = openvdb::tools::createPointIndexGrid(pointList, *transform); + auto pointDataGridPtr = openvdb::points::createPointDataGrid,// corresponds to PointType::Voxel8 + openvdb::points::PointDataGrid, NanoPointList, PointIndexGrid>(*pointIndexGridPtr, pointList, *transform); + timer.stop(); + openvdb::CoordBBox bbox; + pointDataGridPtr->tree().evalActiveVoxelBoundingBox(bbox); + std::cerr << "openvdb::bbox = " << bbox << " voxel count = " << pointDataGridPtr->tree().activeVoxelCount() << std::endl; + +}// Benchmark_OpenVDB_PointDataGrid +#endif + int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); diff --git a/nanovdb/nanovdb/unittest/pnanovdb_validate_strides.h b/nanovdb/nanovdb/unittest/pnanovdb_validate_strides.h index 4fc778d755..02f27c1ea3 100644 --- a/nanovdb/nanovdb/unittest/pnanovdb_validate_strides.h +++ b/nanovdb/nanovdb/unittest/pnanovdb_validate_strides.h @@ -83,7 +83,7 @@ static void compute_node_strides( pnanovdb_uint32_t minmaxStride = pnanovdb_grid_type_minmax_strides_bits[grid_type] / 8u; pnanovdb_uint32_t minmaxAlign = pnanovdb_grid_type_minmax_aligns_bits[grid_type] / 8u; pnanovdb_uint32_t statStride = pnanovdb_grid_type_stat_strides_bits[grid_type] / 8u; - pnanovdb_uint32_t postStatStride = 0u; + pnanovdb_uint32_t indexMaskStride = 0u; if (nodeLevel == 0u) { if (pnanovdb_grid_type_leaf_type[grid_type] == PNANOVDB_LEAF_TYPE_LITE) @@ -106,17 +106,33 @@ static void compute_node_strides( minmaxStride = 0u; minmaxAlign = 0u; statStride = 0u; - postStatStride = 8u; tableAlign = 8u; tableFullStride = 8u; } + else if (pnanovdb_grid_type_leaf_type[grid_type] == PNANOVDB_LEAF_TYPE_INDEXMASK) + { + minmaxStride = 0u; + minmaxAlign = 0u; + statStride = 0u; + tableAlign = 8u; + tableFullStride = 8u; + indexMaskStride = 64u; + } + else if (pnanovdb_grid_type_leaf_type[grid_type] == PNANOVDB_LEAF_TYPE_POINTINDEX) + { + minmaxStride = 8u; + minmaxAlign = 8u; + statStride = 0u; + tableAlign = 2u; + tableFullStride = (16u * node_elements[nodeLevel]) / 8u; + } } *min_off = allocate(&offset, minmaxStride, minmaxAlign); *max_off = allocate(&offset, minmaxStride, minmaxAlign); *ave_off = allocate(&offset, statStride, statStride); *stddev_off = allocate(&offset, statStride, statStride); - allocate(&offset, postStatStride, postStatStride); *table_off = allocate(&offset, tableFullStride, tableAlign); + allocate(&offset, indexMaskStride, tableAlign); *total_size = allocate(&offset, 0u, 32u); } diff --git a/nanovdb/nanovdb/util/CpuTimer.h b/nanovdb/nanovdb/util/CpuTimer.h new file mode 100644 index 0000000000..44bf155287 --- /dev/null +++ b/nanovdb/nanovdb/util/CpuTimer.h @@ -0,0 +1,83 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/// @file CpuTimer.h +/// +/// @author Ken Museth +/// +/// @brief A simple timing class (in case openvdb::util::CpuTimer is unavailable) + +#ifndef NANOVDB_CPU_TIMER_H_HAS_BEEN_INCLUDED +#define NANOVDB_CPU_TIMER_H_HAS_BEEN_INCLUDED + +#include +#include + +namespace nanovdb { + +class CpuTimer +{ + std::chrono::high_resolution_clock::time_point mStart; +public: + /// @brief Default constructor + CpuTimer() {} + + /// @brief Constructor that starts the timer + /// @param msg string message to be printed when timer is started + /// @param os output stream for the message above + CpuTimer(const std::string &msg, std::ostream& os = std::cerr) {this->start(msg, os);} + + /// @brief Start the timer + /// @param msg string message to be printed when timer is started + /// @param os output stream for the message above + void start(const std::string &msg, std::ostream& os = std::cerr) + { + os << msg << " ... " << std::flush; + mStart = std::chrono::high_resolution_clock::now(); + } + + /// @brief elapsed time (since start) in miliseconds + template + auto elapsed() + { + auto end = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(end - mStart).count(); + } + + /// @brief stop the timer + /// @tparam AccuracyT Template parameter defining the accuracy of the reported times + /// @param os output stream for the message above + template + void stop(std::ostream& os = std::cerr) + { + auto end = std::chrono::high_resolution_clock::now(); + auto diff = std::chrono::duration_cast(end - mStart).count(); + os << "completed in " << diff; + if (std::is_same::value) {// resolved at compile-time + os << " microseconds" << std::endl; + } else if (std::is_same::value) { + os << " milliseconds" << std::endl; + } else if (std::is_same::value) { + os << " seconds" << std::endl; + } else { + os << " unknown time unit" << std::endl; + } + } + + /// @brief stop and start the timer + /// @tparam AccuracyT Template parameter defining the accuracy of the reported times + /// @param msg string message to be printed when timer is started + /// @param os output stream for the message above + template + void restart(const std::string &msg, std::ostream& os = std::cerr) + { + this->stop(); + this->start(msg, os); + } + + +};// CpuTimer + +} // namespace nanovdb + +#endif // NANOVDB_CPU_TIMER_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/CreateNanoGrid.h b/nanovdb/nanovdb/util/CreateNanoGrid.h new file mode 100644 index 0000000000..2ca81c72c5 --- /dev/null +++ b/nanovdb/nanovdb/util/CreateNanoGrid.h @@ -0,0 +1,2079 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file CreateNanoGrid.h + + \author Ken Museth + + \date June 26, 2020 + + \note In the examples below we assume that @c srcGrid is a exiting grid of type + SrcGridT = @c openvdb::FloatGrid, @c openvdb::FloatGrid or @c nanovdb::build::FloatGrid. + + \brief Convert any grid to a nanovdb grid of the same type, e.g. float->float + \code + auto handle = nanovdb::createNanoGrid(srcGrid); + auto *dstGrid = handle.grid(); + \endcode + + \brief Convert a grid to a nanovdb grid of a different type, e.g. float->half + \code + auto handle = nanovdb::createNanoGrid(srcGrid); + auto *dstGrid = handle.grid(); + \endcode + + \brief Convert a grid to a nanovdb grid of the same type but using a CUDA buffer + \code + auto handle = nanovdb::createNanoGrid(srcGrid); + auto *dstGrid = handle.grid(); + \endcode + + \brief Create a nanovdb grid that indices values in an existing source grid of any type. + If DstBuildT = nanovdb::ValueIndex both active and in-active values are indexed + and if DstBuildT = nanovdb::ValueOnIndex only active values are indexed. + \code + using DstBuildT = nanovdb::ValueIndex;// index both active an inactive values + auto handle = nanovdb::createNanoGridSrcGridT,DstBuildT>(srcGrid,0,false,false);//no blind data, tile values or stats + auto *dstGrid = handle.grid(); + \endcode + + \brief Create a NanoVDB grid from scratch + \code +#if defined(NANOVDB_USE_OPENVDB) + using SrcGridT = openvdb::FloatGrid; +#else + using SrcGridT = nanovdb::build::FloatGrid; +#endif + SrcGridT srcGrid(0.0f);// create an empty source grid + auto srcAcc = srcGrid.getAccessor();// create an accessor + srcAcc.setValue(nanovdb::Coord(1,2,3), 1.0f);// set a voxel value + + auto handle = nanovdb::createNanoGrid(srcGrid);// convert source grid to a grid handle + auto dstGrid = handle.grid();// get a pointer to the destination grid + \endcode + + \brief Convert a base-pointer to an openvdb grid, denoted srcGrid, to a nanovdb + grid of the same type, e.g. float -> float or openvdb::Vec3f -> nanovdb::Vec3f + \code + auto handle = nanovdb::openToNanoVDB(*srcGrid);// convert source grid to a grid handle + auto dstGrid = handle.grid();// get a pointer to the destination grid + \endcode + + \brief Converts any existing grid to a NanoVDB grid, for example: + nanovdb::build::Grid -> nanovdb::Grid + nanovdb::Grid -> nanovdb::Grid + nanovdb::Grid -> nanovdb::Grid + openvdb::Grid -> nanovdb::Grid + openvdb::Grid -> nanovdb::Grid + openvdb::Grid -> nanovdb::Grid + openvdb::Grid -> nanovdb::Grid + + \note This files replaces GridBuilder.h, IndexGridBuilder.h and OpenToNanoVDB.h +*/ + +#ifndef NANOVDB_CREATENANOGRID_H_HAS_BEEN_INCLUDED +#define NANOVDB_CREATENANOGRID_H_HAS_BEEN_INCLUDED + +#if defined(NANOVDB_USE_OPENVDB) +#include +#include +#include +#endif + +#include "GridBuilder.h" +#include "NodeManager.h" +#include "GridHandle.h" +#include "GridStats.h" +#include "GridChecksum.h" +#include "Range.h" +#include "Invoke.h" +#include "ForEach.h" +#include "Reduce.h" +#include "PrefixSum.h" +#include "DitherLUT.h"// for nanovdb::DitherLUT + +#include +#include // for stringstream +#include +#include +#include // for memcpy +#include + +namespace nanovdb { + +// Forward declarations (defined below) +template class CreateNanoGrid; +class AbsDiff; +template struct MapToNano; + +//================================================================================================ + +#if defined(NANOVDB_USE_OPENVDB) +/// @brief Forward declaration of free-standing function that converts an OpenVDB GridBase into a NanoVDB GridHandle +/// @tparam BufferT Type of the buffer used to allocate the destination grid +/// @param base Shared pointer to a base openvdb grid to be converted +/// @param sMode Mode for computing statistics of the destination grid +/// @param cMode Mode for computing checksums of the destination grid +/// @param verbose Mode of verbosity +/// @return Handle to the destination NanoGrid +template +GridHandle +openToNanoVDB(const openvdb::GridBase::Ptr& base, + StatsMode sMode = StatsMode::Default, + ChecksumMode cMode = ChecksumMode::Default, + int verbose = 0); +#endif + +//================================================================================================ + +/// @brief Freestanding function that creates a NanoGrid from any source grid +/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid +/// @tparam DstBuildT Type of values in the output (destination) nanovdb Grid, e.g. float or nanovdb::Fp16 +/// @tparam BufferT Type of the buffer used ti allocate the destination grid +/// @param srcGrid Input (source) grid to be converted +/// @param sMode Mode for computing statistics of the destination grid +/// @param cMode Mode for computing checksums of the destination grid +/// @param verbose Mode of verbosity +/// @param buffer Instance of a buffer used for allocation +/// @return Handle to the destination NanoGrid +template::type, + typename BufferT = HostBuffer> +typename disable_if::is_index || BuildTraits::is_Fp, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode = StatsMode::Default, + ChecksumMode cMode = ChecksumMode::Default, + int verbose = 0, + const BufferT &buffer = BufferT()); + +//================================================================================================ + +/// @brief Freestanding function that creates a NanoGrid or NanoGrid from any source grid +/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid +/// @tparam DstBuildT If ValueIndex all (active and inactive) values are indexed and if +/// it is ValueOnIndex only active values are indexed. +/// @tparam BufferT BufferT Type of the buffer used ti allocate the destination grid +/// @param channels If non-zero the values (active or all) in @c srcGrid are encoded as blind +/// data in the output index grid. @c channels indicates the number of copies +/// of these blind data +/// @param includeStats If true all tree nodes will includes indices for stats, i.e. min/max/avg/std-div +/// @param includeTiles If false on values in leaf nodes are indexed +/// @param verbose Mode of verbosity +/// @param buffer Instance of a buffer used for allocation +/// @return Handle to the destination NanoGrid where T = ValueIndex or ValueOnIndex +template::type, + typename BufferT = HostBuffer> +typename enable_if::is_index, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + uint32_t channels = 0u, + bool includeStats = true, + bool includeTiles = true, + int verbose = 0, + const BufferT &buffer = BufferT()); + +//================================================================================================ + +/// @brief Freestanding function to create a NanoGrid from any source grid +/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid +/// @tparam DstBuildT = FpN, i.e. variable bit-width of the output grid +/// @tparam OracleT Type of the oracle used to determine the local bit-width, i.e. N in FpN +/// @tparam BufferT Type of the buffer used to allocate the destination grid +/// @param srcGrid Input (source) grid to be converted +/// @param ditherOn switch to enable or disable dithering of quantization error +/// @param sMode Mode for computing statistics of the destination grid +/// @param cMode Mode for computing checksums of the destination grid +/// @param verbose Mode of verbosity +/// @param oracle Instance of a oracle used to determine the local bit-width, i.e. N in FpN +/// @param buffer Instance of a buffer used for allocation +/// @return Handle to the destination NanoGrid +template::type, + typename OracleT = AbsDiff, + typename BufferT = HostBuffer> +typename enable_if::value, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode = StatsMode::Default, + ChecksumMode cMode = ChecksumMode::Default, + bool ditherOn = false, + int verbose = 0, + const OracleT &oracle = OracleT(), + const BufferT &buffer = BufferT()); + +//================================================================================================ + +/// @brief Freestanding function to create a NanoGrid from any source grid, X=4,8,16 +/// @tparam SrcGridT Type of in input (source) grid, e.g. openvdb::Grid or nanovdb::Grid +/// @tparam DstBuildT = Fp4, Fp8 or Fp16, i.e. quantization bit-width of the output grid +/// @tparam BufferT Type of the buffer used to allocate the destination grid +/// @param srcGrid Input (source) grid to be converted +/// @param ditherOn switch to enable or disable dithering of quantization error +/// @param sMode Mode for computing statistics of the destination grid +/// @param cMode Mode for computing checksums of the destination grid +/// @param verbose Mode of verbosity +/// @param buffer Instance of a buffer used for allocation +/// @return Handle to the destination NanoGrid +template::type, + typename BufferT = HostBuffer> +typename enable_if::is_FpX, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode = StatsMode::Default, + ChecksumMode cMode = ChecksumMode::Default, + bool ditherOn = false, + int verbose = 0, + const BufferT &buffer = BufferT()); + +//================================================================================================ + +/// @brief Compression oracle based on absolute difference +class AbsDiff +{ + float mTolerance;// absolute error tolerance +public: + /// @note The default value of -1 means it's un-initialized! + AbsDiff(float tolerance = -1.0f) : mTolerance(tolerance) {} + AbsDiff(const AbsDiff&) = default; + ~AbsDiff() = default; + operator bool() const {return mTolerance>=0.0f;} + void init(nanovdb::GridClass gClass, float background) { + if (gClass == GridClass::LevelSet) { + static const float halfWidth = 3.0f; + mTolerance = 0.1f * background / halfWidth;// range of ls: [-3dx; 3dx] + } else if (gClass == GridClass::FogVolume) { + mTolerance = 0.01f;// range of FOG volumes: [0;1] + } else { + mTolerance = 0.0f; + } + } + void setTolerance(float tolerance) { mTolerance = tolerance; } + float getTolerance() const { return mTolerance; } + /// @brief Return true if the approximate value is within the accepted + /// absolute error bounds of the exact value. + /// + /// @details Required member method + bool operator()(float exact, float approx) const + { + return Abs(exact - approx) <= mTolerance; + } +};// AbsDiff + +inline std::ostream& operator<<(std::ostream& os, const AbsDiff& diff) +{ + os << "Absolute tolerance: " << diff.getTolerance(); + return os; +} + +//================================================================================================ + +/// @brief Compression oracle based on relative difference +class RelDiff +{ + float mTolerance;// relative error tolerance +public: + /// @note The default value of -1 means it's un-initialized! + RelDiff(float tolerance = -1.0f) : mTolerance(tolerance) {} + RelDiff(const RelDiff&) = default; + ~RelDiff() = default; + operator bool() const {return mTolerance>=0.0f;} + void setTolerance(float tolerance) { mTolerance = tolerance; } + float getTolerance() const { return mTolerance; } + /// @brief Return true if the approximate value is within the accepted + /// relative error bounds of the exact value. + /// + /// @details Required member method + bool operator()(float exact, float approx) const + { + return Abs(exact - approx)/Max(Abs(exact), Abs(approx)) <= mTolerance; + } +};// RelDiff + +inline std::ostream& operator<<(std::ostream& os, const RelDiff& diff) +{ + os << "Relative tolerance: " << diff.getTolerance(); + return os; +} + +//================================================================================================ + +/// @brief The NodeAccessor provides a uniform API for accessing nodes got NanoVDB, OpenVDB and build Grids +/// +/// @note General implementation that works with nanovdb::build::Grid +template +class NodeAccessor +{ +public: + static constexpr bool IS_OPENVDB = false; + static constexpr bool IS_NANOVDB = false; + using BuildType = typename GridT::BuildType; + using ValueType = typename GridT::ValueType; + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using RootType = typename TreeType::RootNodeType; + template + using NodeType = typename NodeTrait::type; + NodeAccessor(const GridT &grid) : mMgr(const_cast(grid)) {} + const GridType& grid() const {return mMgr.grid();} + const TreeType& tree() const {return mMgr.tree();} + const RootType& root() const {return mMgr.root();} + uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } + template + const NodeType& node(uint32_t i) const {return mMgr.template node(i); } + const std::string& getName() const {return this->grid().getName();}; + bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} + const nanovdb::Map& map() const {return this->grid().map();} + GridClass gridClass() const {return this->grid().gridClass();} +private: + build::NodeManager mMgr; +};// NodeAccessor + +//================================================================================================ + +/// @brief Template specialization for nanovdb::Grid which is special since its NodeManage +/// uses a handle in order to support node access on the GPU! +template +class NodeAccessor< NanoGrid > +{ +public: + static constexpr bool IS_OPENVDB = false; + static constexpr bool IS_NANOVDB = true; + using BuildType = BuildT; + using BufferType = HostBuffer; + using GridType = NanoGrid; + using ValueType = typename GridType::ValueType; + using TreeType = typename GridType::TreeType; + using RootType = typename TreeType::RootType; + template + using NodeType = typename NodeTrait::type; + NodeAccessor(const GridType &grid) + : mHandle(createNodeManager(grid)) + , mMgr(*(mHandle.template mgr())) {} + const GridType& grid() const {return mMgr.grid();} + const TreeType& tree() const {return mMgr.tree();} + const RootType& root() const {return mMgr.root();} + uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } + template + const NodeType& node(uint32_t i) const {return mMgr.template node(i); } + std::string getName() const {return std::string(this->grid().gridName());}; + bool hasLongGridName() const {return this->grid().hasLongGridName();} + const nanovdb::Map& map() const {return this->grid().map();} + GridClass gridClass() const {return this->grid().gridClass();} +private: + NodeManagerHandle mHandle; + const NodeManager &mMgr; +};// NodeAccessor + +//================================================================================================ + +/// @brief Trait that maps any type to the corresponding nanovdb type +/// @tparam T Type to be mapped +template +struct MapToNano { using type = T; }; + +#if defined(NANOVDB_USE_OPENVDB) + +template<> +struct MapToNano {using type = nanovdb::ValueMask;}; +template +struct MapToNano>{using type = nanovdb::Vec3;}; +template +struct MapToNano>{using type = nanovdb::Vec4;}; +template<> +struct MapToNano {using type = uint32_t;}; +template<> +struct MapToNano {using type = uint32_t;}; + +/// Templated Grid with default 32->16->8 configuration +template +using OpenLeaf = openvdb::tree::LeafNode; +template +using OpenLower = openvdb::tree::InternalNode,4>; +template +using OpenUpper = openvdb::tree::InternalNode,5>; +template +using OpenRoot = openvdb::tree::RootNode>; +template +using OpenTree = openvdb::tree::Tree>; +template +using OpenGrid = openvdb::Grid>; + +//================================================================================================ + +/// @brief Template specialization for openvdb::Grid +template +class NodeAccessor> +{ +public: + static constexpr bool IS_OPENVDB = true; + static constexpr bool IS_NANOVDB = false; + using BuildType = BuildT; + using GridType = OpenGrid; + using ValueType = typename GridType::ValueType; + using TreeType = OpenTree; + using RootType = OpenRoot; + template + using NodeType = typename NodeTrait::type; + NodeAccessor(const GridType &grid) : mMgr(const_cast(grid)) { + const auto mat4 = this->grid().transform().baseMap()->getAffineMap()->getMat4(); + mMap.set(mat4, mat4.inverse()); + } + const GridType& grid() const {return mMgr.grid();} + const TreeType& tree() const {return mMgr.tree();} + const RootType& root() const {return mMgr.root();} + uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } + template + const NodeType& node(uint32_t i) const {return mMgr.template node(i); } + std::string getName() const { return this->grid().getName(); }; + bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} + const nanovdb::Map& map() const {return mMap;} + GridClass gridClass() const { + switch (this->grid().getGridClass()) { + case openvdb::GRID_LEVEL_SET: + if (!is_floating_point::value) OPENVDB_THROW(openvdb::ValueError, "processGrid: Level sets are expected to be floating point types"); + return GridClass::LevelSet; + case openvdb::GRID_FOG_VOLUME: + return GridClass::FogVolume; + case openvdb::GRID_STAGGERED: + return GridClass::Staggered; + default: + return GridClass::Unknown; + } + } +private: + build::NodeManager mMgr; + nanovdb::Map mMap; +};// NodeAccessor> + +//================================================================================================ + +/// @brief Template specialization for openvdb::tools::PointIndexGrid +template <> +class NodeAccessor +{ +public: + static constexpr bool IS_OPENVDB = true; + static constexpr bool IS_NANOVDB = false; + using BuildType = openvdb::PointIndex32; + using GridType = openvdb::tools::PointIndexGrid; + using TreeType = openvdb::tools::PointIndexTree; + using RootType = typename TreeType::RootNodeType; + using ValueType = typename GridType::ValueType; + template + using NodeType = typename NodeTrait::type; + NodeAccessor(const GridType &grid) : mMgr(const_cast(grid)) { + const auto mat4 = this->grid().transform().baseMap()->getAffineMap()->getMat4(); + mMap.set(mat4, mat4.inverse()); + } + const GridType& grid() const {return mMgr.grid();} + const TreeType& tree() const {return mMgr.tree();} + const RootType& root() const {return mMgr.root();} + uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } + template + const NodeType& node(uint32_t i) const {return mMgr.template node(i); } + std::string getName() const { return this->grid().getName(); }; + bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} + const nanovdb::Map& map() const {return mMap;} + GridClass gridClass() const {return GridClass::PointIndex;} +private: + build::NodeManager mMgr; + nanovdb::Map mMap; +};// NodeAccessor + +//================================================================================================ + +// @brief Template specialization for openvdb::points::PointDataGrid +template <> +class NodeAccessor +{ +public: + static constexpr bool IS_OPENVDB = true; + static constexpr bool IS_NANOVDB = false; + using BuildType = openvdb::PointDataIndex32; + using GridType = openvdb::points::PointDataGrid; + using TreeType = openvdb::points::PointDataTree; + using RootType = typename TreeType::RootNodeType; + using ValueType = typename GridType::ValueType; + template + using NodeType = typename NodeTrait::type; + NodeAccessor(const GridType &grid) : mMgr(const_cast(grid)) { + const auto mat4 = this->grid().transform().baseMap()->getAffineMap()->getMat4(); + mMap.set(mat4, mat4.inverse()); + } + const GridType& grid() const {return mMgr.grid();} + const TreeType& tree() const {return mMgr.tree();} + const RootType& root() const {return mMgr.root();} + uint64_t nodeCount(int level) const { return mMgr.nodeCount(level); } + template + const NodeType& node(uint32_t i) const {return mMgr.template node(i); } + std::string getName() const { return this->grid().getName(); }; + bool hasLongGridName() const {return this->grid().getName().length() >= GridData::MaxNameSize;} + const nanovdb::Map& map() const {return mMap;} + GridClass gridClass() const {return GridClass::PointData;} +private: + build::NodeManager mMgr; + nanovdb::Map mMap; +};// NodeAccessor + +#endif// NANOVDB_USE_OPENVDB + +//================================================================================================ + +/// @brief Creates any nanovdb Grid from any source grid (certain combinations are obviously not allowed) +template +class CreateNanoGrid +{ +public: + // SrcGridT can be either openvdb::Grid, nanovdb::Grid or nanovdb::build::Grid + using SrcNodeAccT = NodeAccessor; + using SrcBuildT = typename SrcNodeAccT::BuildType; + using SrcValueT = typename SrcNodeAccT::ValueType; + using SrcTreeT = typename SrcNodeAccT::TreeType; + using SrcRootT = typename SrcNodeAccT::RootType; + template + using SrcNodeT = typename NodeTrait::type; + + /// @brief Constructor from a source grid + /// @param srcGrid Source grid of type SrcGridT + CreateNanoGrid(const SrcGridT &srcGrid); + + /// @brief Constructor from a source node accessor (defined above) + /// @param srcNodeAcc Source node accessor of type SrcNodeAccT + CreateNanoGrid(const SrcNodeAccT &srcNodeAcc); + + /// @brief Set the level of verbosity + /// @param mode level of verbosity, mode=0 means quiet + void setVerbose(int mode = 1) { mVerbose = mode; } + + /// @brief Enable or disable dithering, i.e. randomization of the quantization error. + /// @param on enable or disable dithering + /// @warning Dithering only has an affect when DstBuildT = {Fp4, Fp8, Fp16, FpN} + void enableDithering(bool on = true) { mDitherOn = on; } + + /// @brief Set the mode used for computing statistics of the destination grid + /// @param mode specify the mode of statistics + void setStats(StatsMode mode = StatsMode::Default) { mStats = mode; } + + /// @brief Set the mode used for computing checksums of the destination grid + /// @param mode specify the mode of checksum + void setChecksum(ChecksumMode mode = ChecksumMode::Default) { mChecksum = mode; } + + /// @brief Converts the source grid into a nanovdb grid with the specified destination build type + /// @tparam DstBuildT build type of the destination, output, grid + /// @tparam BufferT Type of the buffer used for allocating the destination grid + /// @param buffer instance of the buffer use for allocation + /// @return Return an instance of a GridHandle (invoking move semantics) + /// @note This version is when DstBuildT != {FpN, ValueIndex, ValueOnIndex} + template::type, typename BufferT = HostBuffer> + typename disable_if::value || + BuildTraits::is_index, GridHandle>::type + getHandle(const BufferT &buffer = BufferT()); + + /// @brief Converts the source grid into a nanovdb grid with variable bit quantization + /// @tparam DstBuildT FpN, i.e. the destination grid uses variable bit quantization + /// @tparam OracleT Type of oracle used to determine the N in FpN + /// @tparam BufferT Type of the buffer used for allocating the destination grid + /// @param oracle Instance of the oracle used to determine the N in FpN + /// @param buffer instance of the buffer use for allocation + /// @return Return an instance of a GridHandle (invoking move semantics) + /// @note This version assumes DstBuildT == FpN + template::type, typename OracleT = AbsDiff, typename BufferT = HostBuffer> + typename enable_if::value, GridHandle>::type + getHandle(const OracleT &oracle = OracleT(), + const BufferT &buffer = BufferT()); + + /// @brief Converts the source grid into a nanovdb grid with indices to external arrays of values + /// @tparam DstBuildT ValueIndex or ValueOnIndex, i.e. index all or just active values + /// @tparam BufferT Type of the buffer used for allocating the destination grid + /// @param channels Number of copies of values encoded as blind data in the destination grid + /// @param includeStats Specify if statics should be indexed + /// @param includeTiles Specify if tile values, i.e. non-leaf-node-values, should be indexed + /// @param buffer instance of the buffer use for allocation + /// @return Return an instance of a GridHandle (invoking move semantics) + template::type, typename BufferT = HostBuffer> + typename enable_if::is_index, GridHandle>::type + getHandle(uint32_t channels = 0u, + bool includeStats = true, + bool includeTiles = true, + const BufferT &buffer = BufferT()); + + /// @brief Add blind data to the destination grid + /// @param name String name of the blind data + /// @param dataSemantic Semantics of the blind data + /// @param dataClass Class of the blind data + /// @param dataType Type of the blind data + /// @param count Element count of the blind data + /// @param size Size of each element of the blind data + /// @return Return the index used to access the blind data + uint64_t addBlindData(const std::string& name, + GridBlindDataSemantic dataSemantic, + GridBlindDataClass dataClass, + GridType dataType, + size_t count, size_t size) + { + const size_t order = mBlindMetaData.size(); + mBlindMetaData.emplace(name, dataSemantic, dataClass, dataType, order, count, size); + return order; + } + + /// @brief This method only has affect when getHandle was called with DstBuildT = ValueIndex or ValueOnIndex + /// @return Return the number of indexed values. If called before getHandle was called with + /// DstBuildT = ValueIndex or ValueOnIndex the return value is zero. Else it is a value larger than zero. + uint64_t valueCount() const {return mValIdx[0].empty() ? 0u : mValIdx[0].back();} + + /// @brief Copy values from the source grid into a provided buffer + /// @tparam DstBuildT Must be ValueIndex or ValueOnIndex, i.e. a index grid + /// @param buffer point in which to write values + template + typename enable_if::is_index>::type + copyValues(SrcValueT *buffer); + +private: + + // ========================================================= + + template + typename enable_if::value&&LEVEL==0), typename NodeTrait, LEVEL>::type*>::type + dstNode(uint64_t i) const { + static_assert(LEVEL==0 || LEVEL==1 || LEVEL==2, "Expected LEVEL== {0,1,2}"); + using NodeT = typename NodeTrait, LEVEL>::type; + return PtrAdd(mBufferPtr, mOffset[5-LEVEL]) + i; + } + template + typename enable_if::value && LEVEL==0, NanoLeaf*>::type + dstNode(uint64_t i) const {return PtrAdd>(mBufferPtr, mCodec[i].offset);} + + template NanoRoot* dstRoot() const {return PtrAdd>(mBufferPtr, mOffset.root);} + template NanoTree* dstTree() const {return PtrAdd>(mBufferPtr, mOffset.tree);} + template NanoGrid* dstGrid() const {return PtrAdd>(mBufferPtr, mOffset.grid);} + GridBlindMetaData* dstMeta(uint32_t i) const { return PtrAdd(mBufferPtr, mOffset.meta) + i;}; + + // ========================================================= + + template + typename disable_if::value || BuildTraits::is_index>::type + preProcess(); + + template + typename enable_if::is_index>::type + preProcess(uint32_t channels); + + template + typename enable_if::value>::type + preProcess(OracleT oracle); + + // ========================================================= + + // Below are private methods use to serialize nodes into NanoVDB + template + GridHandle initHandle(const BufferT& buffer); + + // ========================================================= + + template + inline typename enable_if::is_index>::type + postProcess(uint32_t channels); + + template + inline typename disable_if::is_index>::type + postProcess(); + + // ======================================================== + + template + typename disable_if::is_special>::type + processLeafs(); + + template + typename enable_if::is_index>::type + processLeafs(); + + template + typename enable_if::is_FpX>::type + processLeafs(); + + template + typename enable_if::value>::type + processLeafs(); + + template + typename enable_if::value>::type + processLeafs(); + + template + typename enable_if::value>::type + processLeafs(); + + // ========================================================= + + template + typename enable_if::is_index>::type + processInternalNodes(); + + template + typename enable_if::is_index>::type + processInternalNodes(); + + // ========================================================= + + template + typename enable_if::is_index>::type + processRoot(); + + template + typename enable_if::is_index>::type + processRoot(); + + // ========================================================= + + template + void processTree(); + + template + void processGrid(); + + template + typename enable_if::is_index, uint64_t>::type + countTileValues(uint64_t valueCount); + + template + typename enable_if::is_index, uint64_t>::type + countValues(); + +#if defined(NANOVDB_USE_OPENVDB) + template + typename disable_if::value || + is_same::value, uint64_t>::type + countPoints() const; + + template + typename enable_if::value || + is_same::value, uint64_t>::type + countPoints() const; + + template + typename enable_if::value>::type + copyPointAttribute(size_t attIdx, AttT *attPtr); +#else + uint64_t countPoints() const {return 0u;} +#endif + + uint8_t* mBufferPtr;// pointer to the beginning of the destination nanovdb grid buffer + struct BufferOffsets { + uint64_t grid, tree, root, upper, lower, leaf, meta, blind, size; + uint64_t operator[](int i) const { return *(reinterpret_cast(this)+i); } + } mOffset; + int mVerbose; + uint64_t mLeafNodeSize;// non-trivial when DstBuiltT = FpN + + std::unique_ptr mSrcNodeAccPtr;// placeholder for potential local instance + const SrcNodeAccT &mSrcNodeAcc; + struct BlindMetaData; // forward declaration + std::set mBlindMetaData; // sorted according to BlindMetaData.order + struct Codec { float min, max; uint64_t offset; uint8_t log2; };// used for adaptive bit-rate quantization + std::unique_ptr mCodec;// defines a codec per leaf node when DstBuildT = FpN + StatsMode mStats; + ChecksumMode mChecksum; + bool mDitherOn, mIncludeStats, mIncludeTiles; + std::vector mValIdx[3];// store id of first value in node +}; // CreateNanoGrid + +//================================================================================================ + +template +CreateNanoGrid::CreateNanoGrid(const SrcGridT &srcGrid) + : mVerbose(0) + , mSrcNodeAccPtr(new SrcNodeAccT(srcGrid)) + , mSrcNodeAcc(*mSrcNodeAccPtr) + , mStats(StatsMode::Default) + , mChecksum(ChecksumMode::Default) + , mDitherOn(false) + , mIncludeStats(true) + , mIncludeTiles(true) +{ +} + +//================================================================================================ + +template +CreateNanoGrid::CreateNanoGrid(const SrcNodeAccT &srcNodeAcc) + : mVerbose(0) + , mSrcNodeAccPtr(nullptr) + , mSrcNodeAcc(srcNodeAcc) + , mStats(StatsMode::Default) + , mChecksum(ChecksumMode::Default) + , mDitherOn(false) + , mIncludeStats(true) + , mIncludeTiles(true) +{ +} + +//================================================================================================ + +template +struct CreateNanoGrid::BlindMetaData +{ + BlindMetaData(const std::string& name, + const std::string& type, + GridBlindDataClass dataClass, + size_t i, size_t valueCount, size_t valueSize) + : metaData(reinterpret_cast(new char[sizeof(GridBlindMetaData)])) + , order(i)// sorted id of meta data + , size(AlignUp(valueCount * valueSize)) + { + std::memset(metaData, 0, sizeof(GridBlindMetaData));// zero out all meta data + if (name.length()>=GridData::MaxNameSize) throw std::runtime_error("blind data name exceeds limit"); + std::memcpy(metaData->mName, name.c_str(), name.length() + 1); + metaData->mValueCount = valueCount; + metaData->mSemantic = BlindMetaData::mapToSemantics(name); + metaData->mDataClass = dataClass; + metaData->mDataType = BlindMetaData::mapToType(type); + metaData->mValueSize = valueSize; + NANOVDB_ASSERT(metaData->isValid()); + } + BlindMetaData(const std::string& name, + GridBlindDataSemantic dataSemantic, + GridBlindDataClass dataClass, + GridType dataType, + size_t i, size_t valueCount, size_t valueSize) + : metaData(reinterpret_cast(new char[sizeof(GridBlindMetaData)])) + , order(i)// sorted id of meta data + , size(AlignUp(valueCount * valueSize)) + { + std::memset(metaData, 0, sizeof(GridBlindMetaData));// zero out all meta data + if (name.length()>=GridData::MaxNameSize) throw std::runtime_error("blind data name exceeds character limit"); + std::memcpy(metaData->mName, name.c_str(), name.length() + 1); + metaData->mValueCount = valueCount; + metaData->mSemantic = dataSemantic; + metaData->mDataClass = dataClass; + metaData->mDataType = dataType; + metaData->mValueSize = valueSize; + NANOVDB_ASSERT(metaData->isValid()); + } + + ~BlindMetaData(){ delete metaData;} + bool operator<(const BlindMetaData& other) const { return order < other.order; } // required by std::set + static GridType mapToType(const std::string& name) + { + GridType type = GridType::Unknown; + if ("uint32_t" == name) { + type = GridType::UInt32; + } else if ("float" == name) { + type = GridType::Float; + } else if ("vec3s"== name) { + type = GridType::Vec3f; + } else if ("int32" == name) { + type = GridType::Int32; + } else if ("int64" == name) { + type = GridType::Int64; + } + return type; + } + static GridBlindDataSemantic mapToSemantics(const std::string& name) + { + GridBlindDataSemantic semantic = GridBlindDataSemantic::Unknown; + if ("P" == name) { + semantic = GridBlindDataSemantic::PointPosition; + } else if ("V" == name) { + semantic = GridBlindDataSemantic::PointVelocity; + } else if ("Cd" == name) { + semantic = GridBlindDataSemantic::PointColor; + } else if ("N" == name) { + semantic = GridBlindDataSemantic::PointNormal; + } else if ("id" == name) { + semantic = GridBlindDataSemantic::PointId; + //} else { + //std::cerr << "CreateNanoGrid::mapToSemantics: Unable to map \n" << name << "\" to GridBlindDataSemantic\n"; + } + return semantic; + } + GridBlindMetaData *metaData; + const size_t order, size; +}; // CreateNanoGrid::BlindMetaData + +//================================================================================================ + +template +template +typename disable_if::value || + BuildTraits::is_index, GridHandle>::type +CreateNanoGrid::getHandle(const BufferT& pool) +{ + this->template preProcess(); + auto handle = this->template initHandle(pool); + this->template postProcess(); + return handle; +} // CreateNanoGrid::getHandle + +//================================================================================================ + +template +template +typename enable_if::value, GridHandle>::type +CreateNanoGrid::getHandle(const OracleT& oracle, const BufferT& pool) +{ + this->template preProcess(oracle); + auto handle = this->template initHandle(pool); + this->template postProcess(); + return handle; +} // CreateNanoGrid::getHandle + +//================================================================================================ + +template +template +typename enable_if::is_index, GridHandle>::type +CreateNanoGrid::getHandle(uint32_t channels, + bool includeStats, + bool includeTiles, + const BufferT &pool) +{ + mIncludeStats = includeStats; + mIncludeTiles = includeTiles; + this->template preProcess(channels); + auto handle = this->template initHandle(pool); + this->template postProcess(channels); + return handle; +}// CreateNanoGrid::getHandle + +//================================================================================================ + +template +template +GridHandle CreateNanoGrid::initHandle(const BufferT& pool) +{ + mOffset.grid = 0;// grid is always stored at the start of the buffer! + mOffset.tree = NanoGrid::memUsage(); // grid ends and tree begins + mOffset.root = mOffset.tree + NanoTree::memUsage(); // tree ends and root node begins + mOffset.upper = mOffset.root + NanoRoot::memUsage(mSrcNodeAcc.root().getTableSize()); // root node ends and upper internal nodes begin + mOffset.lower = mOffset.upper + NanoUpper::memUsage()*mSrcNodeAcc.nodeCount(2); // upper internal nodes ends and lower internal nodes begin + mOffset.leaf = mOffset.lower + NanoLower::memUsage()*mSrcNodeAcc.nodeCount(1); // lower internal nodes ends and leaf nodes begin + mOffset.meta = mOffset.leaf + mLeafNodeSize;// leaf nodes end and blind meta data begins + mOffset.blind = mOffset.meta + sizeof(GridBlindMetaData)*mBlindMetaData.size(); // meta data ends and blind data begins + mOffset.size = mOffset.blind;// end of buffer + for (const auto& b : mBlindMetaData) mOffset.size += b.size; // accumulate all the blind data + + auto buffer = BufferT::create(mOffset.size, &pool); + mBufferPtr = buffer.data(); + + // Concurrent processing of all tree levels! + invoke( [&](){this->template processLeafs();}, + [&](){this->template processInternalNodes();}, + [&](){this->template processInternalNodes();}, + [&](){this->template processRoot();}, + [&](){this->template processTree();}, + [&](){this->template processGrid();} ); + + return GridHandle(std::move(buffer)); +} // CreateNanoGrid::initHandle + +//================================================================================================ + +template +template +inline typename disable_if::value || BuildTraits::is_index>::type +CreateNanoGrid::preProcess() +{ + if (const uint64_t pointCount = this->countPoints()) { +#if defined(NANOVDB_USE_OPENVDB) + if constexpr(is_same::value) { + if (!mBlindMetaData.empty()) throw std::runtime_error("expected no blind meta data"); + this->addBlindData("index", + GridBlindDataSemantic::PointId, + GridBlindDataClass::IndexArray, + GridType::UInt32, + pointCount, + sizeof(uint32_t)); + } else if constexpr(is_same::value) { + if (!mBlindMetaData.empty()) throw std::runtime_error("expected no blind meta data"); + auto &srcLeaf = mSrcNodeAcc.template node<0>(0); + const auto& attributeSet = srcLeaf.attributeSet(); + const auto& descriptor = attributeSet.descriptor(); + const auto& nameMap = descriptor.map(); + for (auto it = nameMap.begin(); it != nameMap.end(); ++it) { + const size_t index = it->second; + auto& attArray = srcLeaf.constAttributeArray(index); + mBlindMetaData.emplace(it->first, // name + descriptor.valueType(index), // type + GridBlindDataClass::AttributeArray, // class + index, // order + pointCount, // element count + attArray.valueTypeSize()); // element size + } + } +#endif// end NANOVDB_USE_OPENVDB + } + if (mSrcNodeAcc.hasLongGridName()) { + this->addBlindData("grid name", + GridBlindDataSemantic::Unknown, + GridBlindDataClass::GridName, + GridType::Unknown, + mSrcNodeAcc.getName().length() + 1, 1); + //1, mSrcNodeAcc.getName().length() + 1); + } + mLeafNodeSize = mSrcNodeAcc.nodeCount(0)*NanoLeaf::DataType::memUsage(); +}// CreateNanoGrid::preProcess + +//================================================================================================ + +template +template +inline typename enable_if::value>::type +CreateNanoGrid::preProcess(OracleT oracle) +{ + static_assert(is_same::value, "preProcess: expected SrcValueT == float"); + + const size_t leafCount = mSrcNodeAcc.nodeCount(0); + if (leafCount==0) { + mLeafNodeSize = 0u; + return; + } + mCodec.reset(new Codec[leafCount]); + + if constexpr(is_same::value) { + if (!oracle) oracle.init(mSrcNodeAcc.gridClass(), mSrcNodeAcc.root().background()); + } + + DitherLUT lut(mDitherOn); + forEach(0, leafCount, 4, [&](const Range1D &r) { + for (auto i=r.begin(); i!=r.end(); ++i) { + const auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + float &min = mCodec[i].min = std::numeric_limits::max(); + float &max = mCodec[i].max = -min; + for (int j=0; j<512; ++j) { + float v = srcLeaf.getValue(j); + if (vmax) max = v; + } + const float range = max - min; + uint8_t &logBitWidth = mCodec[i].log2 = 0;// 0,1,2,3,4 => 1,2,4,8,16 bits + while (range > 0.0f && logBitWidth < 4u) { + const uint32_t mask = (uint32_t(1) << (uint32_t(1) << logBitWidth)) - 1u; + const float encode = mask/range; + const float decode = range/mask; + int j = 0; + do { + const float exact = srcLeaf.getValue(j);//data[j];// exact value + const uint32_t code = uint32_t(encode*(exact - min) + lut(j)); + const float approx = code * decode + min;// approximate value + j += oracle(exact, approx) ? 1 : 513; + } while(j < 512); + if (j == 512) break; + ++logBitWidth; + } + } + }); + + auto getOffset = [&](size_t i){ + --i; + return mCodec[i].offset + NanoLeaf::DataType::memUsage(1u << mCodec[i].log2); + }; + mCodec[0].offset = NanoGrid::memUsage() + + NanoTree::memUsage() + + NanoRoot::memUsage(mSrcNodeAcc.root().getTableSize()) + + NanoUpper::memUsage()*mSrcNodeAcc.nodeCount(2) + + NanoLower::memUsage()*mSrcNodeAcc.nodeCount(1); + for (size_t i=1; iaddBlindData("grid name", + GridBlindDataSemantic::Unknown, + GridBlindDataClass::GridName, + GridType::Unknown, + mSrcNodeAcc.getName().length() + 1, 1); + //1, mSrcNodeAcc.getName().length() + 1); + } +}// CreateNanoGrid::preProcess + +//================================================================================================ + +template +template +inline typename enable_if::is_index, uint64_t>::type +CreateNanoGrid::countTileValues(uint64_t valueCount) +{ + const uint64_t stats = mIncludeStats ? 4u : 0u;// minimum, maximum, average, and deviation + mValIdx[LEVEL].clear(); + mValIdx[LEVEL].resize(mSrcNodeAcc.nodeCount(LEVEL) + 1, stats);// minimum 1 entry + forEach(1, mValIdx[LEVEL].size(), 8, [&](const Range1D& r){ + for (auto i = r.begin(); i!=r.end(); ++i) { + auto &srcNode = mSrcNodeAcc.template node(i-1); + if constexpr(BuildTraits::is_onindex) {// resolved at compile time + mValIdx[LEVEL][i] += srcNode.getValueMask().countOn(); + } else { + static const uint64_t maxTileCount = uint64_t(1u) << 3*srcNode.LOG2DIM; + mValIdx[LEVEL][i] += maxTileCount - srcNode.getChildMask().countOn(); + } + } + }); + mValIdx[LEVEL][0] = valueCount; + for (size_t i=1; i + +//================================================================================================ + +template +template +inline typename enable_if::is_index, uint64_t>::type +CreateNanoGrid::countValues() +{ + const uint64_t stats = mIncludeStats ? 4u : 0u;// minimum, maximum, average, and deviation + uint64_t valueCount = 1u;// offset 0 corresponds to the background value + if (mIncludeTiles) { + if constexpr(BuildTraits::is_onindex) { + for (auto it = mSrcNodeAcc.root().cbeginValueOn(); it; ++it) ++valueCount; + } else { + for (auto it = mSrcNodeAcc.root().cbeginValueAll(); it; ++it) ++valueCount; + } + valueCount += stats;// optionally append stats for the root node + valueCount = countTileValues(valueCount); + valueCount = countTileValues(valueCount); + } + mValIdx[0].clear(); + mValIdx[0].resize(mSrcNodeAcc.nodeCount(0) + 1, 512u + stats);// minimum 1 entry + if constexpr(BuildTraits::is_onindex) { + forEach(1, mValIdx[0].size(), 8, [&](const Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + mValIdx[0][i] = stats; + mValIdx[0][i] += mSrcNodeAcc.template node<0>(i-1).getValueMask().countOn(); + } + }); + } + mValIdx[0][0] = valueCount; + prefixSum(mValIdx[0], true);// inclusive prefix sum + return mValIdx[0].back(); +}// CreateNanoGrid::countValues() + +//================================================================================================ + +template +template +inline typename enable_if::is_index>::type +CreateNanoGrid::preProcess(uint32_t channels) +{ + const uint64_t valueCount = this->template countValues(); + mLeafNodeSize = mSrcNodeAcc.nodeCount(0)*NanoLeaf::DataType::memUsage(); + + uint32_t order = mBlindMetaData.size(); + for (uint32_t i=0; i()), + GridBlindDataClass::AttributeArray, + order++, + valueCount, + sizeof(SrcValueT)); + } + if (mSrcNodeAcc.hasLongGridName()) { + this->addBlindData("grid name", + GridBlindDataSemantic::Unknown, + GridBlindDataClass::GridName, + GridType::Unknown, + mSrcNodeAcc.getName().length() + 1, 1); + //1, mSrcNodeAcc.getName().length() + 1); + } +}// preProcess + +//================================================================================================ + +template +template +inline typename disable_if::is_special>::type +CreateNanoGrid::processLeafs() +{ + using DstDataT = typename NanoLeaf::DataType; + using DstValueT = typename DstDataT::ValueType; + static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); + forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const Range1D& r) { + auto *dstData = this->template dstNode(r.begin())->data(); + for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + if (DstDataT::padding()>0u) { + // Cast to void* to avoid compiler warning about missing trivial copy-assignment + std::memset(reinterpret_cast(dstData), 0, DstDataT::memUsage()); + } else { + dstData->mBBoxDif[0] = dstData->mBBoxDif[1] = dstData->mBBoxDif[2] = 0u; + dstData->mFlags = 0u;// enable rendering, no bbox, no stats + dstData->mMinimum = dstData->mMaximum = typename DstDataT::ValueType(); + dstData->mAverage = dstData->mStdDevi = 0; + } + dstData->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstData->mValueMask = srcLeaf.getValueMask(); // copy value mask + DstValueT *dst = dstData->mValues; + if constexpr(is_same::value && SrcNodeAccT::IS_OPENVDB) { + const SrcValueT *src = srcLeaf.buffer().data(); + for (auto *end = dst + 512u; dst != end; dst += 4, src += 4) { + dst[0] = src[0]; // copy *all* voxel values in sets of four, i.e. loop-unrolling + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + } + } else { + for (uint32_t j=0; j<512u; ++j) *dst++ = static_cast(srcLeaf.getValue(j)); + } + } + }); +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename enable_if::is_index>::type +CreateNanoGrid::processLeafs() +{ + using DstDataT = typename NanoLeaf::DataType; + static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); + static_assert(DstDataT::padding()==0u, "Expected leaf nodes to have no padding"); + + forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const Range1D& r) { + const uint8_t flags = mIncludeStats ? 16u : 0u;// 4th bit indicates stats + DstDataT *dstData = this->template dstNode(r.begin())->data();// fixed size + for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + dstData->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstData->mBBoxDif[0] = dstData->mBBoxDif[1] = dstData->mBBoxDif[2] = 0u; + dstData->mFlags = flags; + dstData->mValueMask = srcLeaf.getValueMask(); // copy value mask + dstData->mOffset = mValIdx[0][i]; + if constexpr(BuildTraits::is_onindex) { + const uint64_t *w = dstData->mValueMask.words(); +#ifdef USE_OLD_VALUE_ON_INDEX + int32_t sum = CountOn(*w++); + uint8_t *p = reinterpret_cast(&dstData->mPrefixSum), *q = p + 7; + for (int j=0; j<7; ++j) { + *p++ = sum & 255u; + *q |= (sum >> 8) << j; + sum += CountOn(*w++); + } +#else + uint64_t &prefixSum = dstData->mPrefixSum, sum = CountOn(*w++); + prefixSum = sum; + for (int n = 9; n < 55; n += 9) {// n=i*9 where i=1,2,..6 + sum += CountOn(*w++); + prefixSum |= sum << n;// each pre-fixed sum is encoded in 9 bits + } +#endif + } else { + dstData->mPrefixSum = 0u; + } + if constexpr(BuildTraits::is_indexmask) dstData->mMask = dstData->mValueMask; + } + }); +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename enable_if::value>::type +CreateNanoGrid::processLeafs() +{ + using DstDataT = typename NanoLeaf::DataType; + static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); + forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const Range1D& r) { + auto *dstData = this->template dstNode(r.begin())->data(); + for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + if (DstDataT::padding()>0u) { + // Cast to void* to avoid compiler warning about missing trivial copy-assignment + std::memset(reinterpret_cast(dstData), 0, DstDataT::memUsage()); + } else { + dstData->mBBoxDif[0] = dstData->mBBoxDif[1] = dstData->mBBoxDif[2] = 0u; + dstData->mFlags = 0u;// enable rendering, no bbox, no stats + dstData->mPadding[0] = dstData->mPadding[1] = 0u; + } + dstData->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstData->mValueMask = srcLeaf.getValueMask(); // copy value mask + } + }); +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename enable_if::value>::type +CreateNanoGrid::processLeafs() +{ + using DstDataT = typename NanoLeaf::DataType; + static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); + forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const Range1D& r) { + auto *dstData = this->template dstNode(r.begin())->data(); + for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + if (DstDataT::padding()>0u) { + // Cast to void* to avoid compiler warning about missing trivial copy-assignment + std::memset(reinterpret_cast(dstData), 0, DstDataT::memUsage()); + } else { + dstData->mBBoxDif[0] = dstData->mBBoxDif[1] = dstData->mBBoxDif[2] = 0u; + dstData->mFlags = 0u;// enable rendering, no bbox, no stats + } + dstData->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstData->mValueMask = srcLeaf.getValueMask(); // copy value mask + if constexpr(!is_same::value) { + for (int j=0; j<512; ++j) dstData->mValues.set(j, static_cast(srcLeaf.getValue(j))); + } else if constexpr(SrcNodeAccT::IS_OPENVDB) { + dstData->mValues = *reinterpret_cast*>(srcLeaf.buffer().data()); + } else if constexpr(SrcNodeAccT::IS_NANOVDB) { + dstData->mValues = srcLeaf.data()->mValues; + } else {// build::Leaf + dstData->mValues = srcLeaf.mValues; // copy value mask + } + } + }); +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename enable_if::is_FpX>::type +CreateNanoGrid::processLeafs() +{ + using DstDataT = typename NanoLeaf::DataType; + static_assert(DstDataT::FIXED_SIZE, "Expected destination LeafNode to have fixed size"); + using ArrayT = typename DstDataT::ArrayType; + static_assert(is_same::value, "Expected ValueT == float"); + using FloatT = typename std::conditional=16, double, float>::type;// 16 compression and higher requires double + static constexpr FloatT UNITS = FloatT((1 << DstDataT::bitWidth()) - 1);// # of unique non-zero values + DitherLUT lut(mDitherOn); + + forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const Range1D& r) { + auto *dstData = this->template dstNode(r.begin())->data(); + for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + if (DstDataT::padding()>0u) { + // Cast to void* to avoid compiler warning about missing trivial copy-assignment + std::memset(reinterpret_cast(dstData), 0, DstDataT::memUsage()); + } else { + dstData->mFlags = dstData->mBBoxDif[2] = dstData->mBBoxDif[1] = dstData->mBBoxDif[0] = 0u; + dstData->mDev = dstData->mAvg = dstData->mMax = dstData->mMin = 0u; + } + dstData->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstData->mValueMask = srcLeaf.getValueMask(); // copy value mask + // compute extrema values + float min = std::numeric_limits::max(), max = -min; + for (uint32_t j=0; j<512u; ++j) { + const float v = srcLeaf.getValue(j); + if (v < min) min = v; + if (v > max) max = v; + } + dstData->init(min, max, DstDataT::bitWidth()); + // perform quantization relative to the values in the current leaf node + const FloatT encode = UNITS/(max-min); + uint32_t offset = 0; + auto quantize = [&]()->ArrayT{ + const ArrayT tmp = static_cast(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); + ++offset; + return tmp; + }; + auto *code = reinterpret_cast(dstData->mCode); + if (is_same::value) {// resolved at compile-time + for (uint32_t j=0; j<128u; ++j) { + auto tmp = quantize(); + *code++ = quantize() << 4 | tmp; + tmp = quantize(); + *code++ = quantize() << 4 | tmp; + } + } else { + for (uint32_t j=0; j<128u; ++j) { + *code++ = quantize(); + *code++ = quantize(); + *code++ = quantize(); + *code++ = quantize(); + } + } + } + }); +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename enable_if::value>::type +CreateNanoGrid::processLeafs() +{ + static_assert(is_same::value, "Expected SrcValueT == float"); + DitherLUT lut(mDitherOn); + forEach(0, mSrcNodeAcc.nodeCount(0), 8, [&](const Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + auto &srcLeaf = mSrcNodeAcc.template node<0>(i); + auto *dstData = this->template dstNode(i)->data(); + dstData->mBBoxMin = srcLeaf.origin(); // copy origin of node + dstData->mBBoxDif[0] = dstData->mBBoxDif[1] = dstData->mBBoxDif[2] = 0u; + const uint8_t logBitWidth = mCodec[i].log2; + dstData->mFlags = logBitWidth << 5;// pack logBitWidth into 3 MSB of mFlag + dstData->mValueMask = srcLeaf.getValueMask(); // copy value mask + const float min = mCodec[i].min, max = mCodec[i].max; + dstData->init(min, max, uint8_t(1) << logBitWidth); + // perform quantization relative to the values in the current leaf node + uint32_t offset = 0; + float encode = 0.0f; + auto quantize = [&]()->uint8_t{ + const uint8_t tmp = static_cast(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); + ++offset; + return tmp; + }; + auto *dst = reinterpret_cast(dstData+1); + switch (logBitWidth) { + case 0u: {// 1 bit + encode = 1.0f/(max - min); + for (int j=0; j<64; ++j) { + uint8_t a = 0; + for (int k=0; k<8; ++k) a |= quantize() << k; + *dst++ = a; + } + } + break; + case 1u: {// 2 bits + encode = 3.0f/(max - min); + for (int j=0; j<128; ++j) { + auto a = quantize(); + a |= quantize() << 2; + a |= quantize() << 4; + *dst++ = quantize() << 6 | a; + } + } + break; + case 2u: {// 4 bits + encode = 15.0f/(max - min); + for (int j=0; j<128; ++j) { + auto a = quantize(); + *dst++ = quantize() << 4 | a; + a = quantize(); + *dst++ = quantize() << 4 | a; + } + } + break; + case 3u: {// 8 bits + encode = 255.0f/(max - min); + for (int j=0; j<128; ++j) { + *dst++ = quantize(); + *dst++ = quantize(); + *dst++ = quantize(); + *dst++ = quantize(); + } + } + break; + default: {// 16 bits - special implementation using higher bit-precision + auto *dst = reinterpret_cast(dstData+1); + const double encode = 65535.0/(max - min);// note that double is required! + for (int j=0; j<128; ++j) { + *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; + *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; + *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; + *dst++ = uint16_t(encode * (srcLeaf.getValue(offset) - min) + lut(offset)); ++offset; + } + } + }// end switch + } + });// kernel +} // CreateNanoGrid::processLeafs + +//================================================================================================ + +template +template +inline typename enable_if::is_index>::type +CreateNanoGrid::processInternalNodes() +{ + using DstNodeT = typename NanoNode::type; + using DstValueT = typename DstNodeT::ValueType; + using DstChildT = typename NanoNode::type; + static_assert(LEVEL == 1 || LEVEL == 2, "Expected internal node"); + + const uint64_t nodeCount = mSrcNodeAcc.nodeCount(LEVEL); + if (nodeCount > 0) {// compute and temporarily encode IDs of child nodes + uint64_t childCount = 0; + auto *dstData = this->template dstNode(0)->data(); + for (uint64_t i=0; i(i).getChildMask().countOn(); + } + } + + forEach(0, nodeCount, 4, [&](const Range1D& r) { + auto *dstData = this->template dstNode(r.begin())->data(); + for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { + auto &srcNode = mSrcNodeAcc.template node(i); + uint64_t childID = dstData->mFlags; + if (DstNodeT::DataType::padding()>0u) { + // Cast to void* to avoid compiler warning about missing trivial copy-assignment + std::memset(reinterpret_cast(dstData), 0, DstNodeT::memUsage()); + } else { + dstData->mFlags = 0;// enable rendering, no bbox, no stats + dstData->mMinimum = dstData->mMaximum = typename DstNodeT::ValueType(); + dstData->mAverage = dstData->mStdDevi = 0; + } + dstData->mBBox[0] = srcNode.origin(); // copy origin of node + dstData->mValueMask = srcNode.getValueMask(); // copy value mask + dstData->mChildMask = srcNode.getChildMask(); // copy child mask + for (auto it = srcNode.cbeginChildAll(); it; ++it) { + SrcValueT value{}; // default initialization + if (it.probeChild(value)) { + DstChildT *dstChild = this->template dstNode(childID++);// might be Leaf + dstData->setChild(it.pos(), dstChild); + } else { + dstData->setValue(it.pos(), static_cast(value)); + } + } + } + }); +} // CreateNanoGrid::processInternalNodes + +//================================================================================================ + +template +template +inline typename enable_if::is_index>::type +CreateNanoGrid::processInternalNodes() +{ + using DstNodeT = typename NanoNode::type; + using DstChildT = typename NanoNode::type; + static_assert(LEVEL == 1 || LEVEL == 2, "Expected internal node"); + static_assert(DstNodeT::DataType::padding()==0u, "Expected internal nodes to have no padding"); + + const uint64_t nodeCount = mSrcNodeAcc.nodeCount(LEVEL); + if (nodeCount > 0) {// compute and temporarily encode IDs of child nodes + uint64_t childCount = 0; + auto *dstData = this->template dstNode(0)->data(); + for (uint64_t i=0; i(i).getChildMask().countOn(); + } + } + + forEach(0, nodeCount, 4, [&](const Range1D& r) { + auto *dstData = this->template dstNode(r.begin())->data(); + for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { + auto &srcNode = mSrcNodeAcc.template node(i); + uint64_t childID = dstData->mFlags; + dstData->mFlags = 0u; + dstData->mBBox[0] = srcNode.origin(); // copy origin of node + dstData->mValueMask = srcNode.getValueMask(); // copy value mask + dstData->mChildMask = srcNode.getChildMask(); // copy child mask + uint64_t n = mIncludeTiles ? mValIdx[LEVEL][i] : 0u; + for (auto it = srcNode.cbeginChildAll(); it; ++it) { + SrcValueT value; + if (it.probeChild(value)) { + DstChildT *dstChild = this->template dstNode(childID++);// might be Leaf + dstData->setChild(it.pos(), dstChild); + } else { + uint64_t m = 0u; + if (mIncludeTiles && !((BuildTraits::is_onindex) && dstData->mValueMask.isOff(it.pos()))) m = n++; + dstData->setValue(it.pos(), m); + } + } + if (mIncludeTiles && mIncludeStats) {// stats are always placed after the tile values + dstData->mMinimum = n++; + dstData->mMaximum = n++; + dstData->mAverage = n++; + dstData->mStdDevi = n++; + } else {// if not tiles or stats set stats to the background offset + dstData->mMinimum = 0u; + dstData->mMaximum = 0u; + dstData->mAverage = 0u; + dstData->mStdDevi = 0u; + } + } + }); +} // CreateNanoGrid::processInternalNodes + +//================================================================================================ + +template +template +inline typename enable_if::is_index>::type +CreateNanoGrid::processRoot() +{ + using DstRootT = NanoRoot; + using DstValueT = typename DstRootT::ValueType; + auto &srcRoot = mSrcNodeAcc.root(); + auto *dstData = this->template dstRoot()->data(); + const uint32_t tableSize = srcRoot.getTableSize(); + // Cast to void* to avoid compiler warning about missing trivial copy-assignment + if (DstRootT::DataType::padding()>0) std::memset(reinterpret_cast(dstData), 0, DstRootT::memUsage(tableSize)); + dstData->mTableSize = tableSize; + dstData->mMinimum = dstData->mMaximum = dstData->mBackground = srcRoot.background(); + dstData->mBBox = CoordBBox(); // // set to an empty bounding box + if (tableSize==0) return; + auto *dstChild = this->template dstNode(0);// fixed size and linear in memory + auto *dstTile = dstData->tile(0);// fixed size and linear in memory + for (auto it = srcRoot.cbeginChildAll(); it; ++it, ++dstTile) { + SrcValueT value; + if (it.probeChild(value)) { + dstTile->setChild(it.getCoord(), dstChild++, dstData); + } else { + dstTile->setValue(it.getCoord(), it.isValueOn(), static_cast(value)); + } + } +} // CreateNanoGrid::processRoot + +//================================================================================================ + +template +template +inline typename enable_if::is_index>::type +CreateNanoGrid::processRoot() +{ + using DstRootT = NanoRoot; + auto &srcRoot = mSrcNodeAcc.root(); + auto *dstData = this->template dstRoot()->data(); + const uint32_t tableSize = srcRoot.getTableSize(); + // Cast to void* to avoid compiler warning about missing trivial copy-assignment + if (DstRootT::DataType::padding()>0) std::memset(reinterpret_cast(dstData), 0, DstRootT::memUsage(tableSize)); + dstData->mTableSize = tableSize; + dstData->mBackground = 0u; + uint64_t valueCount = 0u;// the first entry is always the background value + dstData->mBBox = CoordBBox(); // set to an empty/invalid bounding box + + if (tableSize>0) { + auto *dstChild = this->template dstNode(0);// fixed size and linear in memory + auto *dstTile = dstData->tile(0);// fixed size and linear in memory + for (auto it = srcRoot.cbeginChildAll(); it; ++it, ++dstTile) { + SrcValueT tmp; + if (it.probeChild(tmp)) { + dstTile->setChild(it.getCoord(), dstChild++, dstData); + } else { + dstTile->setValue(it.getCoord(), it.isValueOn(), 0u); + if (mIncludeTiles && !((BuildTraits::is_onindex) && !dstTile->state)) dstTile->value = ++valueCount; + } + } + } + if (mIncludeTiles && mIncludeStats) {// stats are always placed after the tile values + dstData->mMinimum = ++valueCount; + dstData->mMaximum = ++valueCount; + dstData->mAverage = ++valueCount; + dstData->mStdDevi = ++valueCount; + } else if (dstData->padding()==0) { + dstData->mMinimum = 0u; + dstData->mMaximum = 0u; + dstData->mAverage = 0u; + dstData->mStdDevi = 0u; + } +} // CreateNanoGrid::processRoot + +//================================================================================================ + +template +template +void CreateNanoGrid::processTree() +{ + const uint64_t nodeCount[3] = {mSrcNodeAcc.nodeCount(0), mSrcNodeAcc.nodeCount(1), mSrcNodeAcc.nodeCount(2)}; + auto *dstTree = this->template dstTree(); + auto *dstData = dstTree->data(); + dstData->setRoot( this->template dstRoot() ); + + dstData->setFirstNode(nodeCount[2] ? this->template dstNode(0) : nullptr); + dstData->setFirstNode(nodeCount[1] ? this->template dstNode(0) : nullptr); + dstData->setFirstNode(nodeCount[0] ? this->template dstNode(0) : nullptr); + + dstData->mNodeCount[0] = static_cast(nodeCount[0]); + dstData->mNodeCount[1] = static_cast(nodeCount[1]); + dstData->mNodeCount[2] = static_cast(nodeCount[2]); + + // Count number of active leaf level tiles + dstData->mTileCount[0] = reduce(Range1D(0,nodeCount[1]), uint32_t(0), [&](Range1D &r, uint32_t sum){ + for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<1>(i).getValueMask().countOn(); + return sum;}, std::plus()); + + // Count number of active lower internal node tiles + dstData->mTileCount[1] = reduce(Range1D(0,nodeCount[2]), uint32_t(0), [&](Range1D &r, uint32_t sum){ + for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<2>(i).getValueMask().countOn(); + return sum;}, std::plus()); + + // Count number of active upper internal node tiles + dstData->mTileCount[2] = 0; + for (auto it = mSrcNodeAcc.root().cbeginValueOn(); it; ++it) dstData->mTileCount[2] += 1; + + // Count number of active voxels + dstData->mVoxelCount = reduce(Range1D(0, nodeCount[0]), uint64_t(0), [&](Range1D &r, uint64_t sum){ + for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<0>(i).getValueMask().countOn(); + return sum;}, std::plus()); + + dstData->mVoxelCount += uint64_t(dstData->mTileCount[0]) << 9;// = 3 * 3 + dstData->mVoxelCount += uint64_t(dstData->mTileCount[1]) << 21;// = 3 * (3+4) + dstData->mVoxelCount += uint64_t(dstData->mTileCount[2]) << 36;// = 3 * (3+4+5) + +} // CreateNanoGrid::processTree + +//================================================================================================ + +template +template +void CreateNanoGrid::processGrid() +{ + auto* dstData = this->template dstGrid()->data(); + dstData->init({GridFlags::IsBreadthFirst}, mOffset.size, mSrcNodeAcc.map(), + mapToGridType(), mapToGridClass(mSrcNodeAcc.gridClass())); + dstData->mBlindMetadataCount = static_cast(mBlindMetaData.size()); + dstData->mData1 = this->valueCount(); + + if (!isValid(dstData->mGridType, dstData->mGridClass)) { + std::stringstream ss; + ss << "Invalid combination of GridType("<mGridType) + << ") and GridClass("<mGridClass)<<"). See NanoVDB.h for details!"; + throw std::runtime_error(ss.str()); + } + + std::memset(dstData->mGridName, '\0', GridData::MaxNameSize);//overwrite mGridName + strncpy(dstData->mGridName, mSrcNodeAcc.getName().c_str(), GridData::MaxNameSize-1); + if (mSrcNodeAcc.hasLongGridName()) dstData->setLongGridNameOn();// grid name is long so store it as blind data + + // Partially process blind meta data - they will be complete in postProcess + if (mBlindMetaData.size()>0) { + auto *metaData = this->dstMeta(0); + dstData->mBlindMetadataOffset = PtrDiff(metaData, dstData); + dstData->mBlindMetadataCount = static_cast(mBlindMetaData.size()); + char *blindData = PtrAdd(mBufferPtr, mOffset.blind); + for (const auto &b : mBlindMetaData) { + std::memcpy(metaData, b.metaData, sizeof(GridBlindMetaData)); + metaData->setBlindData(blindData);// sets metaData.mOffset + if (metaData->mDataClass == GridBlindDataClass::GridName) strcpy(blindData, mSrcNodeAcc.getName().c_str()); + ++metaData; + blindData += b.size; + } + mBlindMetaData.clear(); + } +} // CreateNanoGrid::processGrid + +//================================================================================================ + +template +template +inline typename disable_if::is_index>::type +CreateNanoGrid::postProcess() +{ + if constexpr(is_same::value) mCodec.reset(); + auto *dstGrid = this->template dstGrid(); + gridStats(*dstGrid, mStats); +#if defined(NANOVDB_USE_OPENVDB) + auto *metaData = this->dstMeta(0); + if constexpr(is_same::value || + is_same::value) { + static_assert(is_same::value, "expected DstBuildT==uint32_t"); + auto *dstData0 = this->template dstNode(0)->data(); + dstData0->mMinimum = 0; // start of prefix sum + dstData0->mMaximum = dstData0->mValues[511u]; + for (uint32_t i=1, n=mSrcNodeAcc.nodeCount(0); imMinimum = dstData0->mMinimum + dstData0->mMaximum; + dstData1->mMaximum = dstData1->mValues[511u]; + dstData0 = dstData1; + } + for (size_t i = 0, n = dstGrid->blindDataCount(); i < n; ++i, ++metaData) { + if constexpr(is_same::value) { + if (metaData->mDataClass != GridBlindDataClass::IndexArray) continue; + if (metaData->mDataType == GridType::UInt32) { + uint32_t *blindData = const_cast(metaData->template getBlindData()); + forEach(0, mSrcNodeAcc.nodeCount(0), 16, [&](const auto& r) { + auto *dstData = this->template dstNode(r.begin())->data(); + for (auto j = r.begin(); j != r.end(); ++j, ++dstData) { + uint32_t* p = blindData + dstData->mMinimum; + for (uint32_t idx : mSrcNodeAcc.template node<0>(j).indices()) *p++ = idx; + } + }); + } + } else {// if constexpr(is_same::value) + if (metaData->mDataClass != GridBlindDataClass::AttributeArray) continue; + if (auto *blindData = dstGrid->template getBlindData(i)) { + this->template copyPointAttribute(i, blindData); + } else if (auto *blindData = dstGrid->template getBlindData(i)) { + this->template copyPointAttribute(i, reinterpret_cast(blindData)); + } else if (auto *blindData = dstGrid->template getBlindData(i)) { + this->template copyPointAttribute(i, blindData); + } else if (auto *blindData = dstGrid->template getBlindData(i)) { + this->template copyPointAttribute(i, blindData); + } else { + std::cerr << "unsupported point attribute \"" << toStr(metaData->mDataType) << "\"\n"; + } + }// if + }// loop + } else { // if + (void)metaData; + } +#endif + updateChecksum(*(this->template dstGrid()), mChecksum); +}// CreateNanoGrid::postProcess + +//================================================================================================ + +template +template +inline typename enable_if::is_index>::type +CreateNanoGrid::postProcess(uint32_t channels) +{ + const std::string typeName = toStr(mapToGridType()); + const uint64_t valueCount = this->valueCount(); + const auto *dstGrid = this->template dstGrid(); + for (uint32_t i=0; ifindBlindData(name.c_str()); + if (j<0) throw std::runtime_error("missing " + name); + auto *metaData = this->dstMeta(j);// partially set in processGrid + metaData->mDataClass = GridBlindDataClass::ChannelArray; + metaData->mDataType = mapToGridType(); + SrcValueT *blindData = const_cast(metaData->template getBlindData()); + if (i>0) {// concurrent copy from previous channel + nanovdb::forEach(0,valueCount,1024,[&](const nanovdb::Range1D &r){ + SrcValueT *dst=blindData+r.begin(), *end=dst+r.size(), *src=dst-valueCount; + while(dst!=end) *dst++ = *src++; + }); + } else { + this->template copyValues(blindData); + } + }// loop over channels + gridStats(*(this->template dstGrid()), std::min(StatsMode::BBox, mStats)); + updateChecksum(*(this->template dstGrid()), mChecksum); +}// CreateNanoGrid::postProcess + +//================================================================================================ + +template +template +typename enable_if::is_index>::type +CreateNanoGrid::copyValues(SrcValueT *buffer) +{// copy values from the source grid into the provided buffer + assert(mBufferPtr && buffer); + using StatsT = typename FloatTraits::FloatType; + + if (this->valueCount()==0) this->template countValues(); + + auto copyNodeValues = [&](const auto &node, SrcValueT *v) { + if constexpr(BuildTraits::is_onindex) { + for (auto it = node.cbeginValueOn(); it; ++it) *v++ = *it; + } else { + for (auto it = node.cbeginValueAll(); it; ++it) *v++ = *it; + } + if (mIncludeStats) { + if constexpr(SrcNodeAccT::IS_NANOVDB) {// resolved at compile time + *v++ = node.minimum(); + *v++ = node.maximum(); + if constexpr(is_same::value) { + *v++ = node.average(); + *v++ = node.stdDeviation(); + } else {// eg when SrcValueT=Vec3f and StatsT=float + *v++ = SrcValueT(node.average()); + *v++ = SrcValueT(node.stdDeviation()); + } + } else {// openvdb and nanovdb::build::Grid have no stats + *v++ = buffer[0];// background + *v++ = buffer[0];// background + *v++ = buffer[0];// background + *v++ = buffer[0];// background + } + } + };// copyNodeValues + + const SrcRootT &root = mSrcNodeAcc.root(); + buffer[0] = root.background();// Value array always starts with the background value + if (mIncludeTiles) { + copyNodeValues(root, buffer + 1u); + forEach(0, mSrcNodeAcc.nodeCount(2), 1, [&](const Range1D& r) { + for (auto i = r.begin(); i!=r.end(); ++i) { + copyNodeValues(mSrcNodeAcc.template node<2>(i), buffer + mValIdx[2][i]); + } + }); + forEach(0, mSrcNodeAcc.nodeCount(1), 1, [&](const Range1D& r) { + for (auto i = r.begin(); i!=r.end(); ++i) { + copyNodeValues(mSrcNodeAcc.template node<1>(i), buffer + mValIdx[1][i]); + } + }); + } + forEach(0, mSrcNodeAcc.nodeCount(0), 4, [&](const Range1D& r) { + for (auto i = r.begin(); i!=r.end(); ++i) { + copyNodeValues(mSrcNodeAcc.template node<0>(i), buffer + mValIdx[0][i]); + } + }); +}// CreateNanoGrid::copyValues + + +//================================================================================================ + +#if defined(NANOVDB_USE_OPENVDB) + +template +template +typename disable_if::value || + is_same::value, uint64_t>::type +CreateNanoGrid::countPoints() const +{ + static_assert(is_same::value, "expected default template parameter"); + return 0u; +}// CreateNanoGrid::countPoints + +template +template +typename enable_if::value || + is_same::value, uint64_t>::type +CreateNanoGrid::countPoints() const +{ + static_assert(is_same::value, "expected default template parameter"); + return reduce(0, mSrcNodeAcc.nodeCount(0), 8, uint64_t(0), [&](auto &r, uint64_t sum) { + for (auto i=r.begin(); i!=r.end(); ++i) sum += mSrcNodeAcc.template node<0>(i).getLastValue(); + return sum;}, std::plus()); +}// CreateNanoGrid::countPoints + +template +template +typename enable_if::value>::type +CreateNanoGrid::copyPointAttribute(size_t attIdx, AttT *attPtr) +{ + static_assert(std::is_same::value, "Expected default parameter"); + using HandleT = openvdb::points::AttributeHandle; + forEach(0, mSrcNodeAcc.nodeCount(0), 16, [&](const auto& r) { + auto *dstData = this->template dstNode(r.begin())->data(); + for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { + auto& srcLeaf = mSrcNodeAcc.template node<0>(i); + HandleT handle(srcLeaf.constAttributeArray(attIdx)); + AttT *p = attPtr + dstData->mMinimum; + for (auto iter = srcLeaf.beginIndexOn(); iter; ++iter) *p++ = handle.get(*iter); + } + }); +}// CreateNanoGrid::copyPointAttribute + +#endif + +//================================================================================================ + +template +typename disable_if::is_index || BuildTraits::is_Fp, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode, + ChecksumMode cMode, + int verbose, + const BufferT &buffer) +{ + CreateNanoGrid converter(srcGrid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.setVerbose(verbose); + return converter.template getHandle(buffer); +}// createNanoGrid + +//================================================================================================ + +template +typename enable_if::is_index, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + uint32_t channels, + bool includeStats, + bool includeTiles, + int verbose, + const BufferT &buffer) +{ + CreateNanoGrid converter(srcGrid); + converter.setVerbose(verbose); + return converter.template getHandle(channels, includeStats, includeTiles, buffer); +} + +//================================================================================================ + +template +typename enable_if::value, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode, + ChecksumMode cMode, + bool ditherOn, + int verbose, + const OracleT &oracle, + const BufferT &buffer) +{ + CreateNanoGrid converter(srcGrid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + converter.setVerbose(verbose); + return converter.template getHandle(oracle, buffer); +}// createNanoGrid + +//================================================================================================ + +template +typename enable_if::is_FpX, GridHandle>::type +createNanoGrid(const SrcGridT &srcGrid, + StatsMode sMode, + ChecksumMode cMode, + bool ditherOn, + int verbose, + const BufferT &buffer) +{ + CreateNanoGrid converter(srcGrid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + converter.setVerbose(verbose); + return converter.template getHandle(buffer); +}// createNanoGrid + +//================================================================================================ + +#if defined(NANOVDB_USE_OPENVDB) +template +GridHandle +openToNanoVDB(const openvdb::GridBase::Ptr& base, + StatsMode sMode, + ChecksumMode cMode, + int verbose) +{ + // We need to define these types because they are not defined in OpenVDB + using openvdb_Vec4fTree = typename openvdb::tree::Tree4::Type; + using openvdb_Vec4dTree = typename openvdb::tree::Tree4::Type; + using openvdb_Vec4fGrid = openvdb::Grid; + using openvdb_Vec4dGrid = openvdb::Grid; + using openvdb_UInt32Grid = openvdb::Grid; + + if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else if (auto grid = openvdb::GridBase::grid(base)) { + return createNanoGrid(*grid, sMode, cMode, verbose); + } else { + OPENVDB_THROW(openvdb::RuntimeError, "Unrecognized OpenVDB grid type"); + } +}// openToNanoVDB +#endif + +} // namespace nanovdb + +#endif // NANOVDB_CREATENANOGRID_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/CudaDeviceBuffer.h b/nanovdb/nanovdb/util/CudaDeviceBuffer.h deleted file mode 100644 index 542a7519cb..0000000000 --- a/nanovdb/nanovdb/util/CudaDeviceBuffer.h +++ /dev/null @@ -1,197 +0,0 @@ -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -/*! - \file CudaDeviceBuffer.h - - \author Ken Museth - - \date January 8, 2020 - - \brief Implements a simple CUDA allocator! - - CudaDeviceBuffer - a class for simple cuda buffer allocation and management -*/ - -#ifndef NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED -#define NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED - -#include "HostBuffer.h" // for BufferTraits - -#include // for cudaMalloc/cudaMallocManaged/cudaFree - -#if defined(DEBUG) || defined(_DEBUG) - static inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true) - { - if (code != cudaSuccess) { - fprintf(stderr, "CUDA Runtime Error: %s %s %d\n", cudaGetErrorString(code), file, line); - if (abort) exit(code); - } - } - static inline void ptrAssert(void* ptr, const char* msg, const char* file, int line, bool abort = true) - { - if (ptr == nullptr) { - fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line); - if (abort) exit(1); - } - if (uint64_t(ptr) % NANOVDB_DATA_ALIGNMENT) { - fprintf(stderr, "Pointer misalignment error: %s %s %d\n", msg, file, line); - if (abort) exit(1); - } - } -#else - static inline void gpuAssert(cudaError_t, const char*, int, bool = true){} - static inline void ptrAssert(void*, const char*, const char*, int, bool = true){} -#endif - -// Convenience function for checking CUDA runtime API results -// can be wrapped around any runtime API call. No-op in release builds. -#define cudaCheck(ans) \ - { \ - gpuAssert((ans), __FILE__, __LINE__); \ - } - -#define checkPtr(ptr, msg) \ - { \ - ptrAssert((ptr), (msg), __FILE__, __LINE__); \ - } - -namespace nanovdb { - -// ----------------------------> CudaDeviceBuffer <-------------------------------------- - -/// @brief Simple memory buffer using un-managed pinned host memory when compiled with NVCC. -/// Obviously this class is making explicit used of CUDA so replace it with your own memory -/// allocator if you are not using CUDA. -/// @note While CUDA's pinned host memory allows for asynchronous memory copy between host and device -/// it is significantly slower then cached (un-pinned) memory on the host. -class CudaDeviceBuffer -{ - uint64_t mSize; // total number of bytes for the NanoVDB grid. - uint8_t *mCpuData, *mGpuData; // raw buffer for the NanoVDB grid. - -public: - CudaDeviceBuffer(uint64_t size = 0) - : mSize(0) - , mCpuData(nullptr) - , mGpuData(nullptr) - { - this->init(size); - } - /// @brief Disallow copy-construction - CudaDeviceBuffer(const CudaDeviceBuffer&) = delete; - /// @brief Move copy-constructor - CudaDeviceBuffer(CudaDeviceBuffer&& other) noexcept - : mSize(other.mSize) - , mCpuData(other.mCpuData) - , mGpuData(other.mGpuData) - { - other.mSize = 0; - other.mCpuData = nullptr; - other.mGpuData = nullptr; - } - /// @brief Disallow copy assignment operation - CudaDeviceBuffer& operator=(const CudaDeviceBuffer&) = delete; - /// @brief Move copy assignment operation - CudaDeviceBuffer& operator=(CudaDeviceBuffer&& other) noexcept - { - clear(); - mSize = other.mSize; - mCpuData = other.mCpuData; - mGpuData = other.mGpuData; - other.mSize = 0; - other.mCpuData = nullptr; - other.mGpuData = nullptr; - return *this; - } - /// @brief Destructor frees memory on both the host and device - ~CudaDeviceBuffer() { this->clear(); }; - - void init(uint64_t size); - - // @brief Retuns a pointer to the raw memory buffer managed by this allocator. - /// - /// @warning Note that the pointer can be NULL is the allocator was not initialized! - uint8_t* data() const { return mCpuData; } - uint8_t* deviceData() const { return mGpuData; } - - /// @brief Copy grid from the CPU/host to the GPU/device. If @c sync is false the memory copy is asynchronous! - /// - /// @note This will allocate memory on the GPU/device if it is not already allocated - void deviceUpload(void* stream = 0, bool sync = true) const; - - /// @brief Copy grid from the GPU/device to the CPU/host. If @c sync is false the memory copy is asynchronous! - void deviceDownload(void* stream = 0, bool sync = true) const; - - /// @brief Returns the size in bytes of the raw memory buffer managed by this allocator. - uint64_t size() const { return mSize; } - - /// @brief Returns true if this allocator is empty, i.e. has no allocated memory - bool empty() const { return mSize == 0; } - - /// @brief De-allocate all memory managed by this allocator and set all pointer to NULL - void clear(); - - static CudaDeviceBuffer create(uint64_t size, const CudaDeviceBuffer* context = nullptr); - -}; // CudaDeviceBuffer class - -template<> -struct BufferTraits -{ - static const bool hasDeviceDual = true; -}; - -// --------------------------> Implementations below <------------------------------------ - -inline CudaDeviceBuffer CudaDeviceBuffer::create(uint64_t size, const CudaDeviceBuffer*) -{ - return CudaDeviceBuffer(size); -} - -inline void CudaDeviceBuffer::init(uint64_t size) -{ - if (size == mSize) - return; - if (mSize > 0) - this->clear(); - if (size == 0) - return; - mSize = size; - cudaCheck(cudaMallocHost((void**)&mCpuData, size)); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned - checkPtr(mCpuData, "failed to allocate host data"); -} // CudaDeviceBuffer::init - -inline void CudaDeviceBuffer::deviceUpload(void* stream, bool sync) const -{ - checkPtr(mCpuData, "uninitialized cpu data"); - if (mGpuData == nullptr) - cudaCheck(cudaMalloc((void**)&mGpuData, mSize)); // un-managed memory on the device, always 32B aligned! - checkPtr(mGpuData, "uninitialized gpu data"); - cudaCheck(cudaMemcpyAsync(mGpuData, mCpuData, mSize, cudaMemcpyHostToDevice, reinterpret_cast(stream))); - if (sync) - cudaCheck(cudaStreamSynchronize(reinterpret_cast(stream))); -} // CudaDeviceBuffer::gpuUpload - -inline void CudaDeviceBuffer::deviceDownload(void* stream, bool sync) const -{ - checkPtr(mCpuData, "uninitialized cpu data"); - checkPtr(mGpuData, "uninitialized gpu data"); - cudaCheck(cudaMemcpyAsync(mCpuData, mGpuData, mSize, cudaMemcpyDeviceToHost, reinterpret_cast(stream))); - if (sync) - cudaCheck(cudaStreamSynchronize(reinterpret_cast(stream))); -} // CudaDeviceBuffer::gpuDownload - -inline void CudaDeviceBuffer::clear() -{ - if (mGpuData) - cudaCheck(cudaFree(mGpuData)); - if (mCpuData) - cudaCheck(cudaFreeHost(mCpuData)); - mCpuData = mGpuData = nullptr; - mSize = 0; -} // CudaDeviceBuffer::clear - -} // namespace nanovdb - -#endif // end of NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/DitherLUT.h b/nanovdb/nanovdb/util/DitherLUT.h index 885480c7fd..69c3b33031 100644 --- a/nanovdb/nanovdb/util/DitherLUT.h +++ b/nanovdb/nanovdb/util/DitherLUT.h @@ -12,7 +12,7 @@ #ifndef NANOVDB_DITHERLUT_HAS_BEEN_INCLUDED #define NANOVDB_DITHERLUT_HAS_BEEN_INCLUDED -#include "../NanoVDB.h"// for __hosedev__, Vec3, Min, Max, Pow2, Pow3, Pow4 +#include // for __hostdev__, Vec3, Min, Max, Pow2, Pow3, Pow4 namespace nanovdb { diff --git a/nanovdb/nanovdb/util/GridBuilder.h b/nanovdb/nanovdb/util/GridBuilder.h index 28514a5669..6468b7e414 100644 --- a/nanovdb/nanovdb/util/GridBuilder.h +++ b/nanovdb/nanovdb/util/GridBuilder.h @@ -8,1098 +8,325 @@ \date June 26, 2020 - \brief Generates a NanoVDB grid from any volume or function. - - \note This is only intended as a simple tool to generate nanovdb grids without - any dependency on openvdb. + \brief This file defines a minimum set of tree nodes and tools that + can be used (instead of OpenVDB) to build nanovdb grids on the CPU. */ -#ifndef NANOVDB_GRIDBUILDER_H_HAS_BEEN_INCLUDED -#define NANOVDB_GRIDBUILDER_H_HAS_BEEN_INCLUDED +#ifndef NANOVDB_GRID_BUILDER_H_HAS_BEEN_INCLUDED +#define NANOVDB_GRID_BUILDER_H_HAS_BEEN_INCLUDED -#include "GridHandle.h" -#include "GridStats.h" -#include "GridChecksum.h" -#include "Range.h" -#include "Invoke.h" -#include "ForEach.h" -#include "Reduce.h" -#include "DitherLUT.h"// for nanovdb::DitherLUT +#include #include #include #include // for stringstream #include #include // for memcpy +#include +#include +#include -namespace nanovdb { - -/// @brief Compression oracle based on absolute difference -class AbsDiff -{ - float mTolerance;// absolute error tolerance -public: - /// @note The default value of -1 means it's un-initialized! - AbsDiff(float tolerance = -1.0f) : mTolerance(tolerance) {} - AbsDiff(const AbsDiff&) = default; - ~AbsDiff() = default; - void setTolerance(float tolerance) { mTolerance = tolerance; } - float getTolerance() const { return mTolerance; } - /// @brief Return true if the approximate value is within the accepted - /// absolute error bounds of the exact value. - /// - /// @details Required member method - bool operator()(float exact, float approx) const - { - return Abs(exact - approx) <= mTolerance; - } -};// AbsDiff - -inline std::ostream& operator<<(std::ostream& os, const AbsDiff& diff) -{ - os << "Absolute tolerance: " << diff.getTolerance(); - return os; -} - -/// @brief Compression oracle based on relative difference -class RelDiff -{ - float mTolerance;// relative error tolerance -public: - /// @note The default value of -1 means it's un-initialized! - RelDiff(float tolerance = -1.0f) : mTolerance(tolerance) {} - RelDiff(const RelDiff&) = default; - ~RelDiff() = default; - void setTolerance(float tolerance) { mTolerance = tolerance; } - float getTolerance() const { return mTolerance; } - /// @brief Return true if the approximate value is within the accepted - /// relative error bounds of the exact value. - /// - /// @details Required member method - bool operator()(float exact, float approx) const - { - return Abs(exact - approx)/Max(Abs(exact), Abs(approx)) <= mTolerance; - } -};// RelDiff - -inline std::ostream& operator<<(std::ostream& os, const RelDiff& diff) -{ - os << "Relative tolerance: " << diff.getTolerance(); - return os; -} - -/// @brief Allows for the construction of NanoVDB grids without any dependency -template> -class GridBuilder -{ - struct BuildLeaf; - template - struct BuildNode; - template - struct BuildRoot; - - struct Codec {float min, max; uint16_t log2, size;};// used for adaptive bit-rate quantization - - using SrcNode0 = BuildLeaf; - using SrcNode1 = BuildNode; - using SrcNode2 = BuildNode; - using SrcRootT = BuildRoot; - - using DstNode0 = NanoLeaf< BuildT>;// nanovdb::LeafNode; // leaf - using DstNode1 = NanoLower;// nanovdb::InternalNode; // lower - using DstNode2 = NanoUpper;// nanovdb::InternalNode; // upper - using DstRootT = NanoRoot< BuildT>;// nanovdb::RootNode; - using DstTreeT = NanoTree< BuildT>; - using DstGridT = NanoGrid< BuildT>; - - ValueT mDelta; // skip node if: node.max < -mDelta || node.min > mDelta - uint8_t* mBufferPtr;// pointer to the beginning of the buffer - uint64_t mBufferOffsets[9];//grid, tree, root, upper, lower, leafs, meta data, blind data, buffer size - int mVerbose; - uint64_t mBlindDataSize; - SrcRootT mRoot;// this root supports random write - std::vector mArray0; // leaf nodes - std::vector mArray1; // lower internal nodes - std::vector mArray2; // upper internal nodes - std::unique_ptr mCodec;// defines a codec per leaf node - GridClass mGridClass; - StatsMode mStats; - ChecksumMode mChecksum; - bool mDitherOn; - - // Below are private methods use to serialize nodes into NanoVDB - template< typename OracleT, typename BufferT> - GridHandle initHandle(const OracleT &oracle, const BufferT& buffer); - - template - inline typename std::enable_if::value>::type - compression(uint64_t&, OracleT) {}// no-op - - template - inline typename std::enable_if::value>::type - compression(uint64_t &offset, OracleT oracle); - - template - typename std::enable_if::value && - !is_same::value && - !is_same::value && - !is_same::value>::type - processLeafs(std::vector&); - - template - typename std::enable_if::value || - is_same::value || - is_same::value>::type - processLeafs(std::vector&); - - template - typename std::enable_if::value>::type - processLeafs(std::vector&); - - template - void processNodes(std::vector&); - - DstRootT* processRoot(); - - DstTreeT* processTree(); - - DstGridT* processGrid(const Map&, const std::string&); - - template - typename std::enable_if::value>::type - setFlag(const T&, const T&, FlagT& flag) const { flag &= ~FlagT(1); } // unset first bit - - template - typename std::enable_if::value>::type - setFlag(const T& min, const T& max, FlagT& flag) const; - -public: - struct ValueAccessor; - - GridBuilder(ValueT background = ValueT(), - GridClass gClass = GridClass::Unknown, - uint64_t blindDataSize = 0); - - ValueAccessor getAccessor() { return ValueAccessor(mRoot); } - - /// @brief Performs multi-threaded bottom-up signed-distance flood-filling and changes GridClass to LevelSet - /// - /// @warning Only call this method once this GridBuilder contains a valid signed distance field - void sdfToLevelSet(); - - /// @brief Performs multi-threaded bottom-up signed-distance flood-filling followed by level-set -> FOG volume - /// conversion. It also changes the GridClass to FogVolume - /// - /// @warning Only call this method once this GridBuilder contains a valid signed distance field - void sdfToFog(); - - void setVerbose(int mode = 1) { mVerbose = mode; } - - void enableDithering(bool on = true) { mDitherOn = on; } - - void setStats(StatsMode mode = StatsMode::Default) { mStats = mode; } - - void setChecksum(ChecksumMode mode = ChecksumMode::Default) { mChecksum = mode; } - - void setGridClass(GridClass mode = GridClass::Unknown) { mGridClass = mode; } - - /// @brief Return an instance of a GridHandle (invoking move semantics) - template - GridHandle getHandle(double voxelSize = 1.0, - const Vec3d& gridOrigin = Vec3d(0), - const std::string& name = "", - const OracleT& oracle = OracleT(), - const BufferT& buffer = BufferT()); - - /// @brief Return an instance of a GridHandle (invoking move semantics) - template - GridHandle getHandle(const Map& map, - const std::string& name = "", - const OracleT& oracle = OracleT(), - const BufferT& buffer = BufferT()); - - /// @brief Sets grids values in domain of the @a bbox to those returned by the specified @a func with the - /// expected signature [](const Coord&)->ValueT. - /// - /// @note If @a func returns a value equal to the background value (specified in the constructor) at a - /// specific voxel coordinate, then the active state of that coordinate is left off! Else the value - /// value is set and the active state is on. This is done to allow for sparse grids to be generated. - /// - /// @param func Functor used to evaluate the grid values in the @a bbox - /// @param bbox Coordinate bounding-box over which the grid values will be set. - /// @param delta Specifies a lower threshold value for rendering (optional). Typically equals the voxel size - /// for level sets and otherwise it's zero. - template - void operator()(const Func& func, const CoordBBox& bbox, ValueT delta = ValueT(0)); - -}; // GridBuilder - -//================================================================================================ - -template -GridBuilder:: -GridBuilder(ValueT background, GridClass gClass, uint64_t blindDataSize) - : mDelta(0) - , mVerbose(0) - , mBlindDataSize(blindDataSize) - , mRoot(background) - , mGridClass(gClass) - , mStats(StatsMode::Default) - , mChecksum(ChecksumMode::Default) - , mDitherOn(false) -{ -} - -template -template -void GridBuilder:: -operator()(const Func& func, const CoordBBox& voxelBBox, ValueT delta) -{ - static_assert(is_same::type>::value, "GridBuilder: mismatched ValueType"); - mDelta = delta; // delta = voxel size for level sets, else 0 - - using LeafT = BuildLeaf; - const CoordBBox leafBBox(voxelBBox[0] >> LeafT::TOTAL, voxelBBox[1] >> LeafT::TOTAL); - std::mutex mutex; - auto kernel = [&](const CoordBBox& b) { - LeafT* leaf = nullptr; - for (auto it = b.begin(); it; ++it) { - Coord min(*it << LeafT::TOTAL), max(min + Coord(LeafT::DIM - 1)); - const CoordBBox bbox(min.maxComponent(voxelBBox.min()), - max.minComponent(voxelBBox.max()));// crop - if (leaf == nullptr) { - leaf = new LeafT(bbox[0], mRoot.mBackground, false); - } else { - leaf->mOrigin = bbox[0] & ~LeafT::MASK; - NANOVDB_ASSERT(leaf->mValueMask.isOff()); - } - leaf->mDstOffset = 0;// no prune - for (auto ijk = bbox.begin(); ijk; ++ijk) { - const auto v = func(*ijk); - if (v == mRoot.mBackground) {// don't insert background values - continue; - } - leaf->setValue(*ijk, v); - } - if (!leaf->mValueMask.isOff()) {// has active values - if (leaf->mValueMask.isOn()) {// only active values - const auto first = leaf->getFirstValue(); - int n=1; - while (n<512) {// 8^3 = 512 - if (leaf->mValues[n++] != first) break; - } - if (n == 512) leaf->mDstOffset = 1;// prune below - } - std::lock_guard guard(mutex); - NANOVDB_ASSERT(leaf != nullptr); - mRoot.addNode(leaf); - NANOVDB_ASSERT(leaf == nullptr); - } - }// loop over sub-part of leafBBox - if (leaf) { - delete leaf; - } - }; // kernel - forEach(leafBBox, kernel); - - // Prune leaf and tile nodes - for (auto it2 = mRoot.mTable.begin(); it2 != mRoot.mTable.end(); ++it2) { - if (auto *upper = it2->second.child) {//upper level internal node - for (auto it1 = upper->mChildMask.beginOn(); it1; ++it1) { - auto *lower = upper->mTable[*it1].child;// lower level internal node - for (auto it0 = lower->mChildMask.beginOn(); it0; ++it0) { - auto *leaf = lower->mTable[*it0].child;// leaf nodes - if (leaf->mDstOffset) { - lower->mTable[*it0].value = leaf->getFirstValue(); - lower->mChildMask.setOff(*it0); - lower->mValueMask.setOn(*it0); - delete leaf; - } - }// loop over leaf nodes - if (lower->mChildMask.isOff()) {//only tiles - const auto first = lower->getFirstValue(); - int n=1; - while (n < 4096) {// 16^3 = 4096 - if (lower->mTable[n++].value != first) break; - } - if (n == 4096) {// identical tile values so prune - upper->mTable[*it1].value = first; - upper->mChildMask.setOff(*it1); - upper->mValueMask.setOn(*it1); - delete lower; - } - } - }// loop over lower internal nodes - if (upper->mChildMask.isOff()) {//only tiles - const auto first = upper->getFirstValue(); - int n=1; - while (n < 32768) {// 32^3 = 32768 - if (upper->mTable[n++].value != first) break; - } - if (n == 32768) {// identical tile values so prune - it2->second.value = first; - it2->second.state = upper->mValueMask.isOn(); - it2->second.child = nullptr; - delete upper; - } - } - }// is child node of the root - }// loop over root table -} - -//================================================================================================ - -template -template -GridHandle GridBuilder:: -initHandle(const OracleT &oracle, const BufferT& buffer) -{ - mArray0.clear(); - mArray1.clear(); - mArray2.clear(); - mArray0.reserve(mRoot.template nodeCount()); - mArray1.reserve(mRoot.template nodeCount()); - mArray2.reserve(mRoot.template nodeCount()); - - uint64_t offset[3] = {0}; - for (auto it2 = mRoot.mTable.begin(); it2 != mRoot.mTable.end(); ++it2) { - if (SrcNode2 *upper = it2->second.child) { - upper->mDstOffset = offset[2]; - mArray2.emplace_back(upper); - offset[2] += DstNode2::memUsage(); - for (auto it1 = upper->mChildMask.beginOn(); it1; ++it1) { - SrcNode1 *lower = upper->mTable[*it1].child; - lower->mDstOffset = offset[1]; - mArray1.emplace_back(lower); - offset[1] += DstNode1::memUsage(); - for (auto it0 = lower->mChildMask.beginOn(); it0; ++it0) { - SrcNode0 *leaf = lower->mTable[*it0].child; - leaf->mDstOffset = offset[0];// dummy if BuildT = FpN - mArray0.emplace_back(leaf); - offset[0] += sizeof(DstNode0);// dummy if BuildT = FpN - }// loop over leaf nodes - }// loop over lower internal nodes - }// is child node of the root - }// loop over root table - - this->template compression(offset[0], oracle);// no-op unless BuildT = FpN - - mBufferOffsets[0] = 0;// grid is always stored at the start of the buffer! - mBufferOffsets[1] = DstGridT::memUsage(); // tree - mBufferOffsets[2] = DstTreeT::memUsage(); // root - mBufferOffsets[3] = DstRootT::memUsage(static_cast(mRoot.mTable.size())); // upper internal nodes - mBufferOffsets[4] = offset[2]; // lower internal nodes - mBufferOffsets[5] = offset[1]; // leaf nodes - mBufferOffsets[6] = offset[0]; // blind meta data - mBufferOffsets[7] = GridBlindMetaData::memUsage(mBlindDataSize > 0 ? 1 : 0); // blind data - mBufferOffsets[8] = mBlindDataSize;// end of buffer - - // Compute the prefixed sum - for (int i = 2; i < 9; ++i) { - mBufferOffsets[i] += mBufferOffsets[i - 1]; - } - - GridHandle handle(BufferT::create(mBufferOffsets[8], &buffer)); - mBufferPtr = handle.data(); - return handle; -} // GridBuilder::initHandle - -//================================================================================================ - -template -template -inline typename std::enable_if::value>::type -GridBuilder::compression(uint64_t &offset, OracleT oracle) -{ - static_assert(is_same::value, "compression: expected BuildT == float"); - static_assert(is_same::value, "compression: expected ValueT == float"); - if (is_same::value && oracle.getTolerance() < 0.0f) {// default tolerance for level set and fog volumes - if (mGridClass == GridClass::LevelSet) { - static const float halfWidth = 3.0f; - oracle.setTolerance(0.1f * mRoot.mBackground / halfWidth);// range of ls: [-3dx; 3dx] - } else if (mGridClass == GridClass::FogVolume) { - oracle.setTolerance(0.01f);// range of FOG volumes: [0;1] - } else { - oracle.setTolerance(0.0f); - } - } - - const size_t size = mArray0.size(); - mCodec.reset(new Codec[size]); - - DitherLUT lut(mDitherOn); - auto kernel = [&](const Range1D &r) { - for (auto i=r.begin(); i!=r.end(); ++i) { - const float *data = mArray0[i]->mValues; - float min = std::numeric_limits::max(), max = -min; - for (int j=0; j<512; ++j) { - float v = data[j]; - if (vmax) max = v; - } - mCodec[i].min = min; - mCodec[i].max = max; - const float range = max - min; - uint16_t logBitWidth = 0;// 0,1,2,3,4 => 1,2,4,8,16 bits - while (range > 0.0f && logBitWidth < 4u) { - const uint32_t mask = (uint32_t(1) << (uint32_t(1) << logBitWidth)) - 1u; - const float encode = mask/range; - const float decode = range/mask; - int j = 0; - do { - const float exact = data[j];// exact value - const uint32_t code = uint32_t(encode*(exact - min) + lut(j)); - const float approx = code * decode + min;// approximate value - j += oracle(exact, approx) ? 1 : 513; - } while(j < 512); - if (j == 512) break; - ++logBitWidth; - } - mCodec[i].log2 = logBitWidth; - mCodec[i].size = DstNode0::DataType::memUsage(1u << logBitWidth); - } - };// kernel - forEach(0, size, 4, kernel); - - if (mVerbose) { - uint32_t counters[5+1] = {0}; - ++counters[mCodec[0].log2]; - for (size_t i=1; imDstOffset = mArray0[i-1]->mDstOffset + mCodec[i-1].size; - } - std::cout << "\n" << oracle << std::endl; - std::cout << "Dithering: " << (mDitherOn ? "enabled" : "disabled") << std::endl; - float avg = 0.0f; - for (uint32_t i=0; i<=5; ++i) { - if (uint32_t n = counters[i]) { - avg += n * float(1 << i); - printf("%2i bits: %6u leaf nodes, i.e. %4.1f%%\n",1<mDstOffset = mArray0[i-1]->mDstOffset + mCodec[i-1].size; - } - } - offset = mArray0[size-1]->mDstOffset + mCodec[size-1].size; -}// GridBuilder::compression - -//================================================================================================ - -template -void GridBuilder:: - sdfToLevelSet() -{ - mArray0.clear(); - mArray1.clear(); - mArray2.clear(); - mArray0.reserve(mRoot.template nodeCount()); - mArray1.reserve(mRoot.template nodeCount()); - mArray2.reserve(mRoot.template nodeCount()); - - for (auto it2 = mRoot.mTable.begin(); it2 != mRoot.mTable.end(); ++it2) { - if (SrcNode2 *upper = it2->second.child) { - mArray2.emplace_back(upper); - for (auto it1 = upper->mChildMask.beginOn(); it1; ++it1) { - SrcNode1 *lower = upper->mTable[*it1].child; - mArray1.emplace_back(lower); - for (auto it0 = lower->mChildMask.beginOn(); it0; ++it0) { - mArray0.emplace_back(lower->mTable[*it0].child); - }// loop over leaf nodes - }// loop over lower internal nodes - }// is child node of the root - }// loop over root table - - // Note that the bottom-up flood filling is essential - const ValueT outside = mRoot.mBackground; - forEach(mArray0, 8, [&](const Range1D& r) { - for (auto i = r.begin(); i != r.end(); ++i) - mArray0[i]->signedFloodFill(outside); - }); - forEach(mArray1, 1, [&](const Range1D& r) { - for (auto i = r.begin(); i != r.end(); ++i) - mArray1[i]->signedFloodFill(outside); - }); - forEach(mArray2, 1, [&](const Range1D& r) { - for (auto i = r.begin(); i != r.end(); ++i) - mArray2[i]->signedFloodFill(outside); - }); - mRoot.signedFloodFill(outside); - mGridClass = GridClass::LevelSet; -} // GridBuilder::sdfToLevelSet - -//================================================================================================ - -template -template -GridHandle GridBuilder:: - getHandle(double dx, //voxel size - const Vec3d& p0, // origin - const std::string& name, - const OracleT& oracle, - const BufferT& buffer) -{ - if (dx <= 0) { - throw std::runtime_error("GridBuilder: voxel size is zero or negative"); - } - Map map; // affine map - map.set(dx, p0, 1.0); - return this->getHandle(map, name, oracle, buffer); -} // GridBuilder::getHandle - -//================================================================================================ - -template -template< typename OracleT, typename BufferT> -GridHandle GridBuilder:: - getHandle(const Map& map, - const std::string& name, - const OracleT& oracle, - const BufferT& buffer) -{ - if (mGridClass == GridClass::LevelSet && !is_floating_point::value) { - throw std::runtime_error("Level sets are expected to be floating point types"); - } else if (mGridClass == GridClass::FogVolume && !is_floating_point::value) { - throw std::runtime_error("Fog volumes are expected to be floating point types"); - } - - auto handle = this->template initHandle(oracle, buffer);// initialize the arrays of nodes - - this->processLeafs(mArray0); - - this->processNodes(mArray1); - - this->processNodes(mArray2); - - auto *grid = this->processGrid(map, name); - - gridStats(*grid, mStats); - - updateChecksum(*grid, mChecksum); - - return handle; -} // GridBuilder::getHandle - -//================================================================================================ - -template -template -inline typename std::enable_if::value>::type -GridBuilder:: - setFlag(const T& min, const T& max, FlagT& flag) const -{ - if (mDelta > 0 && (min > mDelta || max < -mDelta)) { - flag |= FlagT(1); // set first bit - } else { - flag &= ~FlagT(1); // unset first bit - } -} - -//================================================================================================ - -template -inline void GridBuilder:: - sdfToFog() -{ - this->sdfToLevelSet(); // performs signed flood fill - - const ValueT d = -mRoot.mBackground, w = 1.0f / d; - auto op = [&](ValueT& v) -> bool { - if (v > ValueT(0)) { - v = ValueT(0); - return false; - } - v = v > d ? v * w : ValueT(1); - return true; - }; - auto kernel0 = [&](const Range1D& r) { - for (auto i = r.begin(); i != r.end(); ++i) { - SrcNode0* node = mArray0[i]; - for (uint32_t i = 0; i < SrcNode0::SIZE; ++i) - node->mValueMask.set(i, op(node->mValues[i])); - } - }; - auto kernel1 = [&](const Range1D& r) { - for (auto i = r.begin(); i != r.end(); ++i) { - SrcNode1* node = mArray1[i]; - for (uint32_t i = 0; i < SrcNode1::SIZE; ++i) { - if (node->mChildMask.isOn(i)) { - SrcNode0* leaf = node->mTable[i].child; - if (leaf->mValueMask.isOff()) { - node->mTable[i].value = leaf->getFirstValue(); - node->mChildMask.setOff(i); - delete leaf; - } - } else { - node->mValueMask.set(i, op(node->mTable[i].value)); - } - } - } - }; - auto kernel2 = [&](const Range1D& r) { - for (auto i = r.begin(); i != r.end(); ++i) { - SrcNode2* node = mArray2[i]; - for (uint32_t i = 0; i < SrcNode2::SIZE; ++i) { - if (node->mChildMask.isOn(i)) { - SrcNode1* child = node->mTable[i].child; - if (child->mChildMask.isOff() && child->mValueMask.isOff()) { - node->mTable[i].value = child->getFirstValue(); - node->mChildMask.setOff(i); - delete child; - } - } else { - node->mValueMask.set(i, op(node->mTable[i].value)); - } - } - } - }; - forEach(mArray0, 8, kernel0); - forEach(mArray1, 1, kernel1); - forEach(mArray2, 1, kernel2); - - for (auto it = mRoot.mTable.begin(); it != mRoot.mTable.end(); ++it) { - SrcNode2* child = it->second.child; - if (child == nullptr) { - it->second.state = op(it->second.value); - } else if (child->mChildMask.isOff() && child->mValueMask.isOff()) { - it->second.value = child->getFirstValue(); - it->second.state = false; - it->second.child = nullptr; - delete child; - } - } - mGridClass = GridClass::FogVolume; -} // GridBuilder::sdfToFog - -//================================================================================================ - -template -template -inline typename std::enable_if::value && - !is_same::value && - !is_same::value && - !is_same::value>::type -GridBuilder:: - processLeafs(std::vector& srcLeafs) -{ - static_assert(!is_same::value, "Does not yet support bool leafs"); - static_assert(!is_same::value, "Does not yet support mask leafs"); - auto kernel = [&](const Range1D& r) { - auto *ptr = mBufferPtr + mBufferOffsets[5]; - for (auto i = r.begin(); i != r.end(); ++i) { - auto *srcLeaf = srcLeafs[i]; - auto *dstLeaf = PtrAdd(ptr, srcLeaf->mDstOffset); - auto *data = dstLeaf->data(); - if (DstNode0::DataType::padding()>0u) { - std::memset(data, 0, DstNode0::DataType::memUsage()); - } else { - data->mBBoxDif[0] = 0u; - data->mBBoxDif[1] = 0u; - data->mBBoxDif[2] = 0u; - data->mFlags = 0u;// enable rendering, no bbox - data->mMinimum = data->mMaximum = ValueT(); - data->mAverage = data->mStdDevi = 0; - } - srcLeaf->mDstNode = dstLeaf; - data->mBBoxMin = srcLeaf->mOrigin; // copy origin of node - data->mValueMask = srcLeaf->mValueMask; // copy value mask - const ValueT* src = srcLeaf->mValues; - for (ValueT *dst = data->mValues, *end = dst + SrcNode0::SIZE; dst != end; dst += 4, src += 4) { - dst[0] = src[0]; // copy *all* voxel values in sets of four, i.e. loop-unrolling - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; - } - } - }; - forEach(srcLeafs, 8, kernel); -} // GridBuilder::processLeafs - -//================================================================================================ - -template -template -inline typename std::enable_if::value || - is_same::value || - is_same::value>::type -GridBuilder:: - processLeafs(std::vector& srcLeafs) -{ - static_assert(is_same::value, "Expected ValueT == float"); - using ArrayT = typename DstNode0::DataType::ArrayType; - using FloatT = typename std::conditional=16, double, float>::type;// 16 compression and higher requires double - static constexpr FloatT UNITS = FloatT((1 << DstNode0::DataType::bitWidth()) - 1);// # of unique non-zero values - DitherLUT lut(mDitherOn); - - auto kernel = [&](const Range1D& r) { - uint8_t* ptr = mBufferPtr + mBufferOffsets[5]; - for (auto i = r.begin(); i != r.end(); ++i) { - auto *srcLeaf = srcLeafs[i]; - auto *dstLeaf = PtrAdd(ptr, srcLeaf->mDstOffset); - srcLeaf->mDstNode = dstLeaf; - auto *data = dstLeaf->data(); - if (DstNode0::DataType::padding()>0u) { - std::memset(data, 0, DstNode0::DataType::memUsage()); - } else { - data->mFlags = data->mBBoxDif[2] = data->mBBoxDif[1] = data->mBBoxDif[0] = 0u; - data->mDev = data->mAvg = data->mMax = data->mMin = 0u; - } - data->mBBoxMin = srcLeaf->mOrigin; // copy origin of node - data->mValueMask = srcLeaf->mValueMask; // copy value mask - const float* src = srcLeaf->mValues; - // compute extrema values - float min = std::numeric_limits::max(), max = -min; - for (int i=0; i<512; ++i) { - const float v = src[i]; - if (v < min) min = v; - if (v > max) max = v; - } - data->init(min, max, DstNode0::DataType::bitWidth()); - // perform quantization relative to the values in the current leaf node - const FloatT encode = UNITS/(max-min); - auto *code = reinterpret_cast(data->mCode); - int offset = 0; - if (is_same::value) {// resolved at compile-time - for (int j=0; j<128; ++j) { - auto tmp = ArrayT(encode * (*src++ - min) + lut(offset++)); - *code++ = ArrayT(encode * (*src++ - min) + lut(offset++)) << 4 | tmp; - tmp = ArrayT(encode * (*src++ - min) + lut(offset++)); - *code++ = ArrayT(encode * (*src++ - min) + lut(offset++)) << 4 | tmp; - } - } else { - for (int j=0; j<128; ++j) { - *code++ = ArrayT(encode * (*src++ - min) + lut(offset++)); - *code++ = ArrayT(encode * (*src++ - min) + lut(offset++)); - *code++ = ArrayT(encode * (*src++ - min) + lut(offset++)); - *code++ = ArrayT(encode * (*src++ - min) + lut(offset++)); - } - } - } - }; - forEach(srcLeafs, 8, kernel); -} // GridBuilder::processLeafs - -//================================================================================================ - -template -template -inline typename std::enable_if::value>::type -GridBuilder:: - processLeafs(std::vector& srcLeafs) -{ - static_assert(is_same::value, "Expected ValueT == float"); - - DitherLUT lut(mDitherOn); - auto kernel = [&](const Range1D& r) { - uint8_t* ptr = mBufferPtr + mBufferOffsets[5]; - for (auto i = r.begin(); i != r.end(); ++i) { - auto *srcLeaf = srcLeafs[i]; - auto *dstLeaf = PtrAdd(ptr, srcLeaf->mDstOffset); - auto *data = dstLeaf->data(); - data->mBBoxMin = srcLeaf->mOrigin; // copy origin of node - data->mBBoxDif[0] = 0u; - data->mBBoxDif[1] = 0u; - data->mBBoxDif[2] = 0u; - srcLeaf->mDstNode = dstLeaf; - const uint8_t logBitWidth = uint8_t(mCodec[i].log2); - data->mFlags = logBitWidth << 5;// pack logBitWidth into 3 MSB of mFlag - data->mValueMask = srcLeaf->mValueMask; // copy value mask - const float* src = srcLeaf->mValues; - const float min = mCodec[i].min, max = mCodec[i].max; - data->init(min, max, uint8_t(1) << logBitWidth); - // perform quantization relative to the values in the current leaf node - int offset = 0; - switch (logBitWidth) { - case 0u: {// 1 bit - auto *dst = reinterpret_cast(data+1); - const float encode = 1.0f/(max - min); - for (int j=0; j<64; ++j) { - uint8_t a = 0; - for (int k=0; k<8; ++k) { - a |= uint8_t(encode * (*src++ - min) + lut(offset++)) << k; - } - *dst++ = a; - } - } - break; - case 1u: {// 2 bits - auto *dst = reinterpret_cast(data+1); - const float encode = 3.0f/(max - min); - for (int j=0; j<128; ++j) { - auto a = uint8_t(encode * (*src++ - min) + lut(offset++)); - a |= uint8_t(encode * (*src++ - min) + lut(offset++)) << 2; - a |= uint8_t(encode * (*src++ - min) + lut(offset++)) << 4; - *dst++ = uint8_t(encode * (*src++ - min) + lut(offset++)) << 6 | a; - } - } - break; - case 2u: {// 4 bits - auto *dst = reinterpret_cast(data+1); - const float encode = 15.0f/(max - min); - for (int j=0; j<128; ++j) { - auto a = uint8_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint8_t(encode * (*src++ - min) + lut(offset++)) << 4 | a; - a = uint8_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint8_t(encode * (*src++ - min) + lut(offset++)) << 4 | a; - } - } - break; - case 3u: {// 8 bits - auto *dst = reinterpret_cast(data+1); - const float encode = 255.0f/(max - min); - for (int j=0; j<128; ++j) { - *dst++ = uint8_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint8_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint8_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint8_t(encode * (*src++ - min) + lut(offset++)); - } - } - break; - default: {// 16 bits - auto *dst = reinterpret_cast(data+1); - const double encode = 65535.0/(max - min);// note that double is required! - for (int j=0; j<128; ++j) { - *dst++ = uint16_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint16_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint16_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint16_t(encode * (*src++ - min) + lut(offset++)); - } - } - }// end switch - } - };// kernel - forEach(srcLeafs, 8, kernel); -} // GridBuilder::processLeafs - -//================================================================================================ - -template -template -void GridBuilder:: - processNodes(std::vector& srcNodes) -{ - using DstNodeT = typename SrcNodeT::NanoNodeT; - static_assert(DstNodeT::LEVEL == 1 || DstNodeT::LEVEL == 2, "Expected internal node"); - auto kernel = [&](const Range1D& r) { - uint8_t* ptr = mBufferPtr + mBufferOffsets[5 - DstNodeT::LEVEL];// 3 or 4 - for (auto i = r.begin(); i != r.end(); ++i) { - SrcNodeT *srcNode = srcNodes[i]; - DstNodeT *dstNode = PtrAdd(ptr, srcNode->mDstOffset); - auto *data = dstNode->data(); - if (DstNodeT::DataType::padding()>0u) std::memset(data, 0, DstNodeT::memUsage()); - srcNode->mDstNode = dstNode; - data->mBBox[0] = srcNode->mOrigin; // copy origin of node - data->mValueMask = srcNode->mValueMask; // copy value mask - data->mChildMask = srcNode->mChildMask; // copy child mask - for (uint32_t j = 0; j != SrcNodeT::SIZE; ++j) { - if (data->mChildMask.isOn(j)) { - data->setChild(j, srcNode->mTable[j].child->mDstNode); - } else - data->setValue(j, srcNode->mTable[j].value); - } - } - }; - forEach(srcNodes, 4, kernel); -} // GridBuilder::processNodes - -//================================================================================================ - -template -NanoRoot* GridBuilder::processRoot() -{ - auto *dstRoot = reinterpret_cast(mBufferPtr + mBufferOffsets[2]); - auto *data = dstRoot->data(); - if (data->padding()>0) std::memset(data, 0, DstRootT::memUsage(uint32_t(mRoot.mTable.size()))); - data->mTableSize = uint32_t(mRoot.mTable.size()); - data->mMinimum = data->mMaximum = data->mBackground = mRoot.mBackground; - data->mBBox = CoordBBox(); // // set to an empty bounding box - - uint32_t tileID = 0; - for (auto iter = mRoot.mTable.begin(); iter != mRoot.mTable.end(); ++iter) { - auto *dstTile = data->tile(tileID++); - if (auto* srcChild = iter->second.child) { - dstTile->setChild(srcChild->mOrigin, srcChild->mDstNode, data); - } else { - dstTile->setValue(iter->first, iter->second.state, iter->second.value); - } - } - return dstRoot; -} // GridBuilder::processRoot - -//================================================================================================ +#include +#include "Range.h" +#include "ForEach.h" -template -NanoTree* GridBuilder::processTree() -{ - auto *dstTree = reinterpret_cast(mBufferPtr + mBufferOffsets[1]); - auto *data = dstTree->data(); - data->setRoot( this->processRoot() ); +namespace nanovdb { - DstNode2 *node2 = mArray2.empty() ? nullptr : reinterpret_cast(mBufferPtr + mBufferOffsets[3]); - data->setFirstNode(node2); +namespace build { - DstNode1 *node1 = mArray1.empty() ? nullptr : reinterpret_cast(mBufferPtr + mBufferOffsets[4]); - data->setFirstNode(node1); +// ----------------------------> Froward decelerations of random access methods <-------------------------------------- - DstNode0 *node0 = mArray0.empty() ? nullptr : reinterpret_cast(mBufferPtr + mBufferOffsets[5]); - data->setFirstNode(node0); +template struct GetValue; +template struct SetValue; +template struct TouchLeaf; +template struct GetState; +template struct ProbeValue; - data->mNodeCount[0] = static_cast(mArray0.size()); - data->mNodeCount[1] = static_cast(mArray1.size()); - data->mNodeCount[2] = static_cast(mArray2.size()); +// ----------------------------> RootNode <-------------------------------------- - // Count number of active leaf level tiles - data->mTileCount[0] = reduce(mArray1, uint32_t(0), [&](Range1D &r, uint32_t sum){ - for (auto i=r.begin(); i!=r.end(); ++i) sum += mArray1[i]->mValueMask.countOn(); - return sum;}, std::plus()); +template +struct RootNode +{ + using ValueType = typename ChildT::ValueType; + using BuildType = typename ChildT::BuildType; + using ChildNodeType = ChildT; + using LeafNodeType = typename ChildT::LeafNodeType; + static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf + struct Tile { + Tile(ChildT* c = nullptr) : child(c) {} + Tile(const ValueType& v, bool s) : child(nullptr), value(v), state(s) {} + bool isChild() const { return child!=nullptr; } + bool isValue() const { return child==nullptr; } + bool isActive() const { return child==nullptr && state; } + ChildT* child; + ValueType value; + bool state; + }; + using MapT = std::map; + MapT mTable; + ValueType mBackground; - // Count number of active lower internal node tiles - data->mTileCount[1] = reduce(mArray2, uint32_t(0), [&](Range1D &r, uint32_t sum){ - for (auto i=r.begin(); i!=r.end(); ++i) sum += mArray2[i]->mValueMask.countOn(); - return sum;}, std::plus()); + Tile* probeTile(const Coord &ijk) { + auto iter = mTable.find(CoordToKey(ijk)); + return iter == mTable.end() ? nullptr : &(iter->second); + } - // Count number of active upper internal node tiles - uint32_t sum = 0; - for (auto &tile : mRoot.mTable) { - if (tile.second.child==nullptr && tile.second.state) ++sum; + const Tile* probeTile(const Coord &ijk) const { + auto iter = mTable.find(CoordToKey(ijk)); + return iter == mTable.end() ? nullptr : &(iter->second); } - data->mTileCount[2] = sum; - // Count number of active voxels - data->mVoxelCount = reduce(mArray0, uint64_t(0), [&](Range1D &r, uint64_t sum){ - for (auto i=r.begin(); i!=r.end(); ++i) sum += mArray0[i]->mValueMask.countOn(); - return sum;}, std::plus()); + class ChildIterator + { + const RootNode *mParent; + typename MapT::const_iterator mIter; + public: + ChildIterator() : mParent(nullptr), mIter() {} + ChildIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { + while (mIter!=parent->mTable.end() && mIter->second.child==nullptr) ++mIter; + } + ChildIterator& operator=(const ChildIterator&) = default; + ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mIter->second.child;} + ChildT* operator->() const {NANOVDB_ASSERT(*this); return mIter->second.child;} + Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} + operator bool() const {return mParent && mIter!=mParent->mTable.end();} + ChildIterator& operator++() { + NANOVDB_ASSERT(mParent); + ++mIter; + while (mIter!=mParent->mTable.end() && mIter->second.child==nullptr) ++mIter; + return *this; + } + ChildIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + uint32_t pos() const { + NANOVDB_ASSERT(mParent); + return uint32_t(std::distance(mParent->mTable.begin(), mIter)); + } + }; // Member class ChildIterator - data->mVoxelCount += data->mTileCount[0]*DstNode0::NUM_VALUES; - data->mVoxelCount += data->mTileCount[1]*DstNode1::NUM_VALUES; - data->mVoxelCount += data->mTileCount[2]*DstNode2::NUM_VALUES; + ChildIterator cbeginChild() const {return ChildIterator(this);} + ChildIterator cbeginChildOn() const {return ChildIterator(this);}// match openvdb - return dstTree; -} // GridBuilder::processTree + class ValueIterator + { + const RootNode *mParent; + typename MapT::const_iterator mIter; + public: + ValueIterator() : mParent(nullptr), mIter() {} + ValueIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { + while (mIter!=parent->mTable.end() && mIter->second.child!=nullptr) ++mIter; + } + ValueIterator& operator=(const ValueIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mIter->second.value;} + bool isActive() const {NANOVDB_ASSERT(*this); return mIter->second.state;} + Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} + operator bool() const {return mParent && mIter!=mParent->mTable.end();} + ValueIterator& operator++() { + NANOVDB_ASSERT(mParent); + ++mIter; + while (mIter!=mParent->mTable.end() && mIter->second.child!=nullptr) ++mIter; + return *this;; + } + ValueIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + uint32_t pos() const { + NANOVDB_ASSERT(mParent); + return uint32_t(std::distance(mParent->mTable.begin(), mIter)); + } + }; // Member class ValueIterator -//================================================================================================ + ValueIterator beginValue() {return ValueIterator(this);} + ValueIterator cbeginValueAll() const {return ValueIterator(this);} -template -NanoGrid* GridBuilder:: -processGrid(const Map& map, - const std::string& name) -{ - auto *dstGrid = reinterpret_cast(mBufferPtr + mBufferOffsets[0]); - this->processTree(); - auto* data = dstGrid->data(); - data->mMagic = NANOVDB_MAGIC_NUMBER; - data->mChecksum = 0u; - data->mVersion = Version(); - data->mFlags = static_cast(GridFlags::IsBreadthFirst); - data->mGridIndex = 0; - data->mGridCount = 1; - data->mGridSize = mBufferOffsets[8]; - data->mWorldBBox = BBox(); - data->mBlindMetadataOffset = 0; - data->mBlindMetadataCount = 0; - data->mGridClass = mGridClass; - data->mGridType = mapToGridType(); - data->mData0 = 0u; - data->mData1 = 0u; - data->mData2 = 0u; - - if (!isValid(data->mGridType, data->mGridClass)) { - std::stringstream ss; - ss << "Invalid combination of GridType("<mGridType) - << ") and GridClass("<mGridClass)<<"). See NanoVDB.h for details!"; - throw std::runtime_error(ss.str()); - } - - std::memset(data->mGridName, '\0', GridData::MaxNameSize);//overwrite mGridName - strncpy(data->mGridName, name.c_str(), GridData::MaxNameSize-1); - if (name.length() >= GridData::MaxNameSize) {// currently we don't support long grid names - std::stringstream ss; - ss << "Grid name \"" << name << "\" is more then " << GridData::MaxNameSize << " characters"; - throw std::runtime_error(ss.str()); - } - - data->mVoxelSize = map.applyMap(Vec3d(1)) - map.applyMap(Vec3d(0)); - data->mMap = map; - - if (mBlindDataSize>0) { - auto *metaData = reinterpret_cast(mBufferPtr + mBufferOffsets[6]); - data->mBlindMetadataOffset = PtrDiff(metaData, dstGrid); - data->mBlindMetadataCount = 1u;// we currently support only 1 set of blind data - auto *blindData = reinterpret_cast(mBufferPtr + mBufferOffsets[7]); - metaData->setBlindData(blindData); - } - - return dstGrid; -} // GridBuilder::processGrid + class ValueOnIterator + { + const RootNode *mParent; + typename MapT::const_iterator mIter; + public: + ValueOnIterator() : mParent(nullptr), mIter() {} + ValueOnIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { + while (mIter!=parent->mTable.end() && (mIter->second.child!=nullptr || !mIter->second.state)) ++mIter; + } + ValueOnIterator& operator=(const ValueOnIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mIter->second.value;} + Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} + operator bool() const {return mParent && mIter!=mParent->mTable.end();} + ValueOnIterator& operator++() { + NANOVDB_ASSERT(mParent); + ++mIter; + while (mIter!=mParent->mTable.end() && (mIter->second.child!=nullptr || !mIter->second.state)) ++mIter; + return *this;; + } + ValueOnIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + uint32_t pos() const { + NANOVDB_ASSERT(mParent); + return uint32_t(std::distance(mParent->mTable.begin(), mIter)); + } + }; // Member class ValueOnIterator -//================================================================================================ + ValueOnIterator beginValueOn() {return ValueOnIterator(this);} + ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} -template -template -struct GridBuilder::BuildRoot -{ - using ValueType = typename ChildT::ValueType; - using ChildType = ChildT; - static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf - struct Tile + class TileIterator { - Tile(ChildT* c = nullptr) - : child(c) - { + const RootNode *mParent; + typename MapT::const_iterator mIter; + public: + TileIterator() : mParent(nullptr), mIter() {} + TileIterator(const RootNode *parent) : mParent(parent), mIter(parent->mTable.begin()) { + NANOVDB_ASSERT(mParent); } - Tile(const ValueT& v, bool s) - : child(nullptr) - , value(v) - , state(s) - { + TileIterator& operator=(const TileIterator&) = default; + const Tile& operator*() const {NANOVDB_ASSERT(*this); return mIter->second;} + const Tile* operator->() const {NANOVDB_ASSERT(*this); return &(mIter->second);} + Coord getOrigin() const { NANOVDB_ASSERT(*this); return mIter->first;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mIter->first;} + operator bool() const {return mParent && mIter!=mParent->mTable.end();} + const ChildT* probeChild(ValueType &value) { + NANOVDB_ASSERT(*this); + const ChildT *child = mIter->second.child; + if (child==nullptr) value = mIter->second.value; + return child; } - ChildT* child; - ValueT value; - bool state; - }; - using MapT = std::map; - MapT mTable; - ValueT mBackground; + bool isValueOn() const {return mIter->second.child==nullptr && mIter->second.state;} + TileIterator& operator++() { + NANOVDB_ASSERT(mParent); + ++mIter; + return *this; + } + TileIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + uint32_t pos() const { + NANOVDB_ASSERT(mParent); + return uint32_t(std::distance(mParent->mTable.begin(), mIter)); + } + }; // Member class TileIterator - BuildRoot(const ValueT& background) - : mBackground(background) + TileIterator beginTile() {return TileIterator(this);} + TileIterator cbeginChildAll() const {return TileIterator(this);} + + //class DenseIterator : public TileIterator + + RootNode(const ValueType& background) : mBackground(background) {} + RootNode(const RootNode&) = delete; // disallow copy-construction + RootNode(RootNode&&) = default; // allow move construction + RootNode& operator=(const RootNode&) = delete; // disallow copy assignment + RootNode& operator=(RootNode&&) = default; // allow move assignment + + ~RootNode() { this->clear(); } + + uint32_t tileCount() const { return uint32_t(mTable.size()); } + uint32_t getTableSize() const { return uint32_t(mTable.size()); }// match openvdb + const ValueType& background() const {return mBackground;} + + void nodeCount(std::array &count) const { + for (auto it = this->cbeginChild(); it; ++it) { + count[ChildT::LEVEL] += 1; + it->nodeCount(count); + } } - BuildRoot(const BuildRoot&) = delete; // disallow copy-construction - BuildRoot(BuildRoot&&) = default; // allow move construction - BuildRoot& operator=(const BuildRoot&) = delete; // disallow copy assignment - BuildRoot& operator=(BuildRoot&&) = default; // allow move assignment - - ~BuildRoot() { this->clear(); } bool empty() const { return mTable.empty(); } void clear() { - for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) - delete iter->second.child; + for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) delete iter->second.child; mTable.clear(); } static Coord CoordToKey(const Coord& ijk) { return ijk & ~ChildT::MASK; } - template - bool isActiveAndCache(const Coord& ijk, AccT& acc) const +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + template + auto get(const Coord& ijk, ArgsT&&... args) const { - auto iter = mTable.find(CoordToKey(ijk)); - if (iter == mTable.end()) - return false; - if (iter->second.child) { - acc.insert(ijk, iter->second.child); - return iter->second.child->isActiveAndCache(ijk, acc); + if (const Tile *tile = this->probeTile(ijk)) { + if (auto *child = tile->child) return child->template get(ijk, args...); + return OpT::get(*tile, args...); } - return iter->second.state; + return OpT::get(*this, args...); + } + template + auto set(const Coord& ijk, ArgsT&&... args) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + NANOVDB_ASSERT(child); + return child->template set(ijk, args...); + } + template + auto getAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) const + { + if (const Tile *tile = this->probeTile(ijk)) { + if (auto *child = tile->child) { + acc.insert(ijk, child); + return child->template get(ijk, args...); + } + return OpT::get(*tile, args...); + } + return OpT::get(*this, args...); } - const ValueT& getValue(const Coord& ijk) const + template + auto setAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + NANOVDB_ASSERT(child); + acc.insert(ijk, child); + return child->template setAndCache(ijk, acc, args...); + } + ValueType getValue(const Coord& ijk) const {return this->template get>(ijk);} + ValueType getValue(int i, int j, int k) const {return this->template get>(Coord(i,j,k));} + ValueType operator()(const Coord& ijk) const {return this->template get>(ijk);} + ValueType operator()(int i, int j, int k) const {return this->template get>(Coord(i,j,k));} + void setValue(const Coord& ijk, const ValueType& value) {this->template set>(ijk, value);} + bool probeValue(const Coord& ijk, ValueType& value) const {return this->template get>(ijk, value);} + bool isActive(const Coord& ijk) const {return this->template get>(ijk);} +#else + ValueType getValue(const Coord& ijk) const { +#if 1 + if (auto *tile = this->probeTile(ijk)) return tile->child ? tile->child->getValue(ijk) : tile->value; + return mBackground; +#else auto iter = mTable.find(CoordToKey(ijk)); if (iter == mTable.end()) { return mBackground; @@ -1108,10 +335,43 @@ struct GridBuilder::BuildRoot } else { return iter->second.value; } +#endif + } + ValueType getValue(int i, int j, int k) const {return this->getValue(Coord(i,j,k));} + + void setValue(const Coord& ijk, const ValueType& value) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + NANOVDB_ASSERT(child); + child->setValue(ijk, value); + } + + template + bool isActiveAndCache(const Coord& ijk, AccT& acc) const + { + auto iter = mTable.find(CoordToKey(ijk)); + if (iter == mTable.end()) + return false; + if (iter->second.child) { + acc.insert(ijk, iter->second.child); + return iter->second.child->isActiveAndCache(ijk, acc); + } + return iter->second.state; } template - const ValueT& getValueAndCache(const Coord& ijk, AccT& acc) const + ValueType getValueAndCache(const Coord& ijk, AccT& acc) const { auto iter = mTable.find(CoordToKey(ijk)); if (iter == mTable.end()) @@ -1124,11 +384,11 @@ struct GridBuilder::BuildRoot } template - void setValueAndCache(const Coord& ijk, const ValueT& value, AccT& acc) + void setValueAndCache(const Coord& ijk, const ValueType& value, AccT& acc) { - ChildT* child = nullptr; + ChildT* child = nullptr; const Coord key = CoordToKey(ijk); - auto iter = mTable.find(key); + auto iter = mTable.find(key); if (iter == mTable.end()) { child = new ChildT(ijk, mBackground, false); mTable[key] = Tile(child); @@ -1142,17 +402,54 @@ struct GridBuilder::BuildRoot acc.insert(ijk, child); child->setValueAndCache(ijk, value, acc); } + template + void setValueOnAndCache(const Coord& ijk, AccT& acc) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + NANOVDB_ASSERT(child); + acc.insert(ijk, child); + child->setValueOnAndCache(ijk, acc); + } + template + void touchLeafAndCache(const Coord &ijk, AccT& acc) + { + ChildT* child = nullptr; + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + acc.insert(ijk, child); + child->touchLeafAndCache(ijk, acc); + } +#endif// NANOVDB_NEW_ACCESSOR_METHODS template uint32_t nodeCount() const { - static_assert(is_same::value, "Root::getNodes: Invalid type"); + static_assert(is_same::value, "Root::getNodes: Invalid type"); static_assert(NodeT::LEVEL < LEVEL, "Root::getNodes: LEVEL error"); uint32_t sum = 0; for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) { - if (iter->second.child == nullptr) - continue; // skip tiles - if (is_same::value) { //resolved at compile-time + if (iter->second.child == nullptr) continue; // skip tiles + if constexpr(is_same::value) { //resolved at compile-time ++sum; } else { sum += iter->second.child->template nodeCount(); @@ -1164,12 +461,12 @@ struct GridBuilder::BuildRoot template void getNodes(std::vector& array) { - static_assert(is_same::value, "Root::getNodes: Invalid type"); + static_assert(is_same::value, "Root::getNodes: Invalid type"); static_assert(NodeT::LEVEL < LEVEL, "Root::getNodes: LEVEL error"); for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) { if (iter->second.child == nullptr) continue; - if (is_same::value) { //resolved at compile-time + if constexpr(is_same::value) { //resolved at compile-time array.push_back(reinterpret_cast(iter->second.child)); } else { iter->second.child->getNodes(array); @@ -1181,7 +478,7 @@ struct GridBuilder::BuildRoot { NANOVDB_ASSERT(child); const Coord key = CoordToKey(child->mOrigin); - auto iter = mTable.find(key); + auto iter = mTable.find(key); if (iter != mTable.end() && iter->second.child != nullptr) { // existing child node delete iter->second.child; iter->second.child = child; @@ -1191,10 +488,50 @@ struct GridBuilder::BuildRoot child = nullptr; } + /// @brief Add a tile containing voxel (i, j, k) at the specified tree level, + /// creating a new branch if necessary. Delete any existing lower-level nodes + /// that contain (x, y, z). + /// @tparam level tree level at which the tile is inserted. Must be 1, 2 or 3. + /// @param ijk Index coordinate that map to the tile being inserted + /// @param value Value of the tile + /// @param state Binary state of the tile + template + void addTile(const Coord& ijk, const ValueType& value, bool state) + { + static_assert(level > 0 && level <= LEVEL, "invalid template value of level"); + const Coord key = CoordToKey(ijk); + auto iter = mTable.find(key); + if constexpr(level == LEVEL) { + if (iter == mTable.end()) { + mTable[key] = Tile(value, state); + } else if (iter->second.child == nullptr) { + iter->second.value = value; + iter->second.state = state; + } else { + delete iter->second.child; + iter->second.child = nullptr; + iter->second.value = value; + iter->second.state = state; + } + } else if constexpr(level < LEVEL) { + ChildT* child = nullptr; + if (iter == mTable.end()) { + child = new ChildT(ijk, mBackground, false); + mTable[key] = Tile(child); + } else if (iter->second.child != nullptr) { + child = iter->second.child; + } else { + child = new ChildT(ijk, iter->second.value, iter->second.state); + iter->second.child = child; + } + child->template addTile(ijk, value, state); + } + } + template void addNode(NodeT*& node) { - if (is_same::value) { //resolved at compile-time + if constexpr(is_same::value) { //resolved at compile-time this->addChild(reinterpret_cast(node)); } else { ChildT* child = nullptr; @@ -1213,23 +550,33 @@ struct GridBuilder::BuildRoot } } + void merge(RootNode &other) + { + for (auto iter1 = other.mTable.begin(); iter1 != other.mTable.end(); ++iter1) { + if (iter1->second.child == nullptr) continue;// ignore input tiles + auto iter2 = mTable.find(iter1->first); + if (iter2 == mTable.end() || iter2->second.child == nullptr) { + mTable[iter1->first] = Tile(iter1->second.child); + iter1->second.child = nullptr; + } else { + iter2->second.child->merge(*iter1->second.child); + } + } + other.clear(); + } + template typename std::enable_if::value>::type signedFloodFill(T outside); - template - typename std::enable_if::value>::type - signedFloodFill(T) {} // no-op for none floating point values -}; // GridBuilder::BuildRoot +}; // build::RootNode //================================================================================================ -template template template inline typename std::enable_if::value>::type -GridBuilder::BuildRoot:: - signedFloodFill(T outside) +RootNode::signedFloodFill(T outside) { std::map nodeKeys; for (auto iter = mTable.begin(); iter != mTable.end(); ++iter) { @@ -1247,27 +594,26 @@ GridBuilder::BuildRoot:: Coord d = b->first - a->first; // delta of neighboring coordinates if (d[0] != 0 || d[1] != 0 || d[2] == int(ChildT::DIM)) continue; // not same z-scanline or neighbors - const ValueT fill[] = {a->second->getLastValue(), b->second->getFirstValue()}; + const ValueType fill[] = {a->second->getLastValue(), b->second->getFirstValue()}; if (!(fill[0] < 0) || !(fill[1] < 0)) continue; // scanline isn't inside Coord c = a->first + Coord(0u, 0u, ChildT::DIM); for (; c[2] != b->first[2]; c[2] += ChildT::DIM) { - const Coord key = SrcRootT::CoordToKey(c); - mTable[key] = typename SrcRootT::Tile(-outside, false); // inactive tile + const Coord key = RootNode::CoordToKey(c); + mTable[key] = typename RootNode::Tile(-outside, false); // inactive tile } } -} // Root::signedFloodFill +} // build::RootNode::signedFloodFill -//================================================================================================ +// ----------------------------> InternalNode <-------------------------------------- -template template -struct GridBuilder:: - BuildNode +struct InternalNode { - using ValueType = ValueT; - using BuildType = BuildT; - using ChildType = ChildT; + using ValueType = typename ChildT::ValueType; + using BuildType = typename ChildT::BuildType; + using ChildNodeType = ChildT; + using LeafNodeType = typename ChildT::LeafNodeType; static constexpr uint32_t LOG2DIM = ChildT::LOG2DIM + 1; static constexpr uint32_t TOTAL = LOG2DIM + ChildT::TOTAL; //dimension in index space static constexpr uint32_t DIM = 1u << TOTAL; @@ -1276,18 +622,16 @@ struct GridBuilder:: static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node using MaskT = Mask; - using NanoNodeT = typename NanoNode::Type; - - struct Tile - { - Tile(ChildT* c = nullptr) - : child(c) - { - } - union - { - ChildT* child; - ValueT value; + template + using MaskIterT = typename MaskT::template Iterator; + using NanoNodeT = typename NanoNode::Type; + + struct Tile { + Tile(ChildT* c = nullptr) : child(c) {} + Tile(const ValueType& v) : value(v) {} + union{ + ChildT* child; + ValueType value; }; }; Coord mOrigin; @@ -1300,26 +644,113 @@ struct GridBuilder:: uint64_t mDstOffset; }; - BuildNode(const Coord& origin, const ValueT& value, bool state) + /// @brief Visits child nodes of this node only + class ChildIterator : public MaskIterT + { + using BaseT = MaskIterT; + const InternalNode *mParent; + public: + ChildIterator() : BaseT(), mParent(nullptr) {} + ChildIterator(const InternalNode* parent) : BaseT(parent->mChildMask.beginOn()), mParent(parent) {} + ChildIterator& operator=(const ChildIterator&) = default; + const ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mParent->mTable[BaseT::pos()].child;} + const ChildT* operator->() const {NANOVDB_ASSERT(*this); return mParent->mTable[BaseT::pos()].child;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return (*this)->origin();} + }; // Member class ChildIterator + + ChildIterator beginChild() {return ChildIterator(this);} + ChildIterator cbeginChildOn() const {return ChildIterator(this);}// match openvdb + + /// @brief Visits all tile values in this node, i.e. both inactive and active tiles + class ValueIterator : public MaskIterT + { + using BaseT = MaskIterT; + const InternalNode *mParent; + public: + ValueIterator() : BaseT(), mParent(nullptr) {} + ValueIterator(const InternalNode* parent) : BaseT(parent->mChildMask.beginOff()), mParent(parent) {} + ValueIterator& operator=(const ValueIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mTable[BaseT::pos()].value;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + bool isActive() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(BaseT::pos());} + }; // Member class ValueIterator + + ValueIterator beginValue() {return ValueIterator(this);} + ValueIterator cbeginValueAll() const {return ValueIterator(this);} + + /// @brief Visits active tile values of this node only + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const InternalNode *mParent; + public: + ValueOnIterator() : BaseT(), mParent(nullptr) {} + ValueOnIterator(const InternalNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} + ValueOnIterator& operator=(const ValueOnIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mTable[BaseT::pos()].value;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOnIterator + + ValueOnIterator beginValueOn() {return ValueOnIterator(this);} + ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} + + /// @brief Visits all tile values and child nodes of this node + class DenseIterator : public MaskT::DenseIterator + { + using BaseT = typename MaskT::DenseIterator; + const InternalNode *mParent; + public: + DenseIterator() : BaseT(), mParent(nullptr) {} + DenseIterator(const InternalNode* parent) : BaseT(0), mParent(parent) {} + DenseIterator& operator=(const DenseIterator&) = default; + ChildT* probeChild(ValueType& value) const + { + NANOVDB_ASSERT(mParent && bool(*this)); + ChildT *child = nullptr; + if (mParent->mChildMask.isOn(BaseT::pos())) { + child = mParent->mTable[BaseT::pos()].child; + } else { + value = mParent->mTable[BaseT::pos()].value; + } + return child; + } + Coord getCoord() const { NANOVDB_ASSERT(mParent && bool(*this)); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class DenseIterator + + DenseIterator beginDense() {return DenseIterator(this);} + DenseIterator cbeginChildAll() const {return DenseIterator(this);}// matches openvdb + + InternalNode(const Coord& origin, const ValueType& value, bool state) : mOrigin(origin & ~MASK) , mValueMask(state) , mChildMask() , mDstOffset(0) { - for (uint32_t i = 0; i < SIZE; ++i) { - mTable[i].value = value; - } + for (uint32_t i = 0; i < SIZE; ++i) mTable[i].value = value; } - BuildNode(const BuildNode&) = delete; // disallow copy-construction - BuildNode(BuildNode&&) = delete; // disallow move construction - BuildNode& operator=(const BuildNode&) = delete; // disallow copy assignment - BuildNode& operator=(BuildNode&&) = delete; // disallow move assignment - ~BuildNode() + InternalNode(const InternalNode&) = delete; // disallow copy-construction + InternalNode(InternalNode&&) = delete; // disallow move construction + InternalNode& operator=(const InternalNode&) = delete; // disallow copy assignment + InternalNode& operator=(InternalNode&&) = delete; // disallow move assignment + ~InternalNode() { for (auto iter = mChildMask.beginOn(); iter; ++iter) { delete mTable[*iter].child; } } + const MaskT& getValueMask() const {return mValueMask;} + const MaskT& valueMask() const {return mValueMask;} + const MaskT& getChildMask() const {return mChildMask;} + const MaskT& childMask() const {return mChildMask;} + const Coord& origin() const {return mOrigin;} + + void nodeCount(std::array &count) const + { + count[ChildT::LEVEL] += mChildMask.countOn(); + if constexpr(ChildT::LEVEL>0) { + for (auto it = const_cast(this)->beginChild(); it; ++it) it->nodeCount(count); + } + } static uint32_t CoordToOffset(const Coord& ijk) { @@ -1343,26 +774,78 @@ struct GridBuilder:: Coord offsetToGlobalCoord(uint32_t n) const { - Coord ijk = BuildNode::OffsetToLocalCoord(n); + Coord ijk = InternalNode::OffsetToLocalCoord(n); this->localToGlobalCoord(ijk); return ijk; } - template - bool isActiveAndCache(const Coord& ijk, AccT& acc) const + ValueType getFirstValue() const { return mChildMask.isOn(0) ? mTable[0].child->getFirstValue() : mTable[0].value; } + ValueType getLastValue() const { return mChildMask.isOn(SIZE - 1) ? mTable[SIZE - 1].child->getLastValue() : mTable[SIZE - 1].value; } + + template + auto get(const Coord& ijk, ArgsT&&... args) const + { + const uint32_t n = CoordToOffset(ijk); + if (mChildMask.isOn(n)) return mTable[n].child->template get(ijk, args...); + return OpT::get(*this, n, args...); + } + + template + auto set(const Coord& ijk, ArgsT&&... args) { const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; if (mChildMask.isOn(n)) { - acc.insert(ijk, const_cast(mTable[n].child)); - return mTable[n].child->isActiveAndCache(ijk, acc); + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + NANOVDB_ASSERT(child); + return child->template set(ijk, args...); + } + + template + auto getAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) const + { + const uint32_t n = CoordToOffset(ijk); + if (mChildMask.isOff(n)) return OpT::get(*this, n, args...); + ChildT* child = mTable[n].child; + acc.insert(ijk, child); + if constexpr(ChildT::LEVEL == 0) { + return child->template get(ijk, args...); + } else { + return child->template getAndCache(ijk, acc, args...); } - return mValueMask.isOn(n); } - ValueT getFirstValue() const { return mChildMask.isOn(0) ? mTable[0].child->getFirstValue() : mTable[0].value; } - ValueT getLastValue() const { return mChildMask.isOn(SIZE - 1) ? mTable[SIZE - 1].child->getLastValue() : mTable[SIZE - 1].value; } + template + auto setAndCache(const Coord& ijk, const AccT& acc, ArgsT&&... args) + { + const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + NANOVDB_ASSERT(child); + acc.insert(ijk, child); + if constexpr(ChildT::LEVEL == 0) { + return child->template set(ijk, args...); + } else { + return child->template setAndCache(ijk, acc, args...); + } + } - const ValueT& getValue(const Coord& ijk) const +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + ValueType getValue(const Coord& ijk) const {return this->template get>(ijk);} + LeafNodeType& setValue(const Coord& ijk, const ValueType& value){return this->template set>(ijk, value);} +#else + ValueType getValue(const Coord& ijk) const { const uint32_t n = CoordToOffset(ijk); if (mChildMask.isOn(n)) { @@ -1370,19 +853,49 @@ struct GridBuilder:: } return mTable[n].value; } + void setValue(const Coord& ijk, const ValueType& value) + { + const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + child->setValue(ijk, value); + } template - const ValueT& getValueAndCache(const Coord& ijk, AccT& acc) const + ValueType getValueAndCache(const Coord& ijk, AccT& acc) const { const uint32_t n = CoordToOffset(ijk); if (mChildMask.isOn(n)) { acc.insert(ijk, const_cast(mTable[n].child)); return mTable[n].child->getValueAndCache(ijk, acc); } - return mTable[n].value; + return mTable[n].value; + } + + template + void setValueAndCache(const Coord& ijk, const ValueType& value, AccT& acc) + { + const uint32_t n = CoordToOffset(ijk); + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, mTable[n].value, mValueMask.isOn(n)); + mTable[n].child = child; + mChildMask.setOn(n); + } + acc.insert(ijk, child); + child->setValueAndCache(ijk, value, acc); } - void setValue(const Coord& ijk, const ValueT& value) + template + void setValueOnAndCache(const Coord& ijk, AccT& acc) { const uint32_t n = CoordToOffset(ijk); ChildT* child = nullptr; @@ -1393,14 +906,15 @@ struct GridBuilder:: mTable[n].child = child; mChildMask.setOn(n); } - child->setValue(ijk, value); + acc.insert(ijk, child); + child->setValueOnAndCache(ijk, acc); } template - void setValueAndCache(const Coord& ijk, const ValueT& value, AccT& acc) + void touchLeafAndCache(const Coord &ijk, AccT& acc) { const uint32_t n = CoordToOffset(ijk); - ChildT* child = nullptr; + ChildT* child = nullptr; if (mChildMask.isOn(n)) { child = mTable[n].child; } else { @@ -1409,18 +923,29 @@ struct GridBuilder:: mChildMask.setOn(n); } acc.insert(ijk, child); - child->setValueAndCache(ijk, value, acc); + if constexpr(LEVEL>1) child->touchLeafAndCache(ijk, acc); + } + template + bool isActiveAndCache(const Coord& ijk, AccT& acc) const + { + const uint32_t n = CoordToOffset(ijk); + if (mChildMask.isOn(n)) { + acc.insert(ijk, const_cast(mTable[n].child)); + return mTable[n].child->isActiveAndCache(ijk, acc); + } + return mValueMask.isOn(n); } +#endif template uint32_t nodeCount() const { - static_assert(is_same::value, "Node::getNodes: Invalid type"); + static_assert(is_same::value, "Node::getNodes: Invalid type"); NANOVDB_ASSERT(NodeT::LEVEL < LEVEL); uint32_t sum = 0; - if (is_same::value) { //resolved at compile-time + if constexpr(is_same::value) { // resolved at compile-time sum += mChildMask.countOn(); - } else { + } else if constexpr(LEVEL>1) { for (auto iter = mChildMask.beginOn(); iter; ++iter) { sum += mTable[*iter].child->template nodeCount(); } @@ -1431,12 +956,12 @@ struct GridBuilder:: template void getNodes(std::vector& array) { - static_assert(is_same::value, "Node::getNodes: Invalid type"); + static_assert(is_same::value, "Node::getNodes: Invalid type"); NANOVDB_ASSERT(NodeT::LEVEL < LEVEL); for (auto iter = mChildMask.beginOn(); iter; ++iter) { - if (is_same::value) { //resolved at compile-time + if constexpr(is_same::value) { // resolved at compile-time array.push_back(reinterpret_cast(mTable[*iter].child)); - } else { + } else if constexpr(LEVEL>1) { mTable[*iter].child->getNodes(array); } } @@ -1455,12 +980,45 @@ struct GridBuilder:: child = nullptr; } + /// @brief Add a tile containing voxel (i, j, k) at the specified tree level, + /// creating a new branch if necessary. Delete any existing lower-level nodes + /// that contain (x, y, z). + /// @tparam level tree level at which the tile is inserted. Must be 1 or 2. + /// @param ijk Index coordinate that map to the tile being inserted + /// @param value Value of the tile + /// @param state Binary state of the tile + template + void addTile(const Coord& ijk, const ValueType& value, bool state) + { + static_assert(level > 0 && level <= LEVEL, "invalid template value of level"); + const uint32_t n = CoordToOffset(ijk); + if constexpr(level == LEVEL) { + if (mChildMask.isOn(n)) { + delete mTable[n].child; + mTable[n] = Tile(value); + } else { + mValueMask.set(n, state); + mTable[n].value = value; + } + } else if constexpr(level < LEVEL) { + ChildT* child = nullptr; + if (mChildMask.isOn(n)) { + child = mTable[n].child; + } else { + child = new ChildT(ijk, value, state); + mTable[n].child = child; + mChildMask.setOn(n); + } + child->template addTile(ijk, value, state); + } + } + template void addNode(NodeT*& node) { - if (is_same::value) { //resolved at compile-time + if constexpr(is_same::value) { //resolved at compile-time this->addChild(reinterpret_cast(node)); - } else { + } else if constexpr(LEVEL>1) { const uint32_t n = CoordToOffset(node->mOrigin); ChildT* child = nullptr; if (mChildMask.isOn(n)) { @@ -1474,59 +1032,432 @@ struct GridBuilder:: } } - template - typename std::enable_if::value>::type - signedFloodFill(T outside); - template - typename std::enable_if::value>::type - signedFloodFill(T) {} // no-op for none floating point values -}; // GridBuilder::BuildNode + void merge(InternalNode &other) + { + for (auto iter = other.mChildMask.beginOn(); iter; ++iter) { + const uint32_t n = *iter; + if (mChildMask.isOn(n)) { + mTable[n].child->merge(*other.mTable[n].child); + } else { + mTable[n].child = other.mTable[n].child; + other.mChildMask.setOff(n); + mChildMask.setOn(n); + } + } + } + + template + typename std::enable_if::value>::type + signedFloodFill(T outside); + +}; // build::InternalNode + +//================================================================================================ + +template +template +inline typename std::enable_if::value>::type +InternalNode::signedFloodFill(T outside) +{ + const uint32_t first = *mChildMask.beginOn(); + if (first < NUM_VALUES) { + bool xInside = mTable[first].child->getFirstValue() < 0; + bool yInside = xInside, zInside = xInside; + for (uint32_t x = 0; x != (1 << LOG2DIM); ++x) { + const uint32_t x00 = x << (2 * LOG2DIM); // offset for block(x, 0, 0) + if (mChildMask.isOn(x00)) { + xInside = mTable[x00].child->getLastValue() < 0; + } + yInside = xInside; + for (uint32_t y = 0; y != (1u << LOG2DIM); ++y) { + const uint32_t xy0 = x00 + (y << LOG2DIM); // offset for block(x, y, 0) + if (mChildMask.isOn(xy0)) + yInside = mTable[xy0].child->getLastValue() < 0; + zInside = yInside; + for (uint32_t z = 0; z != (1 << LOG2DIM); ++z) { + const uint32_t xyz = xy0 + z; // offset for block(x, y, z) + if (mChildMask.isOn(xyz)) { + zInside = mTable[xyz].child->getLastValue() < 0; + } else { + mTable[xyz].value = zInside ? -outside : outside; + } + } + } + } + } +} // build::InternalNode::signedFloodFill + +// ----------------------------> LeafNode <-------------------------------------- + +template +struct LeafNode +{ + using BuildType = BuildT; + using ValueType = typename BuildToValueMap::type; + using LeafNodeType = LeafNode; + static constexpr uint32_t LOG2DIM = 3; + static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes + static constexpr uint32_t DIM = 1u << TOTAL; + static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node + static constexpr int32_t MASK = DIM - 1; // mask for bit operations + static constexpr uint32_t LEVEL = 0; // level 0 = leaf + static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node + using NodeMaskType = Mask; + template + using MaskIterT = typename Mask::template Iterator; + using NanoLeafT = typename NanoNode::Type; + + Coord mOrigin; + Mask mValueMask; + ValueType mValues[SIZE]; + union { + NanoLeafT *mDstNode; + uint64_t mDstOffset; + }; + + /// @brief Visits all active values in a leaf node + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOnIterator() : BaseT(), mParent(nullptr) {} + ValueOnIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} + ValueOnIterator& operator=(const ValueOnIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues[BaseT::pos()];} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOnIterator + + ValueOnIterator beginValueOn() {return ValueOnIterator(this);} + ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} + + /// @brief Visits all inactive values in a leaf node + class ValueOffIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOffIterator() : BaseT(), mParent(nullptr) {} + ValueOffIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOff()), mParent(parent) {} + ValueOffIterator& operator=(const ValueOffIterator&) = default; + ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues[BaseT::pos()];} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOffIterator + + ValueOffIterator beginValueOff() {return ValueOffIterator(this);} + ValueOffIterator cbeginValueOff() const {return ValueOffIterator(this);} + + /// @brief Visits all values in a leaf node, i.e. both active and inactive values + class ValueIterator + { + const LeafNode *mParent; + uint32_t mPos; + public: + ValueIterator() : mParent(nullptr), mPos(1u << 3 * LOG2DIM) {} + ValueIterator(const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);} + ValueIterator& operator=(const ValueIterator&) = default; + ValueType operator*() const { NANOVDB_ASSERT(*this); return mParent->mValues[mPos];} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(mPos);} + bool isActive() const { NANOVDB_ASSERT(*this); return mParent->isActive(mPos);} + operator bool() const {return mPos < SIZE;} + ValueIterator& operator++() {++mPos; return *this;} + ValueIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ValueIterator + + ValueIterator beginValue() {return ValueIterator(this);} + ValueIterator cbeginValueAll() const {return ValueIterator(this);} + + LeafNode(const Coord& ijk, const ValueType& value, bool state) + : mOrigin(ijk & ~MASK) + , mValueMask(state) //invalid + , mDstOffset(0) + { + ValueType* target = mValues; + uint32_t n = SIZE; + while (n--) { + *target++ = value; + } + } + LeafNode(const LeafNode&) = delete; // disallow copy-construction + LeafNode(LeafNode&&) = delete; // disallow move construction + LeafNode& operator=(const LeafNode&) = delete; // disallow copy assignment + LeafNode& operator=(LeafNode&&) = delete; // disallow move assignment + ~LeafNode() = default; + + const Mask& getValueMask() const {return mValueMask;} + const Mask& valueMask() const {return mValueMask;} + const Coord& origin() const {return mOrigin;} + + /// @brief Return the linear offset corresponding to the given coordinate + static uint32_t CoordToOffset(const Coord& ijk) + { + return ((ijk[0] & MASK) << (2 * LOG2DIM)) + ((ijk[1] & MASK) << LOG2DIM) + (ijk[2] & MASK); + } + + static Coord OffsetToLocalCoord(uint32_t n) + { + NANOVDB_ASSERT(n < SIZE); + const int32_t m = n & ((1 << 2 * LOG2DIM) - 1); + return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & MASK); + } + + void localToGlobalCoord(Coord& ijk) const + { + ijk += mOrigin; + } + + Coord offsetToGlobalCoord(uint32_t n) const + { + Coord ijk = LeafNode::OffsetToLocalCoord(n); + this->localToGlobalCoord(ijk); + return ijk; + } + + ValueType getFirstValue() const { return mValues[0]; } + ValueType getLastValue() const { return mValues[SIZE - 1]; } + const ValueType& getValue(uint32_t i) const {return mValues[i];} + const ValueType& getValue(const Coord& ijk) const {return mValues[CoordToOffset(ijk)];} + + template + auto get(const Coord& ijk, ArgsT&&... args) const {return OpT::get(*this, CoordToOffset(ijk), args...);} + + template + auto set(const Coord& ijk, ArgsT&&... args) {return OpT::set(*this, CoordToOffset(ijk), args...);} + +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + template + const ValueType& getValueAndCache(const Coord& ijk, const AccT&) const + { + return mValues[CoordToOffset(ijk)]; + } + + template + void setValueAndCache(const Coord& ijk, const ValueType& value, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + mValues[n] = value; + } + + template + void setValueOnAndCache(const Coord& ijk, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + } + + template + bool isActiveAndCache(const Coord& ijk, const AccT&) const + { + return mValueMask.isOn(CoordToOffset(ijk)); + } +#endif + + void setValue(uint32_t n, const ValueType& value) + { + mValueMask.setOn(n); + mValues[n] = value; + } + void setValue(const Coord& ijk, const ValueType& value){this->setValue(CoordToOffset(ijk), value);} + + void merge(LeafNode &other) + { + other.mValueMask -= mValueMask; + for (auto iter = other.mValueMask.beginOn(); iter; ++iter) { + const uint32_t n = *iter; + mValues[n] = other.mValues[n]; + } + mValueMask |= other.mValueMask; + } + + template + typename std::enable_if::value>::type + signedFloodFill(T outside); + +}; // build::LeafNode + +//================================================================================================ + +template <> +struct LeafNode +{ + using ValueType = bool; + using BuildType = ValueMask; + using LeafNodeType = LeafNode; + static constexpr uint32_t LOG2DIM = 3; + static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes + static constexpr uint32_t DIM = 1u << TOTAL; + static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node + static constexpr int32_t MASK = DIM - 1; // mask for bit operations + static constexpr uint32_t LEVEL = 0; // level 0 = leaf + static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node + using NodeMaskType = Mask; + template + using MaskIterT = typename Mask::template Iterator; + using NanoLeafT = typename NanoNode::Type; + + Coord mOrigin; + Mask mValueMask; + union { + NanoLeafT *mDstNode; + uint64_t mDstOffset; + }; + + /// @brief Visits all active values in a leaf node + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOnIterator() : BaseT(), mParent(nullptr) {} + ValueOnIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} + ValueOnIterator& operator=(const ValueOnIterator&) = default; + bool operator*() const {NANOVDB_ASSERT(*this); return true;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOnIterator + + ValueOnIterator beginValueOn() {return ValueOnIterator(this);} + ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} + + /// @brief Visits all inactive values in a leaf node + class ValueOffIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOffIterator() : BaseT(), mParent(nullptr) {} + ValueOffIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOff()), mParent(parent) {} + ValueOffIterator& operator=(const ValueOffIterator&) = default; + bool operator*() const {NANOVDB_ASSERT(*this); return false;} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOffIterator + + ValueOffIterator beginValueOff() {return ValueOffIterator(this);} + ValueOffIterator cbeginValueOff() const {return ValueOffIterator(this);} + + /// @brief Visits all values in a leaf node, i.e. both active and inactive values + class ValueIterator + { + const LeafNode *mParent; + uint32_t mPos; + public: + ValueIterator() : mParent(nullptr), mPos(1u << 3 * LOG2DIM) {} + ValueIterator(const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);} + ValueIterator& operator=(const ValueIterator&) = default; + bool operator*() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(mPos);} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(mPos);} + bool isActive() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(mPos);} + operator bool() const {return mPos < SIZE;} + ValueIterator& operator++() {++mPos; return *this;} + ValueIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ValueIterator + + ValueIterator beginValue() {return ValueIterator(this);} + ValueIterator cbeginValueAll() const {return ValueIterator(this);} + + LeafNode(const Coord& ijk, const ValueType&, bool state) + : mOrigin(ijk & ~MASK) + , mValueMask(state) //invalid + , mDstOffset(0) + { + } + LeafNode(const LeafNode&) = delete; // disallow copy-construction + LeafNode(LeafNode&&) = delete; // disallow move construction + LeafNode& operator=(const LeafNode&) = delete; // disallow copy assignment + LeafNode& operator=(LeafNode&&) = delete; // disallow move assignment + ~LeafNode() = default; + + const Mask& valueMask() const {return mValueMask;} + const Mask& getValueMask() const {return mValueMask;} + const Coord& origin() const {return mOrigin;} + + /// @brief Return the linear offset corresponding to the given coordinate + static uint32_t CoordToOffset(const Coord& ijk) + { + return ((ijk[0] & MASK) << (2 * LOG2DIM)) + ((ijk[1] & MASK) << LOG2DIM) + (ijk[2] & MASK); + } + + static Coord OffsetToLocalCoord(uint32_t n) + { + NANOVDB_ASSERT(n < SIZE); + const int32_t m = n & ((1 << 2 * LOG2DIM) - 1); + return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & MASK); + } + + void localToGlobalCoord(Coord& ijk) const {ijk += mOrigin;} + + Coord offsetToGlobalCoord(uint32_t n) const + { + Coord ijk = LeafNode::OffsetToLocalCoord(n); + this->localToGlobalCoord(ijk); + return ijk; + } + + bool getFirstValue() const { return mValueMask.isOn(0); } + bool getLastValue() const { return mValueMask.isOn(SIZE - 1); } + bool getValue(uint32_t i) const {return mValueMask.isOn(i);} + bool getValue(const Coord& ijk) const {return mValueMask.isOn(CoordToOffset(ijk));} + + template + auto get(const Coord& ijk, ArgsT&&... args) const {return OpT::get(*this, CoordToOffset(ijk), args...);} + + template + auto set(const Coord& ijk, ArgsT&&... args) {return OpT::set(*this, CoordToOffset(ijk), args...);} + +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + template + bool getValueAndCache(const Coord& ijk, const AccT&) const + { + return mValueMask.isOn(CoordToOffset(ijk)); + } + + template + void setValueAndCache(const Coord& ijk, bool, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + } + + template + void setValueOnAndCache(const Coord& ijk, const AccT&) + { + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + } + + template + bool isActiveAndCache(const Coord& ijk, const AccT&) const + { + return mValueMask.isOn(CoordToOffset(ijk)); + } +#endif -//================================================================================================ + void setValue(uint32_t n, bool) {mValueMask.setOn(n);} + void setValue(const Coord& ijk) {mValueMask.setOn(CoordToOffset(ijk));} -template -template -template -inline typename std::enable_if::value>::type -GridBuilder::BuildNode:: - signedFloodFill(T outside) -{ - const uint32_t first = *mChildMask.beginOn(); - if (first < NUM_VALUES) { - bool xInside = mTable[first].child->getFirstValue() < 0; - bool yInside = xInside, zInside = xInside; - for (uint32_t x = 0; x != (1 << LOG2DIM); ++x) { - const uint32_t x00 = x << (2 * LOG2DIM); // offset for block(x, 0, 0) - if (mChildMask.isOn(x00)) { - xInside = mTable[x00].child->getLastValue() < 0; - } - yInside = xInside; - for (uint32_t y = 0; y != (1u << LOG2DIM); ++y) { - const uint32_t xy0 = x00 + (y << LOG2DIM); // offset for block(x, y, 0) - if (mChildMask.isOn(xy0)) - yInside = mTable[xy0].child->getLastValue() < 0; - zInside = yInside; - for (uint32_t z = 0; z != (1 << LOG2DIM); ++z) { - const uint32_t xyz = xy0 + z; // offset for block(x, y, z) - if (mChildMask.isOn(xyz)) { - zInside = mTable[xyz].child->getLastValue() < 0; - } else { - mTable[xyz].value = zInside ? -outside : outside; - } - } - } - } + void merge(LeafNode &other) + { + mValueMask |= other.mValueMask; } -} // Node::signedFloodFill + +}; // build::LeafNode //================================================================================================ -template -struct GridBuilder:: - BuildLeaf +template <> +struct LeafNode { - using ValueType = ValueT; - using BuildType = BuildT; + using ValueType = bool; + using BuildType = ValueMask; + using LeafNodeType = LeafNode; static constexpr uint32_t LOG2DIM = 3; static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes static constexpr uint32_t DIM = 1u << TOTAL; @@ -1535,32 +1466,89 @@ struct GridBuilder:: static constexpr uint32_t LEVEL = 0; // level 0 = leaf static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node using NodeMaskType = Mask; - using NanoLeafT = typename NanoNode::Type; + template + using MaskIterT = typename Mask::template Iterator; + using NanoLeafT = typename NanoNode::Type; Coord mOrigin; - Mask mValueMask; - ValueT mValues[SIZE]; + Mask mValueMask, mValues; union { NanoLeafT *mDstNode; uint64_t mDstOffset; }; - BuildLeaf(const Coord& ijk, const ValueT& value, bool state) + /// @brief Visits all active values in a leaf node + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOnIterator() : BaseT(), mParent(nullptr) {} + ValueOnIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOn()), mParent(parent) {} + ValueOnIterator& operator=(const ValueOnIterator&) = default; + bool operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues.isOn(BaseT::pos());} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOnIterator + + ValueOnIterator beginValueOn() {return ValueOnIterator(this);} + ValueOnIterator cbeginValueOn() const {return ValueOnIterator(this);} + + /// @brief Visits all inactive values in a leaf node + class ValueOffIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode *mParent; + public: + ValueOffIterator() : BaseT(), mParent(nullptr) {} + ValueOffIterator(const LeafNode* parent) : BaseT(parent->mValueMask.beginOff()), mParent(parent) {} + ValueOffIterator& operator=(const ValueOffIterator&) = default; + bool operator*() const {NANOVDB_ASSERT(*this); return mParent->mValues.isOn(BaseT::pos());} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());} + }; // Member class ValueOffIterator + + ValueOffIterator beginValueOff() {return ValueOffIterator(this);} + ValueOffIterator cbeginValueOff() const {return ValueOffIterator(this);} + + /// @brief Visits all values in a leaf node, i.e. both active and inactive values + class ValueIterator + { + const LeafNode *mParent; + uint32_t mPos; + public: + ValueIterator() : mParent(nullptr), mPos(1u << 3 * LOG2DIM) {} + ValueIterator(const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);} + ValueIterator& operator=(const ValueIterator&) = default; + bool operator*() const { NANOVDB_ASSERT(*this); return mParent->mValues.isOn(mPos);} + Coord getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(mPos);} + bool isActive() const { NANOVDB_ASSERT(*this); return mParent->mValueMask.isOn(mPos);} + operator bool() const {return mPos < SIZE;} + ValueIterator& operator++() {++mPos; return *this;} + ValueIterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ValueIterator + + ValueIterator beginValue() {return ValueIterator(this);} + ValueIterator cbeginValueAll() const {return ValueIterator(this);} + + LeafNode(const Coord& ijk, bool value, bool state) : mOrigin(ijk & ~MASK) - , mValueMask(state) //invalid + , mValueMask(state) + , mValues(value) , mDstOffset(0) { - ValueT* target = mValues; - uint32_t n = SIZE; - while (n--) { - *target++ = value; - } } - BuildLeaf(const BuildLeaf&) = delete; // disallow copy-construction - BuildLeaf(BuildLeaf&&) = delete; // disallow move construction - BuildLeaf& operator=(const BuildLeaf&) = delete; // disallow copy assignment - BuildLeaf& operator=(BuildLeaf&&) = delete; // disallow move assignment - ~BuildLeaf() = default; + LeafNode(const LeafNode&) = delete; // disallow copy-construction + LeafNode(LeafNode&&) = delete; // disallow move construction + LeafNode& operator=(const LeafNode&) = delete; // disallow copy assignment + LeafNode& operator=(LeafNode&&) = delete; // disallow move assignment + ~LeafNode() = default; + + const Mask& valueMask() const {return mValueMask;} + const Mask& getValueMask() const {return mValueMask;} + const Coord& origin() const {return mOrigin;} /// @brief Return the linear offset corresponding to the given coordinate static uint32_t CoordToOffset(const Coord& ijk) @@ -1582,74 +1570,68 @@ struct GridBuilder:: Coord offsetToGlobalCoord(uint32_t n) const { - Coord ijk = BuildLeaf::OffsetToLocalCoord(n); + Coord ijk = LeafNode::OffsetToLocalCoord(n); this->localToGlobalCoord(ijk); return ijk; } + bool getFirstValue() const { return mValues.isOn(0); } + bool getLastValue() const { return mValues.isOn(SIZE - 1); } + bool getValue(uint32_t i) const {return mValues.isOn(i);} + bool getValue(const Coord& ijk) const + { + return mValues.isOn(CoordToOffset(ijk)); + } +#ifndef NANOVDB_NEW_ACCESSOR_METHODS template bool isActiveAndCache(const Coord& ijk, const AccT&) const { return mValueMask.isOn(CoordToOffset(ijk)); } - ValueT getFirstValue() const { return mValues[0]; } - ValueT getLastValue() const { return mValues[SIZE - 1]; } - - const ValueT& getValue(const Coord& ijk) const + template + bool getValueAndCache(const Coord& ijk, const AccT&) const { - return mValues[CoordToOffset(ijk)]; + return mValues.isOn(CoordToOffset(ijk)); } template - const ValueT& getValueAndCache(const Coord& ijk, const AccT&) const + void setValueAndCache(const Coord& ijk, bool value, const AccT&) { - return mValues[CoordToOffset(ijk)]; + const uint32_t n = CoordToOffset(ijk); + mValueMask.setOn(n); + mValues.setOn(n); } template - void setValueAndCache(const Coord& ijk, const ValueT& value, const AccT&) + void setValueOnAndCache(const Coord& ijk, const AccT&) { const uint32_t n = CoordToOffset(ijk); mValueMask.setOn(n); - mValues[n] = value; } +#endif - void setValue(const Coord& ijk, const ValueT& value) + void setValue(uint32_t n, bool value) { - const uint32_t n = CoordToOffset(ijk); mValueMask.setOn(n); - mValues[n] = value; + mValues.set(n, value); } + void setValue(const Coord& ijk, bool value) {return this->setValue(CoordToOffset(ijk), value);} - template - void getNodes(std::vector&) { NANOVDB_ASSERT(false); } - - template - void addNode(NodeT*&) {} - - template - uint32_t nodeCount() const + void merge(LeafNode &other) { - NANOVDB_ASSERT(false);// should never get called - return 1; + mValues |= other.mValues; + mValueMask |= other.mValueMask; } - template - typename std::enable_if::value>::type - signedFloodFill(T outside); - template - typename std::enable_if::value>::type - signedFloodFill(T) {} // no-op for none floating point values -}; // BuildLeaf +}; // build::LeafNode //================================================================================================ -template +template template inline typename std::enable_if::value>::type -GridBuilder::BuildLeaf:: - signedFloodFill(T outside) +LeafNode::signedFloodFill(T outside) { const uint32_t first = *mValueMask.beginOn(); if (first < SIZE) { @@ -1675,19 +1657,29 @@ GridBuilder::BuildLeaf:: } } } -} // BuildLeaf::signedFloodFill +} // build::LeafNode::signedFloodFill -//================================================================================================ +// ----------------------------> ValueAccessor <-------------------------------------- -template -struct GridBuilder:: - ValueAccessor +template +struct ValueAccessor { - ValueAccessor(SrcRootT& root) - : mKeys{Coord(Maximum::value()), Coord(Maximum::value()), Coord(Maximum::value())} - , mNode{nullptr, nullptr, nullptr, &root} + using ValueType = typename BuildToValueMap::type; + using LeafT = build::LeafNode; + using Node1 = build::InternalNode; + using Node2 = build::InternalNode; + using RootNodeType = build::RootNode; + using LeafNodeType = typename RootNodeType::LeafNodeType; + + ValueAccessor(RootNodeType& root) + : mRoot(root) + , mKeys{Coord(Maximum::value()), Coord(Maximum::value()), Coord(Maximum::value())} + , mNode{nullptr, nullptr, nullptr} { } + ValueAccessor(ValueAccessor&&) = default; // allow move construction + ValueAccessor(const ValueAccessor&) = delete; // disallow copy construction + ValueType getValue(int i, int j, int k) const {return this->getValue(Coord(i,j,k));} template bool isCached(const Coord& ijk) const { @@ -1695,54 +1687,622 @@ struct GridBuilder:: (ijk[1] & ~NodeT::MASK) == mKeys[NodeT::LEVEL][1] && (ijk[2] & ~NodeT::MASK) == mKeys[NodeT::LEVEL][2]; } - const ValueT& getValue(const Coord& ijk) + + template + auto get(const Coord& ijk, ArgsT&&... args) const + { + if (this->template isCached(ijk)) { + return ((const LeafT*)mNode[0])->template get(ijk, args...); + } else if (this->template isCached(ijk)) { + return ((const Node1*)mNode[1])->template getAndCache(ijk, *this, args...); + } else if (this->template isCached(ijk)) { + return ((const Node2*)mNode[2])->template getAndCache(ijk, *this, args...); + } + return mRoot.template getAndCache(ijk, *this, args...); + } + + template + auto set(const Coord& ijk, ArgsT&&... args) const + { + if (this->template isCached(ijk)) { + return ((LeafT*)mNode[0])->template set(ijk, args...); + } else if (this->template isCached(ijk)) { + return ((Node1*)mNode[1])->template setAndCache(ijk, *this, args...); + } else if (this->template isCached(ijk)) { + return ((Node2*)mNode[2])->template setAndCache(ijk, *this, args...); + } + return mRoot.template setAndCache(ijk, *this, args...); + } + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + ValueType getValue(const Coord& ijk) const {return this->template get>(ijk);} + LeafT* setValue(const Coord& ijk, const ValueType& value) {return this->template set>(ijk, value);} + LeafT* setValueOn(const Coord& ijk) {return this->template set>(ijk);} + LeafT& touchLeaf(const Coord& ijk) {return this->template set>(ijk);} + bool isActive(const Coord& ijk) const {return this->template get>(ijk);} +#else + ValueType getValue(const Coord& ijk) const { - if (this->isCached(ijk)) { - return ((SrcNode0*)mNode[0])->getValueAndCache(ijk, *this); - } else if (this->isCached(ijk)) { - return ((SrcNode1*)mNode[1])->getValueAndCache(ijk, *this); - } else if (this->isCached(ijk)) { - return ((SrcNode2*)mNode[2])->getValueAndCache(ijk, *this); + if (this->template isCached(ijk)) { + return ((LeafT*)mNode[0])->getValueAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + return ((Node1*)mNode[1])->getValueAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + return ((Node2*)mNode[2])->getValueAndCache(ijk, *this); } - return ((SrcRootT*)mNode[3])->getValueAndCache(ijk, *this); + return mRoot.getValueAndCache(ijk, *this); } + /// @brief Sets value in a leaf node and returns it. - SrcNode0* setValue(const Coord& ijk, const ValueT& value) - { - if (this->isCached(ijk)) { - ((SrcNode0*)mNode[0])->setValueAndCache(ijk, value, *this); - } else if (this->isCached(ijk)) { - ((SrcNode1*)mNode[1])->setValueAndCache(ijk, value, *this); - } else if (this->isCached(ijk)) { - ((SrcNode2*)mNode[2])->setValueAndCache(ijk, value, *this); + LeafT* setValue(const Coord& ijk, const ValueType& value) + { + if (this->template isCached(ijk)) { + ((LeafT*)mNode[0])->setValueAndCache(ijk, value, *this); + } else if (this->template isCached(ijk)) { + ((Node1*)mNode[1])->setValueAndCache(ijk, value, *this); + } else if (this->template isCached(ijk)) { + ((Node2*)mNode[2])->setValueAndCache(ijk, value, *this); + } else { + mRoot.setValueAndCache(ijk, value, *this); + } + NANOVDB_ASSERT(this->isCached(ijk)); + return (LeafT*)mNode[0]; + } + void setValueOn(const Coord& ijk) + { + if (this->template isCached(ijk)) { + ((LeafT*)mNode[0])->setValueOnAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + ((Node1*)mNode[1])->setValueOnAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + ((Node2*)mNode[2])->setValueOnAndCache(ijk, *this); + } else { + mRoot.setValueOnAndCache(ijk, *this); + } + } + void touchLeaf(const Coord& ijk) const + { + if (this->template isCached(ijk)) { + return; + } else if (this->template isCached(ijk)) { + ((Node1*)mNode[1])->touchLeafAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + ((Node2*)mNode[2])->touchLeafAndCache(ijk, *this); } else { - ((SrcRootT*)mNode[3])->setValueAndCache(ijk, value, *this); + mRoot.touchLeafAndCache(ijk, *this); } - NANOVDB_ASSERT(this->isCached(ijk)); - return (SrcNode0*)mNode[0]; } - bool isActive(const Coord& ijk) + bool isActive(const Coord& ijk) const { - if (this->isCached(ijk)) { - return ((SrcNode0*)mNode[0])->isActiveAndCache(ijk, *this); - } else if (this->isCached(ijk)) { - return ((SrcNode1*)mNode[1])->isActiveAndCache(ijk, *this); - } else if (this->isCached(ijk)) { - return ((SrcNode2*)mNode[2])->isActiveAndCache(ijk, *this); + if (this->template isCached(ijk)) { + return ((LeafT*)mNode[0])->isActiveAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + return ((Node1*)mNode[1])->isActiveAndCache(ijk, *this); + } else if (this->template isCached(ijk)) { + return ((Node2*)mNode[2])->isActiveAndCache(ijk, *this); } - return ((SrcRootT*)mNode[3])->isActiveAndCache(ijk, *this); + return mRoot.isActiveAndCache(ijk, *this); } - bool isValueOn(const Coord& ijk) { return this->isActive(ijk); } +#endif + + bool isValueOn(const Coord& ijk) const { return this->isActive(ijk); } template - void insert(const Coord& ijk, NodeT* node) + void insert(const Coord& ijk, NodeT* node) const { mKeys[NodeT::LEVEL] = ijk & ~NodeT::MASK; mNode[NodeT::LEVEL] = node; } - Coord mKeys[3]; - void* mNode[4]; -}; // ValueAccessor + RootNodeType& mRoot; + mutable Coord mKeys[3]; + mutable void* mNode[3]; +}; // build::ValueAccessor + +// ----------------------------> Tree <-------------------------------------- + +template +struct Tree +{ + using ValueType = typename BuildToValueMap::type; + using Node0 = build::LeafNode; + using Node1 = build::InternalNode; + using Node2 = build::InternalNode; + using RootNodeType = build::RootNode; + using LeafNodeType = typename RootNodeType::LeafNodeType; + struct WriteAccessor; + + RootNodeType mRoot; + std::mutex mMutex; + + Tree(const ValueType &background) : mRoot(background) {} + Tree(const Tree&) = delete; // disallow copy construction + Tree(Tree&&) = delete; // disallow move construction + Tree& tree() {return *this;} + RootNodeType& root() {return mRoot;} + ValueType getValue(const Coord& ijk) const {return mRoot.getValue(ijk);} + ValueType getValue(int i, int j, int k) const {return this->getValue(Coord(i,j,k));} + void setValue(const Coord& ijk, const ValueType &value) {mRoot.setValue(ijk, value);} + std::array nodeCount() const + { + std::array count{0,0,0}; + mRoot.nodeCount(count); + return count; + } + /// @brief regular accessor for thread-safe reading and non-thread-safe writing + ValueAccessor getAccessor() { return ValueAccessor(mRoot); } + /// @brief special accessor for thread-safe writing only + WriteAccessor getWriteAccessor() { return WriteAccessor(mRoot, mMutex); } +};// build::Tree + +// ----------------------------> Tree::WriteAccessor <-------------------------------------- + +template +struct Tree::WriteAccessor +{ + using AccT = ValueAccessor; + using ValueType = typename AccT::ValueType; + using LeafT = typename AccT::LeafT; + using Node1 = typename AccT::Node1; + using Node2 = typename AccT::Node2; + using RootNodeType = typename AccT::RootNodeType; + + WriteAccessor(RootNodeType& parent, std::mutex &mx) + : mParent(parent) + , mRoot(parent.mBackground) + , mAcc(mRoot) + , mMutex(mx) + { + } + WriteAccessor(const WriteAccessor&) = delete; // disallow copy construction + WriteAccessor(WriteAccessor&&) = default; // allow move construction + ~WriteAccessor() { this->merge(); } + void merge() + { + mMutex.lock(); + mParent.merge(mRoot); + mMutex.unlock(); + } + inline void setValueOn(const Coord& ijk) {mAcc.setValueOn(ijk);} + inline void setValue(const Coord& ijk, const ValueType &value) {mAcc.setValue(ijk, value);} + + RootNodeType &mParent, mRoot; + AccT mAcc; + std::mutex &mMutex; +}; // build::Tree::WriteAccessor + +// ----------------------------> Grid <-------------------------------------- + +template +struct Grid : public Tree +{ + using BuildType = BuildT; + using ValueType = typename BuildToValueMap::type; + using TreeType = Tree; + using Node0 = build::LeafNode; + using Node1 = build::InternalNode; + using Node2 = build::InternalNode; + using RootNodeType = build::RootNode; + + GridClass mGridClass; + GridType mGridType; + Map mMap; + std::string mName; + + Grid(const ValueType &background, const std::string &name = "", GridClass gClass = GridClass::Unknown) + : TreeType(background) + , mGridClass(gClass) + , mGridType(mapToGridType()) + , mName(name) + { + mMap.set(1.0, Vec3d(0.0), 1.0); + } + TreeType& tree() {return *this;} + const GridType& gridType() const { return mGridType; } + const GridClass& gridClass() const { return mGridClass; } + const Map& map() const { return mMap; } + void setTransform(double scale=1.0, const Vec3d &translation = Vec3d(0.0)) {mMap.set(scale, translation, 1.0);} + const std::string& gridName() const { return mName; } + const std::string& getName() const { return mName; } + void setName(const std::string &name) { mName = name; } + /// @brief Sets grids values in domain of the @a bbox to those returned by the specified @a func with the + /// expected signature [](const Coord&)->ValueType. + /// + /// @note If @a func returns a value equal to the background value of the input grid at a + /// specific voxel coordinate, then the active state of that coordinate is off! Else the value + /// value is set and the active state is on. This is done to allow for sparse grids to be generated. + /// + /// @param func Functor used to evaluate the grid values in the @a bbox + /// @param bbox Coordinate bounding-box over which the grid values will be set. + /// @param delta Specifies a lower threshold value for rendering (optional). Typically equals the voxel size + /// for level sets and otherwise it's zero. + template + void operator()(const Func& func, const CoordBBox& bbox, ValueType delta = ValueType(0)); +};// build::Grid + +template +template +void Grid::operator()(const Func& func, const CoordBBox& bbox, ValueType delta) +{ + auto &root = this->tree().root(); +#if __cplusplus >= 201703L + static_assert(is_same::type>::value, "GridBuilder: mismatched ValueType"); +#else// invoke_result was introduced in C++17 and result_of was removed in C++20 + static_assert(is_same::type>::value, "GridBuilder: mismatched ValueType"); +#endif + const CoordBBox leafBBox(bbox[0] >> Node0::TOTAL, bbox[1] >> Node0::TOTAL); + std::mutex mutex; + forEach(leafBBox, [&](const CoordBBox& b) { + Node0* leaf = nullptr; + for (auto it = b.begin(); it; ++it) { + Coord min(*it << Node0::TOTAL), max(min + Coord(Node0::DIM - 1)); + const CoordBBox b(min.maxComponent(bbox.min()), + max.minComponent(bbox.max()));// crop + if (leaf == nullptr) { + leaf = new Node0(b[0], root.mBackground, false); + } else { + leaf->mOrigin = b[0] & ~Node0::MASK; + NANOVDB_ASSERT(leaf->mValueMask.isOff()); + } + leaf->mDstOffset = 0;// no prune + for (auto ijk = b.begin(); ijk; ++ijk) { + const auto v = func(*ijk);// call functor + if (v != root.mBackground) leaf->setValue(*ijk, v);// don't insert background values + } + if (!leaf->mValueMask.isOff()) {// has active values + if (leaf->mValueMask.isOn()) {// only active values + const auto first = leaf->getFirstValue(); + int n=1; + while (n<512) {// 8^3 = 512 + if (leaf->mValues[n++] != first) break; + } + if (n == 512) leaf->mDstOffset = 1;// prune below + } + std::lock_guard guard(mutex); + NANOVDB_ASSERT(leaf != nullptr); + root.addNode(leaf); + NANOVDB_ASSERT(leaf == nullptr); + } + }// loop over sub-part of leafBBox + if (leaf) delete leaf; + }); + + // Prune leaf and tile nodes + for (auto it2 = root.mTable.begin(); it2 != root.mTable.end(); ++it2) { + if (auto *upper = it2->second.child) {//upper level internal node + for (auto it1 = upper->mChildMask.beginOn(); it1; ++it1) { + auto *lower = upper->mTable[*it1].child;// lower level internal node + for (auto it0 = lower->mChildMask.beginOn(); it0; ++it0) { + auto *leaf = lower->mTable[*it0].child;// leaf nodes + if (leaf->mDstOffset) { + lower->mTable[*it0].value = leaf->getFirstValue(); + lower->mChildMask.setOff(*it0); + lower->mValueMask.setOn(*it0); + delete leaf; + } + }// loop over leaf nodes + if (lower->mChildMask.isOff()) {//only tiles + const auto first = lower->getFirstValue(); + int n=1; + while (n < 4096) {// 16^3 = 4096 + if (lower->mTable[n++].value != first) break; + } + if (n == 4096) {// identical tile values so prune + upper->mTable[*it1].value = first; + upper->mChildMask.setOff(*it1); + upper->mValueMask.setOn(*it1); + delete lower; + } + } + }// loop over lower internal nodes + if (upper->mChildMask.isOff()) {//only tiles + const auto first = upper->getFirstValue(); + int n=1; + while (n < 32768) {// 32^3 = 32768 + if (upper->mTable[n++].value != first) break; + } + if (n == 32768) {// identical tile values so prune + it2->second.value = first; + it2->second.state = upper->mValueMask.isOn(); + it2->second.child = nullptr; + delete upper; + } + } + }// is child node of the root + }// loop over root table +}// build::Grid::operator() + +//================================================================================================ + +template +using BuildLeaf = LeafNode; +template +using BuildLower = InternalNode>; +template +using BuildUpper = InternalNode>; +template +using BuildRoot = RootNode>; +template +using BuildTile = typename BuildRoot::Tile; + +using FloatGrid = Grid; +using Fp4Grid = Grid; +using Fp8Grid = Grid; +using Fp16Grid = Grid; +using FpNGrid = Grid; +using DoubleGrid = Grid; +using Int32Grid = Grid; +using UInt32Grid = Grid; +using Int64Grid = Grid; +using Vec3fGrid = Grid; +using Vec3dGrid = Grid; +using Vec4fGrid = Grid; +using Vec4dGrid = Grid; +using MaskGrid = Grid; +using IndexGrid = Grid; +using OnIndexGrid = Grid; +using BoolGrid = Grid; + +// ----------------------------> NodeManager <-------------------------------------- + +// GridT can be openvdb::Grid and nanovdb::build::Grid +template +class NodeManager +{ +public: + + using ValueType = typename GridT::ValueType; + using BuildType = typename GridT::BuildType; + using GridType = GridT; + using TreeType = typename GridT::TreeType; + using RootNodeType = typename TreeType::RootNodeType; + static_assert(RootNodeType::LEVEL == 3, "NodeManager expected LEVEL=3"); + using Node2 = typename RootNodeType::ChildNodeType; + using Node1 = typename Node2::ChildNodeType; + using Node0 = typename Node1::ChildNodeType; + + NodeManager(GridT &grid) : mGrid(grid) {this->init();} + void init() + { + mArray0.clear(); + mArray1.clear(); + mArray2.clear(); + auto counts = mGrid.tree().nodeCount(); + mArray0.reserve(counts[0]); + mArray1.reserve(counts[1]); + mArray2.reserve(counts[2]); + + for (auto it2 = mGrid.tree().root().cbeginChildOn(); it2; ++it2) { + Node2 &upper = const_cast(*it2); + mArray2.emplace_back(&upper); + for (auto it1 = upper.cbeginChildOn(); it1; ++it1) { + Node1 &lower = const_cast(*it1); + mArray1.emplace_back(&lower); + for (auto it0 = lower.cbeginChildOn(); it0; ++it0) { + Node0 &leaf = const_cast(*it0); + mArray0.emplace_back(&leaf); + }// loop over leaf nodes + }// loop over lower internal nodes + }// loop over root node + } + + /// @brief Return the number of tree nodes at the specified level + /// @details 0 is leaf, 1 is lower internal, and 2 is upper internal level + uint64_t nodeCount(int level) const + { + NANOVDB_ASSERT(level==0 || level==1 || level==2); + return level==0 ? mArray0.size() : level==1 ? mArray1.size() : mArray2.size(); + } + + template + typename enable_if::type node(int i) {return *mArray0[i];} + template + typename enable_if::type node(int i) const {return *mArray0[i];} + template + typename enable_if::type node(int i) {return *mArray1[i];} + template + typename enable_if::type node(int i) const {return *mArray1[i];} + template + typename enable_if::type node(int i) {return *mArray2[i];} + template + typename enable_if::type node(int i) const {return *mArray2[i];} + + /// @brief Return the i'th leaf node with respect to breadth-first ordering + const Node0& leaf(uint32_t i) const { return *mArray0[i]; } + Node0& leaf(uint32_t i) { return *mArray0[i]; } + uint64_t leafCount() const {return mArray0.size();} + + /// @brief Return the i'th lower internal node with respect to breadth-first ordering + const Node1& lower(uint32_t i) const { return *mArray1[i]; } + Node1& lower(uint32_t i) { return *mArray1[i]; } + uint64_t lowerCount() const {return mArray1.size();} + + /// @brief Return the i'th upper internal node with respect to breadth-first ordering + const Node2& upper(uint32_t i) const { return *mArray2[i]; } + Node2& upper(uint32_t i) { return *mArray2[i]; } + uint64_t upperCount() const {return mArray2.size();} + + RootNodeType& root() {return mGrid.tree().root();} + const RootNodeType& root() const {return mGrid.tree().root();} + + TreeType& tree() {return mGrid.tree();} + const TreeType& tree() const {return mGrid.tree();} + + GridType& grid() {return mGrid;} + const GridType& grid() const {return mGrid;} + +protected: + + GridT &mGrid; + std::vector mArray0; // leaf nodes + std::vector mArray1; // lower internal nodes + std::vector mArray2; // upper internal nodes + +};// NodeManager + +template +typename enable_if::value>::type +sdfToLevelSet(NodeManagerT &mgr) +{ + mgr.grid().mGridClass = GridClass::LevelSet; + // Note that the bottom-up flood filling is essential + const auto outside = mgr.root().mBackground; + forEach(0, mgr.leafCount(), 8, [&](const Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) mgr.leaf(i).signedFloodFill(outside); + }); + forEach(0, mgr.lowerCount(), 1, [&](const Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) mgr.lower(i).signedFloodFill(outside); + }); + forEach(0, mgr.upperCount(), 1, [&](const Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) mgr.upper(i).signedFloodFill(outside); + }); + mgr.root().signedFloodFill(outside); +}// sdfToLevelSet + +template +void levelSetToFog(NodeManagerT &mgr, bool rebuild = true) +{ + using ValueType = typename NodeManagerT::ValueType; + mgr.grid().mGridClass = GridClass::FogVolume; + const ValueType d = -mgr.root().mBackground, w = 1.0f / d; + std::atomic_bool prune{false}; + auto op = [&](ValueType& v) -> bool { + if (v > ValueType(0)) { + v = ValueType(0); + return false; + } + v = v > d ? v * w : ValueType(1); + return true; + }; + forEach(0, mgr.leafCount(), 8, [&](const Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + auto& leaf = mgr.leaf(i); + for (uint32_t i = 0; i < 512u; ++i) leaf.mValueMask.set(i, op(leaf.mValues[i])); + } + }); + forEach(0, mgr.lowerCount(), 1, [&](const Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + auto& node = mgr.lower(i); + for (uint32_t i = 0; i < 4096u; ++i) { + if (node.mChildMask.isOn(i)) { + auto* leaf = node.mTable[i].child; + if (leaf->mValueMask.isOff()) {// prune leaf node + node.mTable[i].value = leaf->getFirstValue(); + node.mChildMask.setOff(i); + delete leaf; + prune = true; + } + } else { + node.mValueMask.set(i, op(node.mTable[i].value)); + } + } + } + }); + forEach(0, mgr.upperCount(), 1, [&](const Range1D& r) { + for (auto i = r.begin(); i != r.end(); ++i) { + auto& node = mgr.upper(i); + for (uint32_t i = 0; i < 32768u; ++i) { + if (node.mChildMask.isOn(i)) {// prune lower internal node + auto* child = node.mTable[i].child; + if (child->mChildMask.isOff() && child->mValueMask.isOff()) { + node.mTable[i].value = child->getFirstValue(); + node.mChildMask.setOff(i); + delete child; + prune = true; + } + } else { + node.mValueMask.set(i, op(node.mTable[i].value)); + } + } + } + }); + + for (auto it = mgr.root().mTable.begin(); it != mgr.root().mTable.end(); ++it) { + auto* child = it->second.child; + if (child == nullptr) { + it->second.state = op(it->second.value); + } else if (child->mChildMask.isOff() && child->mValueMask.isOff()) { + it->second.value = child->getFirstValue(); + it->second.state = false; + it->second.child = nullptr; + delete child; + prune = true; + } + } + if (rebuild && prune) mgr.init(); +}// levelSetToFog + +// ----------------------------> Implementations of random access methods <-------------------------------------- + +template +struct TouchLeaf { + static BuildLeaf& set(BuildLeaf &leaf, uint32_t) {return leaf;} +};// TouchLeaf + +/// @brief Implements Tree::getValue(Coord), i.e. return the value associated with a specific coordinate @c ijk. +/// @tparam BuildT Build type of the grid being called +/// @details The value at a coordinate maps to the background, a tile value or a leaf value. +template +struct GetValue { + static auto get(const BuildRoot &root) {return root.mBackground;} + static auto get(const BuildTile &tile) {return tile.value;} + static auto get(const BuildUpper &node, uint32_t n) {return node.mTable[n].value;} + static auto get(const BuildLower &node, uint32_t n) {return node.mTable[n].value;} + static auto get(const BuildLeaf &leaf, uint32_t n) {return leaf.getValue(n);} +};// GetValue + +/// @brief Implements Tree::isActive(Coord) +/// @tparam T Build type of the grid being called +template +struct GetState { + static bool get(const BuildRoot&) {return false;} + static bool get(const BuildTile &tile) {return tile.state;} + static bool get(const BuildUpper &node, uint32_t n) {return node.mValueMask.isOn(n);} + static bool get(const BuildLower &node, uint32_t n) {return node.mValueMask.isOn(n);} + static bool get(const BuildLeaf &leaf, uint32_t n) {return leaf.mValueMask.isOn(n);} +};// GetState + +/// @brief Set the value and its state at the leaf level mapped to by ijk, and create the leaf node and branch if needed. +/// @tparam T BuildType of the corresponding tree +template +struct SetValue { + static BuildLeaf* set(BuildLeaf &leaf, uint32_t n) { + leaf.mValueMask.setOn(n);// always set the active bit + return &leaf; + } + static BuildLeaf* set(BuildLeaf &leaf, uint32_t n, const typename BuildLeaf::ValueType &v) { + leaf.setValue(n, v); + return &leaf; + } +};// SetValue + +/// @brief Implements Tree::probeLeaf(Coord) +/// @tparam T Build type of the grid being called +template +struct ProbeValue { + using ValueT = typename BuildLeaf::ValueType; + static bool get(const BuildRoot &root, ValueT &v) { + v = root.mBackground; + return false; + } + static bool get(const BuildTile &tile, ValueT &v) { + v = tile.value; + return tile.state; + } + static bool get(const BuildUpper &node, uint32_t n, ValueT &v) { + v = node.mTable[n].value; + return node.mValueMask.isOn(n); + } + static bool get(const BuildLower &node, uint32_t n, ValueT &v) { + v = node.mTable[n].value; + return node.mValueMask.isOn(n); + } + static bool get(const BuildLeaf &leaf, uint32_t n, ValueT &v) { + v = leaf.getValue(n); + return leaf.isActive(n); + } +};// ProbeValue + +} // namespace build } // namespace nanovdb -#endif // NANOVDB_GRIDBUILDER_H_HAS_BEEN_INCLUDED +#endif // NANOVDB_GRID_BUILDER_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/GridChecksum.h b/nanovdb/nanovdb/util/GridChecksum.h index e5672364b8..12b79a1131 100644 --- a/nanovdb/nanovdb/util/GridChecksum.h +++ b/nanovdb/nanovdb/util/GridChecksum.h @@ -22,7 +22,7 @@ #include #include -#include "../NanoVDB.h" +#include #include "GridHandle.h" #include "ForEach.h" #include "NodeManager.h" @@ -58,6 +58,13 @@ bool validateChecksum(const NanoGrid &grid, ChecksumMode mode = Checksum template void updateChecksum(NanoGrid &grid, ChecksumMode mode = ChecksumMode::Default); +/// @brief Updates the checksum of the grids encapsulated by a handle +// +/// @param handle Handle with grids whose checksum will be updated. +/// @param mode Defines the mode of computation for the checksum. +template +void updateChecksum(GridHandle &handle, ChecksumMode mode = ChecksumMode::Default); + /// @brief Return the CRC32 checksum of the raw @a data of @a size /// @param data The beginning of the raw data. /// @param size Size of the data to bytes! @@ -281,6 +288,87 @@ void updateChecksum(NanoGrid &grid, ChecksumMode mode) grid.data()->mChecksum = cs.checksum(); } +template +void updateChecksum(GridHandle &handle, ChecksumMode mode) +{ + for (uint32_t i = 0; i < handle.gridCount(); ++i) + { + const GridType& gridType = handle.gridType(i); + switch (gridType) + { + case GridType::Float: + updateChecksum(*handle.template grid(i)); + break; + case GridType::Double: + updateChecksum(*handle.template grid(i)); + break; + case GridType::Int16: + updateChecksum(*handle.template grid(i)); + break; + case GridType::Int32: + updateChecksum(*handle.template grid(i)); + break; + case GridType::Int64: + updateChecksum(*handle.template grid(i)); + break; + case GridType::Vec3f: + updateChecksum(*handle.template grid(i)); + break; + case GridType::Vec3d: + updateChecksum(*handle.template grid(i)); + break; + case GridType::UInt32: + updateChecksum(*handle.template grid(i)); + break; + case GridType::Mask: + updateChecksum(*handle.template grid(i)); + break; + case GridType::Index: + updateChecksum(*handle.template grid(i)); + break; + case GridType::OnIndex: + updateChecksum(*handle.template grid(i)); + break; + case GridType::IndexMask: + updateChecksum(*handle.template grid(i)); + break; + case GridType::OnIndexMask: + updateChecksum(*handle.template grid(i)); + break; + case GridType::Boolean: + updateChecksum(*handle.template grid(i)); + break; + case GridType::RGBA8: + updateChecksum(*handle.template grid(i)); + break; + case GridType::Fp4: + updateChecksum(*handle.template grid(i)); + break; + case GridType::Fp8: + updateChecksum(*handle.template grid(i)); + break; + case GridType::Fp16: + updateChecksum(*handle.template grid(i)); + break; + case GridType::FpN: + updateChecksum(*handle.template grid(i)); + break; + case GridType::Vec4f: + updateChecksum(*handle.template grid(i)); + break; + case GridType::Vec4d: + updateChecksum(*handle.template grid(i)); + break; + default: + { + std::stringstream ss; + ss << "Cannot update checksum for grid of unknown type \"" << toStr(handle.gridType(i)); + throw std::runtime_error(ss.str() + "\""); + } + } + } +} + } // namespace nanovdb #endif // NANOVDB_GRIDCHECKSUM_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/GridHandle.h b/nanovdb/nanovdb/util/GridHandle.h index 7414314311..511f5071ce 100644 --- a/nanovdb/nanovdb/util/GridHandle.h +++ b/nanovdb/nanovdb/util/GridHandle.h @@ -8,102 +8,82 @@ \date January 8, 2020 - \brief Defines two classes, a GridRegister the defines the value type (e.g. Double, Float etc) - of a NanoVDB grid, and a GridHandle and manages the memory of a NanoVDB grid. - - \note This file has NO dependency on OpenVDB. + \brief Defines GridHandle, which manages a host, and possibly a device, + memory buffer containing one or more NanoVDB grids. */ #ifndef NANOVDB_GRID_HANDLE_H_HAS_BEEN_INCLUDED #define NANOVDB_GRID_HANDLE_H_HAS_BEEN_INCLUDED -#include "../NanoVDB.h"// for mapToGridType +#include +#include +#include // for mapToGridType #include "HostBuffer.h" -namespace nanovdb { - -// --------------------------> GridHandleBase <------------------------------------ - -class GridHandleBase -{ -public: - virtual ~GridHandleBase() {} - - /// @brief Returns the size in bytes of the raw memory buffer managed by this GridHandle's allocator. - virtual uint64_t size() const = 0; - - virtual uint8_t* data() = 0; - virtual const uint8_t* data() const = 0; - - /// @brief Return true if this handle is empty, i.e. has no allocated memory - bool empty() const { return size() == 0; } - - /// @brief Return true if this handle contains a grid - operator bool() const { return !this->empty(); } +#if defined(__CUDACC__) +#include "cuda/CudaUtils.h"// for cudaCheckError(); +#endif - /// @brief Returns a const point to the grid meta data (see definition above). - /// - /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized - const GridMetaData* gridMetaData() const { return reinterpret_cast(data()); } - - /// @brief Returns the GridType handled by this instance, and GridType::End if empty - GridType gridType() const - { - const GridMetaData* ptr = this->gridMetaData(); - return ptr ? ptr->gridType() : GridType::End; - } - - /// @brief Return the number of grids contained in this buffer - uint32_t gridCount() const - { - auto *ptr = this->gridMetaData(); - return ptr ? ptr->gridCount() : 0; - } -};// GridHandleBase +namespace nanovdb { // --------------------------> GridHandle <------------------------------------ -/// @brief This class serves to manage a raw memory buffer of a NanoVDB Grid. +struct GridHandleMetaData {uint64_t offset, size; GridType gridType;}; + +/// @brief This class serves to manage a buffer containing one or more NanoVDB Grids. /// /// @note It is important to note that this class does NOT depend on OpenVDB. template -class GridHandle : public GridHandleBase +class GridHandle { + std::vector mMetaData; BufferT mBuffer; - template - const NanoGrid* getGrid(uint32_t n = 0) const; - - template - typename std::enable_if::hasDeviceDual, const NanoGrid*>::type - getDeviceGrid(uint32_t n = 0) const; - template static T* no_const(const T* ptr) { return const_cast(ptr); } public: using BufferType = BufferT; - /// @brief Move constructor from a buffer - GridHandle(BufferT&& buffer) { mBuffer = std::move(buffer); } - /// @brief Empty ctor + /// @brief Move constructor from a buffer + /// @param buffer buffer containing one or more NanoGrids that will be moved into this GridHandle + /// @throw Will throw and error with the buffer does not contain a valid NanoGrid! + GridHandle(BufferT&& buffer); + + /// @brief Constructs an empty GridHandle GridHandle() = default; + /// @brief Disallow copy-construction GridHandle(const GridHandle&) = delete; + + /// @brief Move copy-constructor + GridHandle(GridHandle&& other) noexcept { + mBuffer = std::move(other.mBuffer); + mMetaData = std::move(other.mMetaData); + } + + /// @brief clear this GridHandle to an empty handle + void reset() { + mBuffer.clear(); + mMetaData.clear(); + } + /// @brief Disallow copy assignment operation GridHandle& operator=(const GridHandle&) = delete; + /// @brief Move copy assignment operation - GridHandle& operator=(GridHandle&& other) noexcept - { - mBuffer = std::move(other.mBuffer); + GridHandle& operator=(GridHandle&& other) noexcept { + mBuffer = std::move(other.mBuffer); + mMetaData = std::move(other.mMetaData); return *this; } - /// @brief Move copy-constructor - GridHandle(GridHandle&& other) noexcept { mBuffer = std::move(other.mBuffer); } - /// @brief Default destructor - ~GridHandle() override { reset(); } - /// @brief clear the buffer - void reset() { mBuffer.clear(); } + + /// @brief Performs a deep copy of the GridHandle, possibly templated on a different buffer type + /// @tparam OtherBufferT Buffer type of the deep copy + /// @param buffer optional buffer used for allocation + /// @return A new handle of the specified buffer type that contains a deep copy of the current handle + template + GridHandle copy(const OtherBufferT& buffer = OtherBufferT()) const; /// @brief Return a reference to the buffer BufferT& buffer() { return mBuffer; } @@ -112,101 +92,300 @@ class GridHandle : public GridHandleBase const BufferT& buffer() const { return mBuffer; } /// @brief Returns a non-const pointer to the data. - /// /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized - uint8_t* data() override { return mBuffer.data(); } + uint8_t* data() { return mBuffer.data(); } /// @brief Returns a const pointer to the data. - /// /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized - const uint8_t* data() const override { return mBuffer.data(); } + const uint8_t* data() const { return mBuffer.data(); } - /// @brief Returns the size in bytes of the raw memory buffer managed by this GridHandle's allocator. - uint64_t size() const override { return mBuffer.size(); } + template + typename enable_if::hasDeviceDual, const uint8_t*>::type + deviceData() const { return mBuffer.deviceData(); } + template + typename enable_if::hasDeviceDual, uint8_t*>::type + deviceData() { return mBuffer.deviceData(); } + + /// @brief Returns the size in bytes of the raw memory buffer managed by this GridHandle. + uint64_t size() const { return mBuffer.size(); } - /// @brief Returns a const pointer to the @a n'th NanoVDB grid encoded in this GridHandle. - /// - /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized, @a n is invalid + /// @brief Return true if this handle is empty, i.e. has no allocated memory + bool empty() const { return this->size() == 0; } + + /// @brief Return true if this handle contains any grids + operator bool() const { return !this->empty(); } + + /// @brief Returns a const host pointer to the @a n'th NanoVDB grid encoded in this GridHandle. + /// @tparam ValueT Value type of the grid point to be returned + /// @param n Index of the (host) grid pointer to be returned + /// @warning Note that the return pointer can be NULL if the GridHandle no host grid, @a n is invalid /// or if the template parameter does not match the specified grid! template - const NanoGrid* grid(uint32_t n = 0) const { return this->template getGrid(n); } + const NanoGrid* grid(uint32_t n = 0) const; - /// @brief Returns a pointer to the @a n'th NanoVDB grid encoded in this GridHandle. - /// - /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized, @a n is invalid + /// @brief Returns a host pointer to the @a n'th NanoVDB grid encoded in this GridHandle. + /// @tparam ValueT Value type of the grid point to be returned + /// @param n Index of the (host) grid pointer to be returned + /// @warning Note that the return pointer can be NULL if the GridHandle no host grid, @a n is invalid /// or if the template parameter does not match the specified grid! template - NanoGrid* grid(uint32_t n = 0) { return no_const(this->template getGrid(n)); } + NanoGrid* grid(uint32_t n = 0) {return const_cast*>(static_cast(this)->template grid(n));} /// @brief Return a const pointer to the @a n'th grid encoded in this GridHandle on the device, e.g. GPU - /// - /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized, @a n is invalid - /// or if the template parameter does not match the specified grid! + /// @tparam ValueT Value type of the grid point to be returned + /// @param n Index of the (device) grid pointer to be returned + /// @warning Note that the return pointer can be NULL if the GridHandle has no device grid, @a n is invalid, + /// or if the template parameter does not match the specified grid. template - typename std::enable_if::hasDeviceDual, const NanoGrid*>::type - deviceGrid(uint32_t n = 0) const { return this->template getDeviceGrid(n); } + typename enable_if::hasDeviceDual, const NanoGrid*>::type + deviceGrid(uint32_t n=0) const; /// @brief Return a const pointer to the @a n'th grid encoded in this GridHandle on the device, e.g. GPU - /// - /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized, @a n is invalid - /// or if the template parameter does not match the specified grid! + /// @tparam ValueT Value type of the grid point to be returned + /// @param n Index if of the grid pointer to be returned + /// @param verbose if non-zero error messages will be printed in case something failed + /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized, @a n is invalid, + /// or if the template parameter does not match the specified grid. template - typename std::enable_if::hasDeviceDual, NanoGrid*>::type - deviceGrid(uint32_t n = 0) { return no_const(this->template getDeviceGrid(n)); } + typename enable_if::hasDeviceDual, NanoGrid*>::type + deviceGrid(uint32_t n=0){return const_cast*>(static_cast(this)->template deviceGrid(n));} /// @brief Upload the grid to the device, e.g. from CPU to GPU - /// /// @note This method is only available if the buffer supports devices template - typename std::enable_if::hasDeviceDual, void>::type + typename enable_if::hasDeviceDual, void>::type deviceUpload(void* stream = nullptr, bool sync = true) { mBuffer.deviceUpload(stream, sync); } /// @brief Download the grid to from the device, e.g. from GPU to CPU - /// /// @note This method is only available if the buffer supports devices template - typename std::enable_if::hasDeviceDual, void>::type + typename enable_if::hasDeviceDual, void>::type deviceDownload(void* stream = nullptr, bool sync = true) { mBuffer.deviceDownload(stream, sync); } + + /// @brief Check if the buffer is this handle has any padding, i.e. if the buffer is larger than the combined size of all its grids + /// @return true is the combined size of all grid is smaller than the buffer size + bool isPadded() const {return mMetaData.empty() ? false : mMetaData.back().offset + mMetaData.back().size != mBuffer.size();} + + /// @brief Return the total number of grids contained in this buffer + uint32_t gridCount() const {return mMetaData.size();} + + /// @brief Return the grid size of the @a n'th grid in this GridHandle + /// @param n index of the grid (assumed to be less than gridCount()) + /// @return Return the byte size of the specified grid + uint64_t gridSize(uint32_t n = 0) const {return mMetaData[n].size; } + + /// @brief Return the GridType of the @a n'th grid in this GridHandle + /// @param n index of the grid (assumed to be less than gridCount()) + /// @return Return the GridType of the specified grid + GridType gridType(uint32_t n = 0) const {return mMetaData[n].gridType; } + + /// @brief Access to the GridData of the n'th grid in the current handle + /// @param n zero-based ID of the grid + /// @return Const pointer to the n'th GridData in the current handle + const GridData* gridData(uint32_t n = 0) const; + + /// @brief Returns a const point to the @a n'th grid meta data + /// @param n zero-based ID of the grid + /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized + const GridMetaData* gridMetaData(uint32_t n = 0) const; }; // GridHandle // --------------------------> Implementation of private methods in GridHandle <------------------------------------ template -template -inline const NanoGrid* GridHandle::getGrid(uint32_t index) const +inline const GridData* GridHandle::gridData(uint32_t n) const { - using GridT = const NanoGrid; - auto *data = mBuffer.data(); - GridT *grid = reinterpret_cast(data); - if (grid == nullptr || index >= grid->gridCount()) {// un-initialized or index is out of range - return nullptr; + const uint8_t *data = this->data(); + if (data == nullptr || n >= mMetaData.size()) return nullptr; + return reinterpret_cast(data + mMetaData[n].offset); +} + +template +inline const GridMetaData* GridHandle::gridMetaData(uint32_t n) const +{ + const uint8_t *data = this->data(); + if (data == nullptr || n >= mMetaData.size()) return nullptr; + return reinterpret_cast(data + mMetaData[n].offset); +} + +namespace {// anonymous namespace +inline __hostdev__ void cpyMetaData(const GridData *data, GridHandleMetaData *meta) +{ + uint64_t offset = 0; + for (auto *p=meta, *q=p+data->mGridCount; p!=q; ++p) { + *p = {offset, data->mGridSize, data->mGridType}; + offset += p->size; + data = PtrAdd(data, p->size); } - while(index != grid->gridIndex()) { - data += grid->gridSize(); - grid = reinterpret_cast(data); +} +#if defined(__CUDACC__) +__global__ void cudaCpyMetaData(const GridData *data, GridHandleMetaData *meta){cpyMetaData(data, meta);} +#endif +}// anonymous namespace + +template +GridHandle::GridHandle(BufferT&& buffer) +{ + mBuffer = std::move(buffer); + if (auto *data = reinterpret_cast(mBuffer.data())) { + if (!data->isValid()) throw std::runtime_error("GridHandle was constructed with an invalid host buffer"); + mMetaData.resize(data->mGridCount); + cpyMetaData(data, mMetaData.data()); +#if defined(__CUDACC__) + } else if constexpr(BufferTraits::hasDeviceDual) { + if (auto *d_data = reinterpret_cast(mBuffer.deviceData())) { + GridData tmp; + cudaCheck(cudaMemcpy(&tmp, d_data, sizeof(GridData), cudaMemcpyDeviceToHost)); + if (!tmp.isValid()) throw std::runtime_error("GridHandle was constructed with an invalid device buffer"); + GridHandleMetaData *d_metaData; + cudaMalloc((void**)&d_metaData, tmp.mGridCount*sizeof(GridHandleMetaData)); + cudaCpyMetaData<<<1,1>>>(d_data, d_metaData); + mMetaData.resize(tmp.mGridCount); + cudaCheck(cudaMemcpy(mMetaData.data(), d_metaData,tmp.mGridCount*sizeof(GridHandleMetaData), cudaMemcpyDeviceToHost)); + cudaCheck(cudaFree(d_metaData)); + } +#endif } - return grid->gridType() == mapToGridType() ? grid : nullptr; +} + +template +template +inline GridHandle GridHandle::copy(const OtherBufferT& other) const +{ + if (mBuffer.isEmpty()) return GridHandle();// return an empty handle + auto buffer = OtherBufferT::create(mBuffer.size(), &other); + std::memcpy(buffer.data(), mBuffer.data(), mBuffer.size());// deep copy of buffer + return GridHandle(std::move(buffer)); +} + +template +template +inline const NanoGrid* GridHandle::grid(uint32_t n) const +{ + const uint8_t *data = mBuffer.data(); + if (data == nullptr || n >= mMetaData.size() || mMetaData[n].gridType != mapToGridType()) return nullptr; + return reinterpret_cast*>(data + mMetaData[n].offset); } template template -inline typename std::enable_if::hasDeviceDual, const NanoGrid*>::type -GridHandle::getDeviceGrid(uint32_t index) const +inline typename enable_if::hasDeviceDual, const NanoGrid*>::type +GridHandle::deviceGrid(uint32_t n) const +{ + const uint8_t *data = mBuffer.deviceData(); + if (data == nullptr || n >= mMetaData.size() || mMetaData[n].gridType != mapToGridType()) return nullptr; + return reinterpret_cast*>(data + mMetaData[n].offset); +} + +// --------------------------> free-standing functions <------------------------------------ + +namespace {// anonymous namespace +inline __hostdev__ void updateGridData(GridData *data, uint32_t gridIndex, uint32_t gridCount) +{ + data->mGridIndex = gridIndex; + data->mGridCount = gridCount; +} +#if defined(__CUDACC__) +__global__ void cudaUpdateGridData(GridData *data, uint32_t gridIndex, uint32_t gridCount){updateGridData(data, gridIndex, gridCount);} +#endif +}// anonymous namespace + +template class VectorT = std::vector> +inline VectorT> +splitGrids(const GridHandle &handle, const BufferT* other = nullptr) +{ + using HandleT = GridHandle; + const uint8_t *ptr = handle.data(); + if (ptr == nullptr) return VectorT(); + VectorT handles(handle.gridCount()); + for (auto &h : handles) { + const GridData *src = reinterpret_cast(ptr); + NANOVDB_ASSERT(src->isValid()); + auto buffer = BufferT::create(src->mGridSize, other); + GridData *dst = reinterpret_cast(buffer.data()); + std::memcpy(dst, src, src->mGridSize); + updateGridData(dst, 0u, 1u); + h = HandleT(std::move(buffer)); + ptr += src->mGridSize; + } + return std::move(handles); +}// splitGrids + +template class VectorT> +inline GridHandle +mergeGrids(const VectorT> &handles, const BufferT* other = nullptr) { - using GridT = const NanoGrid; - auto *data = mBuffer.data(); - GridT *grid = reinterpret_cast(data); - if (grid == nullptr || index >= grid->gridCount()) {// un-initialized or index is out of range - return nullptr; + uint64_t size = 0u; + uint32_t counter = 0u, gridCount = 0u; + for (auto &h : handles) { + gridCount += h.gridCount(); + for (uint32_t n=0; ngridIndex()) { - data += grid->gridSize(); - dev += grid->gridSize(); - grid = reinterpret_cast(data); + auto buffer = BufferT::create(size, other); + uint8_t *dst = buffer.data(); + for (auto &h : handles) { + const uint8_t *src = h.data(); + for (uint32_t n=0; n(dst); + NANOVDB_ASSERT(data->isValid()); + updateGridData(data, counter++, gridCount); + dst += data->mGridSize; + src += data->mGridSize; + } } - return grid->gridType() == mapToGridType() ? reinterpret_cast(dev) : nullptr; -} + return GridHandle(std::move(buffer)); +}// mergeGrids + +#if defined(__CUDACC__) + +template class VectorT = std::vector> +inline typename enable_if::hasDeviceDual, VectorT>>::type +splitDeviceGrids(const GridHandle &handle, const BufferT* other = nullptr) +{ + const uint8_t *ptr = handle.deviceData(); + if (ptr == nullptr) return VectorT>(); + VectorT> handles(handle.gridCount()); + for (uint32_t n=0; n(buffer.deviceData()); + const GridData *src = reinterpret_cast(ptr); + cudaCheck(cudaMemcpy(dst, src, handle.gridSize(n), cudaMemcpyDeviceToDevice)); + cudaUpdateGridData<<<1,1>>>(dst, 0u, 1u); + cudaCheckError(); + handles[n] = GridHandle(std::move(buffer)); + ptr += handle.gridSize(n); + } + return std::move(handles); +}// splitDeviceGrids + +template class VectorT> +inline typename enable_if::hasDeviceDual, GridHandle>::type +mergeDeviceGrids(const VectorT> &handles, const BufferT* other = nullptr) +{ + uint64_t size = 0u; + uint32_t counter = 0u, gridCount = 0u; + for (auto &h : handles) { + gridCount += h.gridCount(); + for (uint32_t n=0; n(dst); + cudaUpdateGridData<<<1,1>>>(data, counter++, gridCount); + cudaCheckError(); + dst += h.gridSize(n); + src += h.gridSize(n); + } + } + return GridHandle(std::move(buffer)); +}// mergeDeviceGrids + +#endif// defined(__CUDACC__) } // namespace nanovdb diff --git a/nanovdb/nanovdb/util/GridStats.h b/nanovdb/nanovdb/util/GridStats.h index a98e1a5ed1..df13104470 100644 --- a/nanovdb/nanovdb/util/GridStats.h +++ b/nanovdb/nanovdb/util/GridStats.h @@ -15,7 +15,7 @@ #ifndef NANOVDB_GRIDSTATS_H_HAS_BEEN_INCLUDED #define NANOVDB_GRIDSTATS_H_HAS_BEEN_INCLUDED -#include "../NanoVDB.h" +#include #include "Range.h" #include "ForEach.h" @@ -533,12 +533,12 @@ void GridStats::process( GridT &grid ) auto& data = *grid.data(); const auto& indexBBox = grid.tree().root().bbox(); if (indexBBox.empty()) { - data.mWorldBBox = BBox(); + data.mWorldBBox = BBox(); data.setBBoxOn(false); } else { // Note that below max is offset by one since CoordBBox.max is inclusive - // while bbox.max is exclusive. However, min is inclusive in both - // CoordBBox and BBox. This also guarantees that a grid with a single + // while bbox.max is exclusive. However, min is inclusive in both + // CoordBBox and BBox. This also guarantees that a grid with a single // active voxel, does not have an empty world bbox! E.g. if a grid with a // unit index-to-world transformation only contains the active voxel (0,0,0) // then indeBBox = (0,0,0) -> (0,0,0) and then worldBBox = (0.0, 0.0, 0.0) @@ -547,16 +547,16 @@ void GridStats::process( GridT &grid ) const Coord min = indexBBox[0]; const Coord max = indexBBox[1] + Coord(1); - auto& worldBBox = data.mWorldBBox; + auto& wBBox = data.mWorldBBox; const auto& map = grid.map(); - worldBBox[0] = worldBBox[1] = map.applyMap(Vec3d(min[0], min[1], min[2])); - worldBBox.expand(map.applyMap(Vec3d(min[0], min[1], max[2]))); - worldBBox.expand(map.applyMap(Vec3d(min[0], max[1], min[2]))); - worldBBox.expand(map.applyMap(Vec3d(max[0], min[1], min[2]))); - worldBBox.expand(map.applyMap(Vec3d(max[0], max[1], min[2]))); - worldBBox.expand(map.applyMap(Vec3d(max[0], min[1], max[2]))); - worldBBox.expand(map.applyMap(Vec3d(min[0], max[1], max[2]))); - worldBBox.expand(map.applyMap(Vec3d(max[0], max[1], max[2]))); + wBBox[0] = wBBox[1] = map.applyMap(Vec3d(min[0], min[1], min[2])); + wBBox.expand(map.applyMap(Vec3d(min[0], min[1], max[2]))); + wBBox.expand(map.applyMap(Vec3d(min[0], max[1], min[2]))); + wBBox.expand(map.applyMap(Vec3d(max[0], min[1], min[2]))); + wBBox.expand(map.applyMap(Vec3d(max[0], max[1], min[2]))); + wBBox.expand(map.applyMap(Vec3d(max[0], min[1], max[2]))); + wBBox.expand(map.applyMap(Vec3d(min[0], max[1], max[2]))); + wBBox.expand(map.applyMap(Vec3d(max[0], max[1], max[2]))); data.setBBoxOn(true); } @@ -604,7 +604,7 @@ void GridStats::process(RootT &root) } this->setStats(&data, total.stats); if (total.bbox.empty()) { - std::cerr << "\nWarning: input tree only contained inactive root tiles!" + std::cerr << "\nWarning in GridStats: input tree only contained inactive root tiles!" << "\nWhile not strictly an error it's rather suspicious!\n"; } //data.mActiveVoxelCount = total.activeCount; diff --git a/nanovdb/nanovdb/util/GridValidator.h b/nanovdb/nanovdb/util/GridValidator.h index 961074268f..85234b03ad 100644 --- a/nanovdb/nanovdb/util/GridValidator.h +++ b/nanovdb/nanovdb/util/GridValidator.h @@ -14,7 +14,7 @@ #ifndef NANOVDB_GRIDVALIDATOR_H_HAS_BEEN_INCLUDED #define NANOVDB_GRIDVALIDATOR_H_HAS_BEEN_INCLUDED -#include "../NanoVDB.h" +#include #include "GridChecksum.h" namespace nanovdb { @@ -146,7 +146,7 @@ void GridValidator::checkNodes(const GridT &grid, std::string &errorStr) return errorStr.empty(); }; - for (auto it2 = grid.tree().root().beginChild(); it2; ++it2) { + for (auto it2 = grid.tree().root().cbeginChild(); it2; ++it2) { auto &node2 = *it2; if (!check(&node2, sizeof(node2))) return; for (auto it1 = node2.beginChild(); it1; ++it1) { diff --git a/nanovdb/nanovdb/util/HostBuffer.h b/nanovdb/nanovdb/util/HostBuffer.h index 7994d1a766..e0520d6983 100644 --- a/nanovdb/nanovdb/util/HostBuffer.h +++ b/nanovdb/nanovdb/util/HostBuffer.h @@ -77,16 +77,16 @@ #ifndef NANOVDB_HOSTBUFFER_H_HAS_BEEN_INCLUDED #define NANOVDB_HOSTBUFFER_H_HAS_BEEN_INCLUDED -#include "../NanoVDB.h"// for NANOVDB_DATA_ALIGNMENT; -#include // for types like int32_t etc -#include // for fprintf -#include // for std::malloc/std::realloc/std::free -#include // for std::make_shared -#include // for std::mutex -#include //for std::unordered_set -#include // for assert -#include // for std::stringstream -#include // for memcpy +#include // for NANOVDB_DATA_ALIGNMENT; +#include // for types like int32_t etc +#include // for fprintf +#include // for std::malloc/std::realloc/std::free +#include // for std::make_shared +#include // for std::mutex +#include // for std::unordered_set +#include // for assert +#include // for std::stringstream +#include // for memcpy #define checkPtr(ptr, msg) \ { \ @@ -98,7 +98,7 @@ namespace nanovdb { template struct BufferTraits { - static const bool hasDeviceDual = false; + static constexpr bool hasDeviceDual = false; }; // ----------------------------> HostBuffer <-------------------------------------- diff --git a/nanovdb/nanovdb/util/IO.h b/nanovdb/nanovdb/util/IO.h index 32246623aa..f89183d84b 100644 --- a/nanovdb/nanovdb/util/IO.h +++ b/nanovdb/nanovdb/util/IO.h @@ -18,8 +18,9 @@ #ifndef NANOVDB_IO_H_HAS_BEEN_INCLUDED #define NANOVDB_IO_H_HAS_BEEN_INCLUDED -#include "../NanoVDB.h" +#include #include "GridHandle.h" +#include "GridChecksum.h" #include // for std::ifstream #include // for std::cerr/cout @@ -76,11 +77,13 @@ namespace Internal { static constexpr fileSize_t MAX_SIZE = 1UL << 30; // size is 1 GB template -static fileSize_t write(std::ostream& os, const GridHandle& handle, Codec codec); +static fileSize_t write(std::ostream& os, const GridHandle& handle, Codec codec, uint32_t n); template -static void read(std::istream& is, GridHandle& handle, Codec codec); -}; // namespace Internal +static void read(std::istream& is, BufferT& buffer, Codec codec); + +static void read(std::istream& is, char* data, fileSize_t size, Codec codec); +} // namespace Internal /// @brief Standard hash function to use on strings; std::hash may vary by /// platform/implementation and is know to produce frequent collisions. @@ -96,8 +99,8 @@ inline uint64_t stringHash(const std::string& str) inline uint64_t reverseEndianness(uint64_t val) { return (((val) >> 56) & 0x00000000000000FF) | (((val) >> 40) & 0x000000000000FF00) | - (((val) >> 24) & 0x0000000000FF0000) | (((val) >> 8) & 0x00000000FF000000) | - (((val) << 8) & 0x000000FF00000000) | (((val) << 24) & 0x0000FF0000000000) | + (((val) >> 24) & 0x0000000000FF0000) | (((val) >> 8) & 0x00000000FF000000) | + (((val) << 8) & 0x000000FF00000000) | (((val) << 24) & 0x0000FF0000000000) | (((val) << 40) & 0x00FF000000000000) | (((val) << 56) & 0xFF00000000000000); } @@ -148,7 +151,7 @@ struct MetaData GridClass gridClass; // 4B. BBox worldBBox; // 2 * 3 * 8 = 48B. CoordBBox indexBBox; // 2 * 3 * 4 = 24B. - Vec3R voxelSize; // 24B. + Vec3d voxelSize; // 24B. uint32_t nameSize; // 4B. uint32_t nodeCount[4]; //4 x 4 = 16B uint32_t tileCount[3];// 3 x 4 = 12B @@ -252,10 +255,10 @@ std::vector readGridMetaData(std::istream& is); // --------------------------> Implementations for Internal <------------------------------------ template -fileSize_t Internal::write(std::ostream& os, const GridHandle& handle, Codec codec) +fileSize_t Internal::write(std::ostream& os, const GridHandle& handle, Codec codec, unsigned int n) { - const char* data = reinterpret_cast(handle.data()); - fileSize_t total = 0, residual = handle.size(); + const char* data = reinterpret_cast(handle.gridData(n)); + fileSize_t total = 0, residual = handle.gridSize(n); switch (codec) { case Codec::ZIP: { @@ -300,18 +303,18 @@ fileSize_t Internal::write(std::ostream& os, const GridHandle& handle, os.write(data, residual); total += residual; } - if (!os) { - throw std::runtime_error("Failed to write Tree to file"); - } + if (!os) throw std::runtime_error("Failed to write Tree to file"); return total; } // Internal::write template -void Internal::read(std::istream& is, GridHandle& handle, Codec codec) +void Internal::read(std::istream& is, BufferT& buffer, Codec codec) { - char* data = reinterpret_cast(handle.buffer().data()); - fileSize_t residual = handle.buffer().size(); + Internal::read(is, reinterpret_cast(buffer.data()), buffer.size(), codec); +} // Internal::read +void Internal::read(std::istream& is, char* data, fileSize_t residual, Codec codec) +{ // read tree using optional compression switch (codec) { case Codec::ZIP: { @@ -355,9 +358,7 @@ void Internal::read(std::istream& is, GridHandle& handle, Codec codec) default: is.read(data, residual); } - if (!is) { - throw std::runtime_error("Failed to read Tree from file"); - } + if (!is) throw std::runtime_error("Failed to read Tree from file"); } // Internal::read // --------------------------> Implementations for GridMetaData <------------------------------------ @@ -383,24 +384,17 @@ inline GridMetaData::GridMetaData(uint64_t size, Codec c, const NanoGrid { nameKey = stringHash(gridName); nameSize = static_cast(gridName.size() + 1); // include '\0' - const uint32_t* ptr = reinterpret_cast*>(&grid.tree())->mNodeCount; - for (int i = 0; i < 3; ++i) { - MetaData::nodeCount[i] = *ptr++; - } - //MetaData::nodeCount[3] = 1;// one root node - for (int i = 0; i < 3; ++i) { - MetaData::tileCount[i] = *ptr++; - } -} + const uint32_t* ptr = reinterpret_cast(&grid.tree())->mNodeCount; + for (int i = 0; i < 3; ++i) MetaData::nodeCount[i] = *ptr++; + for (int i = 0; i < 3; ++i) MetaData::tileCount[i] = *ptr++; +}// GridMetaData::GridMetaData inline void GridMetaData::write(std::ostream& os) const { os.write(reinterpret_cast(this), sizeof(MetaData)); os.write(gridName.c_str(), nameSize); - if (!os) { - throw std::runtime_error("Failed writing GridMetaData"); - } -} + if (!os) throw std::runtime_error("Failed writing GridMetaData"); +}// GridMetaData::write inline void GridMetaData::read(std::istream& is) { @@ -408,66 +402,68 @@ inline void GridMetaData::read(std::istream& is) std::unique_ptr tmp(new char[nameSize]); is.read(reinterpret_cast(tmp.get()), nameSize); gridName.assign(tmp.get()); - if (!is) { - throw std::runtime_error("Failed reading GridMetaData"); - } -} + if (!is) throw std::runtime_error("Failed reading GridMetaData"); +}// GridMetaData::read // --------------------------> Implementations for Segment <------------------------------------ inline uint64_t Segment::memUsage() const { uint64_t sum = sizeof(Header); - for (auto& m : meta) { - sum += m.memUsage(); - } + for (auto& m : meta) sum += m.memUsage(); return sum; -} +}// Segment::memUsage template inline void Segment::add(const GridHandle& h) { - if (auto* grid = h.template grid()) { // most common - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else if (auto* grid = h.template grid()) { - meta.emplace_back(h.size(), header.codec, *grid); - } else { - throw std::runtime_error("nanovdb::io::Segment::add Cannot write grid of unknown type to file"); - } - header.gridCount += 1; -} + for (uint32_t i = 0; i < h.gridCount(); ++i) { + if (auto* grid = h.template grid(i)) { // most common + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else { + std::stringstream ss; + ss << "nanovdb::io::Segment::add: Cannot write grid of unknown type \""<(&header), sizeof(Header))) { throw std::runtime_error("Failed to write Header of Segment"); } - for (auto& m : meta) { - m.write(os); - } -} + for (auto& m : meta) m.write(os); +}// Segment::write inline bool Segment::read(std::istream& is) { @@ -509,7 +503,7 @@ inline bool Segment::read(std::istream& is) m.version = header.version; } return true; -} +}// Segment::read // --------------------------> Implementations for read/write <------------------------------------ @@ -524,22 +518,24 @@ void writeGrid(const std::string& fileName, const GridHandle& handle, C if (verbose) { std::cout << "Wrote nanovdb::Grid to file named \"" << fileName << "\"" << std::endl; } -} +}// writeGrid template void writeGrid(std::ostream& os, const GridHandle& handle, Codec codec) { - Segment s(codec); - s.add(handle); - const uint64_t headerSize = s.memUsage(); + Segment seg(codec); + seg.add(handle); + const uint64_t headerSize = seg.memUsage(); std::streamoff seek = headerSize; - os.seekp(seek, std::ios_base::cur); // skip forward from the current position - s.meta[0].fileSize = Internal::write(os, handle, codec); - seek += s.meta[0].fileSize; + seg.write(os); // write header without the correct fileSize + for (uint32_t i = 0; i < handle.gridCount(); ++i) { + seg.meta[i].fileSize = Internal::write(os, handle, codec, i); + seek += seg.meta[i].fileSize; + } os.seekp(-seek, std::ios_base::cur); // rewind to start of stream - s.write(os); // write header + seg.write(os); // rewrite header with the correct fileSize os.seekp(seek - headerSize, std::ios_base::cur); // skip to end -} +}// writeGrid template class VecT> void writeGrids(const std::string& fileName, const VecT>& handles, Codec codec, int verbose) @@ -552,26 +548,13 @@ void writeGrids(const std::string& fileName, const VecT>& ha if (verbose) { std::cout << "Wrote " << handles.size() << " nanovdb::Grid(s) to file named \"" << fileName << "\"" << std::endl; } -} +}// writeGrids template class VecT> void writeGrids(std::ostream& os, const VecT>& handles, Codec codec) { - Segment s(codec); - for (auto& h : handles) { - s.add(h); - } - const uint64_t headerSize = s.memUsage(); - std::streamoff seek = headerSize; - os.seekp(seek, std::ios_base::cur); // skip forward from the current position - for (size_t i = 0; i < handles.size(); ++i) { - s.meta[i].fileSize = Internal::write(os, handles[i], codec); - seek += s.meta[i].fileSize; - } - os.seekp(-seek, std::ios_base::cur); // rewind to start of stream - s.write(os); // write header - os.seekp(seek - headerSize, std::ios_base::cur); // skip to end -} + for (auto& h : handles) writeGrid(os, h, codec); +}// writeGrids /// @brief Read the n'th grid template @@ -586,21 +569,21 @@ GridHandle readGrid(const std::string& fileName, uint64_t n, int verbos std::cout << "Read NanoGrid # " << n << " from the file named \"" << fileName << "\"" << std::endl; } return handle; // is converted to r-value and return value is move constructed. -} +}// readGrid template -GridHandle readGrid(std::istream& is, uint64_t n, const BufferT& buffer) +GridHandle readGrid(std::istream& is, uint64_t n, const BufferT& pool) { - Segment s; + Segment seg; uint64_t counter = 0; - while (s.read(is)) { + while (seg.read(is)) { std::streamoff seek = 0; - for (auto& m : s.meta) { + for (auto& m : seg.meta) { if (counter == n) { - GridHandle handle(BufferT::create(m.gridSize, &buffer)); + auto buffer = BufferT::create(m.gridSize, &pool); is.seekg(seek, std::ios_base::cur); // skip forward from the current position - Internal::read(is, handle, s.header.codec); - return handle; // is converted to r-value and return value is move constructed. + Internal::read(is, buffer, seg.header.codec); + return GridHandle(std::move(buffer)); } else { seek += m.fileSize; } @@ -609,7 +592,7 @@ GridHandle readGrid(std::istream& is, uint64_t n, const BufferT& buffer is.seekg(seek, std::ios_base::cur); // skip forward from the current position } throw std::runtime_error("Grid index exceeds grid count in file"); -} +}// readGrid /// @brief Read the first grid with a specific name template @@ -628,21 +611,21 @@ GridHandle readGrid(const std::string& fileName, const std::string& gri } } return handle; // is converted to r-value and return value is move constructed. -} +}// readGrid template -GridHandle readGrid(std::istream& is, const std::string& gridName, const BufferT& buffer) +GridHandle readGrid(std::istream& is, const std::string& gridName, const BufferT& pool) { const auto key = stringHash(gridName); - Segment s; - while (s.read(is)) { + Segment seg; + while (seg.read(is)) { std::streamoff seek = 0; - for (auto& m : s.meta) { + for (auto& m : seg.meta) { if (m.nameKey == key && m.gridName == gridName) { // check for hash key collision - GridHandle handle(BufferT::create(m.gridSize, &buffer)); + auto buffer = BufferT::create(m.gridSize, &pool); is.seekg(seek, std::ios_base::cur); // rewind - Internal::read(is, handle, s.header.codec); - return handle; // is converted to r-value and return value is move constructed. + Internal::read(is, buffer, seg.header.codec); + return GridHandle(std::move(buffer)); } else { seek += m.fileSize; } @@ -650,7 +633,7 @@ GridHandle readGrid(std::istream& is, const std::string& gridName, cons is.seekg(seek, std::ios_base::cur); // skip forward from the current position } return GridHandle(); // empty handle -} +}// readGrid /// @brief Read all the grids template class VecT> @@ -665,22 +648,40 @@ VecT> readGrids(const std::string& fileName, int verbose, co std::cout << "Read " << handles.size() << " NanoGrid(s) from the file named \"" << fileName << "\"" << std::endl; } return handles; // is converted to r-value and return value is move constructed. -} +}// readGrids template class VecT> -VecT> readGrids(std::istream& is, const BufferT& buffer) +VecT> readGrids(std::istream& is, const BufferT& pool) { VecT> handles; - Segment seg; + Segment seg; while (seg.read(is)) { - for (auto& m : seg.meta) { - GridHandle handle(BufferT::create(m.gridSize, &buffer)); - Internal::read(is, handle, seg.header.codec); - handles.push_back(std::move(handle)); // force move copy assignment + uint64_t bufferSize = 0; + for (auto& m : seg.meta) bufferSize += m.gridSize; + auto buffer = BufferT::create(bufferSize, &pool); + uint64_t bufferOffset = 0; + for (uint16_t i = 0; i < seg.header.gridCount; ++i) { + Internal::read(is, reinterpret_cast(buffer.data()) + bufferOffset, seg.meta[i].gridSize, seg.header.codec); + + // The following three lines provide backwards compatibility with older files + // that were written using writeGrids. + auto *data = reinterpret_cast(buffer.data() + bufferOffset); + data->mGridIndex = static_cast(i); + data->mGridCount = static_cast(seg.header.gridCount); + + bufferOffset += seg.meta[i].gridSize; } + handles.emplace_back(std::move(buffer)); // force move copy assignment } + + // The following two lines provide backwards compatibility with older files + // that were written using writeGrids. Since we (force) updated the mGridIndex + // and mGridCount above, we need to recompute the checksum as well. + for (auto& handle : handles) + updateChecksum(handle); + return handles; // is converted to r-value and return value is move constructed. -} +}// readGrids inline std::vector readGridMetaData(const std::string& fileName) { @@ -689,12 +690,12 @@ inline std::vector readGridMetaData(const std::string& fileName) throw std::runtime_error("Unable to open file named \"" + fileName + "\" for input"); } return readGridMetaData(is); // is converted to r-value and return value is move constructed. -} +}// readGridMetaData inline std::vector readGridMetaData(std::istream& is) { std::vector meta; - Segment seg; + Segment seg; while (seg.read(is)) { std::streamoff seek = 0; for (auto& m : seg.meta) { @@ -704,7 +705,7 @@ inline std::vector readGridMetaData(std::istream& is) is.seekg(seek, std::ios_base::cur); } return meta; // is converted to r-value and return value is move constructed. -} +}// readGridMetaData inline bool hasGrid(const std::string& fileName, const std::string& gridName) { @@ -713,40 +714,61 @@ inline bool hasGrid(const std::string& fileName, const std::string& gridName) throw std::runtime_error("Unable to open file named \"" + fileName + "\" for input"); } return hasGrid(is, gridName); -} +}// hasGrid inline bool hasGrid(std::istream& is, const std::string& gridName) { const auto key = stringHash(gridName); - Segment s; - while (s.read(is)) { + Segment seg; + while (seg.read(is)) { std::streamoff seek = 0; - for (auto& m : s.meta) { - if (m.nameKey == key && m.gridName == gridName) { - return true; // check for hash key collision - } + for (auto& m : seg.meta) { + if (m.nameKey == key && m.gridName == gridName) return true; // check for hash key collision seek += m.fileSize; } is.seekg(seek, std::ios_base::cur); } return false; -} +}// hasGrid inline uint64_t stringHash(const char* cstr) { uint64_t hash = 0; - if (!cstr) { - return hash; - } - for (auto* str = reinterpret_cast(cstr); *str; ++str) { - uint64_t overflow = hash >> (64 - 8); - hash *= 67; // Next-ish prime after 26 + 26 + 10 - hash += *str + overflow; + if (cstr) { + for (auto* str = reinterpret_cast(cstr); *str; ++str) { + uint64_t overflow = hash >> (64 - 8); + hash *= 67; // Next-ish prime after 26 + 26 + 10 + hash += *str + overflow; + } } return hash; +}// stringHash + +} // namespace io + +inline std::ostream& +operator<<(std::ostream& os, const CoordBBox& b) +{ + os << "(" << b[0][0] << "," << b[0][1] << "," << b[0][2] << ") -> " + << "(" << b[1][0] << "," << b[1][1] << "," << b[1][2] << ")"; + return os; } +inline std::ostream& +operator<<(std::ostream& os, const Coord& ijk) +{ + os << "(" << ijk[0] << "," << ijk[1] << "," << ijk[2] << ")"; + return os; } -} // namespace nanovdb::io + +template +inline std::ostream& +operator<<(std::ostream& os, const Vec3& v) +{ + os << "(" << v[0] << "," << v[1] << "," << v[2] << ")"; + return os; +} + +} // namespace nanovdb #endif // NANOVDB_IO_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/IndexGridBuilder.h b/nanovdb/nanovdb/util/IndexGridBuilder.h deleted file mode 100644 index 77b9235664..0000000000 --- a/nanovdb/nanovdb/util/IndexGridBuilder.h +++ /dev/null @@ -1,652 +0,0 @@ -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -/*! - \file IndexGridBuilder.h - - \author Ken Museth - - \date July 8, 2022 - - \brief Generates a NanoVDB IndexGrid from any existing NanoVDB grid. - - \note An IndexGrid encodes index offsets to external value arrays -*/ - -#ifndef NANOVDB_INDEXGRIDBUILDER_H_HAS_BEEN_INCLUDED -#define NANOVDB_INDEXGRIDBUILDER_H_HAS_BEEN_INCLUDED - -#include "GridHandle.h" -#include "NodeManager.h" -#include "Range.h" -#include "ForEach.h" - -#include -#include -#include -#include // for stringstream -#include -#include // for memcpy - -namespace nanovdb { - -/// @brief Allows for the construction of NanoVDB grids without any dependency -template -class IndexGridBuilder -{ - using SrcNode0 = NanoLeaf< SrcValueT>; - using SrcNode1 = NanoLower; - using SrcNode2 = NanoUpper; - using SrcData0 = typename SrcNode0::DataType; - using SrcData1 = typename SrcNode1::DataType; - using SrcData2 = typename SrcNode2::DataType; - using SrcRootT = NanoRoot; - using SrcTreeT = NanoTree; - using SrcGridT = NanoGrid; - - using DstNode0 = NanoLeaf< ValueIndex>; - using DstNode1 = NanoLower; - using DstNode2 = NanoUpper; - using DstData0 = NanoLeaf< ValueIndex>::DataType; - using DstData1 = NanoLower::DataType; - using DstData2 = NanoUpper::DataType; - using DstRootT = NanoRoot; - using DstTreeT = NanoTree; - using DstGridT = NanoGrid; - - NodeManagerHandle<> mSrcMgrHandle; - NodeManager *mSrcMgr; - std::vector mValIdx2, mValIdx1, mValIdx0;// store id of first value in node - uint8_t* mBufferPtr;// pointer to the beginning of the buffer - uint64_t mBufferOffsets[9];//grid, tree, root, upper, lower, leafs, meta, data, buffer size - uint64_t mValueCount; - const bool mIsSparse, mIncludeStats;// include inactive values and stats - - DstNode0* getLeaf( int i=0) const {return PtrAdd(mBufferPtr, mBufferOffsets[5]) + i;} - DstNode1* getLower(int i=0) const {return PtrAdd(mBufferPtr, mBufferOffsets[4]) + i;} - DstNode2* getUpper(int i=0) const {return PtrAdd(mBufferPtr, mBufferOffsets[3]) + i;} - DstRootT* getRoot() const {return PtrAdd(mBufferPtr, mBufferOffsets[2]);} - DstTreeT* getTree() const {return PtrAdd(mBufferPtr, mBufferOffsets[1]);} - DstGridT* getGrid() const {return PtrAdd(mBufferPtr, mBufferOffsets[0]);} - - // Count the number of values (possibly only active) - void countValues(); - - // Below are private methods use to serialize nodes into NanoVDB - template - GridHandle initHandle(uint32_t channels, const BufferT& buffer); - - void processLeafs(); - - void processLower(); - - void processUpper(); - - void processRoot(); - - void processTree(); - - void processGrid(const std::string& name, uint32_t channels); - - void processChannels(uint32_t channels); - -public: - - /// @brief Constructor based on a source grid - /// - /// @param srcGrid Source grid used to generate the IndexGrid - /// @param includeInactive Include inactive values or only active values - /// @param includeStats Include min/max/avg/std per node or not - /// - /// @note For minimum memory consumption set the two boolean options to false - IndexGridBuilder(const SrcGridT& srcGrid, bool includeInactive = true, bool includeStats = true) - : mSrcMgrHandle(createNodeManager(srcGrid)) - , mSrcMgr(mSrcMgrHandle.template mgr()) - , mValueCount(0) - , mIsSparse(!includeInactive) - , mIncludeStats(includeStats) - {} - - /// @brief Return an instance of a GridHandle (invoking move semantics) - template - GridHandle getHandle(const std::string& name = "", uint32_t channels = 0u, const BufferT& buffer = BufferT()); - - /// @brief return the total number of values located in the source grid. - /// - /// @note This is minimum number of elements required for the external array that the IndexGrid - /// points to. - uint64_t getValueCount() const { return mValueCount; } - - /// @brief return a buffer with all the values in the source grid - template - BufferT getValues(uint32_t channels = 1u, const BufferT &buffer = BufferT()); - - /// @brief copy values from the source grid into the provided array and returns number of values copied - uint64_t copyValues(SrcValueT *buffer, size_t maxValueCount = -1); -}; // IndexGridBuilder - -//================================================================================================ - -template -template -GridHandle IndexGridBuilder:: -getHandle(const std::string &name, uint32_t channels, const BufferT &buffer) -{ - this->countValues(); - - auto handle = this->template initHandle(channels, buffer);// initialize the arrays of nodes - - this->processLeafs(); - - this->processLower(); - - this->processUpper(); - - this->processRoot(); - - this->processTree(); - - this->processGrid(name, channels); - - this->processChannels(channels); - - return handle; -} // IndexGridBuilder::getHandle - -//================================================================================================ - -template -void IndexGridBuilder::countValues() -{ - const uint64_t stats = mIncludeStats ? 4u : 0u; - - uint64_t valueCount = 1u + stats;//background, [minimum, maximum, average, and deviation] - - // root values - if (mIsSparse) { - for (auto it = mSrcMgr->root().beginValueOn(); it; ++it) ++valueCount; - } else { - for (auto it = mSrcMgr->root().beginValue(); it; ++it) ++valueCount; - } - - // tile values in upper internal nodes - mValIdx2.resize(mSrcMgr->nodeCount(2) + 1); - if (mIsSparse) { - forEach(1, mValIdx2.size(), 8, [&](const Range1D& r){ - for (auto i = r.begin(); i!=r.end(); ++i) { - mValIdx2[i] = stats + mSrcMgr->upper(i-1).data()->mValueMask.countOn(); - } - }); - } else { - forEach(1, mValIdx2.size(), 8, [&](const Range1D& r){ - const uint64_t n = 32768u + stats; - for (auto i = r.begin(); i!=r.end(); ++i) { - mValIdx2[i] = n - mSrcMgr->upper(i-1).data()->mChildMask.countOn(); - } - }); - } - mValIdx2[0] = valueCount; - for (size_t i=1; inodeCount(1) + 1); - if (mIsSparse) { - forEach(1, mValIdx1.size(), 8, [&](const Range1D& r){ - for (auto i = r.begin(); i!=r.end(); ++i) { - mValIdx1[i] = stats + mSrcMgr->lower(i-1).data()->mValueMask.countOn(); - } - }); - } else { - forEach(1, mValIdx1.size(), 8, [&](const Range1D& r){ - const uint64_t n = 4096u + stats; - for (auto i = r.begin(); i!=r.end(); ++i) { - mValIdx1[i] = n - mSrcMgr->lower(i-1).data()->mChildMask.countOn(); - } - }); - } - mValIdx1[0] = valueCount; - for (size_t i=1; inodeCount(0) + 1, 512u + stats); - if (mIsSparse) { - forEach(1, mValIdx0.size(), 8, [&](const Range1D& r) { - for (auto i = r.begin(); i != r.end(); ++i) { - mValIdx0[i] = stats + mSrcMgr->leaf(i-1).data()->mValueMask.countOn(); - } - }); - } - mValIdx0[0] = valueCount; - for (size_t i=1; i -uint64_t IndexGridBuilder::copyValues(SrcValueT *buffer, size_t maxValueCount) -{ - assert(mBufferPtr); - if (maxValueCount < mValueCount) return 0; - - // Value array always starts with these entries - buffer[0] = mSrcMgr->root().background(); - if (mIncludeStats) { - buffer[1] = mSrcMgr->root().minimum(); - buffer[2] = mSrcMgr->root().maximum(); - buffer[3] = mSrcMgr->root().average(); - buffer[4] = mSrcMgr->root().stdDeviation(); - } - {// copy root tile values - auto *srcData = mSrcMgr->root().data(); - SrcValueT *v = buffer + (mIncludeStats ? 5u : 1u); - for (uint32_t tileID = 0; tileID < srcData->mTableSize; ++tileID) { - auto *srcTile = srcData->tile(tileID); - if (srcTile->isChild() ||(mIsSparse&&!srcTile->state)) continue; - NANOVDB_ASSERT(v - buffer < mValueCount); - *v++ = srcTile->value; - } - } - - {// upper nodes - auto kernel = [&](const Range1D& r) { - DstData2 *dstData = this->getUpper(r.begin())->data(); - for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { - SrcValueT *v = buffer + mValIdx2[i]; - const SrcNode2 &srcNode = mSrcMgr->upper(i); - if (mIncludeStats) { - *v++ = srcNode.minimum(); - *v++ = srcNode.maximum(); - *v++ = srcNode.average(); - *v++ = srcNode.stdDeviation(); - } - if (mIsSparse) { - for (auto it = srcNode.beginValueOn(); it; ++it) { - NANOVDB_ASSERT(v - buffer < mValueCount); - *v++ = *it; - } - } else { - auto *srcData = srcNode.data(); - for (uint32_t j = 0; j != 32768; ++j) { - if (srcData->mChildMask.isOn(j)) continue; - NANOVDB_ASSERT(v - buffer < mValueCount); - *v++ = srcData->getValue(j); - } - } - } - }; - forEach(0, mSrcMgr->nodeCount(2), 1, kernel); - } - - {// lower nodes - auto kernel = [&](const Range1D& r) { - DstData1 *dstData = this->getLower(r.begin())->data(); - for (auto i = r.begin(); i != r.end(); ++i, ++dstData) { - SrcValueT *v = buffer + mValIdx1[i]; - const SrcNode1 &srcNode = mSrcMgr->lower(i); - if (mIncludeStats) { - *v++ = srcNode.minimum(); - *v++ = srcNode.maximum(); - *v++ = srcNode.average(); - *v++ = srcNode.stdDeviation(); - } - if (mIsSparse) { - for (auto it = srcNode.beginValueOn(); it; ++it) { - NANOVDB_ASSERT(v - buffer < mValueCount); - *v++ = *it; - } - } else { - auto *srcData = srcNode.data(); - for (uint32_t j = 0; j != 4096; ++j) { - if (srcData->mChildMask.isOn(j)) continue; - NANOVDB_ASSERT(v - buffer < mValueCount); - *v++ = srcData->getValue(j); - } - } - } - }; - forEach(0, mSrcMgr->nodeCount(1), 4, kernel); - } - {// leaf nodes - auto kernel = [&](const Range1D& r) { - DstData0 *dstLeaf = this->getLeaf(r.begin())->data(); - for (auto i = r.begin(); i != r.end(); ++i, ++dstLeaf) { - SrcValueT *v = buffer + mValIdx0[i];// bug!? - const SrcNode0 &srcLeaf = mSrcMgr->leaf(i); - if (mIncludeStats) { - *v++ = srcLeaf.minimum(); - *v++ = srcLeaf.maximum(); - *v++ = srcLeaf.average(); - *v++ = srcLeaf.stdDeviation(); - } - if (mIsSparse) { - for (auto it = srcLeaf.beginValueOn(); it; ++it) { - NANOVDB_ASSERT(v - buffer < mValueCount); - *v++ = *it; - } - } else { - const SrcData0 *srcData = srcLeaf.data(); - for (uint32_t j = 0; j != 512; ++j) { - NANOVDB_ASSERT(v - buffer < mValueCount); - *v++ = srcData->getValue(j); - } - } - } - }; - forEach(0, mSrcMgr->nodeCount(0), 8, kernel); - } - return mValueCount; -} // IndexGridBuilder::copyValues - -template -template -BufferT IndexGridBuilder::getValues(uint32_t channels, const BufferT &buffer) -{ - assert(channels > 0); - auto values = BufferT::create(channels*sizeof(SrcValueT)*mValueCount, &buffer); - SrcValueT *p = reinterpret_cast(values.data()); - if (!this->copyValues(p, mValueCount)) { - throw std::runtime_error("getValues: insufficient channels"); - } - for (uint32_t i=1; i -template -GridHandle IndexGridBuilder:: -initHandle(uint32_t channels, const BufferT& buffer) -{ - const SrcTreeT &srcTree = mSrcMgr->tree(); - mBufferOffsets[0] = 0;// grid is always stored at the start of the buffer! - mBufferOffsets[1] = DstGridT::memUsage(); // tree - mBufferOffsets[2] = mBufferOffsets[1] + DstTreeT::memUsage(); // root - mBufferOffsets[3] = mBufferOffsets[2] + DstRootT::memUsage(srcTree.root().tileCount());// upper internal nodes - mBufferOffsets[4] = mBufferOffsets[3] + srcTree.nodeCount(2)*sizeof(DstData2); // lower internal nodes - mBufferOffsets[5] = mBufferOffsets[4] + srcTree.nodeCount(1)*sizeof(DstData1); // leaf nodes - mBufferOffsets[6] = mBufferOffsets[5] + srcTree.nodeCount(0)*sizeof(DstData0); // meta data - mBufferOffsets[7] = mBufferOffsets[6] + GridBlindMetaData::memUsage(channels); // channel values - mBufferOffsets[8] = mBufferOffsets[7] + channels*mValueCount*sizeof(SrcValueT);// total size -#if 0 - std::cerr << "grid starts at " << mBufferOffsets[0] <<" byte" << std::endl; - std::cerr << "tree starts at " << mBufferOffsets[1] <<" byte" << std::endl; - std::cerr << "root starts at " << mBufferOffsets[2] <<" byte" << std::endl; - std::cerr << "node starts at " << mBufferOffsets[3] <<" byte" << " #" << srcTree.nodeCount(2) << std::endl; - std::cerr << "node starts at " << mBufferOffsets[4] <<" byte" << " #" << srcTree.nodeCount(1) << std::endl; - std::cerr << "leaf starts at " << mBufferOffsets[5] <<" byte" << " #" << srcTree.nodeCount(0) << std::endl; - std::cerr << "meta starts at " << mBufferOffsets[6] <<" byte" << std::endl; - std::cerr << "data starts at " << mBufferOffsets[7] <<" byte" << std::endl; - std::cerr << "buffer ends at " << mBufferOffsets[8] <<" byte" << std::endl; - std::cerr << "creating buffer of size " << (mBufferOffsets[8]>>20) << "MB" << std::endl; -#endif - GridHandle handle(BufferT::create(mBufferOffsets[8], &buffer)); - mBufferPtr = handle.data(); - - return handle; -} // IndexGridBuilder::initHandle - -//================================================================================================ - -template -void IndexGridBuilder::processGrid(const std::string& name, uint32_t channels) -{ - auto *srcData = mSrcMgr->grid().data(); - auto *dstData = this->getGrid()->data(); - - dstData->mMagic = NANOVDB_MAGIC_NUMBER; - dstData->mChecksum = 0u; - dstData->mVersion = Version(); - dstData->mFlags = static_cast(GridFlags::IsBreadthFirst); - dstData->mGridIndex = 0; - dstData->mGridCount = 1; - dstData->mGridSize = mBufferOffsets[8]; - std::memset(dstData->mGridName, '\0', GridData::MaxNameSize);//overwrite mGridName - strncpy(dstData->mGridName, name.c_str(), GridData::MaxNameSize-1); - dstData->mMap = srcData->mMap; - dstData->mWorldBBox = srcData->mWorldBBox; - dstData->mVoxelSize = srcData->mVoxelSize; - dstData->mGridClass = GridClass::IndexGrid; - dstData->mGridType = mapToGridType(); - dstData->mBlindMetadataOffset = mBufferOffsets[6]; - dstData->mBlindMetadataCount = channels; - dstData->mData0 = 0u; - dstData->mData1 = mValueCount;// encode the total number of values being indexed - dstData->mData2 = 0u; - - if (name.length() >= GridData::MaxNameSize) {// currently we don't support long grid names - std::stringstream ss; - ss << "Grid name \"" << name << "\" is more then " << GridData::MaxNameSize << " characters"; - throw std::runtime_error(ss.str()); - } -} // IndexGridBuilder::processGrid - -//================================================================================================ - -template -void IndexGridBuilder::processTree() -{ - auto *srcData = mSrcMgr->tree().data(); - auto *dstData = this->getTree()->data(); - for (int i=0; i<4; ++i) dstData->mNodeOffset[i] = mBufferOffsets[5-i] - mBufferOffsets[1];// byte offset from tree to first leaf, lower, upper and root node - for (int i=0; i<3; ++i) { - dstData->mNodeCount[i] = srcData->mNodeCount[i];// total number of nodes of type: leaf, lower internal, upper internal - dstData->mTileCount[i] = srcData->mTileCount[i];// total number of active tile values at the lower internal, upper internal and root node levels - } - dstData->mVoxelCount = srcData->mVoxelCount;// total number of active voxels in the root and all its child nodes -} // IndexGridBuilder::processTree - -//================================================================================================ - -template -void IndexGridBuilder::processRoot() -{ - auto *srcData = mSrcMgr->root().data(); - auto *dstData = this->getRoot()->data(); - - if (dstData->padding()>0) std::memset(dstData, 0, DstRootT::memUsage(mSrcMgr->root().tileCount())); - dstData->mBBox = srcData->mBBox; - dstData->mTableSize = srcData->mTableSize; - dstData->mBackground = 0u; - uint64_t valueCount = 1u;// the first entry is always the background value - if (mIncludeStats) { - valueCount += 4u; - dstData->mMinimum = 1u; - dstData->mMaximum = 2u; - dstData->mAverage = 3u; - dstData->mStdDevi = 4u; - } else if (dstData->padding()==0) { - dstData->mMinimum = 0u; - dstData->mMaximum = 0u; - dstData->mAverage = 0u; - dstData->mStdDevi = 0u; - } - //uint64_t valueCount = 5u;// this is always the first available index - for (uint32_t tileID = 0, childID = 0; tileID < dstData->mTableSize; ++tileID) { - auto *srcTile = srcData->tile(tileID); - auto *dstTile = dstData->tile(tileID); - dstTile->key = srcTile->key; - if (srcTile->isChild()) { - dstTile->child = childID * sizeof(DstNode2) + mBufferOffsets[3] - mBufferOffsets[2]; - dstTile->state = false; - dstTile->value = std::numeric_limits::max(); - ++childID; - } else { - dstTile->child = 0; - dstTile->state = srcTile->state; - if (!(mIsSparse && !dstTile->state)) dstTile->value = valueCount++; - } - } -} // IndexGridBuilder::processRoot - -//================================================================================================ - -template -void IndexGridBuilder::processUpper() -{ - static_assert(DstData2::padding()==0u, "Expected upper internal nodes to have no padding"); - auto kernel = [&](const Range1D& r) { - const bool activeOnly = mIsSparse; - const bool hasStats = mIncludeStats; - auto *dstData1 = this->getLower()->data();// fixed size - auto *dstData2 = this->getUpper(r.begin())->data();// fixed size - for (auto i = r.begin(); i != r.end(); ++i, ++dstData2) { - SrcData2 *srcData2 = mSrcMgr->upper(i).data();// might vary in size due to compression - dstData2->mBBox = srcData2->mBBox; - dstData2->mFlags = srcData2->mFlags; - srcData2->mFlags = i;// encode node ID - dstData2->mChildMask = srcData2->mChildMask; - dstData2->mValueMask = srcData2->mValueMask; - uint64_t n = mValIdx2[i]; - if (mIncludeStats) { - dstData2->mMinimum = n++; - dstData2->mMaximum = n++; - dstData2->mAverage = n++; - dstData2->mStdDevi = n++; - } else { - dstData2->mMinimum = 0u; - dstData2->mMaximum = 0u; - dstData2->mAverage = 0u; - dstData2->mStdDevi = 0u; - } - for (uint32_t j = 0; j != 32768; ++j) { - if (dstData2->isChild(j)) { - SrcData1 *srcChild = srcData2->getChild(j)->data(); - DstData1 *dstChild = dstData1 + srcChild->mFlags; - dstData2->setChild(j, dstChild); - srcChild->mFlags = dstChild->mFlags;// restore - } else { - const bool test = activeOnly && !srcData2->mValueMask.isOn(j); - dstData2->setValue(j, test ? 0 : n++); - } - } - - } - }; - forEach(0, mSrcMgr->nodeCount(2), 1, kernel); -} // IndexGridBuilder::processUpper - -//================================================================================================ - -template -void IndexGridBuilder::processLower() -{ - static_assert(DstData1::padding()==0u, "Expected lower internal nodes to have no padding"); - auto kernel = [&](const Range1D& r) { - const bool activeOnly = mIsSparse; - DstData0 *dstData0 = this->getLeaf()->data();// first dst leaf node - DstData1 *dstData1 = this->getLower(r.begin())->data();// fixed size - for (auto i = r.begin(); i != r.end(); ++i, ++dstData1) { - SrcData1 *srcData1 = mSrcMgr->lower(i).data();// might vary in size due to compression - dstData1->mBBox = srcData1->mBBox; - dstData1->mFlags = srcData1->mFlags; - srcData1->mFlags = i;// encode node ID - dstData1->mChildMask = srcData1->mChildMask; - dstData1->mValueMask = srcData1->mValueMask; - uint64_t n = mValIdx1[i]; - if (mIncludeStats) { - dstData1->mMinimum = n++; - dstData1->mMaximum = n++; - dstData1->mAverage = n++; - dstData1->mStdDevi = n++; - } else { - dstData1->mMinimum = 0u; - dstData1->mMaximum = 0u; - dstData1->mAverage = 0u; - dstData1->mStdDevi = 0u; - } - for (uint32_t j = 0; j != 4096; ++j) { - if (dstData1->isChild(j)) { - SrcData0 *srcChild = srcData1->getChild(j)->data(); - DstData0 *dstChild = dstData0 + srcChild->mBBoxMin[0]; - dstData1->setChild(j, dstChild); - srcChild->mBBoxMin[0] = dstChild->mBBoxMin[0];// restore - } else { - const bool test = activeOnly && !srcData1->mValueMask.isOn(j); - dstData1->setValue(j, test ? 0 : n++); - } - } - } - }; - forEach(0, mSrcMgr->nodeCount(1), 4, kernel); -} // IndexGridBuilder::processLower - -//================================================================================================ - -template -void IndexGridBuilder::processLeafs() -{ - static_assert(DstData0::padding()==0u, "Expected leaf nodes to have no padding"); - - auto kernel = [&](const Range1D& r) { - DstData0 *dstData0 = this->getLeaf(r.begin())->data();// fixed size - const uint8_t flags = mIsSparse ? 16u : 0u;// 4th bit indicates sparseness - for (auto i = r.begin(); i != r.end(); ++i, ++dstData0) { - SrcData0 *srcData0 = mSrcMgr->leaf(i).data();// might vary in size due to compression - dstData0->mBBoxMin = srcData0->mBBoxMin; - srcData0->mBBoxMin[0] = int(i);// encode node ID - dstData0->mBBoxDif[0] = srcData0->mBBoxDif[0]; - dstData0->mBBoxDif[1] = srcData0->mBBoxDif[1]; - dstData0->mBBoxDif[2] = srcData0->mBBoxDif[2]; - dstData0->mFlags = flags | (srcData0->mFlags & 2u);// 2nd bit indicates a bbox - dstData0->mValueMask = srcData0->mValueMask; - - if (mIncludeStats) { - dstData0->mStatsOff = mValIdx0[i];// first 4 entries are leaf stats - dstData0->mValueOff = mValIdx0[i] + 4u; - } else { - dstData0->mStatsOff = 0u;// set to background which indicates no stats! - dstData0->mValueOff = mValIdx0[i]; - } - } - }; - forEach(0, mSrcMgr->nodeCount(0), 8, kernel); -} // IndexGridBuilder::processLeafs - -//================================================================================================ - -template -void IndexGridBuilder::processChannels(uint32_t channels) -{ - for (uint32_t i=0; i(mBufferPtr, mBufferOffsets[6]) + i; - auto *blindData = PtrAdd(mBufferPtr, mBufferOffsets[7]) + i*mValueCount; - metaData->setBlindData(blindData); - metaData->mElementCount = mValueCount; - metaData->mFlags = 0; - metaData->mSemantic = GridBlindDataSemantic::Unknown; - metaData->mDataClass = GridBlindDataClass::ChannelArray; - metaData->mDataType = mapToGridType(); - std::memset(metaData->mName, '\0', GridBlindMetaData::MaxNameSize); - std::stringstream ss; - ss << toStr(metaData->mDataType) << "_channel_" << i; - strncpy(metaData->mName, ss.str().c_str(), GridBlindMetaData::MaxNameSize-1); - if (i) {// deep copy from previous channel -#if 0 - this->copyValues(blindData, mValueCount); - //std::memcpy(blindData, blindData-mValueCount, mValueCount*sizeof(SrcValueT)); -#else - nanovdb::forEach(0,mValueCount,1024,[&](const nanovdb::Range1D &r){ - SrcValueT *dst=blindData+r.begin(), *end=dst+r.size(), *src=dst-mValueCount; - while(dst!=end) *dst++ = *src++; - }); -#endif - } else { - this->copyValues(blindData, mValueCount); - } - } -} - -} // namespace nanovdb - -#endif // NANOVDB_INDEXGRIDBUILDER_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/Invoke.h b/nanovdb/nanovdb/util/Invoke.h index 36e58b4fdf..48e1ac0a42 100644 --- a/nanovdb/nanovdb/util/Invoke.h +++ b/nanovdb/nanovdb/util/Invoke.h @@ -19,7 +19,7 @@ #ifndef NANOVDB_INVOKE_H_HAS_BEEN_INCLUDED #define NANOVDB_INVOKE_H_HAS_BEEN_INCLUDED -#include "../NanoVDB.h"// for nanovdb::CoordBBox +#include // for nanovdb::CoordBBox #ifdef NANOVDB_USE_TBB #include diff --git a/nanovdb/nanovdb/util/NanoToOpenVDB.h b/nanovdb/nanovdb/util/NanoToOpenVDB.h index 9ee0297d62..8610afb9a8 100644 --- a/nanovdb/nanovdb/util/NanoToOpenVDB.h +++ b/nanovdb/nanovdb/util/NanoToOpenVDB.h @@ -56,7 +56,7 @@ nanoToOpenVDB(const NanoGrid& grid, int verbose = 0); /// @brief Forward declaration of free-standing function that de-serializes a NanoVDB GridHandle into an OpenVDB GridBase template openvdb::GridBase::Ptr -nanoToOpenVDB(const GridHandle& handle, int verbose = 0); +nanoToOpenVDB(const GridHandle& handle, int verbose = 0, uint32_t n = 0); /// @brief This class will serialize an OpenVDB grid into a NanoVDB grid managed by a GridHandle. template @@ -304,35 +304,35 @@ nanoToOpenVDB(const NanoGrid& grid, int verbose) template openvdb::GridBase::Ptr -nanoToOpenVDB(const GridHandle& handle, int verbose) +nanoToOpenVDB(const GridHandle& handle, int verbose, uint32_t n) { - if (auto grid = handle.template grid()) { + if (auto grid = handle.template grid(n)) { return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid()) { + } else if (auto grid = handle.template grid(n)) { return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid()) { + } else if (auto grid = handle.template grid(n)) { return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid()) { + } else if (auto grid = handle.template grid(n)) { return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid()) { + } else if (auto grid = handle.template grid(n)) { return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid()) { + } else if (auto grid = handle.template grid(n)) { return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid()) { + } else if (auto grid = handle.template grid(n)) { return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid()) { + } else if (auto grid = handle.template grid(n)) { return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid()) { + } else if (auto grid = handle.template grid(n)) { return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid()) { + } else if (auto grid = handle.template grid(n)) { return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid()) { + } else if (auto grid = handle.template grid(n)) { return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid()) { + } else if (auto grid = handle.template grid(n)) { return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid()) { + } else if (auto grid = handle.template grid(n)) { return nanovdb::nanoToOpenVDB(*grid, verbose); - } else if (auto grid = handle.template grid()) { + } else if (auto grid = handle.template grid(n)) { return nanovdb::nanoToOpenVDB(*grid, verbose); } else { OPENVDB_THROW(openvdb::RuntimeError, "Unsupported NanoVDB grid type!"); diff --git a/nanovdb/nanovdb/util/NodeManager.h b/nanovdb/nanovdb/util/NodeManager.h index c11bc6fb7c..5c15594da4 100644 --- a/nanovdb/nanovdb/util/NodeManager.h +++ b/nanovdb/nanovdb/util/NodeManager.h @@ -14,7 +14,7 @@ \details The ordering of the sequential access to nodes is always breadth-first! */ -#include "../NanoVDB.h"// for NanoGrid etc +#include // for NanoGrid etc #include "HostBuffer.h"// for HostBuffer #ifndef NANOVDB_NODEMANAGER_H_HAS_BEEN_INCLUDED @@ -44,9 +44,9 @@ NodeManagerHandle createNodeManager(const NanoGrid &grid, struct NodeManagerData {// 48B = 6*8B uint64_t mMagic;// 8B + union {int64_t mPadding; uint8_t mLinear;};// 8B of which 1B is used for a binary flag void *mGrid;// 8B pointer to either host or device grid union {int64_t *mPtr[3], mOff[3];};// 24B, use mOff if mLinear!=0 - uint8_t mLinear, mPadding[7];// 7B padding to 8B boundary }; /// @brief This class serves to manage a raw memory buffer of a NanoVDB NodeManager or LeafManager. @@ -243,6 +243,10 @@ class NodeManager : private NodeManagerData /// @details 0 is leaf, 1 is lower internal, and 2 is upper internal level __hostdev__ uint64_t nodeCount(int level) const { return this->tree().nodeCount(level); } + __hostdev__ uint64_t leafCount() const { return this->tree().nodeCount(0); } + __hostdev__ uint64_t lowerCount() const { return this->tree().nodeCount(1); } + __hostdev__ uint64_t upperCount() const { return this->tree().nodeCount(2); } + /// @brief Return the i'th leaf node with respect to breadth-first ordering template __hostdev__ const NodeT& node(uint32_t i) const { @@ -294,8 +298,10 @@ NodeManagerHandle createNodeManager(const NanoGrid &grid, NANOVDB_ASSERT(isValid(data)); data->mMagic = NANOVDB_MAGIC_NUMBER; data->mGrid = const_cast*>(&grid); + data->mPadding = 0; - if ((data->mLinear = NodeManager::isLinear(grid)?1u:0u)) { + if (NodeManager::isLinear(grid)) { + data->mLinear = uint8_t(1u); data->mOff[0] = PtrDiff(grid.tree().template getFirstNode<0>(), &grid); data->mOff[1] = PtrDiff(grid.tree().template getFirstNode<1>(), &grid); data->mOff[2] = PtrDiff(grid.tree().template getFirstNode<2>(), &grid); @@ -304,7 +310,7 @@ NodeManagerHandle createNodeManager(const NanoGrid &grid, int64_t *ptr1 = data->mPtr[1] = data->mPtr[0] + grid.tree().nodeCount(0); int64_t *ptr2 = data->mPtr[2] = data->mPtr[1] + grid.tree().nodeCount(1); // Performs depth first traversal but breadth first insertion - for (auto it2 = grid.tree().root().beginChild(); it2; ++it2) { + for (auto it2 = grid.tree().root().cbeginChild(); it2; ++it2) { *ptr2++ = PtrDiff(&*it2, &grid); for (auto it1 = it2->beginChild(); it1; ++it1) { *ptr1++ = PtrDiff(&*it1, &grid); diff --git a/nanovdb/nanovdb/util/OpenToNanoVDB.h b/nanovdb/nanovdb/util/OpenToNanoVDB.h index 3dd1c331b2..ea6c2c94d7 100644 --- a/nanovdb/nanovdb/util/OpenToNanoVDB.h +++ b/nanovdb/nanovdb/util/OpenToNanoVDB.h @@ -8,1491 +8,8 @@ \date January 8, 2020 - \brief This class will serialize an OpenVDB grid into a NanoVDB grid. -*/ - -#include -#include -#include - -#include "GridHandle.h" // manages and streams the raw memory buffer of a NanoVDB grid. -#include "GridChecksum.h" // for nanovdb::checksum -#include "GridStats.h" // for nanovdb::Extrema -#include "GridBuilder.h" // for nanovdb::AbsDiff -#include "ForEach.h"// for nanovdb::forEach -#include "Reduce.h"// for nanovdb::reduce -#include "Invoke.h"// for nanovdb::invoke -#include "DitherLUT.h"// for nanovdb::DitherLUT - -#include - -#ifndef NANOVDB_OPENTONANOVDB_H_HAS_BEEN_INCLUDED -#define NANOVDB_OPENTONANOVDB_H_HAS_BEEN_INCLUDED - -namespace nanovdb { - -/// @brief Converts OpenVDB types to NanoVDB types, e.g. openvdb::Vec3f to nanovdb::Vec3f -/// Template specializations are defined below. -template -struct OpenToNanoType { using Type = T; }; - -//================================================================================================ - -/// @brief Forward declaration of free-standing function that converts an OpenVDB GridBase into a NanoVDB GridHandle -template -GridHandle -openToNanoVDB(const openvdb::GridBase::Ptr& base, - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - int verbose = 0); - -//================================================================================================ - -/// @brief Forward declaration of free-standing function that converts a typed OpenVDB Grid into a NanoVDB GridHandle -/// -/// @details Unlike the function above that takes a base openvdb grid, this method is strongly typed and allows -/// for compression, e.g. openToNanoVDB -template::Type> -GridHandle -openToNanoVDB(const openvdb::Grid& grid, - StatsMode sMode = StatsMode::Default, - ChecksumMode cMode = ChecksumMode::Default, - int verbose = 0); - -//================================================================================================ - -/// @brief Template specialization for openvdb::Coord -template<> -struct OpenToNanoType -{ - using Type = nanovdb::Coord; - static_assert(sizeof(Type) == sizeof(openvdb::Coord), "Mismatching sizeof"); -}; - -/// @brief Template specialization for openvdb::CoordBBox -template<> -struct OpenToNanoType -{ - using Type = nanovdb::CoordBBox; - static_assert(sizeof(Type) == sizeof(openvdb::CoordBBox), "Mismatching sizeof"); -}; - -/// @brief Template specialization for openvdb::math::BBox -template -struct OpenToNanoType> -{ - using Type = nanovdb::BBox; - static_assert(sizeof(Type) == sizeof(openvdb::math::BBox), "Mismatching sizeof"); -}; - -/// @brief Template specialization for openvdb::math::Vec3 -template -struct OpenToNanoType> -{ - using Type = nanovdb::Vec3; - static_assert(sizeof(Type) == sizeof(openvdb::math::Vec3), "Mismatching sizeof"); -}; - -/// @brief Template specialization for openvdb::math::Vec4 -template -struct OpenToNanoType> -{ - using Type = nanovdb::Vec4; - static_assert(sizeof(Type) == sizeof(openvdb::math::Vec4), "Mismatching sizeof"); -}; - -/// @brief Template specialization for openvdb::ValueMask -template<> -struct OpenToNanoType -{ - using Type = nanovdb::ValueMask; -}; - -template<> -struct OpenToNanoType -{ - using Type = uint32_t; -}; - -template<> -struct OpenToNanoType -{ - using Type = uint32_t; -}; - -//================================================================================================ - -/// @brief Grid trait that defines OpenVDB grids with the exact same configuration as NanoVDB grids -template -struct OpenGridType -{ - using GridT = openvdb::Grid::Type>; - using TreeT = typename GridT::TreeType; - using RootT = typename TreeT::RootNodeType; - using UpperT = typename RootT::ChildNodeType; - using LowerT = typename UpperT::ChildNodeType; - using LeafT = typename LowerT::ChildNodeType; - using ValueT = typename LeafT::ValueType; -}; - -/// @brief Template specialization for the PointIndexGrid -template <> -struct OpenGridType -{ - using GridT = openvdb::tools::PointIndexGrid;// 5, 4, 3 - using TreeT = typename GridT::TreeType; - using RootT = typename TreeT::RootNodeType; - using UpperT = typename RootT::ChildNodeType; - using LowerT = typename UpperT::ChildNodeType; - using LeafT = typename LowerT::ChildNodeType; - using ValueT = typename LeafT::ValueType; -}; - -/// @brief Template specialization for the PointDataGrid -template <> -struct OpenGridType -{ - using GridT = openvdb::points::PointDataGrid;// 5, 4, 3 - using TreeT = typename GridT::TreeType; - using RootT = typename TreeT::RootNodeType; - using UpperT = typename RootT::ChildNodeType; - using LowerT = typename UpperT::ChildNodeType; - using LeafT = typename LowerT::ChildNodeType; - using ValueT = typename LeafT::ValueType; -}; - -//================================================================================================ - -/// @brief This class will convert an OpenVDB grid into a NanoVDB grid managed by a GridHandle. -/// -/// @note Note that this converter assumes a 5,4,3 tree configuration of BOTH the OpenVDB and NanoVDB -/// grids. This is a consequence of the fact that the OpenVDB tree is defined in OpenGridType and -/// that all NanoVDB trees are by design always 5,4,3! -/// -/// @details While NanoVDB allows root, internal and leaf nodes to reside anywhere in the memory buffer -/// this conversion tool uses the following memory layout: -/// -/// -/// Grid | Tree Root... Node2... Node1... Leaf... BlindMetaData... BlindData... -/// where "..." means size may vary and "|" means "no gap" - -template -class OpenToNanoVDB -{ - struct BlindMetaData; // forward declerations - template struct NodePair; - struct Codec {float min, max; uint16_t log2, size;};// used for adaptive bit-rate quantization - - using OpenGridT = typename OpenGridType::GridT;// OpenVDB grid - using OpenTreeT = typename OpenGridType::TreeT;// OpenVDB tree - using OpenRootT = typename OpenGridType::RootT;// OpenVDB root node - using OpenUpperT= typename OpenGridType::UpperT;// OpenVDB upper internal node - using OpenLowerT= typename OpenGridType::LowerT;// OpenVDB lower internal node - using OpenLeafT = typename OpenGridType::LeafT;// OpenVDB leaf node - using OpenValueT= typename OpenGridType::ValueT; - - using NanoValueT= typename BuildToValueMap::Type;// e.g. maps from Fp16 to float - using NanoLeafT = NanoLeaf; - using NanoLowerT= NanoLower; - using NanoUpperT= NanoUpper; - using NanoRootT = NanoRoot; - using NanoTreeT = NanoTree; - using NanoGridT = NanoGrid; - - static_assert(sizeof(NanoValueT) == sizeof(OpenValueT), "Mismatching sizeof"); - static_assert(is_same::Type>::value, "Mismatching ValueT"); - - NanoValueT mDelta; // skip node if: node.max < -mDelta || node.min > mDelta - uint8_t* mBufferPtr;// pointer to the beginning of the buffer - uint64_t mBufferOffsets[9];//grid, tree, root, upper. lower, leafs, meta data, blind data, buffer size - int mVerbose; - std::set mBlindMetaData; // sorted according to index - std::vector> mArray0; // leaf nodes - std::vector> mArray1; // lower internal nodes - std::vector> mArray2; // upper internal nodes - std::unique_ptr mCodec;// defines a codec per leaf node - StatsMode mStats; - ChecksumMode mChecksum; - bool mDitherOn; - OracleT mOracle;// used for adaptive bit-rate quantization - -public: - /// @brief Default c-tor - OpenToNanoVDB(); - - /// @brief return a reference to the compression oracle - /// - /// @note Note, the oracle is only used when NanoBuildT = nanovdb::FpN! - OracleT& oracle() { return mOracle; } - - void setVerbose(int mode = 1) { mVerbose = mode; } - - void enableDithering(bool on = true) { mDitherOn = on; } - - void setStats(StatsMode mode = StatsMode::Default) { mStats = mode; } - - void setChecksum(ChecksumMode mode = ChecksumMode::Default) { mChecksum = mode; } - - /// @brief Return a shared pointer to a NanoVDB grid handle constructed from the specified OpenVDB grid - GridHandle operator()(const OpenGridT& grid, - const BufferT& allocator = BufferT()); - - GridHandle operator()(const OpenGridT& grid, - StatsMode sMode, - ChecksumMode cMode, - int verbose, - const BufferT& allocator = BufferT()); - -private: - - /// @brief Allocates and return a handle for the buffer - GridHandle initHandle(const OpenGridT& openGrid, const BufferT& allocator); - - template - inline typename std::enable_if::value>::type - compression(const OpenGridT&, uint64_t&) {}// no-op - - template - inline typename std::enable_if::value>::type - compression(const OpenGridT& openGrid, uint64_t &offset); - - /// @brief Private method to process the grid - NanoGridT* processGrid(const OpenGridT& openGrid); - - // @brief Private method to process the tree - NanoTreeT* processTree(const OpenTreeT& openTree); - - /// @brief Private method to process the root node - NanoRootT* processRoot(const OpenRootT& openRoot); - - template - void processNodes(std::vector> &nodes); - - ////////////////////// - - template - typename std::enable_if::LeafT, typename T::OpenNodeT>::value && - !std::is_same::LeafT, typename T::OpenNodeT>::value && - !std::is_same::value && - !std::is_same::value && - !std::is_same::value && - !std::is_same::value>::type - processLeafs(std::vector &leafs); - - template - typename std::enable_if::value || - std::is_same::value || - std::is_same::value>::type - processLeafs(std::vector &leafs); - - template - typename std::enable_if::value>::type - processLeafs(std::vector &leafs); - - template - typename std::enable_if::LeafT>::value>::type - processLeafs(std::vector> &leafs); - - template - typename std::enable_if::LeafT>::value>::type - processLeafs(std::vector> &leafs); - - ////////////////////// - - /// @brief Private methods to pre-process the bind metadata - template - typename std::enable_if::value && - !std::is_same::value>::type - preProcessMetadata(const T& openGrid); - - template - typename std::enable_if::value>::type - preProcessMetadata(const T& openGrid); - - template - typename std::enable_if::value>::type - preProcessMetadata(const T& openGrid); - - ////////////////////// - - /// @brief Private methods to process the blind metadata - template - typename std::enable_if::value && - !std::is_same::value, GridBlindMetaData*>::type - processMetadata(const T& openGrid); - - template - typename std::enable_if::value, GridBlindMetaData*>::type - processMetadata(const T& openGrid); - - template - typename std::enable_if::value, GridBlindMetaData*>::type - processMetadata(const T& openGrid); - - ////////////////////// - - uint64_t pointCount(); - - template - void copyPointAttribute(size_t attIdx, AttT *attPtr); - - /// @brief Performs: nanoNode.origin = openNode.origin - /// openNode.origin = nanoNode offset - template - void encode(const OpenNodeT *openNode, NanoNodeT *nanoNode); - - /// @brief Performs: nanoNode offset = openNode.origin - /// openNode.origin = nanoNode.origin - /// return nanoNode offset - template - typename NanoNode::Type* decode(const OpenNodeT *openNode); - -}; // OpenToNanoVDB class - -//================================================================================================ - -template -OpenToNanoVDB::OpenToNanoVDB() - : mVerbose(0) - , mStats(StatsMode::Default) - , mChecksum(ChecksumMode::Default) - , mDitherOn(false) - , mOracle() -{ -} - -//================================================================================================ - -template -inline GridHandle -OpenToNanoVDB:: - operator()(const OpenGridT& openGrid, - StatsMode sMode, - ChecksumMode cMode, - int verbose, - const BufferT& allocator) -{ - this->setStats(sMode); - this->setChecksum(cMode); - this->setVerbose(verbose); - return (*this)(openGrid, allocator); -} - -//================================================================================================ - -template -inline GridHandle -OpenToNanoVDB:: - operator()(const OpenGridT& openGrid, - const BufferT& allocator) -{ - //mVerbose = 2; - std::unique_ptr timer(mVerbose > 1 ? new openvdb::util::CpuTimer() : nullptr); - - if (timer) timer->start("Allocating memory for the NanoVDB buffer"); - auto handle = this->initHandle(openGrid, allocator); - if (timer) timer->stop(); - - if (timer) timer->start("Processing leaf nodes"); - this->processLeafs(mArray0); - if (timer) timer->stop(); - - if (timer) timer->start("Processing lower internal nodes"); - this->processNodes(mArray1); - if (timer) timer->stop(); - - if (timer) timer->start("Processing upper internal nodes"); - this->processNodes(mArray2); - if (timer) timer->stop(); - - if (timer) timer->start("Processing grid, tree and root node"); - NanoGridT *nanoGrid = this->processGrid(openGrid); - if (timer) timer->stop(); - - // Point grids already make use of min/max so they shouldn't be re-computed - if (std::is_same::value || - std::is_same::value) { - if (mStats > StatsMode::BBox) mStats = StatsMode::BBox; - } - - if (timer) timer->start("GridStats"); - gridStats(*nanoGrid, mStats); - if (timer) timer->stop(); - - if (timer) timer->start("Checksum"); - updateChecksum(*nanoGrid, mChecksum); - if (timer) timer->stop(); - - return handle; // invokes move constructor -} // OpenToNanoVDB::operator() - -//================================================================================================ - -template -template -inline typename std::enable_if::value>::type -OpenToNanoVDB:: - compression(const OpenGridT& openGrid, uint64_t &offset) -{ - static_assert(is_same::value, "compression: expected OpenBuildT == float"); - static_assert(is_same::value, "compression: expected NanoBuildT == FpN"); - if (is_same::value && mOracle.getTolerance() < 0.0f) {// default tolerance for level set and fog volumes - if (openGrid.getGridClass() == openvdb::GRID_LEVEL_SET) { - mOracle.setTolerance(0.1f * float(openGrid.voxelSize()[0]));// range of ls: [-3dx; 3dx] - } else if (openGrid.getGridClass() == openvdb::GRID_FOG_VOLUME) { - mOracle.setTolerance(0.01f);// range of FOG volumes: [0;1] - } else { - mOracle.setTolerance(0.0f); - } - } - - const size_t size = mArray0.size(); - mCodec.reset(new Codec[size]); - - DitherLUT lut(mDitherOn); - auto kernel = [&](const auto &r) { - const OracleT oracle = mOracle;// local copy since it's very lightweight - for (auto i=r.begin(); i!=r.end(); ++i) { - const float *data = mArray0[i].node->buffer().data(); - float min = std::numeric_limits::max(), max = -min; - for (int j=0; j<512; ++j) { - float v = data[j]; - if (vmax) max=v; - } - mCodec[i].min = min; - mCodec[i].max = max; - const float range = max - min; - uint16_t logBitWidth = 0;// 0,1,2,3,4 => 1,2,4,8,16 bits - while (range > 0.0f && logBitWidth < 4u) { - const uint32_t mask = (uint32_t(1) << (uint32_t(1) << logBitWidth)) - 1u; - const float encode = mask/range; - const float decode = range/mask; - int j = 0; - do { - const float exact = data[j];// exact value - const uint32_t code = uint32_t(encode*(exact - min) + lut(j)); - const float approx = code * decode + min;// approximate value - j += oracle(exact, approx) ? 1 : 513; - } while(j < 512); - if (j == 512) break; - ++logBitWidth; - } - mCodec[i].log2 = logBitWidth; - mCodec[i].size = NanoLeafT::DataType::memUsage(1u< -GridHandle OpenToNanoVDB:: - initHandle(const OpenGridT& openGrid, const BufferT& buffer) -{ - auto &openTree = openGrid.tree(); - auto &openRoot = openTree.root(); - - mArray0.clear(); - mArray1.clear(); - mArray2.clear(); - std::vector nodeCount = openTree.nodeCount(); - mArray0.reserve(nodeCount[0]); - mArray1.reserve(nodeCount[1]); - mArray2.reserve(nodeCount[2]); - - uint64_t offset[3] = {0}; - for (auto it2 = openRoot.cbeginChildOn(); it2; ++it2) { - mArray2.emplace_back(&(*it2), offset[2]); - offset[2] += NanoUpperT::memUsage(); - for (auto it1 = it2->cbeginChildOn(); it1; ++it1) { - mArray1.emplace_back(&(*it1), offset[1]); - offset[1] += NanoLowerT::memUsage(); - for (auto it0 = it1->cbeginChildOn(); it0; ++it0) { - mArray0.emplace_back(&(*it0), offset[0]); - offset[0] += sizeof(NanoLeafT); - } - } - } - - this->template compression(openGrid, offset[0]); + \warning this file has been replaced by CreateNanoGrid.h - this->preProcessMetadata(openGrid); - - mBufferOffsets[0] = 0;// grid is always placed at the beginning of the buffer! - mBufferOffsets[1] = NanoGridT::memUsage(); // grid ends and tree begins - mBufferOffsets[2] = NanoTreeT::memUsage(); // tree ends and root begins - mBufferOffsets[3] = NanoRootT::memUsage(openTree.root().getTableSize()); // root ends and upper internal nodes begins - mBufferOffsets[4] = offset[2];// upper ends and lower internal nodes - mBufferOffsets[5] = offset[1];// lower ends and leaf nodes begins - mBufferOffsets[6] = offset[0];// leafs end blind meta data begins - mBufferOffsets[7] = GridBlindMetaData::memUsage(mBlindMetaData.size()); // meta ends and blind data begins - mBufferOffsets[8] = 0;// blind data - for (auto& i : mBlindMetaData) mBufferOffsets[8] += i.size; // blind data - - // Compute the prefixed sum - for (int i = 2; i < 9; ++i) { - mBufferOffsets[i] += mBufferOffsets[i - 1]; - } - -#if 0 - std::cerr << "grid starts at " << mBufferOffsets[0] <<" byte" << std::endl; - std::cerr << "tree starts at " << mBufferOffsets[1] <<" byte" << std::endl; - std::cerr << "root starts at " << mBufferOffsets[2] <<" byte" << std::endl; - std::cerr << "node starts at " << mBufferOffsets[3] <<" byte" << " #" << mArray2.size() << std::endl; - std::cerr << "node starts at " << mBufferOffsets[4] <<" byte" << " #" << mArray1.size() << std::endl; - std::cerr << "leaf starts at " << mBufferOffsets[5] <<" byte" << " #" << mArray0.size() << std::endl; - std::cerr << "meta starts at " << mBufferOffsets[6] <<" byte" << std::endl; - std::cerr << "data starts at " << mBufferOffsets[7] <<" byte" << std::endl; - std::cerr << "buffer ends at " << mBufferOffsets[8] <<" byte" << std::endl; - std::cerr << "creating buffer of size " << (mBufferOffsets[8]>>20) << "MB" << std::endl; -#endif - - GridHandle handle(BufferT::create(mBufferOffsets[8], &buffer)); - mBufferPtr = handle.data(); - - //openvdb::util::CpuTimer timer("zero buffer"); -#if 1 - //std::memset(mBufferPtr, '8', mBufferOffsets[8]); -#else - forEach(0,mBufferOffsets[8],1024*1024,[&](const Range1D &r){ - //for (uint64_t *p = reinterpret_cast(mBufferPtr)+r.begin(), *q=p+r.size(); p!=q; ++p) *p=0; - std::memset(mBufferPtr+r.begin(), '8', r.size()); - }); - //uint8_t *begin = (mBufferPtr >> 3) << 3; - //std::memset((mBufferPtr >> 3) << 3, 0, mBufferPtr - p); - //forEach(0,mBufferOffsets[8],10*1024*1024,[&](const Range1D &r){std::memset(mBufferPtr+r.begin(), 0, r.size());}); -#endif - //timer.stop(); - - if (mVerbose) { - openvdb::util::printBytes(std::cout, mBufferOffsets[8], "Allocated", " for the NanoVDB grid\n"); - } - return handle;// is converted to r-value so return value is move constructed! -}// OpenToNanoVDB::initHandle - -//================================================================================================ - -template -NanoGrid* OpenToNanoVDB:: - processGrid(const OpenGridT& openGrid) -{ - auto *nanoGrid = reinterpret_cast(mBufferPtr + mBufferOffsets[0]); - if (!openGrid.transform().baseMap()->isLinear()) { - OPENVDB_THROW(openvdb::ValueError, "processGrid: OpenToNanoVDB only supports grids with affine transforms"); - } - auto affineMap = openGrid.transform().baseMap()->getAffineMap(); - const std::string gridName = openGrid.getName(); - auto *data = nanoGrid->data(); - - data->mMagic = NANOVDB_MAGIC_NUMBER;//8B - data->mChecksum = 0u;// 8B - data->mVersion = Version();//4B - data->mFlags = static_cast(GridFlags::IsBreadthFirst);//4B - data->mGridIndex = 0;//4B - data->mGridCount = 1;//4B - data->mGridSize = mBufferOffsets[8];//8B - std::memset(data->mGridName, '\0', GridData::MaxNameSize);// 256B overwrite mGridName - strncpy(data->mGridName, gridName.c_str(), GridData::MaxNameSize-1); - data->mWorldBBox = BBox(); - data->mBlindMetadataOffset = 0; - data->mBlindMetadataCount = 0; - - if (gridName.length() >= GridData::MaxNameSize) { - data->setLongGridNameOn();// grid name is long so store it as blind data - } - mDelta = NanoValueT(0); // dummy value - switch (openGrid.getGridClass()) { // set grid class - case openvdb::GRID_LEVEL_SET: - if (!is_floating_point::value) - OPENVDB_THROW(openvdb::ValueError, "processGrid: Level sets are expected to be floating point types"); - data->mGridClass = GridClass::LevelSet; - mDelta = NanoValueT(openGrid.voxelSize()[0]); // skip a node if max < -mDelta || min > mDelta - break; - case openvdb::GRID_FOG_VOLUME: - data->mGridClass = GridClass::FogVolume; - break; - case openvdb::GRID_STAGGERED: - data->mGridClass = GridClass::Staggered; - break; - default: - data->mGridClass = GridClass::Unknown; - } - - // mapping from the OpenVDB build type to the NanoVDB build type and GridType enum - if (std::is_same::value) { // resolved at compile time - data->mGridType = GridType::Float; - } else if (std::is_same::value) { - data->mGridType = GridType::Double; - } else if (std::is_same::value) { - data->mGridType = GridType::Int16; - } else if (std::is_same::value) { - data->mGridType = GridType::Int32; - } else if (std::is_same::value) { - data->mGridType = GridType::Int64; - } else if (std::is_same::value) { - data->mGridType = GridType::Vec3f; - } else if (std::is_same::value) { - data->mGridType = GridType::UInt32; - } else if (std::is_same::value) { - data->mGridType = GridType::UInt32; - data->mGridClass = GridClass::PointIndex; - } else if (std::is_same::value) { - data->mGridType = GridType::UInt32; - data->mGridClass = GridClass::PointData; - } else if (std::is_same::value) { - data->mGridType = GridType::Mask; - data->mGridClass = GridClass::Topology; - } else if (std::is_same::value) { - data->mGridType = GridType::Boolean; - } else if (std::is_same::value) { - data->mGridType = GridType::Fp4; - } else if (std::is_same::value) { - data->mGridType = GridType::Fp8; - } else if (std::is_same::value) { - data->mGridType = GridType::Fp16; - } else if (std::is_same::value) { - data->mGridType = GridType::FpN; - } else if (std::is_same::value) { - data->mGridType = GridType::Vec4f; - } else if (std::is_same::value) { - data->mGridType = GridType::Vec4d; - } else { - OPENVDB_THROW(openvdb::ValueError, "processGrid: Unsupported value type"); - } - { // set affine map - if (openGrid.hasUniformVoxels()) { - data->mVoxelSize = nanovdb::Vec3R(affineMap->voxelSize()[0]); - } else { - data->mVoxelSize = affineMap->voxelSize(); - } - const auto mat = affineMap->getMat4(); - // Only support non-tapered at the moment: - data->mMap.set(mat, mat.inverse(), 1.0); - } - data->mData0 = 0u; - data->mData1 = 0u; - data->mData2 = 0u; - - this->processTree(openGrid.tree());// calls processRoot - - if (auto size = mBlindMetaData.size()) { - auto *metaData = this->processMetadata(openGrid); - data->mBlindMetadataOffset = PtrDiff(metaData, nanoGrid); - data->mBlindMetadataCount = static_cast(size); - auto *blindData = reinterpret_cast(mBufferPtr + mBufferOffsets[7]); - metaData->setBlindData(blindData); - } - - return nanoGrid; -}// OpenToNanoVDB::processGrid - -//================================================================================================ - -template -NanoTree* OpenToNanoVDB:: - processTree(const OpenTreeT& openTree) -{ - auto *nanoTree = reinterpret_cast(mBufferPtr + mBufferOffsets[1]); - auto *data = nanoTree->data(); - - data->setRoot( this->processRoot( openTree.root()) ); - - NanoUpperT *nanoUpper = mArray2.empty() ? nullptr : reinterpret_cast(mBufferPtr + mBufferOffsets[3]); - data->setFirstNode(nanoUpper); - - NanoLowerT *nanoLower = mArray1.empty() ? nullptr : reinterpret_cast(mBufferPtr + mBufferOffsets[4]); - data->setFirstNode(nanoLower); - - NanoLeafT *nanoLeaf = mArray0.empty() ? nullptr : reinterpret_cast(mBufferPtr + mBufferOffsets[5]); - data->setFirstNode(nanoLeaf); - - data->mNodeCount[0] = static_cast(mArray0.size()); - data->mNodeCount[1] = static_cast(mArray1.size()); - data->mNodeCount[2] = static_cast(mArray2.size()); - -#if 1// count active tiles and voxels - - // Count number of active tiles in the lower internal nodes - data->mTileCount[0] = reduce(mArray1, uint32_t(0), [&](auto &r, uint32_t sum){ - for (auto i=r.begin(); i!=r.end(); ++i) sum += mArray1[i].node->getValueMask().countOn(); - return sum;}, std::plus()); - - // Count number of active tiles in the upper internal nodes - data->mTileCount[1] = reduce(mArray2, uint32_t(0), [&](auto &r, uint32_t sum){ - for (auto i=r.begin(); i!=r.end(); ++i) sum += mArray2[i].node->getValueMask().countOn(); - return sum;}, std::plus()); - - // Count number of active tile in the root node - uint32_t sum = 0; - for (auto it = openTree.root().cbeginValueOn(); it; ++it) ++sum; - data->mTileCount[2] = sum; - - data->mVoxelCount = reduce(mArray0, uint64_t(0), [&](auto &r, uint64_t sum){ - for (auto i=r.begin(); i!=r.end(); ++i) sum += mArray0[i].node->valueMask().countOn(); - return sum;}, std::plus()); - - data->mVoxelCount += data->mTileCount[0]*NanoLeafT::NUM_VALUES; - data->mVoxelCount += data->mTileCount[1]*NanoLowerT::NUM_VALUES; - data->mVoxelCount += data->mTileCount[2]*NanoUpperT::NUM_VALUES; - -#else - - data->mTileCount[0] = 0; - data->mTileCount[1] = 0; - data->mTileCount[2] = 0; - data->mVoxelCount = 0; - -#endif - - return nanoTree; -}// OpenToNanoVDB::processTree - -//================================================================================================ - -template -NanoRoot* OpenToNanoVDB:: - processRoot(const OpenRootT& openRoot) -{ - auto *nanoRoot = reinterpret_cast(mBufferPtr + mBufferOffsets[2]); - auto* data = nanoRoot->data(); - if (data->padding()>0) { - //std::cout << "Root has padding\n"; - std::memset(data, 0, NanoRootT::memUsage(openRoot.getTableSize())); - } else { - data->mTableSize = 0;// incremented below - } - data->mBackground = openRoot.background(); - data->mMinimum = data->mMaximum = data->mBackground; - data->mBBox.min() = openvdb::Coord::max(); // set to an empty bounding box - data->mBBox.max() = openvdb::Coord::min(); - - OpenValueT value = openvdb::zeroVal();// to avoid compiler warning - for (auto iter = openRoot.cbeginChildAll(); iter; ++iter) { - auto* tile = data->tile(data->mTableSize++); - if (const OpenUpperT *openChild = iter.probeChild( value )) { - tile->setChild(iter.getCoord(), this->decode(openChild), data); - } else { - tile->setValue(iter.getCoord(), iter.isValueOn(), value); - } - } - return nanoRoot; -} // OpenToNanoVDB::processRoot - -//================================================================================================ - -template -template -void OpenToNanoVDB:: - processNodes(std::vector>& openNodes) -{ - using NanoNodeT = typename NanoNode::Type; - //if (NanoNodeT::DataType::padding()>0u) std::cerr << "OpenToNanoVDB: internal node has padding\n"; - static_assert(NanoNodeT::LEVEL == 1 || NanoNodeT::LEVEL == 2, "Expected internal node"); - auto kernel = [&](const Range1D& r) { - uint8_t* ptr = mBufferPtr + mBufferOffsets[5 - NanoNodeT::LEVEL];// 3 or 4 - OpenValueT value = openvdb::zeroVal();// to avoid compiler warning - for (auto i = r.begin(); i != r.end(); ++i) { - auto *openNode = openNodes[i].node; - auto *nanoNode = PtrAdd(ptr, openNodes[i].offset); - auto* data = nanoNode->data(); - if (NanoNodeT::DataType::padding()>0u) std::memset(data, 0, NanoNodeT::DataType::memUsage()); - this->encode(openNode, nanoNode);// sets data->mBBoxMin - data->mValueMask = openNode->getValueMask(); // copy value mask - data->mChildMask = openNode->getChildMask(); // copy child mask - for (auto iter = openNode->cbeginChildAll(); iter; ++iter) { - if (const auto *openChild = iter.probeChild(value)) { - data->setChild(iter.pos(), this->decode(openChild)); - } else { - data->setValue(iter.pos(), value); - } - } - } - }; - forEach(openNodes, 1, kernel); -} // OpenToNanoVDB::processNodes - -//================================================================================================ - -template -template -inline typename std::enable_if::LeafT, typename T::OpenNodeT>::value && - !std::is_same::LeafT, typename T::OpenNodeT>::value && - !std::is_same::value && - !std::is_same::value && - !std::is_same::value && - !std::is_same::value>::type -OpenToNanoVDB::processLeafs(std::vector& openLeafs) -{ - //if (NanoLeafT::DataType::padding()>0u) std::cerr << "OpenToNanoVDB: leaf has padding\n"; - auto kernel = [&](const auto& r) { - uint8_t* ptr = mBufferPtr + mBufferOffsets[5]; - for (auto i = r.begin(); i != r.end(); ++i) { - auto *openLeaf = openLeafs[i].node; - auto *nanoLeaf = PtrAdd(ptr, openLeafs[i].offset); - auto* data = nanoLeaf->data(); - if (NanoLeafT::DataType::padding()>0u) {// resolved at compile time - std::memset(data, 0, NanoLeafT::DataType::memUsage()); - } else { - data->mFlags = data->mBBoxDif[2] = data->mBBoxDif[1] = data->mBBoxDif[0] = 0u; - data->mMaximum = data->mMinimum = typename NanoLeafT::DataType::ValueType(0); - data->mStdDevi = data->mAverage = typename NanoLeafT::DataType::FloatType(0); - } - this->encode(openLeaf, nanoLeaf);// sets data->mBBoxMin - data->mValueMask = openLeaf->valueMask(); // copy value mask - auto *src = reinterpret_cast(openLeaf->buffer().data()); - for (NanoValueT *dst = data->mValues, *end = dst + OpenLeafT::size(); dst != end; dst += 4, src += 4) { - dst[0] = src[0]; // copy *all* voxel values in sets of four, i.e. loop-unrolling - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; - } - } - }; - forEach(openLeafs, 8, kernel); -} // OpenToNanoVDB::processLeafs - -//================================================================================================ - -template -template -inline typename std::enable_if::value || - std::is_same::value || - std::is_same::value>::type -OpenToNanoVDB::processLeafs(std::vector& openLeafs) -{ - static_assert(NanoLeafT::DataType::padding()==0u, "Expected no padding in LeafNode"); - using ArrayT = typename NanoLeafT::DataType::ArrayType; - using FloatT = typename std::conditional=16, double, float>::type;// 16 compression and higher requires double - DitherLUT lut(mDitherOn); - - auto kernel = [&](const auto& r) { - uint8_t* ptr = mBufferPtr + mBufferOffsets[5]; - for (auto i = r.begin(); i != r.end(); ++i) { - auto *openLeaf = openLeafs[i].node; - auto *nanoLeaf = PtrAdd(ptr, openLeafs[i].offset); - auto* data = nanoLeaf->data(); - data->mFlags = data->mBBoxDif[2] = data->mBBoxDif[1] = data->mBBoxDif[0] = 0u; - data->mDev = data->mAvg = data->mMax = data->mMin = 0u; - this->encode(openLeaf, nanoLeaf);// sets data->mBBoxMin - data->mValueMask = openLeaf->valueMask(); // copy value mask - auto *src = reinterpret_cast(openLeaf->buffer().data()); - // compute extrema values - float min = std::numeric_limits::max(), max = -min; - for (int i=0; i<512; ++i) { - const float v = src[i]; - if (v < min) min = v; - if (v > max) max = v; - } - data->init(min, max, NanoLeafT::DataType::bitWidth());// sets mMinimum and mQuantum - // perform quantization relative to the values in the current leaf node - const FloatT encode = FloatT((1 << NanoLeafT::DataType::bitWidth()) - 1)/(max-min); - auto *code = reinterpret_cast(data->mCode); - int offset = 0; - if (std::is_same::value) {// resolved at compile-time - for (int i=0; i<128; ++i) { - auto tmp = ArrayT(encode * (*src++ - min) + lut(offset++)); - *code++ = ArrayT(encode * (*src++ - min) + lut(offset++)) << 4 | tmp; - tmp = ArrayT(encode * (*src++ - min) + lut(offset++)); - *code++ = ArrayT(encode * (*src++ - min) + lut(offset++)) << 4 | tmp; - } - } else { - for (int i=0; i<128; ++i) { - *code++ = ArrayT(encode * (*src++ - min) + lut(offset++)); - *code++ = ArrayT(encode * (*src++ - min) + lut(offset++)); - *code++ = ArrayT(encode * (*src++ - min) + lut(offset++)); - *code++ = ArrayT(encode * (*src++ - min) + lut(offset++)); - } - } - } - }; - forEach(openLeafs, 8, kernel); -} // OpenToNanoVDB::processLeafs - -//================================================================================================ - -template -template -inline typename std::enable_if::value>::type -OpenToNanoVDB::processLeafs(std::vector& openLeafs) -{ - static_assert(is_same::value, "Expected OpenBuildT == float"); - static_assert(NanoLeafT::DataType::padding()==0u, "Expected no padding in LeafNode"); - DitherLUT lut(mDitherOn); - auto kernel = [&](const auto& r) { - uint8_t* ptr = mBufferPtr + mBufferOffsets[5]; - for (auto i = r.begin(); i != r.end(); ++i) { - const uint8_t logBitWidth = uint8_t(mCodec[i].log2); - auto *openLeaf = openLeafs[i].node; - auto *nanoLeaf = PtrAdd(ptr, openLeafs[i].offset); - auto* data = nanoLeaf->data(); - data->mBBoxDif[2] = data->mBBoxDif[1] = data->mBBoxDif[0] = 0u; - data->mDev = data->mAvg = data->mMax = data->mMin = 0u; - this->encode(openLeaf, nanoLeaf);// sets data->mBBoxMin - data->mFlags = logBitWidth << 5;// pack logBitWidth into 3 MSB of mFlag - data->mValueMask = openLeaf->valueMask(); // copy value mask - auto *src = reinterpret_cast(openLeaf->buffer().data()); - const float min = mCodec[i].min, max = mCodec[i].max; - data->init(min, max, uint8_t(1) << logBitWidth);// sets mMinimum and mQuantum - // perform quantization relative to the values in the current leaf node - int offset = 0; - switch (logBitWidth) { - case 0u: {// 1 bit - auto *dst = reinterpret_cast(data+1); - const float encode = 1.0f/(max - min); - for (int j=0; j<64; ++j) { - uint8_t a = 0; - for (int k=0; k<8; ++k) { - a |= uint8_t(encode * (*src++ - min) + lut(offset++)) << k; - } - *dst++ = a; - } - } - break; - case 1u: {// 2 bits - auto *dst = reinterpret_cast(data+1); - const float encode = 3.0f/(max - min); - for (int j=0; j<128; ++j) { - auto a = uint8_t(encode * (*src++ - min) + lut(offset++)); - a |= uint8_t(encode * (*src++ - min) + lut(offset++)) << 2; - a |= uint8_t(encode * (*src++ - min) + lut(offset++)) << 4; - *dst++ = uint8_t(encode * (*src++ - min) + lut(offset++)) << 6 | a; - } - } - break; - case 2u: {// 4 bits - auto *dst = reinterpret_cast(data+1); - const float encode = 15.0f/(max - min); - for (int j=0; j<128; ++j) { - auto a = uint8_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint8_t(encode * (*src++ - min) + lut(offset++)) << 4 | a; - a = uint8_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint8_t(encode * (*src++ - min) + lut(offset++)) << 4 | a; - } - } - break; - case 3u: {// 8 bits - auto *dst = reinterpret_cast(data+1); - const float encode = 255.0f/(max - min); - for (int j=0; j<128; ++j) { - *dst++ = uint8_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint8_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint8_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint8_t(encode * (*src++ - min) + lut(offset++)); - } - } - break; - default: {// 16 bits - auto *dst = reinterpret_cast(data+1); - const double encode = 65535.0/(max - min);// note that double is required! - for (int j=0; j<128; ++j) { - *dst++ = uint16_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint16_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint16_t(encode * (*src++ - min) + lut(offset++)); - *dst++ = uint16_t(encode * (*src++ - min) + lut(offset++)); - } - } - }// end switch - } - };// kernel - forEach(openLeafs, 8, kernel); -} // OpenToNanoVDB::processLeafs - -//================================================================================================ - -template -template -inline typename std::enable_if::LeafT>::value>::type -OpenToNanoVDB::processLeafs(std::vector>& openLeafs) -{ - static_assert(NanoLeafT::DataType::padding()==0u, "Expected no padding in LeafNode"); - auto kernel = [&](const auto& r) { - uint8_t* ptr = mBufferPtr + mBufferOffsets[5]; - for (auto i = r.begin(); i != r.end(); ++i) { - auto *openLeaf = openLeafs[i].node; - auto *nanoLeaf = PtrAdd(ptr, openLeafs[i].offset); - this->encode(openLeaf, nanoLeaf);// sets data->mBBoxMin - auto* data = nanoLeaf->data(); - data->mFlags = data->mBBoxDif[2] = data->mBBoxDif[1] = data->mBBoxDif[0] = 0u; - data->mValueMask = openLeaf->valueMask(); // copy value mask - data->mValues = *reinterpret_cast*>(openLeaf->buffer().data()); // copy values - data->mPadding[1] = data->mPadding[0] = 0u; - } - }; - forEach(openLeafs, 8, kernel); -} // OpenToNanoVDB::processLeafs - -//================================================================================================ - -template -template -inline typename std::enable_if::LeafT>::value>::type -OpenToNanoVDB::processLeafs(std::vector>& openLeafs) -{ - static_assert(NanoLeafT::DataType::padding()==0u, "Expected no padding in LeafNode"); - auto kernel = [&](const auto& r) { - uint8_t* ptr = mBufferPtr + mBufferOffsets[5]; - for (auto i = r.begin(); i != r.end(); ++i) { - auto *openLeaf = openLeafs[i].node; - auto *nanoLeaf = PtrAdd(ptr, openLeafs[i].offset); - this->encode(openLeaf, nanoLeaf); - auto* data = nanoLeaf->data(); - data->mFlags = data->mBBoxDif[2] = data->mBBoxDif[1] = data->mBBoxDif[0] = 0u; - data->mValueMask = openLeaf->valueMask(); // copy value mask - data->mPadding[1] = data->mPadding[0] = 0u; - } - }; - forEach(openLeafs, 8, kernel); -} // OpenToNanoVDB::processLeafs - -//================================================================================================ - -template -uint64_t OpenToNanoVDB::pointCount() -{ - return reduce(mArray0, uint64_t(0), [&](auto &r, uint64_t sum) { - for (auto i=r.begin(); i!=r.end(); ++i) sum += mArray0[i].node->getLastValue(); - return sum;}, std::plus()); -}// OpenToNanoVDB::pointCount - -//================================================================================================ - -/// @brief Performs: nanoNode.origin = openNode.origin -/// openNode.origin = nanoNode offset -template -template -inline void OpenToNanoVDB:: -encode(const OpenNodeT *openNode, NanoNodeT *nanoNode) -{ - static_assert(is_same::Type>::value, "Type mismatch"); - openvdb::Coord &ijk = const_cast(openNode->origin()); - nanoNode->data()->setOrigin(ijk); - reinterpret_cast(ijk) = PtrDiff(nanoNode, mBufferPtr); -}// OpenToNanoVDB::encode - -//================================================================================================ - -/// @brief Performs: nanoNode offset = openNode.origin -/// openNode.origin = nanoNode.origin -/// return nanoNode offset -template -template -inline typename NanoNode::Type* OpenToNanoVDB:: -decode(const OpenNodeT *openNode) -{ - using NanoNodeT = typename NanoNode::Type; - openvdb::Coord &ijk = const_cast(openNode->origin()); - NanoNodeT *nanoNode = PtrAdd(mBufferPtr, reinterpret_cast(ijk)); - Coord tmp = nanoNode->origin(); - ijk[0] = tmp[0]; - ijk[1] = tmp[1]; - ijk[2] = tmp[2]; - return nanoNode; -}// OpenToNanoVDB::decode - -//================================================================================================ - -template -template -struct OpenToNanoVDB::NodePair { - using OpenNodeT = NodeT; - using NanoNodeT = typename NanoNode::Type; - NodePair(const NodeT *ptr, size_t n) : node(ptr), offset(n) {} - const NodeT *node;// pointer to OpenVDB node - uint64_t offset;// byte offset to matching NanoVDB node, relative to the first -};// OpenToNanoVDB::NodePair - -//================================================================================================ - -template -struct OpenToNanoVDB::BlindMetaData -{ - BlindMetaData(const std::string& n, const std::string& t, size_t i, size_t c, size_t s) - : name(n) - , typeName(t) - , index(i) - , count(c) - , size(AlignUp(c * s)) - { - } - const std::string name, typeName; - const size_t index, count, size; - bool operator<(const BlindMetaData& other) const { return index < other.index; } // required by std::set -}; // OpenToNanoVDB::BlindMetaData - -//================================================================================================ - -template -template -inline typename std::enable_if::value && - !std::is_same::value>::type -OpenToNanoVDB::preProcessMetadata(const T& openGrid) -{ - mBlindMetaData.clear(); - const size_t length = openGrid.getName().length(); - if (length >= GridData::MaxNameSize) { - mBlindMetaData.emplace("grid name", "uint8_t", 0, 1, length + 1);// Null-terminated byte strings - } -}// OpenToNanoVDB::preProcessMetadata - -//================================================================================================ - -template -template -inline typename std::enable_if::value>::type -OpenToNanoVDB::preProcessMetadata(const T& openGrid) -{ - mBlindMetaData.clear(); - if (const uint64_t pointCount = this->pointCount()) { - mBlindMetaData.emplace("index", "uint32_t", 0, pointCount, sizeof(uint32_t)); - } - const size_t length = openGrid.getName().length(); - if (length >= GridData::MaxNameSize) { - mBlindMetaData.emplace("grid name", "uint8_t", mBlindMetaData.size(), 1, length + 1);// Null-terminated byte strings - } -}// OpenToNanoVDB::preProcessMetadata - -//================================================================================================ - -template -template -inline typename std::enable_if::value>::type -OpenToNanoVDB::preProcessMetadata(const T& openGrid) -{ - mBlindMetaData.clear(); - size_t counter = 0; - if (const uint64_t pointCount = this->pointCount()) { - auto *openLeaf = openGrid.tree().cbeginLeaf().getLeaf(); - const auto& attributeSet = openLeaf->attributeSet(); - const auto& descriptor = attributeSet.descriptor(); - const auto& nameMap = descriptor.map(); - for (auto it = nameMap.begin(); it != nameMap.end(); ++it) { - const size_t index = it->second; - auto& attArray = openLeaf->constAttributeArray(index); - mBlindMetaData.emplace(it->first, descriptor.valueType(index), index, pointCount, attArray.valueTypeSize()); - } - counter += nameMap.size(); - } - const size_t length = openGrid.getName().length(); - if (length >= GridData::MaxNameSize) { - mBlindMetaData.emplace("grid name", "uint8_t", counter, 1, length + 1);// Null-terminated byte strings - } -}// OpenToNanoVDB::preProcessMetadata - -//================================================================================================ - -template -template -inline typename std::enable_if::value && - !std::is_same::value,GridBlindMetaData*>::type -OpenToNanoVDB:: - processMetadata(const T& openGrid) -{ - if (mBlindMetaData.empty()) { - return nullptr; - } - assert(mBlindMetaData.size() == 1);// only the grid name is expected - auto it = mBlindMetaData.cbegin(); - assert(it->name == "grid name" && it->typeName == "uint8_t" && it->index == 0); - assert(openGrid.getName().length() >= GridData::MaxNameSize); - auto *metaData = reinterpret_cast(mBufferPtr + mBufferOffsets[6]); - auto *blindData = reinterpret_cast(mBufferPtr + mBufferOffsets[7]); - // write the blind meta data - metaData->setBlindData(blindData); - metaData->mElementCount = it->count; - metaData->mFlags = 0; - metaData->mSemantic = GridBlindDataSemantic::Unknown; - metaData->mDataClass = GridBlindDataClass::GridName; - metaData->mDataType = GridType::Unknown; - // write the actual bind data - strcpy(blindData, openGrid.getName().c_str()); - return metaData; -}// OpenToNanoVDB::processMetadata - -//================================================================================================ - -template -template -inline typename std::enable_if::value,GridBlindMetaData*>::type -OpenToNanoVDB::processMetadata(const T& openGrid) -{ - if (mBlindMetaData.empty()) { - return nullptr; - } - assert(mBlindMetaData.size() == 1 || mBlindMetaData.size() == 2);// point index and maybe long grid name - auto *metaData = reinterpret_cast(mBufferPtr + mBufferOffsets[6]); - auto *blindData = reinterpret_cast(mBufferPtr + mBufferOffsets[7]); - - auto it = mBlindMetaData.cbegin(); - const uint32_t leafCount = static_cast(mArray0.size()); - - using LeafDataT = typename NanoLeafT::DataType; - uint8_t* ptr = mBufferPtr + mBufferOffsets[5]; - - auto *data0 = reinterpret_cast(ptr + mArray0[0].offset); - data0->mMinimum = 0; // start of prefix sum - data0->mMaximum = data0->mValues[NanoLeafT::SIZE - 1u]; - for (uint32_t i = 1; i < leafCount; ++i) { - auto *data1 = reinterpret_cast(ptr + mArray0[i].offset); - data1->mMinimum = data0->mMinimum + data0->mMaximum; - data1->mMaximum = data1->mValues[NanoLeafT::SIZE - 1u]; - data0 = data1; - } - - // write blind meta data for the point offsets - assert(it->count == data0->mMinimum + data0->mMaximum); - assert(it->name == "index" && it->typeName == "uint32_t" && it->index == 0); - metaData[0].setBlindData( blindData ); - metaData[0].mElementCount = it->count; - metaData[0].mFlags = 0; - metaData[0].mSemantic = GridBlindDataSemantic::Unknown; - metaData[0].mDataClass = GridBlindDataClass::IndexArray; - metaData[0].mDataType = GridType::UInt32; - if (it->name.length() >= GridBlindMetaData::MaxNameSize) { - std::stringstream ss; - ss << "Point attribute name \"" << it->name << "\" is more than " << (GridBlindMetaData::MaxNameSize-1) << " characters"; - OPENVDB_THROW(openvdb::ValueError, ss.str()); - } - std::memset(metaData[0].mName, '\0', GridBlindMetaData::MaxNameSize);//overwrite mName - memcpy(metaData[0].mName, it->name.c_str(), it->name.size() + 1); - - // write point offsets as blind data - forEach(mArray0, 16, [&](const auto& r) { - for (auto i = r.begin(); i != r.end(); ++i) { - auto *data = reinterpret_cast(ptr + mArray0[i].offset); - uint32_t* p = reinterpret_cast(blindData) + data->mMinimum; - for (uint32_t idx : mArray0[i].node->indices()) *p++ = idx; - } - }); - blindData += it->size;// add point offsets - - // write long grid name if it exists - ++it; - if (it != mBlindMetaData.end()) { - assert(it->name == "grid name" && it->typeName == "uint8_t" && it->index == 1); - assert(openGrid.getName().length() >= GridData::MaxNameSize); - metaData[1].setBlindData( blindData ); - metaData[1].mElementCount = it->count; - metaData[1].mFlags = 0; - metaData[1].mSemantic = GridBlindDataSemantic::Unknown; - metaData[1].mDataClass = GridBlindDataClass::GridName; - metaData[1].mDataType = GridType::Unknown; - strcpy(blindData, openGrid.getName().c_str()); - } - return metaData; -}// OpenToNanoVDB::processMetadata - -//================================================================================================ - -template -template -inline typename std::enable_if::value,GridBlindMetaData*>::type -OpenToNanoVDB::processMetadata(const T& openGrid) -{ - if (mBlindMetaData.empty()) { - return nullptr; - } - - auto *metaData = reinterpret_cast(mBufferPtr + mBufferOffsets[6]); - auto *blindData = reinterpret_cast(mBufferPtr + mBufferOffsets[7]); - - const uint32_t leafCount = static_cast(mArray0.size()); - - using LeafDataT = typename NanoLeafT::DataType; - uint8_t* ptr = mBufferPtr + mBufferOffsets[5]; - - auto *data0 = reinterpret_cast(ptr + mArray0[0].offset); - data0->mMinimum = 0; // start of prefix sum - data0->mMaximum = data0->mValues[NanoLeafT::SIZE - 1u]; - for (uint32_t i = 1; i < leafCount; ++i) { - auto *data1 = reinterpret_cast(ptr + mArray0[i].offset); - data1->mMinimum = data0->mMinimum + data0->mMaximum; - data1->mMaximum = data1->mValues[NanoLeafT::SIZE - 1u]; - data0 = data1; - } - - size_t i=0; - for (auto it = mBlindMetaData.cbegin(); it != mBlindMetaData.end(); ++it, ++i) { - metaData[i].setBlindData( blindData ); - metaData[i].mElementCount = it->count; - metaData[i].mFlags = 0; - if (it->name == "grid name") { - metaData[i].mSemantic = GridBlindDataSemantic::Unknown; - metaData[i].mDataClass = GridBlindDataClass::GridName; - metaData[i].mDataType = GridType::Unknown; - assert(openGrid.getName().length() >= GridData::MaxNameSize); - strcpy((char*)blindData, openGrid.getName().c_str()); - } else { - assert(it->count == data0->mMinimum + data0->mMaximum); - metaData[i].mDataClass = GridBlindDataClass::AttributeArray; - if (it->name.length()>= GridBlindMetaData::MaxNameSize) { - std::stringstream ss; - ss << "Point attribute name \"" << it->name << "\" is more than " << (GridBlindMetaData::MaxNameSize-1) << " characters"; - OPENVDB_THROW(openvdb::ValueError, ss.str()); - } - - std::memset(metaData[i].mName, '\0', GridBlindMetaData::MaxNameSize);//overwrite mName - memcpy(metaData[i].mName, it->name.c_str(), it->name.size() + 1); - if (it->typeName == "vec3s") { - metaData[i].mDataType = GridType::Vec3f; - this->copyPointAttribute(it->index, (openvdb::Vec3f*)blindData); - if (it->name == "P") { - metaData[i].mSemantic = GridBlindDataSemantic::PointPosition; - } else if (it->name == "V") { - metaData[i].mSemantic = GridBlindDataSemantic::PointVelocity; - } else if (it->name == "Cd") { - metaData[i].mSemantic = GridBlindDataSemantic::PointColor; - } else if (it->name == "N") { - metaData[i].mSemantic = GridBlindDataSemantic::PointNormal; - } else { - metaData[i].mSemantic = GridBlindDataSemantic::Unknown; - } - } else if (it->typeName == "int32") { - metaData[i].mDataType = GridType::Int32; - this->copyPointAttribute(it->index, (int32_t*)blindData); - if (it->name == "id") { - metaData[i].mSemantic = GridBlindDataSemantic::PointId; - } else { - metaData[i].mSemantic = GridBlindDataSemantic::Unknown; - } - } else if (it->typeName == "int64") { - metaData[i].mDataType = GridType::Int64; - this->copyPointAttribute(it->index, (int64_t*)blindData); - if (it->name == "id") { - metaData[i].mSemantic = GridBlindDataSemantic::PointId; - } else { - metaData[i].mSemantic = GridBlindDataSemantic::Unknown; - } - } else if (it->typeName == "float") { - metaData[i].mDataType = GridType::Float; - metaData[i].mSemantic = GridBlindDataSemantic::Unknown; - this->copyPointAttribute(it->index, (float*)blindData); - } else { - std::stringstream ss; - ss << "Unsupported point attribute type: \"" << it->typeName << "\""; - OPENVDB_THROW(openvdb::ValueError, ss.str()); - } - } - blindData += it->size; - } // loop over bind data - return metaData; -}// OpenToNanoVDB::processMetadata - -//================================================================================================ - - -template -template -inline void OpenToNanoVDB:: - copyPointAttribute(size_t attIdx, AttT *attPtr) -{ - static_assert(std::is_same::value, "Expected value to openvdb::PointData"); - using LeafDataT = typename NanoLeafT::DataType; - using HandleT = openvdb::points::AttributeHandle; - forEach(mArray0, 16, [&](const auto& r) { - uint8_t* ptr = mBufferPtr + mBufferOffsets[5]; - for (auto i = r.begin(); i != r.end(); ++i) { - auto* openLeaf = mArray0[i].node; - auto *nanoData = reinterpret_cast(ptr + mArray0[i].offset); - HandleT handle(openLeaf->constAttributeArray(attIdx)); - AttT* p = attPtr + nanoData->mMinimum; - for (auto iter = openLeaf->beginIndexOn(); iter; ++iter) { - *p++ = handle.get(*iter); - } - } - }); -}// OpenToNanoVDB::copyPointAttribute - -//================================================================================================ - -template -GridHandle -openToNanoVDB(const openvdb::Grid& grid, - StatsMode sMode, - ChecksumMode cMode, - int verbose) -{ - using OpenBuildT = typename OpenTreeT::BuildType; - OpenToNanoVDB s; - return s(grid, sMode, cMode, verbose); -}// openToNanoVDB - -//================================================================================================ - -template -GridHandle -openToNanoVDB(const openvdb::GridBase::Ptr& base, - StatsMode sMode, - ChecksumMode cMode, - int verbose) -{ - // We need to define these types because they are not defined in OpenVDB - using openvdb_Vec4fTree = typename openvdb::tree::Tree4::Type; - using openvdb_Vec4dTree = typename openvdb::tree::Tree4::Type; - using openvdb_Vec4fGrid = openvdb::Grid; - using openvdb_Vec4dGrid = openvdb::Grid; - - if (auto grid = openvdb::GridBase::grid(base)) { - return openToNanoVDB(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return openToNanoVDB(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return openToNanoVDB(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return openToNanoVDB(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid>(base)) { - return openToNanoVDB(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return openToNanoVDB(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return openToNanoVDB(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return openToNanoVDB(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return openToNanoVDB(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return openToNanoVDB(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return openToNanoVDB(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return openToNanoVDB(*grid, sMode, cMode, verbose); - } else if (auto grid = openvdb::GridBase::grid(base)) { - return openToNanoVDB(*grid, sMode, cMode, verbose); - } else { - OPENVDB_THROW(openvdb::RuntimeError, "Unrecognized OpenVDB grid type"); - } -}// openToNanoVDB - -} // namespace nanovdb +*/ -#endif // NANOVDB_OPENTONANOVDB_H_HAS_BEEN_INCLUDED +#include "CreateNanoGrid.h" \ No newline at end of file diff --git a/nanovdb/nanovdb/util/PrefixSum.h b/nanovdb/nanovdb/util/PrefixSum.h new file mode 100644 index 0000000000..b08ee11d43 --- /dev/null +++ b/nanovdb/nanovdb/util/PrefixSum.h @@ -0,0 +1,79 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file PrefixSum.h + + \author Ken Museth + + \date March 12, 2023 + + \brief Multi-threaded implementations of inclusive prefix sum + + \note An exclusive prefix sum is simply an array starting with zero + followed by the elements in the inclusive prefix sum, minus its + last entry which is the sum of all the input elements. +*/ + +#ifndef NANOVDB_PREFIX_SUM_H_HAS_BEEN_INCLUDED +#define NANOVDB_PREFIX_SUM_H_HAS_BEEN_INCLUDED + +#include "Range.h"// for Range1D +#include +#include // for std::plus + +#ifdef NANOVDB_USE_TBB +#include +#endif + +namespace nanovdb { + +/// @brief Computes inclusive prefix sum of a vector +/// @tparam T Type of the elements in the input/out vector +/// @tparam OpT Type of operation performed on each element (defaults to sum) +/// @param vec input and output vector +/// @param threaded if true multi-threading is used +/// @note Inclusive prefix sum: for (i=1; i> +T prefixSum(std::vector &vec, bool threaded = true, OpT op = OpT()); + +/// @brief An inclusive scan includes in[i] when computing out[i] +/// @note Inclusive prefix operation: for (i=1; i +void inclusiveScan(T *array, size_t size, const T &identity, bool threaded, Op op) +{ +#ifndef NANOVDB_USE_TBB + threaded = false; + (void)identity;// avoids compiler warning +#endif + + if (threaded) { +#ifdef NANOVDB_USE_TBB + using RangeT = tbb::blocked_range; + tbb::parallel_scan(RangeT(0, size), identity, + [&](const RangeT &r, T sum, bool is_final_scan)->T { + T tmp = sum; + for (size_t i = r.begin(); i < r.end(); ++i) { + tmp = op(tmp, array[i]); + if (is_final_scan) array[i] = tmp; + } + return tmp; + },[&](const T &a, const T &b) {return op(a, b);} + ); +#endif + } else { // serial inclusive prefix operation + for (size_t i=1; i +T prefixSum(std::vector &vec, bool threaded, OpT op) +{ + inclusiveScan(vec.data(), vec.size(), T(0), threaded, op); + return vec.back();// sum of all input elements +}// prefixSum + +}// namespace nanovdb + +#endif // NANOVDB_PREFIX_SUM_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/Primitives.h b/nanovdb/nanovdb/util/Primitives.h index 892554978f..7c1f3a5856 100644 --- a/nanovdb/nanovdb/util/Primitives.h +++ b/nanovdb/nanovdb/util/Primitives.h @@ -16,7 +16,11 @@ #ifndef NANOVDB_PRIMITIVES_H_HAS_BEEN_INCLUDED #define NANOVDB_PRIMITIVES_H_HAS_BEEN_INCLUDED -#include "GridBuilder.h" +#define NANOVDB_PARALLEL_PRIMITIVES + +#include +#include "CreateNanoGrid.h" +#include namespace nanovdb { @@ -34,22 +38,47 @@ namespace nanovdb { /// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} /// @param buffer Buffer used for memory allocation by the handle /// -/// @details The @c ValueT template parameter must be float (default) or double. -/// The @c VoxelT template parameter must be one of the following: +/// @details The @c BuildT template parameter must be one of the following: /// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when VoxelT is set to FpN. -template -GridHandle -createLevelSetSphere(ValueT radius = 100, - const Vec3& center = Vec3(0), +/// argument is only used when BuildT is set to FpN. +template +typename enable_if::value || + is_same::value, GridHandle>::type +createLevelSetSphere(double radius = 100.0, + const Vec3d& center = Vec3d(0), double voxelSize = 1.0, double halfWidth = 3.0, const Vec3d& origin = Vec3d(0), const std::string& name = "sphere_ls", StatsMode sMode = StatsMode::Default, ChecksumMode cMode = ChecksumMode::Default, + const BufferT& buffer = BufferT()); + +template +typename enable_if::value || + is_same::value || + is_same::value, GridHandle>::type +createLevelSetSphere(double radius = 100.0, + const Vec3d& center = Vec3d(0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0), + const std::string& name = "sphere_ls", + StatsMode sMode = StatsMode::Default, + ChecksumMode cMode = ChecksumMode::Default, + bool ditherOn = false, + const BufferT& buffer = BufferT()); + +template +typename enable_if::value, GridHandle>::type +createLevelSetSphere(double radius = 100.0, + const Vec3d& center = Vec3d(0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0), + const std::string& name = "sphere_ls_FpN", + StatsMode sMode = StatsMode::Default, + ChecksumMode cMode = ChecksumMode::Default, float tolerance = -1.0f, bool ditherOn = false, const BufferT& buffer = BufferT()); @@ -70,21 +99,30 @@ createLevelSetSphere(ValueT radius = 100, /// @param sMode Mode of computation for the statistics. /// @param cMode Mode of computation for the checksum. /// @param tolerance Global error tolerance use when VoxelT = FpN -/// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} +/// @param ditherOn If true dithering will be applied when BuildT = {Fp4,Fp8,Fp16,FpN} /// @param buffer Buffer used for memory allocation by the handle /// -/// @details The @c ValueT template parameter must be float (default) or double. -/// The @c VoxelT template parameter must be one of the following: +/// @details The @c BuildT template parameter must be one of the following: /// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when VoxelT is set to FpN. -template -GridHandle -createFogVolumeSphere(ValueT radius = 100.0f, - const Vec3& center = Vec3(0.0f), - double voxelSize = 1.0f, - double halfWidth = 3.0f, +/// argument is only used when BuildT is set to FpN. +template +typename disable_if::value, GridHandle>::type +createFogVolumeSphere(double radius = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "sphere_fog", + StatsMode sMode = StatsMode::Default, + ChecksumMode cMode = ChecksumMode::Default, + const BufferT& buffer = BufferT()); + +template +typename enable_if::value, GridHandle>::type +createFogVolumeSphere(double radius = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, const Vec3d& origin = Vec3d(0.0), const std::string& name = "sphere_fog", StatsMode sMode = StatsMode::Default, @@ -107,14 +145,13 @@ createFogVolumeSphere(ValueT radius = 100.0f, /// @param mode Mode of computation for the checksum. /// @param buffer Buffer used for memory allocation by the handle /// -/// @details The @c ValueT template parameter must be float (default) or double. -template -inline GridHandle +/// @details The @c BuildT template parameter must be float (default) or double. +template +typename disable_if::value, GridHandle>::type createPointSphere(int pointsPerVoxel = 1, - ValueT radius = 100.0f, - const Vec3& center = Vec3(0.0f), - double voxelSize = 1.0f, + double radius = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, const Vec3d& origin = Vec3d(0.0), const std::string& name = "sphere_points", ChecksumMode mode = ChecksumMode::Default, @@ -137,17 +174,27 @@ createPointSphere(int pointsPerVoxel = 1, /// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} /// @param buffer Buffer used for memory allocation by the handle /// -/// @details The @c ValueT template parameter must be float (default) or double. -/// The @c VoxelT template parameter must be one of the following: +/// @details The @c BuildT template parameter must be one of the following: /// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when VoxelT is set to FpN. -template -GridHandle -createLevelSetTorus(ValueT majorRadius = 100.0f, - ValueT minorRadius = 50.0f, - const Vec3& center = Vec3(0.0f), +/// argument is only used when BuildT is set to FpN. +template +typename disable_if::value, GridHandle>::type +createLevelSetTorus(double majorRadius = 100.0, + double minorRadius = 50.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "torus_ls", + StatsMode sMode = StatsMode::Default, + ChecksumMode cMode = ChecksumMode::Default, + const BufferT& buffer = BufferT()); + +template +typename enable_if::value, GridHandle>::type +createLevelSetTorus(double majorRadius = 100.0, + double minorRadius = 50.0, + const Vec3d& center = Vec3d(0.0), double voxelSize = 1.0, double halfWidth = 3.0, const Vec3d& origin = Vec3d(0.0), @@ -178,23 +225,33 @@ createLevelSetTorus(ValueT majorRadius = 100.0f, /// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} /// @param buffer Buffer used for memory allocation by the handle /// -/// @details The @c ValueT template parameter must be float (default) or double. -/// The @c VoxelT template parameter must be one of the following: +/// @details The @c BuildT template parameter must be one of the following: /// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when VoxelT is set to FpN. -template -GridHandle -createFogVolumeTorus(ValueT majorRadius = 100.0f, - ValueT minorRadius = 50.0f, - const Vec3& center = Vec3(0.0f), +/// argument is only used when BuildT is set to FpN. +template +typename disable_if::value, GridHandle>::type +createFogVolumeTorus(double majorRadius = 100.0, + double minorRadius = 50.0, + const Vec3d& center = Vec3d(0.0), double voxelSize = 1.0, double halfWidth = 3.0, - const Vec3d& origin = Vec3d(0), + const Vec3d& origin = Vec3d(0.0), const std::string& name = "torus_fog", StatsMode sMode = StatsMode::Default, ChecksumMode cMode = ChecksumMode::Default, + const BufferT& buffer = BufferT()); + +template +typename enable_if::value, GridHandle>::type +createFogVolumeTorus(double majorRadius = 100.0, + double minorRadius = 50.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "torus_fog_FpN", + StatsMode sMode = StatsMode::Default, + ChecksumMode cMode = ChecksumMode::Default, float tolerance = -1.0f, bool ditherOn = false, const BufferT& buffer = BufferT()); @@ -214,14 +271,13 @@ createFogVolumeTorus(ValueT majorRadius = 100.0f, /// @param cMode Mode of computation for the checksum. /// @param buffer Buffer used for memory allocation by the handle // -/// @details The @c ValueT template parameter must be float (default) or double. -template -inline GridHandle +/// @details The @c BuildT template parameter must be float (default) or double. +template +typename disable_if::value, GridHandle>::type createPointTorus(int pointsPerVoxel = 1, // half-width of narrow band in voxel units - ValueT majorRadius = 100.0f, // major radius of torus in world units - ValueT minorRadius = 50.0f, // minor radius of torus in world units - const Vec3& center = Vec3(0.0f), //center of torus in world units + double majorRadius = 100.0, // major radius of torus in world units + double minorRadius = 50.0, // minor radius of torus in world units + const Vec3d& center = Vec3d(0.0), // center of torus in world units double voxelSize = 1.0, // size of a voxel in world units const Vec3d& origin = Vec3d(0.0f), // origin of grid in world units const std::string& name = "torus_points", // name of grid @@ -246,24 +302,35 @@ createPointTorus(int pointsPerVoxel = 1, // half-width of narrow /// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} /// @param buffer Buffer used for memory allocation by the handle /// -//// @details The @c ValueT template parameter must be float (default) or double. -/// The @c VoxelT template parameter must be one of the following: +/// @details The @c BuildT template parameter must be one of the following: /// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when VoxelT is set to FpN. -template -GridHandle -createLevelSetBox(ValueT width = 40.0f, - ValueT height = 60.0f, - ValueT depth = 100.0f, - const Vec3& center = Vec3(0.0f), +/// argument is only used when BuildT is set to FpN. +template +typename disable_if::value, GridHandle>::type +createLevelSetBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + const Vec3d& center = Vec3d(0.0), double voxelSize = 1.0, double halfWidth = 3.0, const Vec3d& origin = Vec3d(0.0), const std::string& name = "box_ls", StatsMode sMode = StatsMode::Default, ChecksumMode cMode = ChecksumMode::Default, + const BufferT& buffer = BufferT()); + +template +typename enable_if::value, GridHandle>::type +createLevelSetBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "box_ls_FpN", + StatsMode sMode = StatsMode::Default, + ChecksumMode cMode = ChecksumMode::Default, float tolerance = -1.0f, bool ditherOn = false, const BufferT& buffer = BufferT()); @@ -289,24 +356,35 @@ createLevelSetBox(ValueT width = 40.0f, /// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} /// @param buffer Buffer used for memory allocation by the handle /// -/// @details The @c ValueT template parameter must be float (default) or double. -/// The @c VoxelT template parameter must be one of the following: +/// @details The @c BuildT template parameter must be one of the following: /// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when VoxelT is set to FpN. -template -GridHandle -createFogVolumeBox(ValueT width = 40.0f, - ValueT height = 60.0f, - ValueT depth = 100.0f, - const Vec3& center = Vec3(0.0f), +/// argument is only used when BuildT is set to FpN. +template +typename disable_if::value, GridHandle>::type +createFogVolumeBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + const Vec3d& center = Vec3d(0.0), double voxelSize = 1.0, double halfWidth = 3.0, const Vec3d& origin = Vec3d(0.0), const std::string& name = "box_fog", StatsMode sMode = StatsMode::Default, ChecksumMode cMode = ChecksumMode::Default, + const BufferT& buffer = BufferT()); + +template +typename enable_if::value, GridHandle>::type +createFogVolumeBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "box_fog_FpN", + StatsMode sMode = StatsMode::Default, + ChecksumMode cMode = ChecksumMode::Default, float tolerance = -1.0f, bool ditherOn = false, const BufferT& buffer = BufferT()); @@ -327,22 +405,31 @@ createFogVolumeBox(ValueT width = 40.0f, /// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} /// @param buffer Buffer used for memory allocation by the handle /// -/// @details The @c ValueT template parameter must be float (default) or double. -/// The @c VoxelT template parameter must be one of the following: +/// @details The @c BuildT template parameter must be one of the following: /// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when VoxelT is set to FpN. -template -GridHandle -createLevelSetOctahedron(ValueT scale = 100.0f, - const Vec3& center = Vec3(0.0f), +/// argument is only used when BuildT is set to FpN. +template +typename disable_if::value, GridHandle>::type +createLevelSetOctahedron(double scale = 100.0, + const Vec3d& center = Vec3d(0.0), double voxelSize = 1.0, double halfWidth = 3.0, const Vec3d& origin = Vec3d(0.0), const std::string& name = "octadedron_ls", StatsMode sMode = StatsMode::Default, ChecksumMode cMode = ChecksumMode::Default, + const BufferT& buffer = BufferT()); + +template +typename enable_if::value, GridHandle>::type +createLevelSetOctahedron(double scale = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "octadedron_ls_FpN", + StatsMode sMode = StatsMode::Default, + ChecksumMode cMode = ChecksumMode::Default, float tolerance = -1.0f, bool ditherOn = false, const BufferT& buffer = BufferT()); @@ -366,22 +453,31 @@ createLevelSetOctahedron(ValueT scale = 100.0f, /// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} /// @param buffer Buffer used for memory allocation by the handle /// -/// @details The @c ValueT template parameter must be float (default) or double. -/// The @c VoxelT template parameter must be one of the following: +/// @details The @c BuildT template parameter must be one of the following: /// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when VoxelT is set to FpN. -template -GridHandle -createFogVolumeOctahedron(ValueT scale = 100.0f, - const Vec3& center = Vec3(0.0f), +/// argument is only used when BuildT is set to FpN. +template +typename disable_if::value, GridHandle>::type +createFogVolumeOctahedron(double scale = 100.0, + const Vec3d& center = Vec3d(0.0), double voxelSize = 1.0, double halfWidth = 3.0, const Vec3d& origin = Vec3d(0.0), const std::string& name = "octadedron_fog", StatsMode sMode = StatsMode::Default, ChecksumMode cMode = ChecksumMode::Default, + const BufferT& buffer = BufferT()); + +template +typename enable_if::value, GridHandle>::type +createFogVolumeOctahedron(double scale = 100.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "octadedron_fog_FpN", + StatsMode sMode = StatsMode::Default, + ChecksumMode cMode = ChecksumMode::Default, float tolerance = -1.0f, bool ditherOn = false, const BufferT& buffer = BufferT()); @@ -405,25 +501,37 @@ createFogVolumeOctahedron(ValueT scale = 100.0f, /// @param ditherOn If true dithering will be applied when VoxelT = {Fp4,Fp8,Fp16,FpN} /// @param buffer Buffer used for memory allocation by the handle /// -/// @details The @c ValueT template parameter must be float (default) or double. -/// The @c VoxelT template parameter must be one of the following: +/// @details The @c BuildT template parameter must be one of the following: /// float (default), double, Fp4, Fp8, Fp16 or FpN. The @c tolerance -/// argument is only used when VoxelT is set to FpN. -template -GridHandle -createLevelSetBBox(ValueT width = 40.0f, - ValueT height = 60.0f, - ValueT depth = 100.0f, - ValueT thickness = 10.0f, - const Vec3& center = Vec3(0.0f), +/// argument is only used when BuildT is set to FpN. +template +typename disable_if::value, GridHandle>::type +createLevelSetBBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + double thickness = 10.0, + const Vec3d& center = Vec3d(0.0), double voxelSize = 1.0, double halfWidth = 3.0, const Vec3d& origin = Vec3d(0.0), const std::string& name = "bbox_ls", StatsMode sMode = StatsMode::Default, ChecksumMode cMode = ChecksumMode::Default, + const BufferT& buffer = BufferT()); + +template +typename enable_if::value, GridHandle>::type +createLevelSetBBox(double width = 40.0, + double height = 60.0, + double depth = 100.0, + double thickness = 10.0, + const Vec3d& center = Vec3d(0.0), + double voxelSize = 1.0, + double halfWidth = 3.0, + const Vec3d& origin = Vec3d(0.0), + const std::string& name = "bbox_ls_FpN", + StatsMode sMode = StatsMode::Default, + ChecksumMode cMode = ChecksumMode::Default, float tolerance = -1.0f, bool ditherOn = false, const BufferT& buffer = BufferT()); @@ -444,14 +552,13 @@ createLevelSetBBox(ValueT width = 40.0f, /// @param name Name of the grid /// @param mode Mode of computation for the checksum. /// @param buffer Buffer used for memory allocation by the handle -template -inline GridHandle +template +typename disable_if::value, GridHandle>::type createPointBox(int pointsPerVoxel = 1, // half-width of narrow band in voxel units - ValueT width = 40.0f, // width of box in world units - ValueT height = 60.0f, // height of box in world units - ValueT depth = 100.0f, // depth of box in world units - const Vec3& center = Vec3(0.0f), //center of box in world units + double width = 40.0, // width of box in world units + double height = 60.0, // height of box in world units + double depth = 100.0, // depth of box in world units + const Vec3d& center = Vec3d(0.0), // center of box in world units double voxelSize = 1.0, // size of a voxel in world units const Vec3d& origin = Vec3d(0.0), // origin of grid in world units const std::string& name = "box_points", // name of grid @@ -461,44 +568,45 @@ createPointBox(int pointsPerVoxel = 1, // half-width of narrow b //================================================================================================ /// @brief Given an input NanoVDB voxel grid this methods returns a GridHandle to another NanoVDB -/// PointDataGrid with points scattered in the active leaf voxels of in input grid. +/// PointDataGrid with points scattered in the active leaf voxels of in input grid. Note, the +/// coordinates of the points are encoded as blind data in world-space. /// /// @param srcGrid Const input grid used to determine the active voxels to scatter points into /// @param pointsPerVoxel Number of point per voxel on on the surface /// @param name Name of the grid /// @param mode Mode of computation for the checksum. /// @param buffer Buffer used for memory allocation by the handle -template +template inline GridHandle -createPointScatter(const NanoGrid& srcGrid, // origin of grid in world units - int pointsPerVoxel = 1, // half-width of narrow band in voxel units - const std::string& name = "point_scatter", // name of grid - ChecksumMode mode = ChecksumMode::Default, - const BufferT& buffer = BufferT()); +createPointScatter(const NanoGrid& srcGrid, // source grid used to scatter points into + int pointsPerVoxel = 1, // half-width of narrow band in voxel units + const std::string& name = "point_scatter", // name of grid + ChecksumMode mode = ChecksumMode::Default, + const BufferT& buffer = BufferT()); //================================================================================================ namespace { -/// @brief Returns a shared pointer to a GridBuilder with narrow-band SDF values for a sphere +/// @brief Returns a shared pointer to a build::Grid containing a narrow-band SDF values for a sphere /// /// @brief Note, this is not (yet) a valid level set SDF field since values inside sphere (and outside -/// the narrow band) are still undefined. Call GridBuilder::sdfToLevelSet() to set those -/// values or alternatively call GridBuilder::sdfToFog to generate a FOG volume. +/// the narrow band) are still undefined. Call builder::sdfToLevelSet() to set those +/// values or alternatively call builder::levelSetToFog to generate a FOG volume. /// -/// @details The @c VoxelT template parameter must be one of the following: -/// float (default), Fp4, Fp8, Fp16 or FpN. -template -std::shared_ptr> -initSphere(ValueT radius, // radius of sphere in world units - const Vec3& center, //center of sphere in world units +/// @details The @c BuildT template parameter must be one of the following: +/// float (default), double, Fp4, Fp8, Fp16 or FpN. +template +std::shared_ptr> +initSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units double voxelSize, // size of a voxel in world units double halfWidth, // half-width of narrow band in voxel units const Vec3d& origin) // origin of grid in world units { + using GridT = build::Grid; + using ValueT = typename BuildToValueMap::type; static_assert(is_floating_point::value, "initSphere: expect floating point"); - static_assert(is_floating_point::Type>::value, "initSphere: expect floating point"); if (!(radius > 0)) throw std::runtime_error("Sphere: radius must be positive!"); if (!(voxelSize > 0)) @@ -506,16 +614,14 @@ initSphere(ValueT radius, // radius of sphere in world units if (!(halfWidth > 0)) throw std::runtime_error("Sphere: halfWidth must be positive!"); - auto builder = std::make_shared>(ValueT(halfWidth * voxelSize)); - auto acc = builder->getAccessor(); + auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); + grid->setTransform(voxelSize, origin); // Define radius of sphere with narrow-band in voxel units const ValueT r0 = radius / ValueT(voxelSize), rmax = r0 + ValueT(halfWidth); // Radius below the Nyquist frequency - if (r0 < ValueT(1.5f)) { - return builder; - } + if (r0 < ValueT(1.5f)) return grid; // Define center of sphere in voxel units const Vec3 c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), @@ -527,40 +633,50 @@ initSphere(ValueT radius, // radius of sphere in world units const int jmin = Floor(c[1] - rmax), jmax = Ceil(c[1] + rmax); const int kmin = Floor(c[2] - rmax), kmax = Ceil(c[2] + rmax); - Coord ijk; - int & i = ijk[0], &j = ijk[1], &k = ijk[2], m = 1; - // Compute signed distances to sphere using leapfrogging in k - for (i = imin; i <= imax; ++i) { - const auto x2 = Pow2(ValueT(i) - c[0]); - for (j = jmin; j <= jmax; ++j) { - const auto x2y2 = Pow2(ValueT(j) - c[1]) + x2; - for (k = kmin; k <= kmax; k += m) { - m = 1; - const auto v = Sqrt(x2y2 + Pow2(ValueT(k) - c[2])) - r0; // Distance in voxel units - const auto d = v < 0 ? -v : v; - if (d < halfWidth) { // inside narrow band - acc.setValue(ijk, ValueT(voxelSize) * v); // distance in world units - } else { // outside narrow band - m += Floor(d - halfWidth); // leapfrog - } - } //end leapfrog over k - } //end loop over j - } //end loop over i - - return builder; + const Range<1,int> range(imin, imax+1, 32); + + auto kernel = [&](const Range<1,int> &r) { + auto acc = grid->getWriteAccessor(); + Coord ijk; + int &i = ijk[0], &j = ijk[1], &k = ijk[2], m = 1; + // Compute signed distances to sphere using leapfrogging in k + for (i = r.begin(); i < r.end(); ++i) { + const auto x2 = Pow2(ValueT(i) - c[0]); + for (j = jmin; j <= jmax; ++j) { + const auto x2y2 = Pow2(ValueT(j) - c[1]) + x2; + for (k = kmin; k <= kmax; k += m) { + m = 1; + const auto v = Sqrt(x2y2 + Pow2(ValueT(k) - c[2])) - r0; // Distance in voxel units + const auto d = v < 0 ? -v : v; + if (d < halfWidth) { // inside narrow band + acc.setValue(ijk, ValueT(voxelSize) * v); // distance in world units + } else { // outside narrow band + m += Floor(d - halfWidth); // leapfrog + } + } //end leapfrog over k + } //end loop over j + } //end loop over i + };// kernel +#ifdef NANOVDB_PARALLEL_PRIMITIVES + forEach(range, kernel); +#else + kernel(range); +#endif + return grid; } // initSphere -template -std::shared_ptr> -initTorus(ValueT radius1, // major radius of torus in world units - ValueT radius2, // minor radius of torus in world units - const Vec3& center, //center of torus in world units +template +std::shared_ptr> +initTorus(double radius1, // major radius of torus in world units + double radius2, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units double voxelSize, // size of a voxel in world units double halfWidth, // half-width of narrow band in voxel units const Vec3d& origin) // origin of grid in world units { + using GridT = build::Grid; + using ValueT = typename BuildToValueMap::type; static_assert(is_floating_point::value, "initTorus: expect floating point"); - static_assert(is_floating_point::Type>::value, "initTorus: expect floating point"); if (!(radius2 > 0)) throw std::runtime_error("Torus: radius2 must be positive!"); if (!(radius1 > radius2)) @@ -570,15 +686,14 @@ initTorus(ValueT radius1, // major radius of torus in world units if (!(halfWidth > 0)) throw std::runtime_error("Torus: halfWidth must be positive!"); - auto builder = std::make_shared>(ValueT(halfWidth * voxelSize)); - auto acc = builder->getAccessor(); + auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); + grid->setTransform(voxelSize, origin); // Define size of torus with narrow-band in voxel units const ValueT r1 = radius1 / ValueT(voxelSize), r2 = radius2 / ValueT(voxelSize), rmax1 = r1 + r2 + ValueT(halfWidth), rmax2 = r2 + ValueT(halfWidth); // Radius below the Nyquist frequency - if (r2 < ValueT(1.5)) - return builder; + if (r2 < ValueT(1.5)) return grid; // Define center of torus in voxel units const Vec3 c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), @@ -590,41 +705,52 @@ initTorus(ValueT radius1, // major radius of torus in world units const int jmin = Floor(c[1] - rmax2), jmax = Ceil(c[1] + rmax2); const int kmin = Floor(c[2] - rmax1), kmax = Ceil(c[2] + rmax1); - Coord ijk; - int & i = ijk[0], &j = ijk[1], &k = ijk[2], m = 1; - // Compute signed distances to torus using leapfrogging in k - for (i = imin; i <= imax; ++i) { - const auto x2 = Pow2(ValueT(i) - c[0]); - for (k = kmin; k <= kmax; ++k) { - const auto x2z2 = Pow2(Sqrt(Pow2(ValueT(k) - c[2]) + x2) - r1); - for (j = jmin; j <= jmax; j += m) { - m = 1; - const auto v = Sqrt(x2z2 + Pow2(ValueT(j) - c[1])) - r2; // Distance in voxel units - const auto d = v < 0 ? -v : v; - if (d < halfWidth) { // inside narrow band - acc.setValue(ijk, ValueT(voxelSize) * v); // distance in world units - } else { // outside narrow band - m += Floor(d - halfWidth); // leapfrog - } - } //end leapfrog over k - } //end loop over j - } //end loop over i - - return builder; + const Range<1,int> range(imin, imax+1, 32); + auto kernel = [&](const Range<1,int> &r) { + auto acc = grid->getWriteAccessor(); + Coord ijk; + int &i = ijk[0], &j = ijk[1], &k = ijk[2], m = 1; + // Compute signed distances to torus using leapfrogging in k + for (i = r.begin(); i < r.end(); ++i) { + const auto x2 = Pow2(ValueT(i) - c[0]); + for (k = kmin; k <= kmax; ++k) { + const auto x2z2 = Pow2(Sqrt(Pow2(ValueT(k) - c[2]) + x2) - r1); + for (j = jmin; j <= jmax; j += m) { + m = 1; + const auto v = Sqrt(x2z2 + Pow2(ValueT(j) - c[1])) - r2; // Distance in voxel units + const auto d = v < 0 ? -v : v; + if (d < halfWidth) { // inside narrow band + acc.setValue(ijk, ValueT(voxelSize) * v); // distance in world units + } else { // outside narrow band + m += Floor(d - halfWidth); // leapfrog + } + } //end leapfrog over k + } //end loop over j + } //end loop over i + }; // kernel + +#ifdef NANOVDB_PARALLEL_PRIMITIVES + forEach(range, kernel); +#else + kernel(range); +#endif + + return grid; } // initTorus -template -std::shared_ptr> -initBox(ValueT width, // major radius of torus in world units - ValueT height, // minor radius of torus in world units - ValueT depth, - const Vec3& center, //center of box in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin) // origin of grid in world units +template +std::shared_ptr> +initBox(double width, // major radius of torus in world units + double height, // minor radius of torus in world units + double depth, + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin) // origin of grid in world units { + using GridT = build::Grid; + using ValueT = typename BuildToValueMap::type; static_assert(is_floating_point::value, "initBox: expect floating point"); - static_assert(is_floating_point::Type>::value, "initBox: expect floating point"); using Vec3T = Vec3; if (!(width > 0)) throw std::runtime_error("Box: width must be positive!"); @@ -638,15 +764,16 @@ initBox(ValueT width, // major radius of torus in world units if (!(halfWidth > 0)) throw std::runtime_error("Box: halfWidth must be positive!"); - auto builder = std::make_shared>(ValueT(halfWidth * voxelSize)); - auto acc = builder->getAccessor(); + auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); + grid->setTransform(voxelSize, origin); // Define size of box with narrow-band in voxel units - const Vec3T r(width / (2 * ValueT(voxelSize)), height / (2 * ValueT(voxelSize)), depth / (2 * ValueT(voxelSize))); + const Vec3T r(width / (2 * ValueT(voxelSize)), + height / (2 * ValueT(voxelSize)), + depth / (2 * ValueT(voxelSize))); // Below the Nyquist frequency - if (r.min() < ValueT(1.5)) - return builder; + if (r.min() < ValueT(1.5)) return grid; // Define center of box in voxel units const Vec3T c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), @@ -660,47 +787,56 @@ initBox(ValueT width, // major radius of torus in world units // Define bounds of the voxel coordinates const BBox b(c - r - Vec3T(ValueT(halfWidth)), c + r + Vec3T(ValueT(halfWidth))); const CoordBBox bbox(Coord(Floor(b[0][0]), Floor(b[0][1]), Floor(b[0][2])), - Coord(Ceil(b[1][0]), Ceil(b[1][1]), Ceil(b[1][2]))); + Coord( Ceil(b[1][0]), Ceil(b[1][1]), Ceil(b[1][2]))); + const Range<1,int> range(bbox[0][0], bbox[1][0]+1, 32); // Compute signed distances to box using leapfrogging in k - int m = 1; - for (Coord p = bbox[0]; p[0] <= bbox[1][0]; ++p[0]) { - const auto q1 = Abs(ValueT(p[0]) - c[0]) - r[0]; - const auto x2 = Pow2(Pos(q1)); - for (p[1] = bbox[0][1]; p[1] <= bbox[1][1]; ++p[1]) { - const auto q2 = Abs(ValueT(p[1]) - c[1]) - r[1]; - const auto q0 = Max(q1, q2); - const auto x2y2 = x2 + Pow2(Pos(q2)); - for (p[2] = bbox[0][2]; p[2] <= bbox[1][2]; p[2] += m) { - m = 1; - const auto q3 = Abs(ValueT(p[2]) - c[2]) - r[2]; - const auto v = Sqrt(x2y2 + Pow2(Pos(q3))) + Neg(Max(q0, q3)); // Distance in voxel units - const auto d = Abs(v); - if (d < halfWidth) { // inside narrow band - acc.setValue(p, ValueT(voxelSize) * v); // distance in world units - } else { // outside narrow band - m += Floor(d - halfWidth); // leapfrog - } - } //end leapfrog over k - } //end loop over j - } //end loop over i - - return builder; + auto kernel = [&](const Range<1,int> &ra) { + auto acc = grid->getWriteAccessor(); + int m = 1; + for (Coord p(ra.begin(),bbox[0][1],bbox[0][2]); p[0] < ra.end(); ++p[0]) { + const auto q1 = Abs(ValueT(p[0]) - c[0]) - r[0]; + const auto x2 = Pow2(Pos(q1)); + for (p[1] = bbox[0][1]; p[1] <= bbox[1][1]; ++p[1]) { + const auto q2 = Abs(ValueT(p[1]) - c[1]) - r[1]; + const auto q0 = Max(q1, q2); + const auto x2y2 = x2 + Pow2(Pos(q2)); + for (p[2] = bbox[0][2]; p[2] <= bbox[1][2]; p[2] += m) { + m = 1; + const auto q3 = Abs(ValueT(p[2]) - c[2]) - r[2]; + const auto v = Sqrt(x2y2 + Pow2(Pos(q3))) + Neg(Max(q0, q3)); // Distance in voxel units + const auto d = Abs(v); + if (d < halfWidth) { // inside narrow band + acc.setValue(p, ValueT(voxelSize) * v); // distance in world units + } else { // outside narrow band + m += Floor(d - halfWidth); // leapfrog + } + } //end leapfrog over k + } //end loop over j + } //end loop over i + }; // kernel +#ifdef NANOVDB_PARALLEL_PRIMITIVES + forEach(range, kernel); +#else + kernel(range); +#endif + return grid; } // initBox -template -std::shared_ptr> -initBBox(ValueT width, // width of the bbox in world units - ValueT height, // height of the bbox in world units - ValueT depth, // depth of the bbox in world units - ValueT thickness, // thickness of the wire in world units - const Vec3& center, //center of bbox in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin) // origin of grid in world units +template +std::shared_ptr> +initBBox(double width, // width of the bbox in world units + double height, // height of the bbox in world units + double depth, // depth of the bbox in world units + double thickness, // thickness of the wire in world units + const Vec3d& center, // center of bbox in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin) // origin of grid in world units { + using GridT = build::Grid; + using ValueT = typename BuildToValueMap::type; static_assert(is_floating_point::value, "initBBox: expect floating point"); - static_assert(is_floating_point::Type>::value, "initBBox: expect floating point"); using Vec3T = Vec3; if (!(width > 0)) throw std::runtime_error("BBox: width must be positive!"); @@ -713,16 +849,18 @@ initBBox(ValueT width, // width of the bbox in world units if (!(voxelSize > 0.0)) throw std::runtime_error("BBox: voxelSize must be positive!"); - auto builder = std::make_shared>(ValueT(halfWidth * voxelSize)); - auto acc = builder->getAccessor(); + + auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); + grid->setTransform(voxelSize, origin); // Define size of bbox with narrow-band in voxel units - const Vec3T r(width / (2 * ValueT(voxelSize)), height / (2 * ValueT(voxelSize)), depth / (2 * ValueT(voxelSize))); + const Vec3T r(width / (2 * ValueT(voxelSize)), + height / (2 * ValueT(voxelSize)), + depth / (2 * ValueT(voxelSize))); const ValueT e = thickness / ValueT(voxelSize); // Below the Nyquist frequency - if (r.min() < ValueT(1.5) || e < ValueT(1.5)) - return builder; + if (r.min() < ValueT(1.5) || e < ValueT(1.5)) return grid; // Define center of bbox in voxel units const Vec3T c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), @@ -736,71 +874,78 @@ initBBox(ValueT width, // width of the bbox in world units // Define bounds of the voxel coordinates const BBox b(c - r - Vec3T(e + ValueT(halfWidth)), c + r + Vec3T(e + ValueT(halfWidth))); const CoordBBox bbox(Coord(Floor(b[0][0]), Floor(b[0][1]), Floor(b[0][2])), - Coord(Ceil(b[1][0]), Ceil(b[1][1]), Ceil(b[1][2]))); + Coord( Ceil(b[1][0]), Ceil(b[1][1]), Ceil(b[1][2]))); + const Range<1,int> range(bbox[0][0], bbox[1][0]+1, 32); // Compute signed distances to bbox using leapfrogging in k - int m = 1; - for (Coord p = bbox[0]; p[0] <= bbox[1][0]; ++p[0]) { - const ValueT px = Abs(ValueT(p[0]) - c[0]) - r[0]; - const ValueT qx = Abs(ValueT(px) + e) - e; - const ValueT px2 = Pow2(Pos(px)); - const ValueT qx2 = Pow2(Pos(qx)); - for (p[1] = bbox[0][1]; p[1] <= bbox[1][1]; ++p[1]) { - const ValueT py = Abs(ValueT(p[1]) - c[1]) - r[1]; - const ValueT qy = Abs(ValueT(py) + e) - e; - const ValueT qy2 = Pow2(Pos(qy)); - ; - const ValueT px2qy2 = px2 + qy2; - const ValueT qx2py2 = qx2 + Pow2(Pos(py)); - const ValueT qx2qy2 = qx2 + qy2; - const ValueT a[3] = {Max(px, qy), Max(qx, py), Max(qx, qy)}; - for (p[2] = bbox[0][2]; p[2] <= bbox[1][2]; p[2] += m) { - m = 1; - const ValueT pz = Abs(ValueT(p[2]) - c[2]) - r[2]; - const ValueT qz = Abs(ValueT(pz) + e) - e; - const ValueT qz2 = Pow2(Pos(qz)); - const ValueT s1 = Sqrt(px2qy2 + qz2) + Neg(Max(a[0], qz)); - const ValueT s2 = Sqrt(qx2py2 + qz2) + Neg(Max(a[1], qz)); - const ValueT s3 = Sqrt(qx2qy2 + Pow2(Pos(pz))) + Neg(Max(a[2], pz)); - const ValueT v = Min(s1, Min(s2, s3)); // Distance in voxel units - const ValueT d = Abs(v); - if (d < halfWidth) { // inside narrow band - acc.setValue(p, ValueT(voxelSize) * v); // distance in world units - } else { // outside narrow band - m += Floor(d - halfWidth); // leapfrog - } - } //end leapfrog over k - } //end loop over j - } //end loop over i - - return builder; + auto kernel = [&](const Range<1,int> &ra) { + auto acc = grid->getWriteAccessor(); + int m = 1; + for (Coord p(ra.begin(),bbox[0][1],bbox[0][2]); p[0] < ra.end(); ++p[0]) { + const ValueT px = Abs(ValueT(p[0]) - c[0]) - r[0]; + const ValueT qx = Abs(ValueT(px) + e) - e; + const ValueT px2 = Pow2(Pos(px)); + const ValueT qx2 = Pow2(Pos(qx)); + for (p[1] = bbox[0][1]; p[1] <= bbox[1][1]; ++p[1]) { + const ValueT py = Abs(ValueT(p[1]) - c[1]) - r[1]; + const ValueT qy = Abs(ValueT(py) + e) - e; + const ValueT qy2 = Pow2(Pos(qy)); + const ValueT px2qy2 = px2 + qy2; + const ValueT qx2py2 = qx2 + Pow2(Pos(py)); + const ValueT qx2qy2 = qx2 + qy2; + const ValueT a[3] = {Max(px, qy), Max(qx, py), Max(qx, qy)}; + for (p[2] = bbox[0][2]; p[2] <= bbox[1][2]; p[2] += m) { + m = 1; + const ValueT pz = Abs(ValueT(p[2]) - c[2]) - r[2]; + const ValueT qz = Abs(ValueT(pz) + e) - e; + const ValueT qz2 = Pow2(Pos(qz)); + const ValueT s1 = Sqrt(px2qy2 + qz2) + Neg(Max(a[0], qz)); + const ValueT s2 = Sqrt(qx2py2 + qz2) + Neg(Max(a[1], qz)); + const ValueT s3 = Sqrt(qx2qy2 + Pow2(Pos(pz))) + Neg(Max(a[2], pz)); + const ValueT v = Min(s1, Min(s2, s3)); // Distance in voxel units + const ValueT d = Abs(v); + if (d < halfWidth) { // inside narrow band + acc.setValue(p, ValueT(voxelSize) * v); // distance in world units + } else { // outside narrow band + m += Floor(d - halfWidth); // leapfrog + } + } //end leapfrog over k + } //end loop over j + } //end loop over i + }; //kernel +#ifdef NANOVDB_PARALLEL_PRIMITIVES + forEach(range, kernel); +#else + kernel(range); +#endif + + return grid; } // initBBox -template -std::shared_ptr> -initOctahedron(ValueT scale, // scale of the octahedron in world units - const Vec3& center, //center of octahedron in world units - double voxelSize, // size of a voxel in world units - double halfWidth, // half-width of narrow band in voxel units - const Vec3d& origin) // origin of grid in world units +template +std::shared_ptr> +initOctahedron(double scale, // scale of the octahedron in world units + const Vec3d& center, // center of octahedron in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin) // origin of grid in world units { - static_assert(is_floating_point::value, "initOctahedron: expect floating point"); - static_assert(is_floating_point::Type>::value, "initOctahedron: expect floating point"); + using GridT = build::Grid; + using ValueT = typename BuildToValueMap::type; using Vec3T = Vec3; - if (!(scale > 0)) - throw std::runtime_error("Octahedron: width must be positive!"); - if (!(voxelSize > 0)) - throw std::runtime_error("Octahedron: voxelSize must be positive!"); + static_assert(is_floating_point::value, "initOctahedron: expect floating point"); + + if (!(scale > 0)) throw std::runtime_error("Octahedron: width must be positive!"); + if (!(voxelSize > 0)) throw std::runtime_error("Octahedron: voxelSize must be positive!"); - auto builder = std::make_shared>(halfWidth * voxelSize); - auto acc = builder->getAccessor(); + auto grid = std::make_shared(ValueT(halfWidth * voxelSize)); + grid->setTransform(voxelSize, origin); // Define size of octahedron with narrow-band in voxel units const ValueT s = scale / (2 * ValueT(voxelSize)); // Below the Nyquist frequency - if ( s < ValueT(1.5) ) - return builder; + if ( s < ValueT(1.5) ) return grid; // Define center of octahedron in voxel units const Vec3T c(ValueT(center[0] - origin[0]) / ValueT(voxelSize), @@ -821,50 +966,117 @@ initOctahedron(ValueT scale, // scale of the octahedron in world un // Define bounds of the voxel coordinates const BBox b(c - Vec3T(s + ValueT(halfWidth)), c + Vec3T(s + ValueT(halfWidth))); const CoordBBox bbox(Coord(Floor(b[0][0]), Floor(b[0][1]), Floor(b[0][2])), - Coord(Ceil(b[1][0]), Ceil(b[1][1]), Ceil(b[1][2]))); + Coord( Ceil(b[1][0]), Ceil(b[1][1]), Ceil(b[1][2]))); + const Range<1,int> range(bbox[0][0], bbox[1][0]+1, 32); // Compute signed distances to octahedron using leapfrogging in k - int m = 1; - static const ValueT a = Sqrt(ValueT(1)/ValueT(3)); - for (Coord p = bbox[0]; p[0] <= bbox[1][0]; ++p[0]) { - const ValueT px = Abs(ValueT(p[0]) - c[0]); - for (p[1] = bbox[0][1]; p[1] <= bbox[1][1]; ++p[1]) { - const ValueT py = Abs(ValueT(p[1]) - c[1]); - for (p[2] = bbox[0][2]; p[2] <= bbox[1][2]; p[2] += m) { - m = 1; - const ValueT pz = Abs(ValueT(p[2]) - c[2]); - ValueT d = px + py + pz - s; - ValueT v; - if (ValueT(3)*px < d) { - v = sdf(px, py, pz); - } else if (ValueT(3)*py < d) { - v = sdf(py, pz, px); - } else if (ValueT(3)*pz < d) { - v = sdf(pz, px, py); - } else { - v = a * d; - } - d = Abs(v); - if (d < halfWidth) { // inside narrow band - acc.setValue(p, ValueT(voxelSize) * v); // distance in world units - } else { // outside narrow band - m += Floor(d - halfWidth); // leapfrog - } - } //end leapfrog over k - } //end loop over j - } //end loop over i - - return builder; + auto kernel = [&](const Range<1,int> &ra) { + auto acc = grid->getWriteAccessor(); + int m = 1; + static const ValueT a = Sqrt(ValueT(1)/ValueT(3)); + for (Coord p(ra.begin(),bbox[0][1],bbox[0][2]); p[0] < ra.end(); ++p[0]) { + const ValueT px = Abs(ValueT(p[0]) - c[0]); + for (p[1] = bbox[0][1]; p[1] <= bbox[1][1]; ++p[1]) { + const ValueT py = Abs(ValueT(p[1]) - c[1]); + for (p[2] = bbox[0][2]; p[2] <= bbox[1][2]; p[2] += m) { + m = 1; + const ValueT pz = Abs(ValueT(p[2]) - c[2]); + ValueT d = px + py + pz - s; + ValueT v; + if (ValueT(3)*px < d) { + v = sdf(px, py, pz); + } else if (ValueT(3)*py < d) { + v = sdf(py, pz, px); + } else if (ValueT(3)*pz < d) { + v = sdf(pz, px, py); + } else { + v = a * d; + } + d = Abs(v); + if (d < halfWidth) { // inside narrow band + acc.setValue(p, ValueT(voxelSize) * v); // distance in world units + } else { // outside narrow band + m += Floor(d - halfWidth); // leapfrog + } + } //end leapfrog over k + } //end loop over j + } //end loop over i + };// kernel +#ifdef NANOVDB_PARALLEL_PRIMITIVES + forEach(range, kernel); +#else + kernel(range); +#endif + return grid; } // initOctahedron } // unnamed namespace //================================================================================================ -template -inline GridHandle -createLevelSetSphere(ValueT radius, // radius of sphere in world units - const Vec3& center, //center of sphere in world units +template +typename enable_if::value || + is_same::value, GridHandle>::type +createLevelSetSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + ChecksumMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetSphere + +//================================================================================================ + +template +typename enable_if::value || + is_same::value || + is_same::value, GridHandle>::type +createLevelSetSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + ChecksumMode cMode, // mode of computation for the checksum + bool ditherOn, + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetSphere + +//================================================================================================ + +template +typename enable_if::value, GridHandle>::type +createLevelSetSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units double voxelSize, // size of a voxel in world units double halfWidth, // half-width of narrow band in voxel units const Vec3d& origin, // origin of grid in world units @@ -875,23 +1087,55 @@ createLevelSetSphere(ValueT radius, // radius of sphere in world un bool ditherOn, const BufferT& buffer) { - auto builder = initSphere(radius, center, voxelSize, halfWidth, origin); - builder->sdfToLevelSet(); - builder->setStats(sMode); - builder->setChecksum(cMode); - builder->enableDithering(ditherOn); + using GridT = build::Grid; + auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); AbsDiff oracle(tolerance); - auto handle = builder->template getHandle(voxelSize, origin, name, oracle, buffer); + auto handle = converter.template getHandle(oracle, buffer); assert(handle); return handle; -} // createLevelSetSphere +} // createLevelSetSphere //================================================================================================ -template -inline GridHandle -createFogVolumeSphere(ValueT radius, // radius of sphere in world units - const Vec3& center, //center of sphere in world units +template +typename disable_if::value, GridHandle>::type +createFogVolumeSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + ChecksumMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createFogVolumeSphere + +//================================================================================================ + +template +typename enable_if::value, GridHandle>::type +createFogVolumeSphere(double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units double voxelSize, // size of a voxel in world units double halfWidth, // half-width of narrow band in voxel units const Vec3d& origin, // origin of grid in world units @@ -902,24 +1146,29 @@ createFogVolumeSphere(ValueT radius, // radius of sphere in world u bool ditherOn, const BufferT& buffer) { - auto builder = initSphere(radius, center, voxelSize, halfWidth, origin); - builder->sdfToFog(); - builder->setStats(sMode); - builder->setChecksum(cMode); - builder->enableDithering(ditherOn); + using GridT = build::Grid; + auto grid = initSphere(radius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); AbsDiff oracle(tolerance); - auto handle = builder->template getHandle(voxelSize, origin, name, oracle, buffer); + auto handle = converter.template getHandle(oracle, buffer); assert(handle); return handle; -} // createFogVolumeSphere +} // createFogVolumeSphere //================================================================================================ -template -inline GridHandle -createPointSphere(int pointsPerVoxel, // half-width of narrow band in voxel units - ValueT radius, // radius of sphere in world units - const Vec3& center, //center of sphere in world units +template +typename disable_if::value, GridHandle>::type +createPointSphere(int pointsPerVoxel, // number of points to be scattered in each active voxel + double radius, // radius of sphere in world units + const Vec3d& center, // center of sphere in world units double voxelSize, // size of a voxel in world units const Vec3d& origin, // origin of grid in world units const std::string& name, // name of grid @@ -927,9 +1176,9 @@ createPointSphere(int pointsPerVoxel, // half-width of narrow ba const BufferT& buffer) { auto sphereHandle = createLevelSetSphere(radius, center, voxelSize, 0.5, origin, "dummy", - StatsMode::BBox, ChecksumMode::Disable, -1.0f, false, buffer); + StatsMode::BBox, ChecksumMode::Disable, buffer); assert(sphereHandle); - auto* sphereGrid = sphereHandle.template grid(); + auto* sphereGrid = sphereHandle.template grid(); assert(sphereGrid); auto pointHandle = createPointScatter(*sphereGrid, pointsPerVoxel, name, cMode, buffer); assert(pointHandle); @@ -938,11 +1187,39 @@ createPointSphere(int pointsPerVoxel, // half-width of narrow ba //================================================================================================ -template -inline GridHandle -createLevelSetTorus(ValueT majorRadius, // major radius of torus in world units - ValueT minorRadius, // minor radius of torus in world units - const Vec3& center, //center of torus in world units +template +typename disable_if::value, GridHandle>::type +createLevelSetTorus(double majorRadius, // major radius of torus in world units + double minorRadius, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + ChecksumMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetTorus + +//================================================================================================ + +template +typename enable_if::value, GridHandle>::type +createLevelSetTorus(double majorRadius, // major radius of torus in world units + double minorRadius, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units double voxelSize, // size of a voxel in world units double halfWidth, // half-width of narrow band in voxel units const Vec3d& origin, // origin of grid in world units @@ -953,24 +1230,57 @@ createLevelSetTorus(ValueT majorRadius, // major radius of torus in bool ditherOn, const BufferT& buffer) { - auto builder = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); - builder->sdfToLevelSet(); - builder->setStats(sMode); - builder->setChecksum(cMode); - builder->enableDithering(ditherOn); + using GridT = build::Grid; + auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); AbsDiff oracle(tolerance); - auto handle = builder->template getHandle(voxelSize, origin, name, oracle, buffer); + auto handle = converter.template getHandle(oracle, buffer); assert(handle); return handle; -} // createLevelSetTorus +} // createLevelSetTorus //================================================================================================ -template -inline GridHandle -createFogVolumeTorus(ValueT majorRadius, // major radius of torus in world units - ValueT minorRadius, // minor radius of torus in world units - const Vec3& center, //center of torus in world units +template +typename disable_if::value, GridHandle>::type +createFogVolumeTorus(double majorRadius, // major radius of torus in world units + double minorRadius, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + ChecksumMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createFogVolumeTorus + +//================================================================================================ + +template +typename enable_if::value, GridHandle>::type +createFogVolumeTorus(double majorRadius, // major radius of torus in world units + double minorRadius, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units double voxelSize, // size of a voxel in world units double halfWidth, // half-width of narrow band in voxel units const Vec3d& origin, // origin of grid in world units @@ -981,25 +1291,30 @@ createFogVolumeTorus(ValueT majorRadius, // major radius of torus i bool ditherOn, const BufferT& buffer) { - auto builder = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); - builder->sdfToFog(); - builder->setStats(sMode); - builder->setChecksum(cMode); - builder->enableDithering(ditherOn); + using GridT = build::Grid; + auto grid = initTorus(majorRadius, minorRadius, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); AbsDiff oracle(tolerance); - auto handle = builder->template getHandle(voxelSize, origin, name, oracle, buffer); + auto handle = converter.template getHandle(oracle, buffer); assert(handle); return handle; -} // createFogVolumeTorus +} // createFogVolumeTorus //================================================================================================ -template -inline GridHandle -createPointTorus(int pointsPerVoxel, // half-width of narrow band in voxel units - ValueT majorRadius, // major radius of torus in world units - ValueT minorRadius, // minor radius of torus in world units - const Vec3& center, //center of torus in world units +template +typename disable_if::value, GridHandle>::type +createPointTorus(int pointsPerVoxel, // number of points to be scattered in each active voxel + double majorRadius, // major radius of torus in world units + double minorRadius, // minor radius of torus in world units + const Vec3d& center, // center of torus in world units double voxelSize, // size of a voxel in world units const Vec3d& origin, // origin of grid in world units const std::string& name, // name of grid @@ -1007,23 +1322,52 @@ createPointTorus(int pointsPerVoxel, // half-width of narrow ban const BufferT& buffer) { auto torusHandle = createLevelSetTorus(majorRadius, minorRadius, center, voxelSize, 0.5f, origin, - "dummy", StatsMode::BBox, ChecksumMode::Disable, -1.0f, false, buffer); + "dummy", StatsMode::BBox, ChecksumMode::Disable, buffer); assert(torusHandle); - auto* torusGrid = torusHandle.template grid(); + auto* torusGrid = torusHandle.template grid(); assert(torusGrid); auto pointHandle = createPointScatter(*torusGrid, pointsPerVoxel, name, cMode, buffer); assert(pointHandle); return pointHandle; -} // createPointTorus +} // createPointTorus //================================================================================================ -template -inline GridHandle -createLevelSetBox(ValueT width, // width of box in world units - ValueT height, // height of box in world units - ValueT depth, // depth of box in world units - const Vec3& center, //center of box in world units +template +typename disable_if::value, GridHandle>::type +createLevelSetBox(double width, // width of box in world units + double height, // height of box in world units + double depth, // depth of box in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + ChecksumMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetBox + +//================================================================================================ + +template +typename enable_if::value, GridHandle>::type +createLevelSetBox(double width, // width of box in world units + double height, // height of box in world units + double depth, // depth of box in world units + const Vec3d& center, // center of box in world units double voxelSize, // size of a voxel in world units double halfWidth, // half-width of narrow band in voxel units const Vec3d& origin, // origin of grid in world units @@ -1034,23 +1378,54 @@ createLevelSetBox(ValueT width, // width of box in world units bool ditherOn, const BufferT& buffer) { - auto builder = initBox(width, height, depth, center, voxelSize, halfWidth, origin); - builder->sdfToLevelSet(); - builder->setStats(sMode); - builder->setChecksum(cMode); - builder->enableDithering(ditherOn); + using GridT = build::Grid; + auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); AbsDiff oracle(tolerance); - auto handle = builder->template getHandle(voxelSize, origin, name, oracle, buffer); + auto handle = converter.template getHandle(oracle, buffer); assert(handle); return handle; -} // createLevelSetBox +} // createLevelSetBox //================================================================================================ -template -inline GridHandle -createLevelSetOctahedron(ValueT scale, // scale of the octahedron in world units - const Vec3& center, //center of box in world units +template +typename disable_if::value, GridHandle>::type +createLevelSetOctahedron(double scale, // scale of the octahedron in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + ChecksumMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetOctahedron + +//================================================================================================ + +template +typename enable_if::value, GridHandle>::type +createLevelSetOctahedron(double scale, // scale of the octahedron in world units + const Vec3d& center, // center of box in world units double voxelSize, // size of a voxel in world units double halfWidth, // half-width of narrow band in voxel units const Vec3d& origin, // origin of grid in world units @@ -1061,26 +1436,60 @@ createLevelSetOctahedron(ValueT scale, // scale of the octahedron i bool ditherOn, const BufferT& buffer) { - auto builder = initOctahedron(scale, center, voxelSize, halfWidth, origin); - builder->sdfToLevelSet(); - builder->setStats(sMode); - builder->setChecksum(cMode); - builder->enableDithering(ditherOn); + using GridT = build::Grid; + auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); AbsDiff oracle(tolerance); - auto handle = builder->template getHandle(voxelSize, origin, name, oracle, buffer); + auto handle = converter.template getHandle(oracle, buffer); assert(handle); return handle; -} // createLevelSetOctahedron +} // createLevelSetOctahedron //================================================================================================ -template -inline GridHandle -createLevelSetBBox(ValueT width, // width of bbox in world units - ValueT height, // height of bbox in world units - ValueT depth, // depth of bbox in world units - ValueT thickness, // thickness of the wire in world units - const Vec3& center, //center of bbox in world units +template +typename disable_if::value, GridHandle>::type +createLevelSetBBox(double width, // width of bbox in world units + double height, // height of bbox in world units + double depth, // depth of bbox in world units + double thickness, // thickness of the wire in world units + const Vec3d& center, // center of bbox in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + ChecksumMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initBBox(width, height, depth, thickness, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createLevelSetBBox + +//================================================================================================ + +template +typename enable_if::value, GridHandle>::type +createLevelSetBBox(double width, // width of bbox in world units + double height, // height of bbox in world units + double depth, // depth of bbox in world units + double thickness, // thickness of the wire in world units + const Vec3d& center, // center of bbox in world units double voxelSize, // size of a voxel in world units double halfWidth, // half-width of narrow band in voxel units const Vec3d& origin, // origin of grid in world units @@ -1091,25 +1500,59 @@ createLevelSetBBox(ValueT width, // width of bbox in world units bool ditherOn, const BufferT& buffer) { - auto builder = initBBox(width, height, depth, thickness, center, voxelSize, halfWidth, origin); - builder->sdfToLevelSet(); - builder->setStats(sMode); - builder->setChecksum(cMode); - builder->enableDithering(ditherOn); + using GridT = build::Grid; + auto grid = initBBox(width, height, depth, thickness, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); AbsDiff oracle(tolerance); - auto handle = builder->template getHandle(voxelSize, origin, name, oracle, buffer); + auto handle = converter.template getHandle(oracle, buffer); assert(handle); return handle; -} // createLevelSetBBox +} // createLevelSetBBox //================================================================================================ -template -inline GridHandle -createFogVolumeBox(ValueT width, // width of box in world units - ValueT height, // height of box in world units - ValueT depth, // depth of box in world units - const Vec3& center, //center of box in world units +template +typename disable_if::value, GridHandle>::type +createFogVolumeBox(double width, // width of box in world units + double height, // height of box in world units + double depth, // depth of box in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + ChecksumMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createFogVolumeBox + +//================================================================================================ + +template +typename enable_if::value, GridHandle>::type +createFogVolumeBox(double width, // width of box in world units + double height, // height of box in world units + double depth, // depth of box in world units + const Vec3d& center, // center of box in world units double voxelSize, // size of a voxel in world units double halfWidth, // half-width of narrow band in voxel units const Vec3d& origin, // origin of grid in world units @@ -1120,23 +1563,56 @@ createFogVolumeBox(ValueT width, // width of box in world units bool ditherOn, const BufferT& buffer) { - auto builder = initBox(width, height, depth, center, voxelSize, halfWidth, origin); - builder->sdfToFog(); - builder->setStats(sMode); - builder->setChecksum(cMode); - builder->enableDithering(ditherOn); + using GridT = build::Grid; + auto grid = initBox(width, height, depth, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); AbsDiff oracle(tolerance); - auto handle = builder->template getHandle(voxelSize, origin, name, oracle, buffer); + auto handle = converter.template getHandle(oracle, buffer); assert(handle); return handle; -} // createFogVolumeBox +} // createFogVolumeBox //================================================================================================ -template -inline GridHandle -createFogVolumeOctahedron(ValueT scale, // scale of octahedron in world units - const Vec3& center, //center of box in world units +template +typename disable_if::value, GridHandle>::type +createFogVolumeOctahedron(double scale, // scale of octahedron in world units + const Vec3d& center, // center of box in world units + double voxelSize, // size of a voxel in world units + double halfWidth, // half-width of narrow band in voxel units + const Vec3d& origin, // origin of grid in world units + const std::string& name, // name of grid + StatsMode sMode, // mode of computation for the statistics + ChecksumMode cMode, // mode of computation for the checksum + const BufferT& buffer) +{ + using GridT = build::Grid; + auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + auto handle = converter.template getHandle(buffer); + assert(handle); + return handle; +} // createFogVolumeOctahedron + +//================================================================================================ + +template +typename enable_if::value, GridHandle>::type +createFogVolumeOctahedron(double scale, // scale of octahedron in world units + const Vec3d& center, // center of box in world units double voxelSize, // size of a voxel in world units double halfWidth, // half-width of narrow band in voxel units const Vec3d& origin, // origin of grid in world units @@ -1147,26 +1623,31 @@ createFogVolumeOctahedron(ValueT scale, // scale of octahedron in w bool ditherOn, const BufferT& buffer) { - auto builder = initOctahedron(scale, center, voxelSize, halfWidth, origin); - builder->sdfToFog(); - builder->setStats(sMode); - builder->setChecksum(cMode); - builder->enableDithering(ditherOn); + using GridT = build::Grid; + auto grid = initOctahedron(scale, center, voxelSize, halfWidth, origin); + grid->mName = name; + build::NodeManager mgr(*grid); + build::sdfToLevelSet(mgr); + build::levelSetToFog(mgr, false); + CreateNanoGrid converter(*grid); + converter.setStats(sMode); + converter.setChecksum(cMode); + converter.enableDithering(ditherOn); AbsDiff oracle(tolerance); - auto handle = builder->template getHandle(voxelSize, origin, name, oracle, buffer); + auto handle = converter.template getHandle(oracle, buffer); assert(handle); return handle; -} // createFogVolumeOctahedron +} // createFogVolumeOctahedron //================================================================================================ -template -inline GridHandle -createPointBox(int pointsPerVoxel, // half-width of narrow band in voxel units - ValueT width, // width of box in world units - ValueT height, // height of box in world units - ValueT depth, // depth of box in world units - const Vec3& center, //center of box in world units +template +typename disable_if::value, GridHandle>::type +createPointBox(int pointsPerVoxel, // number of points to be scattered in each active voxel + double width, // width of box in world units + double height, // height of box in world units + double depth, // depth of box in world units + const Vec3d& center, // center of box in world units double voxelSize, // size of a voxel in world units const Vec3d& origin, // origin of grid in world units const std::string& name, // name of grid @@ -1174,26 +1655,26 @@ createPointBox(int pointsPerVoxel, // half-width of narrow band const BufferT& buffer) { auto boxHandle = createLevelSetBox(width, height, depth, center, voxelSize, 0.5, origin, "dummy", - StatsMode::BBox, ChecksumMode::Disable, -1.0f, false, buffer); + StatsMode::BBox, ChecksumMode::Disable, buffer); assert(boxHandle); - auto* boxGrid = boxHandle.template grid(); + auto* boxGrid = boxHandle.template grid(); assert(boxGrid); auto pointHandle = createPointScatter(*boxGrid, pointsPerVoxel, name, cMode, buffer); assert(pointHandle); return pointHandle; - -} // createPointBox +} // createPointBox //================================================================================================ -template +template inline GridHandle -createPointScatter(const NanoGrid& srcGrid, // origin of grid in world units - int pointsPerVoxel, // half-width of narrow band in voxel units - const std::string& name, // name of grid - ChecksumMode cMode, // mode of computation for the checksum - const BufferT& buffer) +createPointScatter(const NanoGrid& srcGrid, // origin of grid in world units + int pointsPerVoxel, // number of points to be scattered in each active voxel + const std::string& name, // name of grid + ChecksumMode cMode, // mode of computation for the checksum + const BufferT& buffer) { + using ValueT = typename BuildToValueMap::type; static_assert(is_floating_point::value, "createPointScatter: expect floating point"); using Vec3T = Vec3; if (pointsPerVoxel < 1) { @@ -1206,80 +1687,65 @@ createPointScatter(const NanoGrid& srcGrid, // origin of grid in world u throw std::runtime_error("createPointScatter: ActiveVoxelCount is required"); } const uint64_t pointCount = pointsPerVoxel * srcGrid.activeVoxelCount(); - const uint64_t pointSize = AlignUp(pointCount * sizeof(Vec3T)); if (pointCount == 0) { throw std::runtime_error("createPointScatter: No particles to scatter"); } std::vector xyz; xyz.reserve(pointCount); - GridBuilder builder(std::numeric_limits::max(), GridClass::PointData, pointSize); - auto dstAcc = builder.getAccessor(); + using DstGridT = build::Grid; + DstGridT dstGrid(std::numeric_limits::max(), name, GridClass::PointData); + dstGrid.mMap = srcGrid.map(); + auto dstAcc = dstGrid.getAccessor(); std::srand(1234); const ValueT s = 1 / (1 + ValueT(RAND_MAX)); // scale so s*rand() is in ] 0, 1 [ // return a point with random local voxel coordinates (-0.5 to +0.5) - auto randomPoint = [&s]() { - return s * Vec3T(rand(), rand(), rand()) - Vec3T(0.5); - }; + auto randomPoint = [&s](){return s * Vec3T(rand(), rand(), rand()) - Vec3T(0.5);}; const auto& srcTree = srcGrid.tree(); auto srcMgrHandle = createNodeManager(srcGrid); - auto *srcMgr = srcMgrHandle.template mgr(); + auto *srcMgr = srcMgrHandle.template mgr(); assert(srcMgr); for (uint32_t i = 0, end = srcTree.nodeCount(0); i < end; ++i) { - auto& srcLeaf = srcMgr->leaf(i);; + auto& srcLeaf = srcMgr->leaf(i); auto* dstLeaf = dstAcc.setValue(srcLeaf.origin(), pointsPerVoxel); // allocates leaf node dstLeaf->mValueMask = srcLeaf.valueMask(); for (uint32_t j = 0, m = 0; j < 512; ++j) { if (dstLeaf->mValueMask.isOn(j)) { - for (int n = 0; n < pointsPerVoxel; ++n, ++m) { - xyz.push_back(randomPoint()); - } - } + const Vec3f ijk = dstLeaf->offsetToGlobalCoord(j).asVec3s();// floating-point representatrion of index coorindates + for (int n = 0; n < pointsPerVoxel; ++n) xyz.push_back(srcGrid.indexToWorld(randomPoint() + ijk)); + m += pointsPerVoxel; + }// active voxels dstLeaf->mValues[j] = m; - } - } + }// loop over all voxels + }// loop over leaf nodes assert(pointCount == xyz.size()); - builder.setStats(StatsMode::MinMax); - builder.setChecksum(ChecksumMode::Disable); - const AbsDiff dummy; - auto handle = builder.template getHandle(srcGrid.map(), name, dummy, buffer); + CreateNanoGrid converter(dstGrid); + converter.setStats(StatsMode::MinMax); + converter.setChecksum(ChecksumMode::Disable); + + converter.addBlindData(name, + GridBlindDataSemantic::WorldCoords, + GridBlindDataClass::AttributeArray, + mapToGridType(), + pointCount, + sizeof(Vec3T)); + auto handle = converter.template getHandle(buffer); assert(handle); - auto* dstGrid = handle.template grid(); - assert(dstGrid && dstGrid->template isSequential<0>()); - auto& dstTree = dstGrid->tree(); - if (dstTree.nodeCount(0) == 0) { - throw std::runtime_error("Expect leaf nodes!"); - } - auto *leafData = dstTree.getFirstLeaf()->data(); + + auto* grid = handle.template grid(); + assert(grid && grid->template isSequential<0>()); + auto &tree = grid->tree(); + if (tree.nodeCount(0) == 0) throw std::runtime_error("Expect leaf nodes!"); + auto *leafData = tree.getFirstLeaf()->data(); leafData[0].mMinimum = 0; // start of prefix sum - for (uint32_t i = 1, n = dstTree.nodeCount(0); i < n; ++i) { + for (uint32_t i = 1, n = tree.nodeCount(0); i < n; ++i) { leafData[i].mMinimum = leafData[i - 1].mMinimum + leafData[i - 1].mMaximum; } - auto& meta = const_cast(dstGrid->blindMetaData(0u)); - - meta.mElementCount = xyz.size(); - meta.mFlags = 0; - meta.mDataClass = GridBlindDataClass::AttributeArray; - meta.mSemantic = GridBlindDataSemantic::PointPosition; - if (name.length() + 1 > GridBlindMetaData::MaxNameSize) { - std::stringstream ss; - ss << "Point attribute name \"" << name << "\" is more then " - << nanovdb::GridBlindMetaData::MaxNameSize << " characters"; - throw std::runtime_error(ss.str()); - } - memcpy(meta.mName, name.c_str(), name.size() + 1); - if (std::is_same::value) { // resolved at compiletime - meta.mDataType = GridType::Vec3f; - } else if (std::is_same::value) { - meta.mDataType = GridType::Vec3d; - } else { - throw std::runtime_error("Unsupported value type"); - } - if (const auto *p = dstGrid->blindData(0)) { - memcpy(const_cast(p), xyz.data(), xyz.size() * sizeof(Vec3T)); + if (Vec3T *blindData = grid->template getBlindData(0)) { + memcpy(blindData, xyz.data(), xyz.size() * sizeof(Vec3T)); } else { throw std::runtime_error("Blind data pointer was NULL"); } - updateChecksum(*dstGrid, cMode); + updateChecksum(*grid, cMode); return handle; } // createPointScatter diff --git a/nanovdb/nanovdb/util/Ray.h b/nanovdb/nanovdb/util/Ray.h index 2597e3f030..62d6ff51a0 100644 --- a/nanovdb/nanovdb/util/Ray.h +++ b/nanovdb/nanovdb/util/Ray.h @@ -474,22 +474,16 @@ class Ray // Vec3T(_bbox[1][0]+1e-4,_bbox[1][1]+1e-4,_bbox[1][2]+1e-4)); RealT t0 = (bbox[mSign[0]][0] - mEye[0]) * mInvDir[0]; RealT t2 = (bbox[1 - mSign[1]][1] - mEye[1]) * mInvDir[1]; - if (t0 > t2) - return false; + if (t0 > t2) return false; RealT t1 = (bbox[1 - mSign[0]][0] - mEye[0]) * mInvDir[0]; RealT t3 = (bbox[mSign[1]][1] - mEye[1]) * mInvDir[1]; - if (t3 > t1) - return false; - if (t3 > t0) - t0 = t3; - if (t2 < t1) - t1 = t2; + if (t3 > t1) return false; + if (t3 > t0) t0 = t3; + if (t2 < t1) t1 = t2; t3 = (bbox[mSign[2]][2] - mEye[2]) * mInvDir[2]; - if (t3 > t1) - return false; + if (t3 > t1) return false; t2 = (bbox[1 - mSign[2]][2] - mEye[2]) * mInvDir[2]; - if (t0 > t2) - return false; + if (t0 > t2) return false; //if (t3 > t0) t0 = t3; //if (mTimeSpan.t1 < t0) return false; //if (t2 < t1) t1 = t2; diff --git a/nanovdb/nanovdb/util/Stencils.h b/nanovdb/nanovdb/util/Stencils.h index e56d683069..88e943f4ff 100644 --- a/nanovdb/nanovdb/util/Stencils.h +++ b/nanovdb/nanovdb/util/Stencils.h @@ -15,7 +15,7 @@ #ifndef NANOVDB_STENCILS_HAS_BEEN_INCLUDED #define NANOVDB_STENCILS_HAS_BEEN_INCLUDED -#include "../NanoVDB.h"// for __hosedev__, Vec3, Min, Max, Pow2, Pow3, Pow4 +#include // for __hostdev__, Vec3, Min, Max, Pow2, Pow3, Pow4 namespace nanovdb { diff --git a/nanovdb/nanovdb/util/cuda/CudaAddBlindData.h b/nanovdb/nanovdb/util/cuda/CudaAddBlindData.h new file mode 100644 index 0000000000..de46595a24 --- /dev/null +++ b/nanovdb/nanovdb/util/cuda/CudaAddBlindData.h @@ -0,0 +1,94 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +#ifndef NVIDIA_CUDA_ADD_BLIND_DATA_H_HAS_BEEN_INCLUDED +#define NVIDIA_CUDA_ADD_BLIND_DATA_H_HAS_BEEN_INCLUDED + +#include +#include "CudaDeviceBuffer.h" +#include +#include + +#include // for std::strcpy + +namespace nanovdb { + +/// @brief This function appends blind data to and existing NanoGrid +template +GridHandle +cudaAddBlindData(const NanoGrid *d_grid, + const BlindDataT *d_blindData, + uint64_t valueCount, + GridBlindDataClass blindClass = GridBlindDataClass::Unknown, + GridBlindDataSemantic semantics = GridBlindDataSemantic::Unknown, + const char *name = "", + const BufferT &pool = BufferT()) +{ + // In: |-----------|--------- |-----------| + // old grid old meta old data + // Out: |-----------|----------|----------|-----------|------------| + // old grid old meta new meta old data new data + + static_assert(BufferTraits::hasDeviceDual, "Expected BufferT to support device allocation"); + + // extract byte sizes of the grid, blind meta data and blind data + enum {GRID=0, META=1, DATA=2}; + uint64_t tmp[3], *d_tmp; + cudaCheck(cudaMalloc((void**)&d_tmp, 3*sizeof(uint64_t))); + cudaLambdaKernel<<<1, 1>>>(1, [=] __device__(size_t) { + if (auto count = d_grid->blindDataCount()) { + d_tmp[GRID] = PtrDiff(&d_grid->blindMetaData(0), d_grid); + d_tmp[META] = count*sizeof(GridBlindMetaData); + d_tmp[DATA] = d_grid->gridSize() - d_tmp[GRID] - d_tmp[META]; + } else { + d_tmp[GRID] = d_grid->gridSize(); + d_tmp[META] = d_tmp[DATA] = 0u; + } + }); cudaCheckError(); + cudaCheck(cudaMemcpy(&tmp, d_tmp, 3*sizeof(uint64_t), cudaMemcpyDeviceToHost)); + cudaCheck(cudaFree(d_tmp)); + + GridBlindMetaData metaData{int64_t(sizeof(GridBlindMetaData) + tmp[DATA]), valueCount, + sizeof(BlindDataT), semantics, blindClass, mapToGridType()}; + std::strcpy(metaData.mName, name); + auto buffer = BufferT::create(tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA] + metaData.blindDataSize(), &pool, false); + auto d_data = buffer.deviceData(); + + // 1: |-----------|----------| + // old grid old meta + cudaCheck(cudaMemcpy(d_data, d_grid, tmp[GRID] + tmp[META], cudaMemcpyDeviceToDevice)); + + // 2: |-----------|----------|----------| + // old grid old meta new meta + cudaCheck(cudaMemcpy(d_data + tmp[GRID] + tmp[META], &metaData, sizeof(GridBlindMetaData), cudaMemcpyHostToDevice)); + + // 3: |-----------|----------|----------|-----------| + // old grid old meta new meta old data + cudaCheck(cudaMemcpy(d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData), + (const char*)d_grid + tmp[GRID] + tmp[META], tmp[DATA], cudaMemcpyDeviceToDevice)); + + // 4: |-----------|----------|----------|-----------|------------| + // old grid old meta new meta old data new data + const size_t dataSize = valueCount*sizeof(BlindDataT);// no padding + cudaCheck(cudaMemcpy(d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA], + d_blindData, dataSize, cudaMemcpyDeviceToDevice)); + if (auto padding = metaData.blindDataSize() - dataSize) {// zero out possible padding + cudaCheck(cudaMemset(d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA] + dataSize, 0, padding)); + } + + // increment grid size and blind data counter in output grid + cudaLambdaKernel<<<1, 1>>>(1, [=] __device__(size_t) { + auto &grid = *reinterpret_cast*>(d_data); + grid.mBlindMetadataCount += 1; + grid.mBlindMetadataOffset = tmp[GRID];// this is undefined if input grid has no blind data + auto *meta = PtrAdd(d_data, grid.mBlindMetadataOffset);// points to first blind meta data + for (uint32_t i=0, n=grid.mBlindMetadataCount-1; imDataOffset += sizeof(GridBlindMetaData); + grid.mGridSize += sizeof(GridBlindMetaData) + meta->blindDataSize();// expansion with 32 byte alignment + }); cudaCheckError(); + + return GridHandle(std::move(buffer)); +}// cudaAddBlindData + +}// nanovdb namespace + +#endif // NVIDIA_CUDA_ADD_BLIND_DATA_H_HAS_BEEN_INCLUDED \ No newline at end of file diff --git a/nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h b/nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h new file mode 100644 index 0000000000..dc96ed0993 --- /dev/null +++ b/nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h @@ -0,0 +1,185 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file CudaDeviceBuffer.h + + \author Ken Museth + + \date January 8, 2020 + + \brief Implements a simple dual (host/device) CUDA buffer +*/ + +#ifndef NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED +#define NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED + +#include "../HostBuffer.h" // for BufferTraits +#include "CudaUtils.h"// for cudaMalloc/cudaMallocManaged/cudaFree + +namespace nanovdb { + +// ----------------------------> CudaDeviceBuffer <-------------------------------------- + +/// @brief Simple memory buffer using un-managed pinned host memory when compiled with NVCC. +/// Obviously this class is making explicit used of CUDA so replace it with your own memory +/// allocator if you are not using CUDA. +/// @note While CUDA's pinned host memory allows for asynchronous memory copy between host and device +/// it is significantly slower then cached (un-pinned) memory on the host. +class CudaDeviceBuffer +{ + uint64_t mSize; // total number of bytes managed by this buffer (assumed to be identical for host and device) + uint8_t *mCpuData, *mGpuData; // raw pointers to the host and device buffers + +public: + /// @brief Static factory method that return an instance of this buffer + /// @param size byte size of buffer to be initialized + /// @param dummy this argument is currently ignored but required to match the API of the HostBuffer + /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + /// @return An instance of this class using move semantics + static CudaDeviceBuffer create(uint64_t size, const CudaDeviceBuffer* dummy = nullptr, bool host = true); + + /// @brief Constructor + /// @param size byte size of buffer to be initialized + /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + CudaDeviceBuffer(uint64_t size = 0, bool host = true) + : mSize(0) + , mCpuData(nullptr) + , mGpuData(nullptr) + { + if (size > 0) this->init(size, host); + } + + /// @brief Disallow copy-construction + CudaDeviceBuffer(const CudaDeviceBuffer&) = delete; + + /// @brief Move copy-constructor + CudaDeviceBuffer(CudaDeviceBuffer&& other) noexcept + : mSize(other.mSize) + , mCpuData(other.mCpuData) + , mGpuData(other.mGpuData) + { + other.mSize = 0; + other.mCpuData = nullptr; + other.mGpuData = nullptr; + } + + /// @brief Disallow copy assignment operation + CudaDeviceBuffer& operator=(const CudaDeviceBuffer&) = delete; + + /// @brief Move copy assignment operation + CudaDeviceBuffer& operator=(CudaDeviceBuffer&& other) noexcept + { + this->clear(); + mSize = other.mSize; + mCpuData = other.mCpuData; + mGpuData = other.mGpuData; + other.mSize = 0; + other.mCpuData = nullptr; + other.mGpuData = nullptr; + return *this; + } + + /// @brief Destructor frees memory on both the host and device + ~CudaDeviceBuffer() { this->clear(); }; + + /// @brief Initialize buffer + /// @param size byte size of buffer to be initialized + /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + /// @note All existing buffers are first cleared + /// @warning size is expected to be non-zero. Use clear() clear buffer! + void init(uint64_t size, bool host = true); + + /// @brief Retuns a raw pointer to the host/CPU buffer managed by this allocator. + /// @warning Note that the pointer can be NULL! + uint8_t* data() const { return mCpuData; } + + /// @brief Retuns a raw pointer to the device/GPU buffer managed by this allocator. + /// @warning Note that the pointer can be NULL! + uint8_t* deviceData() const { return mGpuData; } + + /// @brief Upload this buffer from the host to the device, i.e. CPU -> GPU. + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + /// @param sync if false the memory copy is asynchronous + /// @note If the device/GPU buffer does not exist it is first allocated + /// @warning Assumes that the host/CPU buffer already exists + void deviceUpload(void* stream = nullptr, bool sync = true) const; + + /// @brief Upload this buffer from the device to the host, i.e. GPU -> CPU. + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + /// @param sync if false the memory copy is asynchronous + /// @note If the host/CPU buffer does not exist it is first allocated + /// @warning Assumes that the device/GPU buffer already exists + void deviceDownload(void* stream = nullptr, bool sync = true) const; + + /// @brief Returns the size in bytes of the raw memory buffer managed by this allocator. + uint64_t size() const { return mSize; } + + /// @brief Returns true if this allocator is empty, i.e. has no allocated memory + bool empty() const { return mSize == 0; } + + /// @brief De-allocate all memory managed by this allocator and set all pointers to NULL + void clear(); + +}; // CudaDeviceBuffer class + +template<> +struct BufferTraits +{ + static constexpr bool hasDeviceDual = true; +}; + +// --------------------------> Implementations below <------------------------------------ + +inline CudaDeviceBuffer CudaDeviceBuffer::create(uint64_t size, const CudaDeviceBuffer*, bool host) +{ + return CudaDeviceBuffer(size, host); +} + +inline void CudaDeviceBuffer::init(uint64_t size, bool host) +{ + if (mSize>0) this->clear(); + NANOVDB_ASSERT(size > 0); + if (host) { + cudaCheck(cudaMallocHost((void**)&mCpuData, size)); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned + checkPtr(mCpuData, "CudaDeviceBuffer::init: failed to allocate host buffer"); + } else { + cudaCheck(cudaMalloc((void**)&mGpuData, size)); // un-managed memory on the device, always 32B aligned! + checkPtr(mGpuData, "CudaDeviceBuffer::init: failed to allocate device buffer"); + } + mSize = size; +} // CudaDeviceBuffer::init + +inline void CudaDeviceBuffer::deviceUpload(void* stream, bool sync) const +{ + checkPtr(mCpuData, "uninitialized cpu data"); + if (mGpuData == nullptr) { + cudaCheck(cudaMalloc((void**)&mGpuData, mSize)); // un-managed memory on the device, always 32B aligned! + } + checkPtr(mGpuData, "uninitialized gpu data"); + cudaCheck(cudaMemcpyAsync(mGpuData, mCpuData, mSize, cudaMemcpyHostToDevice, reinterpret_cast(stream))); + if (sync) cudaCheck(cudaStreamSynchronize(reinterpret_cast(stream))); +} // CudaDeviceBuffer::gpuUpload + +inline void CudaDeviceBuffer::deviceDownload(void* stream, bool sync) const +{ + checkPtr(mGpuData, "uninitialized gpu data"); + if (mCpuData == nullptr) { + cudaCheck(cudaMallocHost((void**)&mCpuData, mSize)); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned + } + checkPtr(mCpuData, "uninitialized cpu data"); + cudaCheck(cudaMemcpyAsync(mCpuData, mGpuData, mSize, cudaMemcpyDeviceToHost, reinterpret_cast(stream))); + if (sync) cudaCheck(cudaStreamSynchronize(reinterpret_cast(stream))); +} // CudaDeviceBuffer::gpuDownload + +inline void CudaDeviceBuffer::clear() +{ + if (mGpuData) cudaCheck(cudaFree(mGpuData)); + if (mCpuData) cudaCheck(cudaFreeHost(mCpuData)); + mCpuData = mGpuData = nullptr; + mSize = 0; +} // CudaDeviceBuffer::clear + +} // namespace nanovdb + +#endif // end of NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/cuda/CudaIndexToGrid.h b/nanovdb/nanovdb/util/cuda/CudaIndexToGrid.h new file mode 100644 index 0000000000..2dda21e944 --- /dev/null +++ b/nanovdb/nanovdb/util/cuda/CudaIndexToGrid.h @@ -0,0 +1,370 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file CudaIndexToGrid.h + + \author Ken Museth + + \date April 17, 2023 + + \brief Combines an IndexGrid and values into a regular Grid on the device +*/ + +#ifndef NVIDIA_CUDA_INDEX_TO_GRID_H_HAS_BEEN_INCLUDED +#define NVIDIA_CUDA_INDEX_TO_GRID_H_HAS_BEEN_INCLUDED + +#include +#include "CudaDeviceBuffer.h" +#include +#include +#include + +namespace nanovdb { + +// cudeIndexGridToGrid + +/// @brief Freestanding function that combines an IndexGrid and values into a regular Grid +/// @tparam DstBuildT Build time of the destination/output Grid +/// @tparam SrcBuildT Build type of the source/input IndexGrid +/// @tparam BufferT Type of the buffer used for allocation of the destination Grid +/// @param d_srcGrid Device pointer to source/input IndexGrid, i.e. SrcBuildT={ValueIndex,ValueOnIndex,ValueIndexMask,ValueOnIndexMask} +/// @param d_srcValues Device pointer to an array of values +/// @param pool Memory pool used to create a buffer for the destination/output Grid +/// @note If d_srcGrid has stats (min,max,avg,std-div), the d_srcValues is also assumed +/// to have the same information, all of which are then copied to the destination/output grid. +/// An exception to this rule is if the type of d_srcValues is different from the stats type +/// NanoRoot::FloatType, e.g. if DstBuildT=Vec3f then NanoRoot::FloatType=float, +/// in which case average and standard-deviation is undefined in the output grid. +/// @return +template +typename enable_if::is_index, GridHandle>::type +cudaIndexToGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT()); + + +template +typename enable_if::is_index, GridHandle>::type +cudaCreateNanoGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT()) +{ + return cudaIndexToGrid(d_srcGrid, d_srcValues, pool); +} + +namespace {// anonymous namespace + +template +class CudaIndexToGrid +{ + using SrcGridT = NanoGrid; +public: + struct NodeAccessor; + + /// @brief Constructor from a source IndeGrid + /// @param srcGrid Device pointer to IndexGrid used as the source + CudaIndexToGrid(const SrcGridT *d_srcGrid); + + ~CudaIndexToGrid() {cudaCheck(cudaFree(mDevNodeAcc));} + + /// @brief Toggle on and off verbose mode + /// @param on if true verbose is turned on + void setVerbose(bool on = true) {mVerbose = on; } + + /// @brief Set the name of the destination/output grid + /// @param name Name used for the destination grid + void setGridName(const std::string &name) {mGridName = name;} + + template + GridHandle getHandle(const typename BuildToValueMap::type *srcValues, const BufferT &buffer = BufferT()); + +private: + GpuTimer mTimer; + std::string mGridName; + bool mVerbose{true}; + NodeAccessor mNodeAcc, *mDevNodeAcc; + + template + BufferT getBuffer(const BufferT &pool); +};// CudaIndexToGrid + +//================================================================================================ + +template +struct CudaIndexToGrid::NodeAccessor +{ + uint64_t grid, tree, root, node[3], meta, blind, size;// byte offsets, node: 0=leaf,1=lower, 2=upper + const SrcGridT *d_srcGrid;// device point to source IndexGrid + void *d_dstPtr;// device pointer to buffer with destination Grid + char *d_gridName; + uint32_t nodeCount[4];// 0=leaf, 1=lower, 2=upper, 3=root tiles + + __device__ const NanoGrid& srcGrid() const {return *d_srcGrid;} + __device__ const NanoTree& srcTree() const {return d_srcGrid->tree();} + __device__ const NanoRoot& srcRoot() const {return d_srcGrid->tree().root();} + template + __device__ const typename NanoNode::type& srcNode(int i) const { + return *(this->srcTree().template getFirstNode() + i); + } + + template + __device__ NanoGrid& dstGrid() const {return *PtrAdd>(d_dstPtr, grid);} + template + __device__ NanoTree& dstTree() const {return *PtrAdd>(d_dstPtr, tree);} + template + __device__ NanoRoot& dstRoot() const {return *PtrAdd>(d_dstPtr, root);} + template + __device__ typename NanoNode::type& dstNode(int i) const { + return *(PtrAdd::type>(d_dstPtr, node[LEVEL])+i); + } +};// CudaIndexToGrid::NodeAccessor + +//================================================================================================ + +template +__global__ void cudaProcessGridTreeRoot(typename CudaIndexToGrid::NodeAccessor *nodeAcc, + const typename BuildToValueMap::type *srcValues) +{ + using SrcValueT = typename BuildToValueMap::type; + using DstStatsT = typename NanoRoot::FloatType; + + auto &srcGrid = nodeAcc->srcGrid(); + auto &dstGrid = nodeAcc->template dstGrid(); + auto &srcTree = srcGrid.tree(); + auto &dstTree = nodeAcc->template dstTree(); + auto &srcRoot = srcTree.root(); + auto &dstRoot = nodeAcc->template dstRoot(); + + // process Grid + *dstGrid.data() = *srcGrid.data(); + dstGrid.mGridType = mapToGridType(); + dstGrid.mData1 = 0u; + + // process Tree + *dstTree.data() = *srcTree.data(); + dstTree.setRoot(&dstRoot); + dstTree.setFirstNode(&nodeAcc->template dstNode(0)); + dstTree.setFirstNode(&nodeAcc->template dstNode(0)); + dstTree.setFirstNode(&nodeAcc->template dstNode(0)); + + // process Root + dstRoot.mBBox = srcRoot.mBBox; + dstRoot.mTableSize = srcRoot.mTableSize; + dstRoot.mBackground = srcValues[srcRoot.mBackground]; + if (srcGrid.hasMinMax()) { + dstRoot.mMinimum = srcValues[srcRoot.mMinimum]; + dstRoot.mMaximum = srcValues[srcRoot.mMaximum]; + } + if constexpr(is_same::value) {// e.g. {float,float} or {Vec3f,float} + if (srcGrid.hasAverage()) dstRoot.mAverage = srcValues[srcRoot.mAverage]; + if (srcGrid.hasStdDeviation()) dstRoot.mStdDevi = srcValues[srcRoot.mStdDevi]; + } +}// cudaProcessGridTreeRoot + +//================================================================================================ + +template +__global__ void cudaProcessRootTiles(typename CudaIndexToGrid::NodeAccessor *nodeAcc, + const typename BuildToValueMap::type *srcValues) +{ + const auto tid = blockIdx.x; + + // Process children and tiles + const auto &srcTile = *nodeAcc->srcRoot().tile(tid); + auto &dstTile = *nodeAcc->template dstRoot().tile(tid); + dstTile.key = srcTile.key; + if (srcTile.child) { + dstTile.child = sizeof(NanoRoot) + sizeof(NanoRoot::Tile)*((srcTile.child - sizeof(NanoRoot))/sizeof(NanoRoot::Tile)); + dstTile.value = srcValues[0];// set to background + dstTile.state = false; + } else { + dstTile.child = 0;// i.e. no child node + dstTile.value = srcValues[srcTile.value]; + dstTile.state = srcTile.state; + } +}// cudaProcessRootTiles + +//================================================================================================ + +template +__global__ void cudaProcessInternalNodes(typename CudaIndexToGrid::NodeAccessor *nodeAcc, + const typename BuildToValueMap::type *srcValues) +{ + using SrcNodeT = typename NanoNode::type; + using DstNodeT = typename NanoNode::type; + using SrcChildT = typename SrcNodeT::ChildNodeType; + using DstChildT = typename DstNodeT::ChildNodeType; + using SrcValueT = typename BuildToValueMap::type; + using DstStatsT = typename NanoRoot::FloatType; + + auto &srcNode = nodeAcc->template srcNode(blockIdx.x); + auto &dstNode = nodeAcc->template dstNode(blockIdx.x); + + if (threadIdx.x == 0 && threadIdx.y == 0) { + dstNode.mBBox = srcNode.mBBox; + dstNode.mFlags = srcNode.mFlags; + dstNode.mValueMask = srcNode.mValueMask; + dstNode.mChildMask = srcNode.mChildMask; + auto &srcGrid = nodeAcc->srcGrid(); + if (srcGrid.hasMinMax()) { + dstNode.mMinimum = srcValues[srcNode.mMinimum]; + dstNode.mMaximum = srcValues[srcNode.mMaximum]; + } + if constexpr(is_same::value) {// e.g. {float,float} or {Vec3f,float} + if (srcGrid.hasAverage()) dstNode.mAverage = srcValues[srcNode.mAverage]; + if (srcGrid.hasStdDeviation()) dstNode.mStdDevi = srcValues[srcNode.mStdDevi]; + } + } + const uint64_t nodeSkip = nodeAcc->nodeCount[LEVEL] - blockIdx.x, srcOff = sizeof(SrcNodeT)*nodeSkip, dstOff = sizeof(DstNodeT)*nodeSkip;// offset to first node of child type + const int off = blockDim.x*blockDim.y*threadIdx.x + blockDim.x*threadIdx.y; + for (int threadIdx_z=0; threadIdx_z +__global__ void cudaProcessLeafNodes(typename CudaIndexToGrid::NodeAccessor *nodeAcc, + const typename BuildToValueMap::type *srcValues) +{ + using SrcValueT = typename BuildToValueMap::type; + using DstStatsT = typename NanoRoot::FloatType; + static_assert(!BuildTraits::is_special, "Invalid destination type!"); + auto &srcLeaf = nodeAcc->template srcNode<0>(blockIdx.x); + auto &dstLeaf = nodeAcc->template dstNode(blockIdx.x); + if (threadIdx.x == 0 && threadIdx.y == 0) { + dstLeaf.mBBoxMin = srcLeaf.mBBoxMin; + for (int i=0; i<3; ++i) dstLeaf.mBBoxDif[i] = srcLeaf.mBBoxDif[i]; + dstLeaf.mFlags = srcLeaf.mFlags; + dstLeaf.mValueMask = srcLeaf.mValueMask; + /// + auto &srcGrid = nodeAcc->srcGrid(); + if (srcGrid.hasMinMax()) { + dstLeaf.mMinimum = srcValues[srcLeaf.getMin()]; + dstLeaf.mMaximum = srcValues[srcLeaf.getMax()]; + } + if constexpr(is_same::value) {// e.g. {float,float} or {Vec3f,float} + if (srcGrid.hasAverage()) dstLeaf.mAverage = srcValues[srcLeaf.getAvg()]; + if (srcGrid.hasStdDeviation()) dstLeaf.mStdDevi = srcValues[srcLeaf.getDev()]; + } + } + const int off = blockDim.x*blockDim.y*threadIdx.x + blockDim.x*threadIdx.y; + auto *dst = dstLeaf.mValues + off; + for (int threadIdx_z=0; threadIdx_z +__global__ void cudaCpyNodeCount(const NanoGrid *srcGrid, + typename CudaIndexToGrid::NodeAccessor *nodeAcc) +{ + assert(srcGrid->isSequential()); + nodeAcc->d_srcGrid = srcGrid; + for (int i=0; i<3; ++i) nodeAcc->nodeCount[i] = srcGrid->tree().nodeCount(i); + nodeAcc->nodeCount[3] = srcGrid->tree().root().tileCount(); +} + +}// anonymous namespace + +//================================================================================================ + +template +CudaIndexToGrid::CudaIndexToGrid(const SrcGridT *d_srcGrid) +{ + NANOVDB_ASSERT(d_srcGrid); + cudaCheck(cudaMalloc((void**)&mDevNodeAcc, sizeof(NodeAccessor))); + cudaCpyNodeCount<<<1,1>>>(d_srcGrid, mDevNodeAcc); + cudaCheckError(); + cudaCheck(cudaMemcpy(&mNodeAcc, mDevNodeAcc, sizeof(NodeAccessor), cudaMemcpyDeviceToHost));// mNodeAcc = *mDevNodeAcc +} + +//================================================================================================ + +template +template +GridHandle CudaIndexToGrid::getHandle(const typename BuildToValueMap::type *srcValues, + const BufferT &pool) +{ + if (mVerbose) mTimer.start("Initiate buffer"); + auto buffer = this->template getBuffer(pool); + + if (mVerbose) mTimer.restart("Process grid,tree,root"); + cudaProcessGridTreeRoot<<<1, 1>>>(mDevNodeAcc, srcValues); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Process root children and tiles"); + cudaProcessRootTiles<<>>(mDevNodeAcc, srcValues); + cudaCheckError(); + + cudaCheck(cudaFree(mNodeAcc.d_gridName)); + + if (mVerbose) mTimer.restart("Process upper internal nodes"); + cudaProcessInternalNodes<<>>(mDevNodeAcc, srcValues); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Process lower internal nodes"); + cudaProcessInternalNodes<<>>(mDevNodeAcc, srcValues); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Process leaf nodes"); + cudaProcessLeafNodes<<>>(mDevNodeAcc, srcValues); + if (mVerbose) mTimer.stop(); + cudaCheckError(); + + return GridHandle(std::move(buffer)); +}// CudaIndexToGrid::getHandle + +//================================================================================================ + +template +template +inline BufferT CudaIndexToGrid::getBuffer(const BufferT &pool) +{ + mNodeAcc.grid = 0;// grid is always stored at the start of the buffer! + mNodeAcc.tree = NanoGrid::memUsage(); // grid ends and tree begins + mNodeAcc.root = mNodeAcc.tree + NanoTree::memUsage(); // tree ends and root node begins + mNodeAcc.node[2] = mNodeAcc.root + NanoRoot::memUsage(mNodeAcc.nodeCount[3]); // root node ends and upper internal nodes begin + mNodeAcc.node[1] = mNodeAcc.node[2] + NanoUpper::memUsage()*mNodeAcc.nodeCount[2]; // upper internal nodes ends and lower internal nodes begin + mNodeAcc.node[0] = mNodeAcc.node[1] + NanoLower::memUsage()*mNodeAcc.nodeCount[1]; // lower internal nodes ends and leaf nodes begin + mNodeAcc.meta = mNodeAcc.node[0] + NanoLeaf::DataType::memUsage()*mNodeAcc.nodeCount[0];// leaf nodes end and blind meta data begins + mNodeAcc.blind = mNodeAcc.meta + 0*sizeof(GridBlindMetaData); // meta data ends and blind data begins + mNodeAcc.size = mNodeAcc.blind;// end of buffer + auto buffer = BufferT::create(mNodeAcc.size, &pool, false); + mNodeAcc.d_dstPtr = buffer.deviceData(); + if (mNodeAcc.d_dstPtr == nullptr) throw std::runtime_error("Failed memory allocation on the device"); + + if (size_t size = mGridName.size()) { + cudaCheck(cudaMalloc((void**)&mNodeAcc.d_gridName, size)); + cudaCheck(cudaMemcpy(mNodeAcc.d_gridName, mGridName.data(), size, cudaMemcpyHostToDevice)); + } else { + mNodeAcc.d_gridName = nullptr; + } + cudaCheck(cudaMemcpy(mDevNodeAcc, &mNodeAcc, sizeof(NodeAccessor), cudaMemcpyHostToDevice));// copy NodeAccessor CPU -> GPU + return buffer; +} + +//================================================================================================ + +template +typename enable_if::is_index, GridHandle>::type +cudaIndexToGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool) +{ + CudaIndexToGrid converter(d_srcGrid); + return converter.template getHandle(d_srcValues, pool); +} + +}// nanovdb namespace + +#endif // NVIDIA_CUDA_INDEX_TO_GRID_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.h b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.h new file mode 100644 index 0000000000..280c9b6832 --- /dev/null +++ b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.h @@ -0,0 +1,1046 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +#ifndef NVIDIA_CUDA_POINTS_TO_GRID_H_HAS_BEEN_INCLUDED +#define NVIDIA_CUDA_POINTS_TO_GRID_H_HAS_BEEN_INCLUDED + +#include +#include +#include +#include + +#include +#include "CudaDeviceBuffer.h" +#include +#include +#include + +/* +Notes: + +Improvements: no limit on domain size, avoid random access in root node, removed offsetInLeafNode array + +make -j testNanoVDB && ./unittest/testNanoVDB --gtest_filter="*CudaPointsToGrid*" --gtest_break_on_failure --gtest_repeat=3 + +4.29 billion (=2^32) coordinates of type Vec3f have a memory footprint of 48 GB! +*/ + +namespace nanovdb { + +// Define the type used when the points are encoded as blind data in the output grid +enum class PointType : uint32_t { Disable = 0,// no point information e.g. when BuildT != Points + PointID = 1,// linear index of type uint32_t to points + World64 = 2,// Vec3d in world space + World32 = 3,// Vec3f in world space + Grid64 = 4,// Vec3d in grid space + Grid32 = 5,// Vec3f in grid space + Voxel32 = 6,// Vec3f in voxel space + Voxel16 = 7,// Vec3u16 in voxel space + Voxel8 = 8,// Vec3u8 in voxel space + Default = 9,// output matches input, i.e. Vec3d or Vec3f in world space + End =10 }; + +//================================================================================================ + +/// @brief Generates a NanoGrid from a list of point coordinates on the device. This method is +/// mainly used as a means to build a BVH acceleration structure for points, e.g. for efficient rendering. +/// @tparam Vec3T Template type of the list of coordinates of points in world space. Should be Vec3f or Vec3d. +/// @tparam BufferT Template type of buffer used for memory allocation on the device +/// @tparam AllocT Template type of optional device allocator for internal tempoary memory +/// @param d_world List of coordinates of points in world space on the device +/// @param pointCount number of point in the list @c d_world +/// @param voxelSize Size of a voxel in world units used for the output grid +/// @param type Defined the way point information is represented in the output grid (see PointType enum above) +/// Should not be PointType::Disable! +/// @param buffer Instance of the device buffer used for memory allocation +/// @return Returns a handle with a grid of type NanoGrid where point information, e.g. coordinates, +/// are represented as blind data defined by @c type. +template +GridHandle +cudaPointsToGrid(const Vec3T* d_world, + int pointCount, + double voxelSize = 1.0, + PointType type = PointType::Default, + BufferT &buffer = BufferT()); + +//================================================================================================ + +template +GridHandle +cudaPointsToGrid(std::vector> pointSet, const BufferT &buffer = BufferT()); + +//================================================================================================ + +/// @brief Generates a NanoGrid of any type from a list of voxel coordinates on the device. Unlike @c cudaPointsToGrid +/// this method only builds the grid but does not encode the coordinates as blind data. It is mainly useful as a +/// means to generate a grid that is know to contain the voxels given in the list. +/// @tparam BuildT Template type of the return grid +/// @tparam Vec3T Template type of the list of coordinates of voxels in grid (or index) space. Should be Coord, Vec3f or Vec3f. +/// @tparam BufferT Template type of buffer used for memory allocation on the device +/// @tparam AllocT Template type of optional device allocator for internal tempoary memory +/// @param d_voxels List of coordinates of voxels in grid (or index) space on the device +/// @param pointCount number of voxel in the list @c d_voxels +/// @param voxelSize Size of a voxel in world units used for the output grid +/// @param buffer Instance of the device buffer used for memory allocation +/// @return Returns a handle with the grid of type NanoGrid +template +GridHandle +cudaVoxelsToGrid(const Vec3T* d_voxels, + int pointCount, + double voxelSize = 1.0, + const BufferT &buffer = BufferT()); + + +//================================================================================================ + +template +GridHandle +cudaVoxelsToGrid(std::vector> pointSet, const BufferT &buffer = BufferT()); + +//================================================================================================ + +#if 0 +// the following function is only included for backwards compatability +template +typename enable_if::is_index, GridHandle>::type +cudaCreateNanoGrid(const Vec3T* d_ijk, size_t pointCount, double voxelSize = 1.0, const BufferT &buffer = BufferT()) +{ + return cudaVoxelsToGrid(d_ijk, pointCount, voxelSize, buffer); +} +#endif + +//================================================================================================ + +template +__hostdev__ inline static void worldToVoxel(Vec3u8 &voxel, const Vec3T &world, const Map &map) +{ + const Vec3d ijk = map.applyInverseMap(world);// world -> index + static constexpr double encode = double((1<<8) - 1); + voxel[0] = uint8_t( encode*(ijk[0] - Floor(ijk[0] + 0.5) + 0.5) ); + voxel[1] = uint8_t( encode*(ijk[1] - Floor(ijk[1] + 0.5) + 0.5) ); + voxel[2] = uint8_t( encode*(ijk[2] - Floor(ijk[2] + 0.5) + 0.5) ); +} + +template +__hostdev__ inline static void worldToVoxel(Vec3u16 &voxel, const Vec3T &world, const Map &map) +{ + const Vec3d ijk = map.applyInverseMap(world);// world -> index + static constexpr double encode = double((1<<16) - 1); + voxel[0] = uint16_t( encode*(ijk[0] - Floor(ijk[0] + 0.5) + 0.5) ); + voxel[1] = uint16_t( encode*(ijk[1] - Floor(ijk[1] + 0.5) + 0.5) ); + voxel[2] = uint16_t( encode*(ijk[2] - Floor(ijk[2] + 0.5) + 0.5) ); +} + +template +__hostdev__ inline static void worldToVoxel(Vec3f &voxel, const Vec3T &world, const Map &map) +{ + const Vec3d ijk = map.applyInverseMap(world);// world -> index + voxel[0] = float( ijk[0] - Floor(ijk[0] + 0.5) ); + voxel[1] = float( ijk[1] - Floor(ijk[1] + 0.5) ); + voxel[2] = float( ijk[2] - Floor(ijk[2] + 0.5) ); +} + +//================================================================================================ + +template +__hostdev__ inline static Vec3T voxelToWorld(const Vec3u8 &voxel, const Coord &ijk, const Map &map) +{ + static constexpr double decode = 1.0/double((1<<8) - 1); + if constexpr(is_same::value) { + return map.applyMap( Vec3d(ijk[0] + decode*voxel[0] - 0.5, ijk[1] + decode*voxel[1] - 0.5, ijk[2] + decode*voxel[2] - 0.5)); + } else { + return map.applyMapF(Vec3f(ijk[0] + decode*voxel[0] - 0.5f, ijk[1] + decode*voxel[1] - 0.5f, ijk[2] + decode*voxel[2] - 0.5f)); + } +} + +template +__hostdev__ inline static Vec3T voxelToWorld(const Vec3u16 &voxel, const Coord &ijk, const Map &map) +{ + static constexpr double decode = 1.0/double((1<<16) - 1); + if constexpr(is_same::value) { + return map.applyMap( Vec3d(ijk[0] + decode*voxel[0] - 0.5, ijk[1] + decode*voxel[1] - 0.5, ijk[2] + decode*voxel[2] - 0.5)); + } else { + return map.applyMapF(Vec3f(ijk[0] + decode*voxel[0] - 0.5f, ijk[1] + decode*voxel[1] - 0.5f, ijk[2] + decode*voxel[2] - 0.5f)); + } +} + +template +__hostdev__ inline static Vec3T voxelToWorld(const Vec3f &voxel, const Coord &ijk, const Map &map) +{ + if constexpr(is_same::value) { + return map.applyMap( Vec3d(ijk[0] + voxel[0], ijk[1] + voxel[1], ijk[2] + voxel[2])); + } else { + return map.applyMapF(Vec3f(ijk[0] + voxel[0], ijk[1] + voxel[1], ijk[2] + voxel[2])); + } +} + +//================================================================================================ + +namespace {// anonymous namespace + +template +class CudaPointsToGrid +{ +public: + + struct Data { + Map map; + void *d_bufferPtr; + uint64_t *d_keys, *d_tile_keys, *d_lower_keys, *d_leaf_keys;// device pointer to 64 bit keys + uint64_t grid, tree, root, upper, lower, leaf, meta, blind, size;// byte offsets to nodes in buffer + uint32_t *d_indx;// device pointer to point indices (or IDs) + uint32_t nodeCount[3], *pointsPerLeafPrefix, *pointsPerLeaf;// 0=leaf,1=lower, 2=upper + uint32_t voxelCount, *pointsPerVoxelPrefix, *pointsPerVoxel; + BitFlags<16> flags; + __hostdev__ NanoGrid& getGrid() const {return *PtrAdd>(d_bufferPtr, grid);} + __hostdev__ NanoTree& getTree() const {return *PtrAdd>(d_bufferPtr, tree);} + __hostdev__ NanoRoot& getRoot() const {return *PtrAdd>(d_bufferPtr, root);} + __hostdev__ NanoUpper& getUpper(int i) const {return *(PtrAdd>(d_bufferPtr, upper)+i);} + __hostdev__ NanoLower& getLower(int i) const {return *(PtrAdd>(d_bufferPtr, lower)+i);} + __hostdev__ NanoLeaf& getLeaf(int i) const {return *(PtrAdd>(d_bufferPtr, leaf)+i);} + __hostdev__ GridBlindMetaData& getMeta() const { return *PtrAdd(d_bufferPtr, meta);}; + template + __hostdev__ Vec3T& getPoint(int i) const {return *(PtrAdd(d_bufferPtr, blind)+i);} + };// Data + + /// @brief Constructor from a Map + /// @param map Map to be used for the output device grid + CudaPointsToGrid(const Map &map) : mPointType(is_same::value ? PointType::Default : PointType::Disable){ + mData.map = map; + mData.flags.initMask({GridFlags::HasBBox, GridFlags::IsBreadthFirst}); + cudaCheck(cudaMalloc((void**)&mDeviceData, sizeof(Data))); + } + + /// @brief Default constructor + /// @param scale Voxel size in world units + /// @param trans Translation of origin in world units + CudaPointsToGrid(const double scale = 1.0, const Vec3d &trans = Vec3d(0.0)) : CudaPointsToGrid(Map(scale, trans)) {} + + /// @brief Destructor + ~CudaPointsToGrid() {cudaCheck(cudaFree(mDeviceData));} + + /// @brief Toggle on and off verbose mode + /// @param level Verbose level: 0=quiet, 1=timing, 2=benchmarking + void setVerbose(int level = 1) {mVerbose = level; mData.flags.setBit(7u, level); } + + /// @brief Toggle on and off the computation of a bounding-box + /// @param on If true bbox will be computed + void includeBBox(bool on = true) { mData.flags.setMask(GridFlags::HasBBox, on); } + + /// @brief Set the name of the output grid + /// @param name name of the output grid + void setGridName(const std::string &name) {mGridName = name;} + + // only available when BuildT == Points + template typename enable_if::value>::type + setPointType(PointType type) { mPointType = type; } + + /// @brief Creates a handle to a grid with the specified build type from a list of points in index or world space + /// @tparam BuildT Build type of the output grid, i.e NanoGrid + /// @tparam Vec3T Type of the input points. If Vec3 points are in world space and if Coord in Index space + /// @tparam BufferT Buffer type used for allocation of the grid handle + /// @param d_xyz device point to an array of points in world space + /// @param pointCount number of input points + /// @param gridName optional name of the output grid + /// @param buffer optional buffer (currently ignored) + /// @return returns a handle with a grid of type NanoGrid + //template + template + GridHandle getHandle(const Vec3T* d_xyz, size_t pointCount, const BufferT &buffer = BufferT()); + + template + void countNodes(const Vec3T* d_points, size_t pointCount); + + template + void processGridTreeRoot(const Vec3T *d_points, size_t pointCount); + + void processUpperNodes(); + + void processLowerNodes(); + + template + void processLeafNodes(const Vec3T *d_points); + + template + void processPoints(const Vec3T *d_points, size_t pointCount); + + void processBBox(); + + // the following methods are only defined when BuildT == Points + template typename enable_if::value, uint32_t>::type + maxPointsPerVoxel() const {return mMaxPointsPerVoxel;} + template typename enable_if::value, uint32_t>::type + maxPointsPerLeaf() const {return mMaxPointsPerLeaf;} + +private: + static constexpr unsigned int mNumThreads = 128;// seems faster than the old value of 256! + static unsigned int numBlocks(unsigned int n) {return (n + mNumThreads - 1) / mNumThreads;} + + GpuTimer mTimer; + PointType mPointType; + std::string mGridName; + int mVerbose{0}; + Data mData, *mDeviceData; + uint32_t mMaxPointsPerVoxel{0u}, mMaxPointsPerLeaf{0u}; + // wrapper of cub::CachingDeviceAllocator with a shared scratch space + struct Allocator { + AllocT mAllocator; + void* d_scratch; + size_t scratchSize, actualScratchSize; + Allocator() : d_scratch(nullptr), scratchSize(0), actualScratchSize(0) {} + ~Allocator() { + if (scratchSize > 0) this->free(d_scratch);// a bug in cub makes this necessary + mAllocator.FreeAllCached(); + } + template + T* alloc(size_t count, cudaStream_t stream = 0) { + T* d_ptr = nullptr; + cudaCheck(mAllocator.DeviceAllocate((void**)&d_ptr, sizeof(T)*count, stream)); + return d_ptr; + } + void free(void *d_ptr) {if (d_ptr) cudaCheck(mAllocator.DeviceFree(d_ptr));} + template + void free(void *d_ptr, T... other) { + if (d_ptr) cudaCheck(mAllocator.DeviceFree(d_ptr)); + this->free(other...); + } + void adjustScratch(cudaStream_t stream = 0){ + if (scratchSize > actualScratchSize) { + if (actualScratchSize>0) cudaCheck(mAllocator.DeviceFree(d_scratch)); + cudaCheck(mAllocator.DeviceAllocate((void**)&d_scratch, scratchSize, stream)); + actualScratchSize = scratchSize; + } + } + } mMemPool; + + template + BufferT getBuffer(const BufferT &buffer, size_t pointCount); +};// CudaPointsToGrid + +//================================================================================================ + +// Define utility macro used to call cub functions that use dynamic temporary storage +#ifndef CALL_CUBS +#ifdef _WIN32 +#define CALL_CUBS(func, ...) \ + cudaCheck(cub::func(nullptr, mMemPool.scratchSize, __VA_ARGS__)); \ + mMemPool.adjustScratch(); \ + cudaCheck(cub::func(mMemPool.d_scratch, mMemPool.scratchSize, __VA_ARGS__)); +#else// fdef _WIN32 +#define CALL_CUBS(func, args...) \ + cudaCheck(cub::func(nullptr, mMemPool.scratchSize, args)); \ + mMemPool.adjustScratch(); \ + cudaCheck(cub::func(mMemPool.d_scratch, mMemPool.scratchSize, args)); +#endif// ifdef _WIN32 +#endif// ifndef CALL_CUBS + +}// anonymous namespace + +//================================================================================================ + +template +template +inline GridHandle +CudaPointsToGrid::getHandle(const Vec3T* d_xyz, + size_t pointCount, + const BufferT &pool) +{ + if (mVerbose==1) mTimer.start("\nCounting nodes"); + this->countNodes(d_xyz, pointCount); + + if (mVerbose==1) mTimer.restart("Initiate buffer"); + auto buffer = this->template getBuffer(pool, pointCount); + + if (mVerbose==1) mTimer.restart("Process grid,tree,root"); + this->processGridTreeRoot(d_xyz, pointCount); + + if (mVerbose==1) mTimer.restart("Process upper nodes"); + this->processUpperNodes(); + + if (mVerbose==1) mTimer.restart("Process lower nodes"); + this->processLowerNodes(); + + if (mVerbose==1) mTimer.restart("Process leaf nodes"); + this->processLeafNodes(d_xyz); + + if (mVerbose==1) mTimer.restart("Process points"); + this->processPoints(d_xyz, pointCount); + + if (mVerbose==1) mTimer.restart("Process bbox"); + this->processBBox(); + + cudaDeviceSynchronize();// finish all device tasks + if (mVerbose==1) mTimer.stop(); + + return GridHandle(std::move(buffer)); +}// CudaPointsToGrid::getHandle + +//================================================================================================ + +// --- CUB helpers --- +template +struct ShiftRight +{ + __hostdev__ inline OutT operator()(const InT& v) const {return static_cast(v >> BitCount);} +}; + +template +struct ShiftRightIterator : public cub::TransformInputIterator, InT*> +{ + using BASE = cub::TransformInputIterator, InT*>; + __hostdev__ inline ShiftRightIterator(uint64_t* input_itr) : BASE(input_itr, ShiftRight()) {} +}; + +//================================================================================================ + +template +template +void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t pointCount) +{ + mData.d_keys = mMemPool.template alloc(pointCount); + mData.d_indx = mMemPool.template alloc(pointCount);// uint32_t can index 4.29 billion Coords, corresponding to 48 GB + cudaCheck(cudaMemcpy(mDeviceData, &mData, sizeof(Data), cudaMemcpyHostToDevice));// copy mData from CPU -> GPU + + if (mVerbose==2) mTimer.start("\nAllocating arrays for keys and indices"); + auto *d_keys = mMemPool.template alloc(pointCount); + auto *d_indx = mMemPool.template alloc(pointCount); + + if (mVerbose==2) mTimer.restart("Generate tile keys"); + if constexpr(is_same::value) {// points in world space + if constexpr(is_same::value) { + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { + d_indx[tid] = uint32_t(tid); + d_keys[tid] = NanoRoot::CoordToKey(d_data->map.applyInverseMapF(d_points[tid]).round()); + }, mDeviceData); cudaCheckError(); + } else if constexpr(is_same::value) { + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { + d_indx[tid] = uint32_t(tid); + d_keys[tid] = NanoRoot::CoordToKey(d_data->map.applyInverseMap(d_points[tid]).round()); + }, mDeviceData); cudaCheckError(); + } else { + throw std::runtime_error("Points (vs voxels) coordinates should be represented as Vec3f or Vec3d"); + } + } else if constexpr(is_same::value) { + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { + d_indx[tid] = uint32_t(tid); + d_keys[tid] = NanoRoot::CoordToKey(d_points[tid]); + }, mDeviceData); cudaCheckError(); + } else if constexpr(is_same::value || is_same::value) { + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { + d_indx[tid] = uint32_t(tid); + d_keys[tid] = NanoRoot::CoordToKey(d_points[tid].round()); + }, mDeviceData); cudaCheckError(); + } else { + throw std::runtime_error("Voxel coordinates should be represented as Coord, Vec3f or Vec3d"); + } + + if (mVerbose==2) mTimer.restart("DeviceRadixSort of "+std::to_string(pointCount)+" tile keys"); + CALL_CUBS(DeviceRadixSort::SortPairs, d_keys, mData.d_keys, d_indx, mData.d_indx, pointCount, 0, 62);// 21 bits per coord + std::swap(d_indx, mData.d_indx);// sorted indices are now in d_indx + + if (mVerbose==2) mTimer.restart("Allocate runs"); + auto *d_points_per_tile = mMemPool.template alloc(pointCount); + uint32_t *d_node_count = mMemPool.template alloc(3); + + if (mVerbose==2) mTimer.restart("DeviceRunLengthEncode tile keys"); + CALL_CUBS(DeviceRunLengthEncode::Encode, mData.d_keys, d_keys, d_points_per_tile, d_node_count+2, pointCount); + cudaCheck(cudaMemcpy(mData.nodeCount+2, d_node_count+2, sizeof(uint32_t), cudaMemcpyDeviceToHost)); + mData.d_tile_keys = mMemPool.template alloc(mData.nodeCount[2]); + cudaCheck(cudaMemcpy(mData.d_tile_keys, d_keys, mData.nodeCount[2]*sizeof(uint64_t), cudaMemcpyDeviceToDevice)); + + if (mVerbose) mTimer.restart("DeviceRadixSort of " + std::to_string(pointCount) + " voxel keys in " + std::to_string(mData.nodeCount[2]) + " tiles"); + uint32_t *points_per_tile = new uint32_t[mData.nodeCount[2]]; + cudaCheck(cudaMemcpy(points_per_tile, d_points_per_tile, mData.nodeCount[2]*sizeof(uint32_t), cudaMemcpyDeviceToHost)); + mMemPool.free(d_points_per_tile); + + auto voxelKey = [] __device__ (uint64_t tileID, const Coord &ijk){ + return tileID << 36 | // upper offset: 64-15-12-9=28, i.e. last 28 bits + uint64_t(NanoUpper::CoordToOffset(ijk)) << 21 | // lower offset: 32^3 = 2^15, i.e. next 15 bits + uint64_t(NanoLower::CoordToOffset(ijk)) << 9 | // leaf offset: 16^3 = 2^12, i.e. next 12 bits + uint64_t(NanoLeaf< BuildT>::CoordToOffset(ijk)); // voxel offset: 8^3 = 2^9, i.e. first 9 bits + }; + + for (uint32_t id = 0, offset = 0; id < mData.nodeCount[2]; ++id) { + const uint32_t count = points_per_tile[id]; + cudaLambdaKernel<<>>(count, [=] __device__(size_t tid, const Data *d_data) { + tid += offset; + Vec3T p = d_points[d_indx[tid]]; + if constexpr(is_same::value) p = is_same::value ? d_data->map.applyInverseMapF(p) : d_data->map.applyInverseMap(p); + d_keys[tid] = voxelKey(id, p.round()); + }, mDeviceData); cudaCheckError(); + CALL_CUBS(DeviceRadixSort::SortPairs, d_keys + offset, mData.d_keys + offset, d_indx + offset, mData.d_indx + offset, count, 0, 36);// 9+12+15=36 + offset += count; + } + mMemPool.free(d_indx); + delete [] points_per_tile; + + if (mVerbose==2) mTimer.restart("Count points per voxel"); + + mData.pointsPerVoxel = mMemPool.template alloc(pointCount); + uint32_t *d_voxel_count = mMemPool.template alloc(1); + CALL_CUBS(DeviceRunLengthEncode::Encode, mData.d_keys, d_keys, mData.pointsPerVoxel, d_voxel_count, pointCount); + cudaCheck(cudaMemcpy(&mData.voxelCount, d_voxel_count, sizeof(uint32_t), cudaMemcpyDeviceToHost)); + mMemPool.free(d_voxel_count); + + if constexpr(is_same::value) { + if (mVerbose==2) mTimer.restart("Count max points per voxel"); + uint32_t *d_maxPointsPerVoxel = mMemPool.template alloc(1); + CALL_CUBS(DeviceReduce::Max, mData.pointsPerVoxel, d_maxPointsPerVoxel, mData.voxelCount); + cudaCheck(cudaMemcpy(&mMaxPointsPerVoxel, d_maxPointsPerVoxel, sizeof(uint32_t), cudaMemcpyDeviceToHost)); + mMemPool.free(d_maxPointsPerVoxel); + } + + //printf("\n Active voxel count = %u, max points per voxel = %u\n", mData.voxelCount, mMaxPointsPerVoxel); + if (mVerbose==2) mTimer.restart("Compute prefix sum of points per voxel"); + mData.pointsPerVoxelPrefix = mMemPool.template alloc(mData.voxelCount); + CALL_CUBS(DeviceScan::ExclusiveSum, mData.pointsPerVoxel, mData.pointsPerVoxelPrefix, mData.voxelCount); + + mData.pointsPerLeaf = mMemPool.template alloc(pointCount); + CALL_CUBS(DeviceRunLengthEncode::Encode, ShiftRightIterator<9>(mData.d_keys), d_keys, mData.pointsPerLeaf, d_node_count, pointCount); + cudaCheck(cudaMemcpy(mData.nodeCount, d_node_count, sizeof(uint32_t), cudaMemcpyDeviceToHost)); + + if constexpr(is_same::value) { + uint32_t *d_maxPointsPerLeaf = mMemPool.template alloc(1); + CALL_CUBS(DeviceReduce::Max, mData.pointsPerLeaf, d_maxPointsPerLeaf, mData.nodeCount[0]); + cudaCheck(cudaMemcpy(&mMaxPointsPerLeaf, d_maxPointsPerLeaf, sizeof(uint32_t), cudaMemcpyDeviceToHost)); + //printf("\n Leaf count = %u, max points per leaf = %u\n", mData.nodeCount[0], mMaxPointsPerLeaf); + if (mMaxPointsPerLeaf > std::numeric_limits::max()) { + throw std::runtime_error("Too many points per leaf: "+std::to_string(mMaxPointsPerLeaf)); + } + mMemPool.free(d_maxPointsPerLeaf); + } + + mData.pointsPerLeafPrefix = mMemPool.template alloc(mData.nodeCount[0]); + CALL_CUBS(DeviceScan::ExclusiveSum, mData.pointsPerLeaf, mData.pointsPerLeafPrefix, mData.nodeCount[0]); + + mData.d_leaf_keys = mMemPool.template alloc(mData.nodeCount[0]); + cudaCheck(cudaMemcpy(mData.d_leaf_keys, d_keys, mData.nodeCount[0]*sizeof(uint64_t), cudaMemcpyDeviceToDevice)); + + CALL_CUBS(DeviceSelect::Unique, ShiftRightIterator<12>(mData.d_leaf_keys), d_keys, d_node_count+1, mData.nodeCount[0]);// count lower nodes + cudaCheck(cudaMemcpy(mData.nodeCount+1, d_node_count+1, sizeof(uint32_t), cudaMemcpyDeviceToHost)); + mData.d_lower_keys = mMemPool.template alloc(mData.nodeCount[1]); + cudaCheck(cudaMemcpy(mData.d_lower_keys, d_keys, mData.nodeCount[1]*sizeof(uint64_t), cudaMemcpyDeviceToDevice)); + + mMemPool.free(d_keys, d_node_count); + if (mVerbose==2) mTimer.stop(); + + //printf("Leaf count = %u, lower count = %u, upper count = %u\n", mData.nodeCount[0], mData.nodeCount[1], mData.nodeCount[2]); +}// CudaPointsToGrid::countNodes + +//================================================================================================ + +template +template +inline BufferT CudaPointsToGrid::getBuffer(const BufferT &pool, size_t pointCount) +{ + auto sizeofPoint = [&]()->size_t{ + switch (mPointType){ + case PointType::PointID: return sizeof(uint32_t); + case PointType::World64: return sizeof(Vec3d); + case PointType::World32: return sizeof(Vec3f); + case PointType::Grid64: return sizeof(Vec3d); + case PointType::Grid32: return sizeof(Vec3f); + case PointType::Voxel32: return sizeof(Vec3f); + case PointType::Voxel16: return sizeof(Vec3u16); + case PointType::Voxel8: return sizeof(Vec3u8); + case PointType::Default: return sizeof(Vec3T); + default: return size_t(0);// PointType::Disable + } + }; + + mData.grid = 0;// grid is always stored at the start of the buffer! + mData.tree = NanoGrid::memUsage(); // grid ends and tree begins + mData.root = mData.tree + NanoTree::memUsage(); // tree ends and root node begins + mData.upper = mData.root + NanoRoot::memUsage(mData.nodeCount[2]); // root node ends and upper internal nodes begin + mData.lower = mData.upper + NanoUpper::memUsage()*mData.nodeCount[2]; // upper internal nodes ends and lower internal nodes begin + mData.leaf = mData.lower + NanoLower::memUsage()*mData.nodeCount[1]; // lower internal nodes ends and leaf nodes begin + mData.meta = mData.leaf + NanoLeaf::DataType::memUsage()*mData.nodeCount[0];// leaf nodes end and blind meta data begins + mData.blind = mData.meta + sizeof(GridBlindMetaData)*int( mPointType!=PointType::Disable ); // meta data ends and blind data begins + mData.size = mData.blind + pointCount*sizeofPoint();// end of buffer + + auto buffer = BufferT::create(mData.size, &pool, false); + mData.d_bufferPtr = buffer.deviceData(); + if (mData.d_bufferPtr == nullptr) throw std::runtime_error("Failed to allocate grid buffer on the device"); + cudaCheck(cudaMemcpy(mDeviceData, &mData, sizeof(Data), cudaMemcpyHostToDevice));// copy Data CPU -> GPU + return buffer; +}// CudaPointsToGrid::getBuffer + +//================================================================================================ + +template +template +inline void CudaPointsToGrid::processGridTreeRoot(const Vec3T *d_points, size_t pointCount) +{ + cudaLambdaKernel<<<1, 1>>>(1, [=] __device__(size_t, Data *d_data, PointType pointType) { + // process Root + auto &root = d_data->getRoot(); + root.mBBox = CoordBBox(); // init to empty + root.mTableSize = d_data->nodeCount[2]; + root.mBackground = NanoRoot::ValueType(0);// background_value + root.mMinimum = root.mMaximum = NanoRoot::ValueType(0); + root.mAverage = root.mStdDevi = NanoRoot::FloatType(0); + + // process Tree + auto &tree = d_data->getTree(); + tree.setRoot(&root); + tree.setFirstNode(&d_data->getUpper(0)); + tree.setFirstNode(&d_data->getLower(0)); + tree.setFirstNode(&d_data->getLeaf(0)); + tree.mNodeCount[2] = tree.mTileCount[2] = d_data->nodeCount[2]; + tree.mNodeCount[1] = tree.mTileCount[1] = d_data->nodeCount[1]; + tree.mNodeCount[0] = tree.mTileCount[0] = d_data->nodeCount[0]; + tree.mVoxelCount = d_data->voxelCount; + + // process Grid + auto &grid = d_data->getGrid(); + grid.init({GridFlags::HasBBox, GridFlags::IsBreadthFirst}, d_data->size, d_data->map, mapToGridType()); + grid.mBlindMetadataCount = is_same::value;// ? 1u : 0u; + grid.mBlindMetadataOffset = d_data->meta; + if (pointType != PointType::Disable) { + const auto lastLeaf = tree.mNodeCount[0] - 1; + grid.mData1 = d_data->pointsPerLeafPrefix[lastLeaf] + d_data->pointsPerLeaf[lastLeaf]; + auto &meta = d_data->getMeta(); + meta.mDataOffset = sizeof(GridBlindMetaData);// blind data is placed right after this meta data + meta.mValueCount = pointCount; + // Blind meta data + switch (pointType){ + case PointType::PointID: + grid.mGridClass = GridClass::PointIndex; + meta.mSemantic = GridBlindDataSemantic::PointId; + meta.mDataClass = GridBlindDataClass::IndexArray; + meta.mDataType = mapToGridType(); + meta.mValueSize = sizeof(uint32_t); + cudaStrcpy(meta.mName, "PointID: uint32_t indices to points"); + break; + case PointType::World64: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::WorldCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = mapToGridType(); + meta.mValueSize = sizeof(Vec3d); + cudaStrcpy(meta.mName, "World64: Vec3 point coordinates in world space"); + break; + case PointType::World32: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::WorldCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = mapToGridType(); + meta.mValueSize = sizeof(Vec3f); + cudaStrcpy(meta.mName, "World32: Vec3 point coordinates in world space"); + break; + case PointType::Grid64: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::GridCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = mapToGridType(); + meta.mValueSize = sizeof(Vec3d); + cudaStrcpy(meta.mName, "Grid64: Vec3 point coordinates in grid space"); + break; + case PointType::Grid32: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::GridCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = mapToGridType(); + meta.mValueSize = sizeof(Vec3f); + cudaStrcpy(meta.mName, "Grid32: Vec3 point coordinates in grid space"); + break; + case PointType::Voxel32: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::VoxelCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = mapToGridType(); + meta.mValueSize = sizeof(Vec3f); + cudaStrcpy(meta.mName, "Voxel32: Vec3 point coordinates in voxel space"); + break; + case PointType::Voxel16: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::VoxelCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = mapToGridType(); + meta.mValueSize = sizeof(Vec3u16); + cudaStrcpy(meta.mName, "Voxel16: Vec3 point coordinates in voxel space"); + break; + case PointType::Voxel8: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::VoxelCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = mapToGridType(); + meta.mValueSize = sizeof(Vec3u8); + cudaStrcpy(meta.mName, "Voxel8: Vec3 point coordinates in voxel space"); + break; + case PointType::Default: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::WorldCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = mapToGridType(); + meta.mValueSize = sizeof(Vec3T); + if constexpr(is_same::value) { + cudaStrcpy(meta.mName, "World32: Vec3 point coordinates in world space"); + } else if constexpr(is_same::value){ + cudaStrcpy(meta.mName, "World64: Vec3 point coordinates in world space"); + } else { + printf("Error in CudaPointsToGrid::processGridTreeRoot: expected Vec3T = Vec3f or Vec3d\n"); + } + break; + default: + printf("Error in CudaPointsToGrid::processGridTreeRoot: invalid pointType\n"); + } + } else if constexpr(BuildTraits::is_offindex) { + grid.mData1 = 1u + 512u*d_data->nodeCount[0]; + grid.mGridClass = GridClass::IndexGrid; + } + }, mDeviceData, mPointType);// cudaLambdaKernel + cudaCheckError(); + + char *dst = mData.getGrid().mGridName; + if (const char *src = mGridName.data()) { + cudaCheck(cudaMemcpy(dst, src, GridData::MaxNameSize, cudaMemcpyHostToDevice)); + } else { + cudaCheck(cudaMemset(dst, 0, GridData::MaxNameSize)); + } +}// CudaPointsToGrid::processGridTreeRoot + +//================================================================================================ + +template +inline void CudaPointsToGrid::processUpperNodes() +{ + cudaLambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { + auto &root = d_data->getRoot(); + auto &upper = d_data->getUpper(tid); + const Coord ijk = NanoRoot::KeyToCoord(d_data->d_tile_keys[tid]); + root.tile(tid)->setChild(ijk, &upper, &root); + upper.mBBox[0] = ijk; + upper.mFlags = 0; + upper.mValueMask.setOff(); + upper.mChildMask.setOff(); + upper.mMinimum = upper.mMaximum = NanoLower::ValueType(0); + upper.mAverage = upper.mStdDevi = NanoLower::FloatType(0); + }, mDeviceData); + cudaCheckError(); + + mMemPool.free(mData.d_tile_keys); + + const uint64_t valueCount = mData.nodeCount[2] << 15; + cudaLambdaKernel<<>>(valueCount, [=] __device__(size_t tid, Data *d_data) { + auto &upper = d_data->getUpper(tid >> 15); + upper.mTable[tid & 32767u].value = NanoUpper::ValueType(0);// background + }, mDeviceData); + cudaCheckError(); +}// CudaPointsToGrid::processUpperNodes + +//================================================================================================ + +template +inline void CudaPointsToGrid::processLowerNodes() +{ + cudaLambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { + auto &root = d_data->getRoot(); + const uint64_t lowerKey = d_data->d_lower_keys[tid]; + auto &upper = d_data->getUpper(lowerKey >> 15); + const uint32_t upperOffset = lowerKey & 32767u;// (1 << 15) - 1 = 32767 + upper.mChildMask.setOnAtomic(upperOffset); + auto &lower = d_data->getLower(tid); + upper.setChild(upperOffset, &lower); + lower.mBBox[0] = upper.offsetToGlobalCoord(upperOffset); + lower.mFlags = 0; + lower.mValueMask.setOff(); + lower.mChildMask.setOff(); + lower.mMinimum = lower.mMaximum = NanoLower::ValueType(0);// background; + lower.mAverage = lower.mStdDevi = NanoLower::FloatType(0); + }, mDeviceData); + cudaCheckError(); + + const uint64_t valueCount = mData.nodeCount[1] << 12; + cudaLambdaKernel<<>>(valueCount, [=] __device__(size_t tid, Data *d_data) { + auto &lower = d_data->getLower(tid >> 12); + lower.mTable[tid & 4095u].value = NanoLower::ValueType(0);// background + }, mDeviceData); + cudaCheckError(); +}// CudaPointsToGrid::processLowerNodes + +//================================================================================================ + +template +template +inline void CudaPointsToGrid::processLeafNodes(const Vec3T *d_points) +{ + const uint8_t flags = static_cast(mData.flags.data());// mIncludeStats ? 16u : 0u;// 4th bit indicates stats + + if (mVerbose==2) mTimer.start("process leaf meta data"); + // loop over leaf nodes and add it to its parent node + cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + const uint64_t leafKey = d_data->d_leaf_keys[tid], tile_id = leafKey >> 27; + auto &upper = d_data->getUpper(tile_id); + const uint32_t lowerOffset = leafKey & 4095u, upperOffset = (leafKey >> 12) & 32767u; + auto &lower = *upper.getChild(upperOffset); + lower.mChildMask.setOnAtomic(lowerOffset); + auto &leaf = d_data->getLeaf(tid); + lower.setChild(lowerOffset, &leaf); + leaf.mBBoxMin = lower.offsetToGlobalCoord(lowerOffset); + leaf.mFlags = flags; + auto &valueMask = leaf.mValueMask; + valueMask.setOff();// initiate all bits to off + + //for (uint64_t *ptr=d_data->d_keys+d_data->pointsPerLeafPrefix[tid], *end=ptr+d_data->pointsPerLeaf[tid]; ptr!=end; ++ptr) { + // valueMask.setOn(*ptr & uint64_t(511)); + //} + + if constexpr(is_same::value) { + leaf.mOffset = d_data->pointsPerLeafPrefix[tid]; + leaf.mPointCount = d_data->pointsPerLeaf[tid]; + } else if constexpr(BuildTraits::is_offindex) { + leaf.mOffset = tid*512u + 1u;// background is index 0 + leaf.mPrefixSum = 0u; + } else if constexpr(!BuildTraits::is_special) { + leaf.mAverage = leaf.mStdDevi = NanoLeaf::FloatType(0); + leaf.mMinimum = leaf.mMaximum = NanoLeaf::ValueType(0); + } + }, mDeviceData); cudaCheckError(); + + if (mVerbose==2) mTimer.restart("set active voxel state and values"); + // loop over all active voxels and set LeafNode::mValueMask and LeafNode::mValues + cudaLambdaKernel<<>>(mData.voxelCount, [=] __device__(size_t tid, Data *d_data) { + const uint32_t pointID = d_data->pointsPerVoxelPrefix[tid]; + const uint64_t voxelKey = d_data->d_keys[pointID]; + auto &upper = d_data->getUpper(voxelKey >> 36); + auto &lower = *upper.getChild((voxelKey >> 21) & 32767u); + auto &leaf = *lower.getChild((voxelKey >> 9) & 4095u); + const uint32_t n = voxelKey & 511u; + leaf.mValueMask.setOnAtomic(n);// <--- slow! + if constexpr(is_same::value) { + leaf.mValues[n] = uint16_t(pointID + d_data->pointsPerVoxel[tid] - leaf.offset()); + } else if constexpr(!BuildTraits::is_special) { + leaf.mValues[n] = NanoLeaf::ValueType(1);// set value of active voxels that are not points (or index) + } + }, mDeviceData); cudaCheckError(); + + mMemPool.free(mData.d_keys, mData.pointsPerVoxel, mData.pointsPerVoxelPrefix, mData.pointsPerLeafPrefix, mData.pointsPerLeaf); + + if (mVerbose==2) mTimer.restart("set inactive voxel values"); + //if constexpr(is_same::value) {// set inactive voxel values when BuildT == Points + const uint64_t denseVoxelCount = mData.nodeCount[0] << 9; + cudaLambdaKernel<<>>(denseVoxelCount, [=] __device__(size_t tid, Data *d_data) { + auto &leaf = d_data->getLeaf(tid >> 9u); + const uint32_t n = tid & 511u; + if (leaf.mValueMask.isOn(n)) return; + if constexpr(is_same::value) { + const uint32_t m = leaf.mValueMask.findPrev(n - 1); + leaf.mValues[n] = m < 512u ? leaf.mValues[m] : 0u; + } else if constexpr(!BuildTraits::is_special) { + leaf.mValues[n] = NanoLeaf::ValueType(0);// value of inactive voxels + } + }, mDeviceData); cudaCheckError(); + + if constexpr(BuildTraits::is_onindex) { + if (mVerbose==2) mTimer.restart("prefix-sum for index grid"); + uint64_t *devValueIndex = mMemPool.template alloc(mData.nodeCount[0]); + auto devValueIndexPrefix = mMemPool.template alloc(mData.nodeCount[0]); + cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + devValueIndex[tid] = static_cast(d_data->getLeaf(tid).mValueMask.countOn()); + }, mDeviceData); cudaCheckError(); + CALL_CUBS(DeviceScan::InclusiveSum, devValueIndex, devValueIndexPrefix, mData.nodeCount[0]); + mMemPool.free(devValueIndex); + cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + auto &leaf = d_data->getLeaf(tid); + leaf.mOffset = 1u;// will be re-set below + const uint64_t *w = leaf.mValueMask.words(); + uint64_t &prefixSum = leaf.mPrefixSum, sum = CountOn(*w++); + prefixSum = sum; + for (int n = 9; n < 55; n += 9) {// n=i*9 where i=1,2,..6 + sum += CountOn(*w++); + prefixSum |= sum << n;// each pre-fixed sum is encoded in 9 bits + } + if (tid==0) { + d_data->getGrid().mData1 = 1u + devValueIndexPrefix[d_data->nodeCount[0]-1];// set total count + d_data->getTree().mVoxelCount = devValueIndexPrefix[d_data->nodeCount[0]-1]; + } else { + leaf.mOffset = 1u + devValueIndexPrefix[tid-1];// background is index 0 + } + }, mDeviceData); cudaCheckError(); + mMemPool.free(devValueIndexPrefix); + } + + if constexpr(BuildTraits::is_indexmask) { + if (mVerbose==2) mTimer.restart("leaf.mMask = leaf.mValueMask"); + cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + auto &leaf = d_data->getLeaf(tid); + leaf.mMask = leaf.mValueMask; + }, mDeviceData); cudaCheckError(); + } + if (mVerbose==2) mTimer.stop(); +}// CudaPointsToGrid::processLeafNodes + +//================================================================================================ + +template +template +inline void CudaPointsToGrid::processPoints(const Vec3T *d_points, size_t pointCount) +{ + mMemPool.free(mData.d_indx); +} + +//================================================================================================ + +// Template specialization with BuildT = Points +template <> +template +inline void CudaPointsToGrid::processPoints(const Vec3T *d_points, size_t pointCount) +{ + switch (mPointType){ + case PointType::Disable: + throw std::runtime_error("CudaPointsToGrid::processPoints: mPointType == PointType::Disable\n"); + case PointType::PointID: + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_data->d_indx[tid]; + }, mDeviceData); cudaCheckError(); + break; + case PointType::World64: + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_points[d_data->d_indx[tid]]; + }, mDeviceData); cudaCheckError(); + break; + case PointType::World32: + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_points[d_data->d_indx[tid]]; + }, mDeviceData); cudaCheckError(); + break; + case PointType::Grid64: + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_data->map.applyInverseMap(d_points[d_data->d_indx[tid]]); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Grid32: + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_data->map.applyInverseMapF(d_points[d_data->d_indx[tid]]); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Voxel32: + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + worldToVoxel(d_data->template getPoint(tid), d_points[d_data->d_indx[tid]], d_data->map); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Voxel16: + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + worldToVoxel(d_data->template getPoint(tid), d_points[d_data->d_indx[tid]], d_data->map); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Voxel8: + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + worldToVoxel(d_data->template getPoint(tid), d_points[d_data->d_indx[tid]], d_data->map); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Default: + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_points[d_data->d_indx[tid]]; + }, mDeviceData); cudaCheckError(); + break; + default: + printf("Internal error in CudaPointsToGrid::processPoints\n"); + } + mMemPool.free(mData.d_indx); +}// CudaPointsToGrid::processPoints + +//================================================================================================ + +template +inline void CudaPointsToGrid::processBBox() +{ + if (mData.flags.isMaskOff(GridFlags::HasBBox)) { + mMemPool.free(mData.d_leaf_keys, mData.d_lower_keys); + return; + } + + // reset bbox in lower nodes + cudaLambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { + d_data->getLower(tid).mBBox = CoordBBox(); + }, mDeviceData); + cudaCheckError(); + + // update and propagate bbox from leaf -> lower/parent nodes + cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + const uint64_t leafKey = d_data->d_leaf_keys[tid]; + auto &upper = d_data->getUpper(leafKey >> 27); + auto &lower = *upper.getChild((leafKey >> 12) & 32767u); + auto &leaf = d_data->getLeaf(tid); + leaf.updateBBox(); + lower.mBBox.expandAtomic(leaf.bbox()); + }, mDeviceData); + mMemPool.free(mData.d_leaf_keys); + cudaCheckError(); + + // reset bbox in upper nodes + cudaLambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { + d_data->getUpper(tid).mBBox = CoordBBox(); + }, mDeviceData); + cudaCheckError(); + + // propagate bbox from lower -> upper/parent node + cudaLambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { + const uint64_t lowerKey = d_data->d_lower_keys[tid]; + auto &upper = d_data->getUpper(lowerKey >> 15); + auto &lower = d_data->getLower(tid); + upper.mBBox.expandAtomic(lower.bbox()); + }, mDeviceData); + mMemPool.free(mData.d_lower_keys); + cudaCheckError() + + // propagate bbox from upper -> root/parent node + cudaLambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { + d_data->getRoot().mBBox.expandAtomic(d_data->getUpper(tid).bbox()); + }, mDeviceData); + cudaCheckError(); + + // update the world-bbox in the root node + cudaLambdaKernel<<<1, 1>>>(1, [=] __device__(size_t, Data *d_data) { + d_data->getGrid().mWorldBBox = d_data->getRoot().mBBox.transform(d_data->map); + }, mDeviceData); + cudaCheckError(); +}// CudaPointsToGrid::processBBox + +//================================================================================================ + +template +GridHandle// Grid with PointType coordinates as blind data +cudaPointsToGrid(const Vec3T* d_xyz, int pointCount, double voxelSize, PointType type, BufferT &buffer) +{ + CudaPointsToGrid converter(voxelSize); + converter.setPointType(type); + return converter.getHandle(d_xyz, pointCount, buffer); +} + +//================================================================================================ + +template +GridHandle// Grid +cudaVoxelsToGrid(const Vec3T* d_ijk, int pointCount, double voxelSize, const BufferT &buffer) +{ + CudaPointsToGrid converter(voxelSize); + return converter.getHandle(d_ijk, pointCount, buffer); +} + +//================================================================================================ + +template +GridHandle +cudaPointsToGrid(std::vector> vec, const BufferT &buffer) +{ + std::vector> handles; + for (auto &p : vec) handles.push_back(cudaPointsToGrid(std::get<0>(p), std::get<1>(p), std::get<2>(p), std::get<3>(p), buffer)); + return mergeDeviceGrids(handles); +} + +//================================================================================================ + +template +GridHandle +cudaVoxelsToGrid(std::vector> vec, const BufferT &buffer) +{ + std::vector> handles; + for (auto &p : vec) handles.push_back(cudaVoxelsToGrid(std::get<0>(p), std::get<1>(p), std::get<2>(p), buffer)); + return mergeDeviceGrids(handles); +} + +}// nanovdb namespace + +#endif // NVIDIA_CUDA_POINTS_TO_GRID_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.h b/nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.h new file mode 100644 index 0000000000..ec9dd24caf --- /dev/null +++ b/nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.h @@ -0,0 +1,190 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file CudaSignedFloodFill.h + + \author Ken Museth + + \date May 3, 2023 + + \brief Performs signed flood-fill operation on the hierarchical tree structure on the device + + \todo This tools needs to handle the (extremely) rare case when root node + needs to be modified during the signed flood fill operation. This happens + when the root-table needs to be expanded with tile values (of size 4096^3) + that are completely inside the implicit surface. +*/ + +#ifndef NANOVDB_CUDA_SIGNED_FLOOD_FILL_H_HAS_BEEN_INCLUDED +#define NANOVDB_CUDA_SIGNED_FLOOD_FILL_H_HAS_BEEN_INCLUDED + +#include +#include +#include +#include + +namespace nanovdb { + +/// @brief Performs signed flood-fill operation on the hierarchical tree structure on the device +/// @tparam BuildT Build type of the grid to be flood-filled +/// @param d_grid Non-const device pointer to the grid that will be flood-filled +/// @param verbose If true timing information will be printed to the terminal +template +typename enable_if::is_float, void>::type +cudaSignedFloodFill(NanoGrid *d_grid, bool verbose = false); + +namespace {// anonymous namespace + +template +class CudaSignedFloodFill +{ +public: + CudaSignedFloodFill() {} + + /// @brief Toggle on and off verbose mode + /// @param on if true verbose is turned on + void setVerbose(bool on = true) {mVerbose = on;} + + void operator()(NanoGrid *d_grid); + +private: + GpuTimer mTimer; + bool mVerbose{false}; + +};// CudaSignedFloodFill + +//================================================================================================ + +template +__global__ void cudaProcessRootNode(NanoTree *tree) +{ + // auto &root = tree->root(); + /* + using ChildT = typename RootT::ChildNodeType; + // Insert the child nodes into a map sorted according to their origin + std::map nodeKeys; + typename RootT::ChildOnIter it = root.beginChildOn(); + for (; it; ++it) nodeKeys.insert(std::pair(it.getCoord(), &(*it))); + static const Index DIM = RootT::ChildNodeType::DIM; + + // We employ a simple z-scanline algorithm that inserts inactive tiles with + // the inside value if they are sandwiched between inside child nodes only! + typename std::map::const_iterator b = nodeKeys.begin(), e = nodeKeys.end(); + if ( b == e ) return; + for (typename std::map::const_iterator a = b++; b != e; ++a, ++b) { + Coord d = b->first - a->first; // delta of neighboring coordinates + if (d[0]!=0 || d[1]!=0 || d[2]==Int32(DIM)) continue;// not same z-scanline or neighbors + const ValueT fill[] = { a->second->getLastValue(), b->second->getFirstValue() }; + if (!(fill[0] < 0) || !(fill[1] < 0)) continue; // scanline isn't inside + Coord c = a->first + Coord(0u, 0u, DIM); + for (; c[2] != b->first[2]; c[2] += DIM) root.addTile(c, mInside, false); + } + */ + //root.setBackground(mOutside, /*updateChildNodes=*/false); +}// cudaProcessRootNode + +//================================================================================================ + +template +__global__ void cudaProcessInternalNodes(NanoTree *tree, size_t count) +{ + using NodeT = typename NanoNode::type; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= count) return; + const uint32_t nValue = tid & (NodeT::SIZE - 1u); + auto &node = *(tree->template getFirstNode() + (tid >> (3*NodeT::LOG2DIM))); + const auto &mask = node.childMask(); + if (mask.isOn(nValue)) return;// ignore if child + auto value = tree->background();// initiate to outside value + auto n = mask.template findNext(nValue); + if (n < NodeT::SIZE) { + if (node.getChild(n)->getFirstValue() < 0) value = -value; + } else if ((n = mask.template findPrev(nValue)) < NodeT::SIZE) { + if (node.getChild(n)->getLastValue() < 0) value = -value; + } else if (node.getValue(0)<0) { + value = -value; + } + node.setValue(nValue, value); +}// cudaProcessInternalNodes + +//================================================================================================ + +template +__global__ void cudaProcessLeafNodes(NanoTree *tree, size_t count) +{ + using LeafT = NanoLeaf; + const size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= count) return; + const uint32_t nVoxel = tid & (LeafT::SIZE - 1u); + auto *leaf = tree->getFirstLeaf() + (tid >> (3*LeafT::LOG2DIM)); + const auto &mask = leaf->valueMask(); + if (mask.isOn(nVoxel)) return; + auto *buffer = leaf->mValues; + auto n = mask.template findNext(nVoxel); + if (n == LeafT::SIZE && (n = mask.template findPrev(nVoxel)) == LeafT::SIZE) n = 0u; + buffer[nVoxel] = buffer[n]<0 ? -tree->background() : tree->background(); +}// cudaProcessLeafNodes + +//================================================================================================ + +template +__global__ void cudaCpyNodeCount(NanoGrid *d_grid, uint64_t *d_count) +{ + NANOVDB_ASSERT(d_grid->isSequential()); + for (int i=0; i<3; ++i) *d_count++ = d_grid->tree().nodeCount(i); + *d_count = d_grid->tree().root().tileCount(); +} + +}// anonymous namespace + +//================================================================================================ + +template +void CudaSignedFloodFill::operator()(NanoGrid *d_grid) +{ + static_assert(BuildTraits::is_float, "CudaSignedFloodFill only works on float grids"); + NANOVDB_ASSERT(d_grid); + uint64_t count[4], *d_count = nullptr; + cudaCheck(cudaMalloc((void**)&d_count, 4*sizeof(uint64_t))); + cudaCpyNodeCount<<<1,1>>>(d_grid, d_count); + cudaCheckError(); + cudaCheck(cudaMemcpy(&count, d_count, 4*sizeof(uint64_t), cudaMemcpyDeviceToHost)); + cudaCheck(cudaFree(d_count)); + + static const int threadsPerBlock = 128; + auto blocksPerGrid = [&](size_t count){return (count + (threadsPerBlock - 1)) / threadsPerBlock;}; + auto *tree = reinterpret_cast*>(d_grid + 1); + + if (mVerbose) mTimer.start("\nProcess leaf nodes"); + cudaProcessLeafNodes<<>>(tree, count[0]<<9); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Process lower internal nodes"); + cudaProcessInternalNodes<<>>(tree, count[1]<<12); + cudaCheckError(); + + if (mVerbose) mTimer.restart("Process upper internal nodes"); + cudaProcessInternalNodes<<>>(tree, count[2]<<15); + cudaCheckError(); + + //if (mVerbose) mTimer.restart("Process root node"); + //cudaProcessRootNode<<<1, 1>>>(tree); + if (mVerbose) mTimer.stop(); + cudaCheckError(); +}// CudaSignedFloodFill::operator() + +//================================================================================================ + +template +typename enable_if::is_float, void>::type +cudaSignedFloodFill(NanoGrid *d_grid, bool verbose) +{ + CudaSignedFloodFill tmp; + tmp.setVerbose(verbose); + tmp(d_grid); +} + +}// nanovdb namespace + +#endif // NANOVDB_CUDA_SIGNED_FLOOD_FILL_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/cuda/CudaUtils.h b/nanovdb/nanovdb/util/cuda/CudaUtils.h new file mode 100644 index 0000000000..b4a377b227 --- /dev/null +++ b/nanovdb/nanovdb/util/cuda/CudaUtils.h @@ -0,0 +1,119 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +#ifndef NANOVDB_CUDA_UTILS_H_HAS_BEEN_INCLUDED +#define NANOVDB_CUDA_UTILS_H_HAS_BEEN_INCLUDED + +#include +#include + +//#if defined(DEBUG) || defined(_DEBUG) + static inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true) + { + if (code != cudaSuccess) { + fprintf(stderr, "CUDA error %u: %s (%s:%d)\n", unsigned(code), cudaGetErrorString(code), file, line); + //fprintf(stderr, "CUDA Runtime Error: %s %s %d\n", cudaGetErrorString(code), file, line); + if (abort) exit(code); + } + } + static inline void ptrAssert(const void* ptr, const char* msg, const char* file, int line, bool abort = true) + { + if (ptr == nullptr) { + fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line); + if (abort) exit(1); + } else if (uint64_t(ptr) % NANOVDB_DATA_ALIGNMENT) { + fprintf(stderr, "Pointer misalignment error: %s %s %d\n", msg, file, line); + if (abort) exit(1); + } + } +//#else +// static inline void gpuAssert(cudaError_t, const char*, int, bool = true){} +// static inline void ptrAssert(void*, const char*, const char*, int, bool = true){} +//#endif + +// Convenience function for checking CUDA runtime API results +// can be wrapped around any runtime API call. No-op in release builds. +#define cudaCheck(ans) \ + { \ + gpuAssert((ans), __FILE__, __LINE__); \ + } + +#define checkPtr(ptr, msg) \ + { \ + ptrAssert((ptr), (msg), __FILE__, __LINE__); \ + } + +#define cudaSync() \ + { \ + cudaCheck(cudaDeviceSynchronize()); \ + } + +#define cudaCheckError() \ + { \ + cudaCheck(cudaGetLastError()); \ + } + +#if defined(__CUDACC__)// the following functions only run on the GPU! + +// --- Wrapper for launching lambda kernels +template +__global__ void cudaLambdaKernel(const size_t numItems, Func func, Args... args) +{ + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + func(tid, args...); +} + +/// @brief Copy characters from @c src to @c dst on the device. +/// @param dst pointer to the character array to write to. +/// @param src pointer to the null-terminated character string to copy from. +/// @return pointer to the character array being written to. +/// @note Emulates the behaviour of std::strcpy. +__device__ inline char* cudaStrcpy(char *dst, const char *src) +{ + char *p = dst; + do {*p++ = *src;} while(*src++); + return dst; +} + +/// @brief Appends a copy of the character string pointed to by @c src to +/// the end of the character string pointed to by @c dst on the device. +/// @param dst pointer to the null-terminated byte string to append to. +/// @param src pointer to the null-terminated byte string to copy from. +/// @return pointer to the character array being appended to. +/// @note Emulates the behaviour of std::strcat. +__device__ inline char* cudaStrcat(char *dst, const char *src) +{ + char *p = dst; + while (*p) ++p; + cudaStrcpy(p, src); + return dst; +} + +/// @brief Compares two null-terminated byte strings lexicographically on the device. +/// @param lhs pointer to the null-terminated byte strings to compare +/// @param rhs pointer to the null-terminated byte strings to compare +/// @return Negative value if @c lhs appears before @c rhs in lexicographical order. +/// Zero if @c lhs and @c rhs compare equal. Positive value if @c lhs appears +/// after @c rhs in lexicographical order. +__device__ inline int cudaStrcmp(const char *lhs, const char *rhs) +{ + while(*lhs && (*lhs == *rhs)){ + lhs++; + rhs++; + } + return *(const unsigned char*)lhs - *(const unsigned char*)rhs;// zero if lhs == rhs +} + +/// @brief Test if two null-terminated byte strings are the same +/// @param lhs pointer to the null-terminated byte strings to compare +/// @param rhs pointer to the null-terminated byte strings to compare +/// @return true if the two c-strings are identical +__device__ inline bool cudaStrEq(const char *lhs, const char *rhs) +{ + return cudaStrcmp(lhs, rhs) == 0; +} + +#endif + +#endif// NANOVDB_CUDA_UTILS_H_HAS_BEEN_INCLUDED \ No newline at end of file diff --git a/nanovdb/nanovdb/util/cuda/GpuTimer.h b/nanovdb/nanovdb/util/cuda/GpuTimer.h new file mode 100644 index 0000000000..435dbb1f3e --- /dev/null +++ b/nanovdb/nanovdb/util/cuda/GpuTimer.h @@ -0,0 +1,104 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/// @file GpuTimer.h +/// +/// @author Ken Museth +/// +/// @brief A simple GPU timing class + +#ifndef NANOVDB_GPU_TIMER_H_HAS_BEEN_INCLUDED +#define NANOVDB_GPU_TIMER_H_HAS_BEEN_INCLUDED + +#include // for std::cerr +#include +#include + +namespace nanovdb { + +class GpuTimer +{ + cudaEvent_t mStart, mStop; + +public: + /// @brief Default constructor + /// @note Starts the timer + GpuTimer(void* stream = nullptr) + { + cudaEventCreate(&mStart); + cudaEventCreate(&mStop); + cudaEventRecord(mStart, reinterpret_cast(stream)); + } + + /// @brief Construct and start the timer + /// @param msg string message to be printed when timer is started + /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @param os output stream for the message above + GpuTimer(const std::string &msg, void* stream = nullptr, std::ostream& os = std::cerr) + { + os << msg << " ... " << std::flush; + cudaEventCreate(&mStart); + cudaEventCreate(&mStop); + cudaEventRecord(mStart, reinterpret_cast(stream)); + } + + /// @brief Destructor + ~GpuTimer() + { + cudaEventDestroy(mStart); + cudaEventDestroy(mStop); + } + + /// @brief Start the timer + /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @param os output stream for the message above + void start(void* stream = nullptr) + { + cudaEventRecord(mStart, reinterpret_cast(stream)); + } + + /// @brief Start the timer + /// @param msg string message to be printed when timer is started + /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @param os output stream for the message above + void start(const std::string &msg, void* stream = nullptr, std::ostream& os = std::cerr) + { + os << msg << " ... " << std::flush; + this->start(stream); + } + + /// @brief elapsed time (since start) in miliseconds + /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @return elapsed time (since start) in miliseconds + float elapsed(void* stream = nullptr) + { + cudaEventRecord(mStop, reinterpret_cast(stream)); + cudaEventSynchronize(mStop); + float diff = 0.0f; + cudaEventElapsedTime(&diff, mStart, mStop); + return diff; + } + + /// @brief stop the timer + /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @param os output stream for the message above + void stop(void* stream = nullptr, std::ostream& os = std::cerr) + { + float diff = this->elapsed(stream); + os << "completed in " << diff << " milliseconds" << std::endl; + } + + /// @brief stop and start the timer + /// @param msg string message to be printed when timer is started + /// @param os output stream for the message above + /// @warning Remember to call start before restart + void restart(const std::string &msg, void* stream = nullptr, std::ostream& os = std::cerr) + { + this->stop(); + this->start(msg, stream, os); + } +};// GpuTimer + +} // namespace nanovdb + +#endif // NANOVDB_GPU_TIMER_HAS_BEEN_INCLUDED diff --git a/openvdb_cmd/vdb_tool/include/Tool.h b/openvdb_cmd/vdb_tool/include/Tool.h index c1887a5c3d..b397811a6d 100644 --- a/openvdb_cmd/vdb_tool/include/Tool.h +++ b/openvdb_cmd/vdb_tool/include/Tool.h @@ -52,7 +52,7 @@ #ifdef VDB_TOOL_USE_NANO #include #include -#include +#include #include #endif @@ -1224,6 +1224,7 @@ void Tool::writeNVDB(const std::string &fileName) const float tolerance = mParser.get("tolerance");// negative values means derive it from the grid class (eg ls or fog) const std::string stats = mParser.get("stats"); const std::string checksum = mParser.get("checksum"); + const int verbose = mParser.verbose ? 1 : 0; nanovdb::io::Codec codec = nanovdb::io::Codec::NONE;// compression codec for the file if (codec_str == "zip") { @@ -1284,35 +1285,24 @@ void Tool::writeNVDB(const std::string &fileName) auto openToNano = [&](const GridBase::Ptr& base) { if (auto floatGrid = GridBase::grid(base)) { + using SrcGridT = openvdb::FloatGrid; switch (qMode){ - case nanovdb::GridType::Fp4: { - nanovdb::OpenToNanoVDB s; - s.enableDithering(dither); - return s(*floatGrid, sMode, cMode, mParser.verbose ? 1 : 0); - } case nanovdb::GridType::Fp8: { - nanovdb::OpenToNanoVDB s; - s.enableDithering(dither); - return s(*floatGrid, sMode, cMode, mParser.verbose ? 1 : 0); - } case nanovdb::GridType::Fp16: { - nanovdb::OpenToNanoVDB s; - s.enableDithering(dither); - return s(*floatGrid, sMode, cMode, mParser.verbose ? 1 : 0); - } case nanovdb::GridType::FpN: { + case nanovdb::GridType::Fp4: + return nanovdb::createNanoGrid(*floatGrid, sMode, cMode, dither, verbose); + case nanovdb::GridType::Fp8: + return nanovdb::createNanoGrid(*floatGrid, sMode, cMode, dither, verbose); + case nanovdb::GridType::Fp16: + return nanovdb::createNanoGrid(*floatGrid, sMode, cMode, dither, verbose); + case nanovdb::GridType::FpN: if (absolute) { - nanovdb::OpenToNanoVDB s; - s.enableDithering(dither); - s.oracle() = nanovdb::AbsDiff(tolerance); - return s(*floatGrid, sMode, cMode, mParser.verbose ? 1 : 0); + return nanovdb::createNanoGrid(*floatGrid, sMode, cMode, dither, verbose, nanovdb::AbsDiff(tolerance)); } else { - nanovdb::OpenToNanoVDB s; - s.enableDithering(dither); - s.oracle() = nanovdb::RelDiff(tolerance); - return s(*floatGrid, sMode, cMode, mParser.verbose ? 1 : 0); + return nanovdb::createNanoGrid(*floatGrid, sMode, cMode, dither, verbose, nanovdb::RelDiff(tolerance)); } - } default: break;// 32 bit float grids are handled below + default: break;// 32 bit float grids are handled below }// end of switch } - return nanovdb::openToNanoVDB(base, sMode, cMode, mParser.verbose ? 1 : 0);// float and other grids + return nanovdb::openToNanoVDB(base, sMode, cMode, verbose);// float and other grids };// openToNano if (fileName=="stdout.nvdb") { From 492ea4da5379d46b2fed1472c1acac38e55b3db4 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Mon, 31 Jul 2023 20:24:30 -0700 Subject: [PATCH 02/49] fixed typo Signed-off-by: Ken Museth --- doc/nanovdb/SourceTree.md | 245 +++++++++++++++++---------------- nanovdb/nanovdb/CMakeLists.txt | 2 +- 2 files changed, 130 insertions(+), 117 deletions(-) diff --git a/doc/nanovdb/SourceTree.md b/doc/nanovdb/SourceTree.md index 816e0f23eb..c2bb2f3023 100644 --- a/doc/nanovdb/SourceTree.md +++ b/doc/nanovdb/SourceTree.md @@ -15,120 +15,133 @@ ```bash foo@bar:~$ tree . -└── nanovdb - ├── CMakeLists.txt - ├── cmd - │ ├── CMakeLists.txt - │ ├── convert - │ │ └── nanovdb_convert.cc - │ ├── print - │ │ └── nanovdb_print.cc - │ └── validate - │ └── nanovdb_validate.cc - ├── CNanoVDB.h - ├── docs - │ ├── CMakeLists.txt - │ ├── codingstyle.txt - │ └── doxygen-config - ├── examples - │ ├── benchmark - │ │ ├── BenchKernels_dense.cu - │ │ ├── BenchKernels_nano.cu - │ │ ├── Benchmark.cc - │ │ ├── Benchmark_dense.cc - │ │ ├── Benchmark_nano.cc - │ │ ├── Camera.h - │ │ ├── CMakeLists.txt - │ │ ├── DenseGrid.h - │ │ └── Image.h - │ ├── CMakeLists.txt - │ ├── ex_bump_pool_buffer - │ │ └── bump_pool_buffer.cc - │ ├── ex_collide_level_set - │ │ ├── common.h - │ │ ├── main.cc - │ │ ├── nanovdb.cu - │ │ └── openvdb.cc - │ ├── ex_index_grid_cuda - │ │ ├── index_grid_cuda.cc - │ │ └── index_grid_cuda.cu - │ ├── ex_make_custom_nanovdb - │ │ └── make_custom_nanovdb.cc - │ ├── ex_make_funny_nanovdb - │ │ └── make_funny_nanovdb.cc - │ ├── ex_make_nanovdb_sphere - │ │ └── make_nanovdb_sphere.cc - │ ├── ex_make_typed_grids - │ │ └── make_typed_grids.cc - │ ├── ex_map_pool_buffer - │ │ └── map_pool_buffer.cc - │ ├── ex_modify_nanovdb_thrust - │ │ └── modify_nanovdb_thrust.cu - │ ├── ex_nodemanager_cuda - │ │ ├── nodemanager_cuda.cc - │ │ └── nodemanager_cuda.cu - │ ├── ex_openvdb_to_nanovdb - │ │ └── openvdb_to_nanovdb.cc - │ ├── ex_openvdb_to_nanovdb_accessor - │ │ └── openvdb_to_nanovdb_accessor.cc - │ ├── ex_openvdb_to_nanovdb_cuda - │ │ ├── openvdb_to_nanovdb_cuda.cc - │ │ └── openvdb_to_nanovdb_cuda.cu - │ ├── ex_raytrace_fog_volume - │ │ ├── common.h - │ │ ├── main.cc - │ │ ├── nanovdb.cu - │ │ └── openvdb.cc - │ ├── ex_raytrace_level_set - │ │ ├── common.h - │ │ ├── main.cc - │ │ ├── nanovdb.cu - │ │ └── openvdb.cc - │ ├── ex_read_nanovdb_sphere - │ │ └── read_nanovdb_sphere.cc - │ ├── ex_read_nanovdb_sphere_accessor - │ │ └── read_nanovdb_sphere_accessor.cc - │ ├── ex_read_nanovdb_sphere_accessor_cuda - │ │ ├── read_nanovdb_sphere_accessor_cuda.cc - │ │ └── read_nanovdb_sphere_accessor_cuda.cu - │ ├── ex_util - │ │ ├── ComputePrimitives.h - │ │ └── CpuTimer.h - │ ├── ex_vox_to_nanovdb - │ │ ├── vox_to_nanovdb.cc - │ │ └── VoxToNanoVDB.h - │ └── ex_write_nanovdb_grids - │ └── write_nanovdb_grids.cc - ├── NanoVDB.h - ├── PNanoVDB.h - ├── Readme.md - ├── unittest - │ ├── CMakeLists.txt - │ ├── pnanovdb_validate_strides.h - │ ├── TestNanoVDB.cc - │ └── TestOpenVDB.cc - └── util - ├── CSampleFromVoxels.h - ├── CudaDeviceBuffer.h - ├── DitherLUT.h - ├── ForEach.h - ├── GridBuilder.h - ├── GridChecksum.h - ├── GridHandle.h - ├── GridStats.h - ├── GridValidator.h - ├── HDDA.h - ├── HostBuffer.h - ├── IndexGridBuilder.h - ├── Invoke.h - ├── IO.h - ├── NanoToOpenVDB.h - ├── NodeManager.h - ├── OpenToNanoVDB.h - ├── Primitives.h - ├── Range.h - ├── Ray.h - ├── Reduce.h - ├── SampleFromVoxels.h - └── Stencils.h +├── CMakeLists.txt +├── cmd +│ ├── CMakeLists.txt +│ ├── convert +│ │ └── nanovdb_convert.cc +│ ├── print +│ │ └── nanovdb_print.cc +│ └── validate +│ └── nanovdb_validate.cc +├── CNanoVDB.h +├── docs +│ ├── CMakeLists.txt +│ ├── codingstyle.txt +│ └── doxygen-config +├── examples +│ ├── benchmark +│ │ ├── BenchKernels_dense.cu +│ │ ├── BenchKernels_nano.cu +│ │ ├── Benchmark.cc +│ │ ├── Benchmark_dense.cc +│ │ ├── Benchmark_nano.cc +│ │ ├── Camera.h +│ │ ├── CMakeLists.txt +│ │ ├── DenseGrid.h +│ │ └── Image.h +│ ├── CMakeLists.txt +│ ├── ex_bump_pool_buffer +│ │ └── bump_pool_buffer.cc +│ ├── ex_collide_level_set +│ │ ├── common.h +│ │ ├── main.cc +│ │ ├── nanovdb.cu +│ │ └── openvdb.cc +│ ├── ex_index_grid_cuda +│ │ ├── index_grid_cuda.cc +│ │ └── index_grid_cuda.cu +│ ├── ex_make_custom_nanovdb +│ │ └── make_custom_nanovdb.cc +│ ├── ex_make_custom_nanovdb_cuda +│ │ ├── make_custom_nanovdb_cuda.cc +│ │ └── make_custom_nanovdb_cuda.cu +│ ├── ex_make_funny_nanovdb +│ │ └── make_funny_nanovdb.cc +│ ├── ex_make_nanovdb_sphere +│ │ └── make_nanovdb_sphere.cc +│ ├── ex_make_typed_grids +│ │ └── make_typed_grids.cc +│ ├── ex_map_pool_buffer +│ │ └── map_pool_buffer.cc +│ ├── ex_modify_nanovdb_thrust +│ │ └── modify_nanovdb_thrust.cu +│ ├── ex_nodemanager_cuda +│ │ ├── nodemanager_cuda.cc +│ │ └── nodemanager_cuda.cu +│ ├── ex_openvdb_to_nanovdb +│ │ └── openvdb_to_nanovdb.cc +│ ├── ex_openvdb_to_nanovdb_accessor +│ │ └── openvdb_to_nanovdb_accessor.cc +│ ├── ex_openvdb_to_nanovdb_cuda +│ │ ├── openvdb_to_nanovdb_cuda.cc +│ │ └── openvdb_to_nanovdb_cuda.cu +│ ├── ex_raytrace_fog_volume +│ │ ├── common.h +│ │ ├── main.cc +│ │ ├── nanovdb.cu +│ │ └── openvdb.cc +│ ├── ex_raytrace_level_set +│ │ ├── common.h +│ │ ├── main.cc +│ │ ├── nanovdb.cu +│ │ └── openvdb.cc +│ ├── ex_read_nanovdb_sphere +│ │ └── read_nanovdb_sphere.cc +│ ├── ex_read_nanovdb_sphere_accessor +│ │ └── read_nanovdb_sphere_accessor.cc +│ ├── ex_read_nanovdb_sphere_accessor_cuda +│ │ ├── read_nanovdb_sphere_accessor_cuda.cc +│ │ └── read_nanovdb_sphere_accessor_cuda.cu +│ ├── ex_util +│ │ └── ComputePrimitives.h +│ ├── ex_voxels_to_grid_cuda +│ │ └── ex_voxels_to_grid_cuda.cu +│ ├── ex_vox_to_nanovdb +│ │ ├── vox_to_nanovdb.cc +│ │ └── VoxToNanoVDB.h +│ └── ex_write_nanovdb_grids +│ └── write_nanovdb_grids.cc +├── NanoVDB.h +├── PNanoVDB.h +├── Readme.md +├── unittest +│ ├── CMakeLists.txt +│ ├── pnanovdb_validate_strides.h +│ ├── TestNanoVDB.cc +│ ├── TestNanoVDB.cu +│ └── TestOpenVDB.cc +└── util + ├── CpuTimer.h + ├── CreateNanoGrid.h + ├── CSampleFromVoxels.h + ├── cuda + │ ├── CudaAddBlindData.h + │ ├── CudaDeviceBuffer.h + │ ├── CudaIndexToGrid.h + │ ├── CudaPointsToGrid.h + │ ├── CudaSignedFloodFill.h + │ ├── CudaUtils.h + │ └── GpuTimer.h + ├── DitherLUT.h + ├── ForEach.h + ├── GridBuilder.h + ├── GridChecksum.h + ├── GridHandle.h + ├── GridStats.h + ├── GridValidator.h + ├── HDDA.h + ├── HostBuffer.h + ├── Invoke.h + ├── IO.h + ├── NanoToOpenVDB.h + ├── NodeManager.h + ├── OpenToNanoVDB.h + ├── PrefixSum.h + ├── Primitives.h + ├── Range.h + ├── Ray.h + ├── Reduce.h + ├── SampleFromVoxels.h + └── Stencils.h ``` diff --git a/nanovdb/nanovdb/CMakeLists.txt b/nanovdb/nanovdb/CMakeLists.txt index 2e569bab80..ac7fa423f7 100644 --- a/nanovdb/nanovdb/CMakeLists.txt +++ b/nanovdb/nanovdb/CMakeLists.txt @@ -170,7 +170,7 @@ set(NANOVDB_INCLUDE_UTILFILES util/CpuTimer.h util/CreateNanoGrid.h util/CSampleFromVoxels.h - util/cuda/CudaAddBlindData.handle + util/cuda/CudaAddBlindData.h util/cuda/CudaDeviceBuffer.h util/cuda/CudaIndexToGrid.handle util/cuda/CudaPointsToGrid.handle From 6f9ab9399757090908960d9ce3861eaa9689b73b Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Mon, 31 Jul 2023 20:32:39 -0700 Subject: [PATCH 03/49] fixed typos Signed-off-by: Ken Museth --- nanovdb/nanovdb/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nanovdb/nanovdb/CMakeLists.txt b/nanovdb/nanovdb/CMakeLists.txt index ac7fa423f7..18efe44f17 100644 --- a/nanovdb/nanovdb/CMakeLists.txt +++ b/nanovdb/nanovdb/CMakeLists.txt @@ -172,10 +172,10 @@ set(NANOVDB_INCLUDE_UTILFILES util/CSampleFromVoxels.h util/cuda/CudaAddBlindData.h util/cuda/CudaDeviceBuffer.h - util/cuda/CudaIndexToGrid.handle - util/cuda/CudaPointsToGrid.handle - util/cuda/CudaSignedFloodFill.handle - util/cuda/CudaUtils.handle + util/cuda/CudaIndexToGrid.h + util/cuda/CudaPointsToGrid.h + util/cuda/CudaSignedFloodFill.h + util/cuda/CudaUtils.h util/cuda/GpuTimer.h util/DitherLUT.h util/ForEach.h From fa4e20b763ada3ebdad7b05e765902ad23f3301f Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Mon, 31 Jul 2023 20:41:10 -0700 Subject: [PATCH 04/49] removed tabs Signed-off-by: Ken Museth --- nanovdb/nanovdb/util/PrefixSum.h | 38 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/nanovdb/nanovdb/util/PrefixSum.h b/nanovdb/nanovdb/util/PrefixSum.h index b08ee11d43..b360579745 100644 --- a/nanovdb/nanovdb/util/PrefixSum.h +++ b/nanovdb/nanovdb/util/PrefixSum.h @@ -10,9 +10,9 @@ \brief Multi-threaded implementations of inclusive prefix sum - \note An exclusive prefix sum is simply an array starting with zero - followed by the elements in the inclusive prefix sum, minus its - last entry which is the sum of all the input elements. + \note An exclusive prefix sum is simply an array starting with zero + followed by the elements in the inclusive prefix sum, minus its + last entry which is the sum of all the input elements. */ #ifndef NANOVDB_PREFIX_SUM_H_HAS_BEEN_INCLUDED @@ -44,27 +44,27 @@ template void inclusiveScan(T *array, size_t size, const T &identity, bool threaded, Op op) { #ifndef NANOVDB_USE_TBB - threaded = false; - (void)identity;// avoids compiler warning + threaded = false; + (void)identity;// avoids compiler warning #endif if (threaded) { #ifdef NANOVDB_USE_TBB - using RangeT = tbb::blocked_range; - tbb::parallel_scan(RangeT(0, size), identity, - [&](const RangeT &r, T sum, bool is_final_scan)->T { - T tmp = sum; - for (size_t i = r.begin(); i < r.end(); ++i) { - tmp = op(tmp, array[i]); - if (is_final_scan) array[i] = tmp; - } - return tmp; - },[&](const T &a, const T &b) {return op(a, b);} - ); + using RangeT = tbb::blocked_range; + tbb::parallel_scan(RangeT(0, size), identity, + [&](const RangeT &r, T sum, bool is_final_scan)->T { + T tmp = sum; + for (size_t i = r.begin(); i < r.end(); ++i) { + tmp = op(tmp, array[i]); + if (is_final_scan) array[i] = tmp; + } + return tmp; + },[&](const T &a, const T &b) {return op(a, b);} + ); #endif - } else { // serial inclusive prefix operation - for (size_t i=1; i From 77ec3eb3554855c5886dfa546529b6ddd94f8767 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Tue, 1 Aug 2023 09:15:55 -0700 Subject: [PATCH 05/49] fixed Windows compiler error Signed-off-by: Ken Museth --- nanovdb/nanovdb/util/cuda/CudaPointsToGrid.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.h b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.h index 280c9b6832..ad984b55a0 100644 --- a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.h +++ b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.h @@ -406,13 +406,13 @@ void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t auto *d_indx = mMemPool.template alloc(pointCount); if (mVerbose==2) mTimer.restart("Generate tile keys"); - if constexpr(is_same::value) {// points in world space - if constexpr(is_same::value) { + if (is_same::value) {// points in world space + if (is_same::value) { cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { d_indx[tid] = uint32_t(tid); d_keys[tid] = NanoRoot::CoordToKey(d_data->map.applyInverseMapF(d_points[tid]).round()); }, mDeviceData); cudaCheckError(); - } else if constexpr(is_same::value) { + } else if (is_same::value) { cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { d_indx[tid] = uint32_t(tid); d_keys[tid] = NanoRoot::CoordToKey(d_data->map.applyInverseMap(d_points[tid]).round()); @@ -420,12 +420,12 @@ void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t } else { throw std::runtime_error("Points (vs voxels) coordinates should be represented as Vec3f or Vec3d"); } - } else if constexpr(is_same::value) { + } else if (is_same::value) { cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { d_indx[tid] = uint32_t(tid); d_keys[tid] = NanoRoot::CoordToKey(d_points[tid]); }, mDeviceData); cudaCheckError(); - } else if constexpr(is_same::value || is_same::value) { + } else if (is_same::value || is_same::value) { cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { d_indx[tid] = uint32_t(tid); d_keys[tid] = NanoRoot::CoordToKey(d_points[tid].round()); From 7c02a1f678de92a0e5a50b8a8a427a5c993f9fbc Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Wed, 2 Aug 2023 16:09:26 -0700 Subject: [PATCH 06/49] added back ex_modify_nanovdb_thrust Signed-off-by: Ken Museth --- nanovdb/nanovdb/examples/CMakeLists.txt | 2 +- openvdb/openvdb/points/AttributeArray.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nanovdb/nanovdb/examples/CMakeLists.txt b/nanovdb/nanovdb/examples/CMakeLists.txt index df4fabb059..1ff1e8fd5e 100644 --- a/nanovdb/nanovdb/examples/CMakeLists.txt +++ b/nanovdb/nanovdb/examples/CMakeLists.txt @@ -102,7 +102,7 @@ nanovdb_example(NAME "ex_read_nanovdb_sphere_accessor_cuda") nanovdb_example(NAME "ex_index_grid_cuda") nanovdb_example(NAME "ex_nodemanager_cuda") nanovdb_example(NAME "ex_voxels_to_grid_cuda") -#nanovdb_example(NAME "ex_modify_nanovdb_thrust") +nanovdb_example(NAME "ex_modify_nanovdb_thrust") nanovdb_example(NAME "ex_map_pool_buffer") nanovdb_example(NAME "ex_bump_pool_buffer") nanovdb_example(NAME "ex_collide_level_set") diff --git a/openvdb/openvdb/points/AttributeArray.h b/openvdb/openvdb/points/AttributeArray.h index f363a6d8b1..d00a77b362 100644 --- a/openvdb/openvdb/points/AttributeArray.h +++ b/openvdb/openvdb/points/AttributeArray.h @@ -1864,7 +1864,7 @@ TypedAttributeArray::writeMetadata(std::ostream& os, bool ou uint8_t flags(mFlags); uint8_t serializationFlags(0); Index size(mSize); - Index stride(mStrideOrTotalSize); + Index strideOrTotalSize(mStrideOrTotalSize); bool strideOfOne(this->stride() == 1); bool bloscCompression = io::getDataCompression(os) & io::COMPRESS_BLOSC; @@ -1906,7 +1906,7 @@ TypedAttributeArray::writeMetadata(std::ostream& os, bool ou os.write(reinterpret_cast(&size), sizeof(Index)); // write strided - if (!strideOfOne) os.write(reinterpret_cast(&stride), sizeof(Index)); + if (!strideOfOne) os.write(reinterpret_cast(&strideOrTotalSize), sizeof(Index)); } From 9b1b1d69e37d12e434c2bec2108ed57428ec336b Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Fri, 4 Aug 2023 09:09:11 -0700 Subject: [PATCH 07/49] major refactoring and renaming Signed-off-by: Ken Museth --- nanovdb/nanovdb/NanoVDB.h | 48 +++---- .../examples/benchmark/BenchKernels_dense.cu | 2 +- .../examples/benchmark/BenchKernels_nano.cu | 2 +- ...{Benchmark_dense.cc => Benchmark_dense.cu} | 0 .../{Benchmark_nano.cc => Benchmark_nano.cu} | 0 .../{Benchmark.cc => TestBenchmark.cc} | 101 +------------- .../examples/benchmark/TestBenchmark.cu | 109 +++++++++++++++ .../examples/ex_collide_level_set/openvdb.cc | 2 + .../ex_index_grid_cuda/index_grid_cuda.cc | 44 ------ .../ex_index_grid_cuda/index_grid_cuda.cu | 71 +++++----- .../index_grid_cuda_kernel.cu | 41 ++++++ .../make_custom_nanovdb_cuda.cc | 2 + ....cu => make_custom_nanovdb_cuda_kernel.cu} | 3 +- .../ex_nodemanager_cuda/nodemanager_cuda.cc | 2 + ...ger_cuda.cu => nodemanager_cuda_kernel.cu} | 1 + .../openvdb_to_nanovdb_cuda.cc | 2 +- ...a.cu => openvdb_to_nanovdb_cuda_kernel.cu} | 1 + .../examples/ex_raytrace_fog_volume/main.cc | 2 +- .../ex_raytrace_fog_volume/nanovdb.cu | 12 +- .../examples/ex_raytrace_level_set/nanovdb.cu | 14 +- .../read_nanovdb_sphere_accessor_cuda.cc | 40 ------ .../read_nanovdb_sphere_accessor_cuda.cu | 61 +++++---- ...ead_nanovdb_sphere_accessor_cuda_kernel.cu | 36 +++++ .../ex_voxels_to_grid_cuda.cu | 2 +- nanovdb/nanovdb/unittest/TestNanoVDB.cc | 46 +++---- nanovdb/nanovdb/unittest/TestNanoVDB.cu | 126 +++++++++--------- nanovdb/nanovdb/unittest/TestOpenVDB.cc | 2 +- nanovdb/nanovdb/util/CreateNanoGrid.h | 3 +- nanovdb/nanovdb/util/GridBuilder.h | 38 +++--- nanovdb/nanovdb/util/GridHandle.h | 93 +++---------- nanovdb/nanovdb/util/PrefixSum.h | 38 +++--- ...udaAddBlindData.h => CudaAddBlindData.cuh} | 19 ++- nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh | 116 ++++++++++++++++ ...{CudaIndexToGrid.h => CudaIndexToGrid.cuh} | 17 +-- ...udaPointsToGrid.h => CudaPointsToGrid.cuh} | 90 ++++++------- ...nedFloodFill.h => CudaSignedFloodFill.cuh} | 15 ++- nanovdb/nanovdb/util/cuda/CudaUtils.h | 2 +- .../util/cuda/{GpuTimer.h => GpuTimer.cuh} | 18 ++- 38 files changed, 666 insertions(+), 555 deletions(-) rename nanovdb/nanovdb/examples/benchmark/{Benchmark_dense.cc => Benchmark_dense.cu} (100%) rename nanovdb/nanovdb/examples/benchmark/{Benchmark_nano.cc => Benchmark_nano.cu} (100%) rename nanovdb/nanovdb/examples/benchmark/{Benchmark.cc => TestBenchmark.cc} (86%) create mode 100644 nanovdb/nanovdb/examples/benchmark/TestBenchmark.cu delete mode 100644 nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cc create mode 100644 nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu rename nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/{make_custom_nanovdb_cuda.cu => make_custom_nanovdb_cuda_kernel.cu} (92%) rename nanovdb/nanovdb/examples/ex_nodemanager_cuda/{nodemanager_cuda.cu => nodemanager_cuda_kernel.cu} (90%) rename nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/{openvdb_to_nanovdb_cuda.cu => openvdb_to_nanovdb_cuda_kernel.cu} (90%) delete mode 100644 nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cc create mode 100644 nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda_kernel.cu rename nanovdb/nanovdb/util/cuda/{CudaAddBlindData.h => CudaAddBlindData.cuh} (90%) create mode 100644 nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh rename nanovdb/nanovdb/util/cuda/{CudaIndexToGrid.h => CudaIndexToGrid.cuh} (97%) rename nanovdb/nanovdb/util/cuda/{CudaPointsToGrid.h => CudaPointsToGrid.cuh} (94%) rename nanovdb/nanovdb/util/cuda/{CudaSignedFloodFill.h => CudaSignedFloodFill.cuh} (93%) rename nanovdb/nanovdb/util/cuda/{GpuTimer.h => GpuTimer.cuh} (84%) diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index fbe81519a5..d6f8b9400d 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -291,9 +291,10 @@ class FpN }; /// @dummy type for indexing points into voxels -class Points +class Point { }; +//using Points = Point;// for backwards compatibility // --------------------------> GridType <------------------------------------ @@ -357,7 +358,7 @@ enum class GridClass : uint32_t { Unknown = 0, Topology = 6, // grid with active states only (no values) VoxelVolume = 7, // volume of geometric cubes, e.g. colors cubes in Minecraft IndexGrid = 8, // grid whose values are offsets, e.g. into an external array - TensorGrid = 9, // Index grid specefically indexing learnable tensor features + TensorGrid = 9, // Index grid for indexing learnable tensor features End = 10 }; #ifndef __CUDACC_RTC__ @@ -417,9 +418,9 @@ enum class GridBlindDataSemantic : uint32_t { Unknown = 0, PointRadius = 4, PointVelocity = 5, PointId = 6, - WorldCoords = 7, // 3D coorinates in world space, e.g. (0.056, 0.8, 1,8) - GridCoords = 8, // 3D coorinates in grid space, e.g. (1.2, 4.0, 5.7), aka index-space - VoxelCoords = 9, // 3D coorinates invoxel space, e.g. (0.2, 0.0, 0.7) + WorldCoords = 7, // 3D coordinates in world space, e.g. (0.056, 0.8, 1,8) + GridCoords = 8, // 3D coordinates in grid space, e.g. (1.2, 4.0, 5.7), aka index-space + VoxelCoords = 9, // 3D coordinates in voxel space, e.g. (0.2, 0.0, 0.7) End = 10 }; // --------------------------> is_same <------------------------------------ @@ -473,7 +474,7 @@ struct BuildTraits static constexpr bool is_float = is_floating_point::value; // check if T is a template specialization of LeafData, i.e. has T mValues[512] static constexpr bool is_special = is_index || is_Fp || - is_same::value || + is_same::value || is_same::value || is_same::value; }; // BuildTraits @@ -648,7 +649,7 @@ struct BuildToValueMap }; template<> -struct BuildToValueMap +struct BuildToValueMap { using Type = uint64_t; using type = uint64_t; @@ -1892,7 +1893,7 @@ struct FloatTraits // size of empty class in C++ is 1 byte and not }; template<> -struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte { using FloatType = double; }; @@ -1945,7 +1946,7 @@ __hostdev__ inline GridType mapToGridType() return GridType::Vec4f; } else if constexpr(is_same::value) { return GridType::Vec4d; - } else if (is_same::value) { + } else if (is_same::value) { return GridType::PointIndex; } else if constexpr(is_same::value) { return GridType::Vec3u8; @@ -1967,7 +1968,7 @@ __hostdev__ inline GridClass mapToGridClass(GridClass defaultClass = GridClass:: return GridClass::IndexGrid; } else if (is_same::value) { return GridClass::VoxelVolume; - } else if (is_same::value) { + } else if (is_same::value) { return GridClass::PointIndex; } return defaultClass; @@ -3475,9 +3476,9 @@ class Grid : public GridData /// @brief @brief Return the total number of points indexed by this PointGrid /// - /// @note This method is only defined for PointGrid = NanoGrid + /// @note This method is only defined for PointGrid = NanoGrid template - __hostdev__ typename enable_if::value, const uint64_t&>::type + __hostdev__ typename enable_if::value, const uint64_t&>::type pointCount() const { return DataType::mData1; } /// @brief Return a const reference to the tree @@ -5686,15 +5687,15 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData -// --------------------------> LeafData <------------------------------------ +// --------------------------> LeafData <------------------------------------ template class MaskT, uint32_t LOG2DIM> -struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData { static_assert(sizeof(CoordT) == sizeof(Coord), "Mismatching sizeof"); static_assert(sizeof(MaskT) == sizeof(Mask), "Mismatching sizeof"); using ValueType = uint64_t; - using BuildType = Points; + using BuildType = Point; using FloatType = typename FloatTraits::FloatType; using ArrayType = uint16_t; // type used for the internal mValue array static constexpr bool FIXED_SIZE = true; @@ -5751,7 +5752,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +}; // LeafData // --------------------------> LeafNode <------------------------------------ @@ -6257,6 +6258,7 @@ using Vec4dGrid = Grid; using Vec3IGrid = Grid; using MaskGrid = Grid; using BoolGrid = Grid; +using PointGrid = Grid; using IndexGrid = Grid; using OnIndexGrid = Grid; using IndexMaskGrid = Grid; @@ -7420,14 +7422,14 @@ class PointAccessor : public DefaultReadAccessor }; // PointAccessor template -class PointAccessor : public DefaultReadAccessor +class PointAccessor : public DefaultReadAccessor { - using AccT = DefaultReadAccessor; - const NanoGrid& mGrid; + using AccT = DefaultReadAccessor; + const NanoGrid& mGrid; const AttT* mData; public: - PointAccessor(const NanoGrid& grid) + PointAccessor(const NanoGrid& grid) : AccT(grid.tree().root()) , mGrid(grid) , mData(grid.template getBlindData(0)) @@ -7441,7 +7443,7 @@ class PointAccessor : public DefaultReadAccessor /// @brief return true if this access was initialized correctly __hostdev__ operator bool() const { return mData != nullptr; } - __hostdev__ const NanoGrid& grid() const { return mGrid; } + __hostdev__ const NanoGrid& grid() const { return mGrid; } /// @brief Return the total number of point in the grid and set the /// iterators to the complete range of points. @@ -7469,7 +7471,7 @@ class PointAccessor : public DefaultReadAccessor __hostdev__ uint64_t voxelPoints(const Coord& ijk, const AttT*& begin, const AttT*& end) const { if (auto* leaf = this->probeLeaf(ijk)) { - const uint32_t n = NanoLeaf::CoordToOffset(ijk); + const uint32_t n = NanoLeaf::CoordToOffset(ijk); if (leaf->isActive(n)) { begin = mData + leaf->first(n); end = mData + leaf->last(n); @@ -7479,7 +7481,7 @@ class PointAccessor : public DefaultReadAccessor begin = end = nullptr; return 0u; // no leaf or inactive voxel } -}; // PointAccessor +}; // PointAccessor /// @brief Class to access values in channels at a specific voxel location. /// diff --git a/nanovdb/nanovdb/examples/benchmark/BenchKernels_dense.cu b/nanovdb/nanovdb/examples/benchmark/BenchKernels_dense.cu index 9e7f0892d3..6d3544e9f8 100644 --- a/nanovdb/nanovdb/examples/benchmark/BenchKernels_dense.cu +++ b/nanovdb/nanovdb/examples/benchmark/BenchKernels_dense.cu @@ -10,7 +10,7 @@ #include "DenseGrid.h" #include // for CUDA memory management -#include +#include #include // for nanovdb::Ray #include // for nanovdb::DDA diff --git a/nanovdb/nanovdb/examples/benchmark/BenchKernels_nano.cu b/nanovdb/nanovdb/examples/benchmark/BenchKernels_nano.cu index 61675707fc..5d8aee5d1f 100644 --- a/nanovdb/nanovdb/examples/benchmark/BenchKernels_nano.cu +++ b/nanovdb/nanovdb/examples/benchmark/BenchKernels_nano.cu @@ -12,7 +12,7 @@ #include // for CUDA memory management #include // for nanovdb::Ray #include // for nanovdb::ZeroCrossing -#include +#include #include "Image.h" #include "Camera.h" diff --git a/nanovdb/nanovdb/examples/benchmark/Benchmark_dense.cc b/nanovdb/nanovdb/examples/benchmark/Benchmark_dense.cu similarity index 100% rename from nanovdb/nanovdb/examples/benchmark/Benchmark_dense.cc rename to nanovdb/nanovdb/examples/benchmark/Benchmark_dense.cu diff --git a/nanovdb/nanovdb/examples/benchmark/Benchmark_nano.cc b/nanovdb/nanovdb/examples/benchmark/Benchmark_nano.cu similarity index 100% rename from nanovdb/nanovdb/examples/benchmark/Benchmark_nano.cc rename to nanovdb/nanovdb/examples/benchmark/Benchmark_nano.cu diff --git a/nanovdb/nanovdb/examples/benchmark/Benchmark.cc b/nanovdb/nanovdb/examples/benchmark/TestBenchmark.cc similarity index 86% rename from nanovdb/nanovdb/examples/benchmark/Benchmark.cc rename to nanovdb/nanovdb/examples/benchmark/TestBenchmark.cc index 65436ee886..df3d5b5daf 100644 --- a/nanovdb/nanovdb/examples/benchmark/Benchmark.cc +++ b/nanovdb/nanovdb/examples/benchmark/TestBenchmark.cc @@ -1,7 +1,7 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/// @file Benchmark.cc +/// @file TestBenchmark.cc /// /// @author Ken Museth /// @@ -18,10 +18,6 @@ #include "DenseGrid.h" -#if defined(NANOVDB_USE_CUDA) -#include -#endif - #if defined(NANOVDB_USE_OPENVDB) #include #include @@ -67,7 +63,7 @@ class Benchmark : public ::testing::Test // Code here will be called immediately after each test (right // before the destructor). } - std::string getEnvVar(const std::string& name, const std::string def = "") const + static std::string getEnvVar(const std::string& name, const std::string def = "") { const char* str = std::getenv(name.c_str()); return str == nullptr ? def : std::string(str); @@ -486,9 +482,7 @@ TEST_F(Benchmark, OpenVDB_CPU) //mTimer.stop(); } // loop over angle } // OpenVDB_CPU -#endif - - +#endif// NANOVDB_USE_OPENVDB TEST_F(Benchmark, DenseGrid_CPU) { @@ -578,95 +572,6 @@ TEST_F(Benchmark, DenseGrid_CPU) } // loop over angle } // DenseGrid_CPU -#if defined(NANOVDB_USE_CUDA) - -extern "C" void launch_kernels(const nanovdb::GridHandle&, - nanovdb::ImageHandle&, - const nanovdb::Camera*, - cudaStream_t stream); - -TEST_F(Benchmark, NanoVDB_GPU) -{ - using BufferT = nanovdb::CudaDeviceBuffer; - using RealT = float; - using Vec3T = nanovdb::Vec3; - using CameraT = nanovdb::Camera; - - const std::string image_path = this->getEnvVar("VDB_SCRATCH_PATH", "."); - - // The first CUDA run time call initializes the CUDA sub-system (loads the runtime API) which takes time! - int deviceCount; - cudaGetDeviceCount(&deviceCount); - for (int device = 0; device < deviceCount; ++device) { - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, device); - printf("Device %d has compute capability %d.%d.\n", - device, - deviceProp.major, - deviceProp.minor); - } - cudaSetDevice(0); - - cudaStream_t stream; - cudaCheck(cudaStreamCreate(&stream)); - -#if defined(NANOVDB_USE_OPENVDB) - auto handle = nanovdb::io::readGrid("data/test.nvdb"); -#else - auto handle = nanovdb::createLevelSetTorus(100.0f, 50.0f); -#endif - //auto handle = nanovdb::io::readGrid("data/test.nvdb"); - const auto* grid = handle.grid(); - EXPECT_TRUE(grid); - EXPECT_TRUE(grid->isLevelSet()); - EXPECT_FALSE(grid->isFogVolume()); - handle.deviceUpload(stream, false); - EXPECT_TRUE(handle.deviceGrid()); - - std::cout << "\nRay-tracing NanoVDB grid named \"" << grid->gridName() << "\"" << std::endl; - - const int width = 1280, height = 720; - const RealT vfov = 25.0f, aspect = RealT(width) / height, radius = 300.0f; - const auto bbox = grid->worldBBox(); - const Vec3T lookat(0.5 * (bbox.min() + bbox.max())), up(0, -1, 0); - auto eye = [&lookat, &radius](int angle) { - const RealT theta = angle * nanovdb::pi() / 180.0f; - return lookat + radius * Vec3T(sin(theta), 0, cos(theta)); - }; - CameraT *host_camera, *dev_camera; - cudaCheck(cudaMalloc((void**)&dev_camera, sizeof(CameraT))); // un-managed memory on the device - cudaCheck(cudaMallocHost((void**)&host_camera, sizeof(CameraT))); - - nanovdb::ImageHandle imgHandle(width, height); - auto* img = imgHandle.image(); - imgHandle.deviceUpload(stream, false); - - for (int angle = 0; angle < 6; ++angle) { - std::stringstream ss; - ss << "NanoVDB: GPU kernel with " << img->size() << " rays"; - host_camera->update(eye(angle), lookat, up, vfov, aspect); - cudaCheck(cudaMemcpyAsync(dev_camera, host_camera, sizeof(CameraT), cudaMemcpyHostToDevice, stream)); - mTimer.start(ss.str()); - launch_kernels(handle, imgHandle, dev_camera, stream);// defined in BenchKernels_nano.cu - mTimer.stop(); - - //mTimer.start("Write image to file"); - imgHandle.deviceDownload(stream); - ss.str(""); - ss.clear(); - ss << image_path << "/nanovdb_gpu_" << std::setfill('0') << std::setw(3) << angle << ".ppm"; - img->writePPM(ss.str(), "Benchmark test"); - //mTimer.stop(); - - } //frame number angle - - cudaCheck(cudaStreamDestroy(stream)); - cudaCheck(cudaFreeHost(host_camera)); - cudaCheck(cudaFree(dev_camera)); -} // NanoVDB_GPU -#endif - - int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); diff --git a/nanovdb/nanovdb/examples/benchmark/TestBenchmark.cu b/nanovdb/nanovdb/examples/benchmark/TestBenchmark.cu new file mode 100644 index 0000000000..cc96fb9648 --- /dev/null +++ b/nanovdb/nanovdb/examples/benchmark/TestBenchmark.cu @@ -0,0 +1,109 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/// @file TestBenchmark.cu +/// +/// @author Ken Museth +/// +/// @brief A simple ray-tracing benchmark test. + +#include // io::readGrid +#include // createLevelSetTorus +#include "Image.h" +#include "Camera.h" +#include +#include + +#include + +extern "C" void launch_kernels(const nanovdb::GridHandle&, + nanovdb::ImageHandle&, + const nanovdb::Camera*, + cudaStream_t stream); + +std::string getEnvVar(const std::string& name, const std::string def = "") +{ + const char* str = std::getenv(name.c_str()); + return str == nullptr ? def : std::string(str); +} + +TEST(TestBenchmark, NanoVDB_GPU) +{ + using BufferT = nanovdb::CudaDeviceBuffer; + using RealT = float; + using Vec3T = nanovdb::Vec3; + using CameraT = nanovdb::Camera; + nanovdb::CpuTimer timer; + + const std::string image_path = getEnvVar("VDB_SCRATCH_PATH", "."); + + // The first CUDA run time call initializes the CUDA sub-system (loads the runtime API) which takes time! + int deviceCount; + cudaGetDeviceCount(&deviceCount); + for (int device = 0; device < deviceCount; ++device) { + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, device); + printf("Device %d has compute capability %d.%d.\n", + device, + deviceProp.major, + deviceProp.minor); + } + cudaSetDevice(0); + + cudaStream_t stream; + cudaCheck(cudaStreamCreate(&stream)); + +#if defined(NANOVDB_USE_OPENVDB) + auto handle = nanovdb::io::readGrid("data/test.nvdb"); +#else + auto handle = nanovdb::createLevelSetTorus(100.0f, 50.0f); +#endif + //auto handle = nanovdb::io::readGrid("data/test.nvdb"); + const auto* grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_TRUE(grid->isLevelSet()); + EXPECT_FALSE(grid->isFogVolume()); + handle.deviceUpload(stream, false); + EXPECT_TRUE(handle.deviceGrid()); + + std::cout << "\nRay-tracing NanoVDB grid named \"" << grid->gridName() << "\"" << std::endl; + + const int width = 1280, height = 720; + const RealT vfov = 25.0f, aspect = RealT(width) / height, radius = 300.0f; + const auto bbox = grid->worldBBox(); + const Vec3T lookat(0.5 * (bbox.min() + bbox.max())), up(0, -1, 0); + auto eye = [&lookat, &radius](int angle) { + const RealT theta = angle * nanovdb::pi() / 180.0f; + return lookat + radius * Vec3T(sin(theta), 0, cos(theta)); + }; + CameraT *host_camera, *dev_camera; + cudaCheck(cudaMalloc((void**)&dev_camera, sizeof(CameraT))); // un-managed memory on the device + cudaCheck(cudaMallocHost((void**)&host_camera, sizeof(CameraT))); + + nanovdb::ImageHandle imgHandle(width, height); + auto* img = imgHandle.image(); + imgHandle.deviceUpload(stream, false); + + for (int angle = 0; angle < 6; ++angle) { + std::stringstream ss; + ss << "NanoVDB: GPU kernel with " << img->size() << " rays"; + host_camera->update(eye(angle), lookat, up, vfov, aspect); + cudaCheck(cudaMemcpyAsync(dev_camera, host_camera, sizeof(CameraT), cudaMemcpyHostToDevice, stream)); + timer.start(ss.str()); + launch_kernels(handle, imgHandle, dev_camera, stream);// defined in BenchKernels_nano.cu + timer.stop(); + + //timer.start("Write image to file"); + imgHandle.deviceDownload(stream); + ss.str(""); + ss.clear(); + ss << image_path << "/nanovdb_gpu_" << std::setfill('0') << std::setw(3) << angle << ".ppm"; + img->writePPM(ss.str(), "Benchmark test"); + //timer.stop(); + + } //frame number angle + + cudaCheck(cudaStreamDestroy(stream)); + cudaCheck(cudaFreeHost(host_camera)); + cudaCheck(cudaFree(dev_camera)); +} // NanoVDB_GPU \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_collide_level_set/openvdb.cc b/nanovdb/nanovdb/examples/ex_collide_level_set/openvdb.cc index aed2bc3f0a..ec67f754bd 100644 --- a/nanovdb/nanovdb/examples/ex_collide_level_set/openvdb.cc +++ b/nanovdb/nanovdb/examples/ex_collide_level_set/openvdb.cc @@ -21,6 +21,8 @@ using BufferT = nanovdb::CudaDeviceBuffer; using BufferT = nanovdb::HostBuffer; #endif +openvdb::GridBase::Ptr nanoToOpenVDB(nanovdb::GridHandle& handle); + void runOpenVDB(nanovdb::GridHandle& handle, int numIterations, int numPoints, BufferT& positionBuffer, BufferT& velocityBuffer) { using GridT = openvdb::FloatGrid; diff --git a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cc b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cc deleted file mode 100644 index 42d93c725b..0000000000 --- a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cc +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -#include -#include // for nanovdb::createLevelSetSphere -#include // for nanovdb::CudaDeviceBuffer - -extern "C" void launch_kernels(const nanovdb::NanoGrid*,// device grid - const nanovdb::NanoGrid*,// host grid - cudaStream_t stream); - -/// @brief This examples depends on NanoVDB and CUDA. -int main() -{ - using SrcGridT = nanovdb::FloatGrid; - using DstBuildT = nanovdb::ValueOnIndex; - using BufferT = nanovdb::CudaDeviceBuffer; - try { - // Create an NanoVDB grid of a sphere at the origin with radius 100 and voxel size 1. - auto srcHandle = nanovdb::createLevelSetSphere(); - auto *srcGrid = srcHandle.grid(); - - // Converts the FloatGrid to an IndexGrid using CUDA for memory management. - auto idxHandle = nanovdb::createNanoGrid(*srcGrid, 1u, false , false);// 1 channel, no tiles or stats - - cudaStream_t stream; // Create a CUDA stream to allow for asynchronous copy of pinned CUDA memory. - cudaStreamCreate(&stream); - - idxHandle.deviceUpload(stream, false); // Copy the NanoVDB grid to the GPU asynchronously - auto* cpuGrid = idxHandle.grid(); // get a (raw) pointer to a NanoVDB grid of value type float on the CPU - auto* gpuGrid = idxHandle.deviceGrid(); // get a (raw) pointer to a NanoVDB grid of value type float on the GPU - - if (!gpuGrid) throw std::runtime_error("GridHandle did not contain a device grid with value type float"); - if (!cpuGrid) throw std::runtime_error("GridHandle did not contain a host grid with value type float"); - - launch_kernels(cpuGrid, cpuGrid, stream); // Call a host method to print a grid value on both the CPU and GPU - - cudaStreamDestroy(stream); // Destroy the CUDA stream - } - catch (const std::exception& e) { - std::cerr << "An exception occurred: \"" << e.what() << "\"" << std::endl; - } - return 0; -} \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cu b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cu index be83ceb074..b81d71c22b 100644 --- a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cu +++ b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cu @@ -1,41 +1,44 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include // this defined the core tree data structure of NanoVDB accessable on both the host and device -#include // for printf +#include +#include // for nanovdb::createLevelSetSphere +#include // for nanovdb::CudaDeviceBuffer -// This is called by the host only -void cpu_kernel(const nanovdb::NanoGrid* cpuGrid) -{ - nanovdb::ChannelAccessor acc(*cpuGrid); - //printf("\nNanoVDB CPU: channels=%u values=%lu\n", acc.grid().blindDataCount(), acc.root().maximum()); - printf("NanoVDB CPU; %lu\n", acc.idx( 0, 0, 0)); - printf("NanoVDB CPU; %lu\n", acc.idx( 99, 0, 0)); - printf("NanoVDB CPU; %lu\n", acc.idx(100, 0, 0)); - printf("NanoVDB CPU; %4.2f\n", acc( 0, 0, 0)); - printf("NanoVDB CPU; %4.2f\n", acc( 99, 0, 0)); - printf("NanoVDB CPU; %4.2f\n", acc(100, 0, 0)); -} - -// This is called by the device only -__global__ void gpu_kernel(const nanovdb::NanoGrid* gpuGrid) -{ - nanovdb::ChannelAccessor acc(*gpuGrid); - //printf("\nNanoVDB GPU: channels=%u values=%lu\n", gpuGrid->blindDataCount(), acc.root().maximum()); - printf("NanoVDB GPU; %lu\n", acc.idx( 0, 0, 0)); - printf("NanoVDB GPU; %lu\n", acc.idx( 99, 0, 0)); - printf("NanoVDB GPU; %lu\n", acc.idx(100, 0, 0)); - printf("NanoVDB GPU; %4.2f\n", acc( 0, 0, 0)); - printf("NanoVDB GPU; %4.2f\n", acc( 99, 0, 0)); - printf("NanoVDB GPU; %4.2f\n", acc(100, 0, 0)); -} - -// This is called by the client code on the host -extern "C" void launch_kernels(const nanovdb::NanoGrid* gpuGrid, - const nanovdb::NanoGrid* cpuGrid, - cudaStream_t stream) +extern "C" void launch_kernels(const nanovdb::NanoGrid*,// device grid + const nanovdb::NanoGrid*,// host grid + cudaStream_t stream); + +/// @brief This examples depends on NanoVDB and CUDA. +int main(int, char**) { - gpu_kernel<<<1, 1, 0, stream>>>(gpuGrid); // Launch the device kernel asynchronously + using SrcGridT = nanovdb::FloatGrid; + using DstBuildT = nanovdb::ValueOnIndex; + using BufferT = nanovdb::CudaDeviceBuffer; + try { + // Create an NanoVDB grid of a sphere at the origin with radius 100 and voxel size 1. + auto srcHandle = nanovdb::createLevelSetSphere(); + auto *srcGrid = srcHandle.grid(); + + // Converts the FloatGrid to an IndexGrid using CUDA for memory management. + auto idxHandle = nanovdb::createNanoGrid(*srcGrid, 1u, false , false);// 1 channel, no tiles or stats + + cudaStream_t stream; // Create a CUDA stream to allow for asynchronous copy of pinned CUDA memory. + cudaStreamCreate(&stream); + + idxHandle.deviceUpload(stream, false); // Copy the NanoVDB grid to the GPU asynchronously + auto* cpuGrid = idxHandle.grid(); // get a (raw) pointer to a NanoVDB grid of value type float on the CPU + auto* gpuGrid = idxHandle.deviceGrid(); // get a (raw) pointer to a NanoVDB grid of value type float on the GPU + + if (!gpuGrid) throw std::runtime_error("GridHandle did not contain a device grid with value type float"); + if (!cpuGrid) throw std::runtime_error("GridHandle did not contain a host grid with value type float"); + + launch_kernels(cpuGrid, cpuGrid, stream); // Call a host method to print a grid value on both the CPU and GPU - cpu_kernel(cpuGrid); // Launch the host "kernel" (synchronously) + cudaStreamDestroy(stream); // Destroy the CUDA stream + } + catch (const std::exception& e) { + std::cerr << "An exception occurred: \"" << e.what() << "\"" << std::endl; + } + return 0; } \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu new file mode 100644 index 0000000000..be83ceb074 --- /dev/null +++ b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu @@ -0,0 +1,41 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +#include // this defined the core tree data structure of NanoVDB accessable on both the host and device +#include // for printf + +// This is called by the host only +void cpu_kernel(const nanovdb::NanoGrid* cpuGrid) +{ + nanovdb::ChannelAccessor acc(*cpuGrid); + //printf("\nNanoVDB CPU: channels=%u values=%lu\n", acc.grid().blindDataCount(), acc.root().maximum()); + printf("NanoVDB CPU; %lu\n", acc.idx( 0, 0, 0)); + printf("NanoVDB CPU; %lu\n", acc.idx( 99, 0, 0)); + printf("NanoVDB CPU; %lu\n", acc.idx(100, 0, 0)); + printf("NanoVDB CPU; %4.2f\n", acc( 0, 0, 0)); + printf("NanoVDB CPU; %4.2f\n", acc( 99, 0, 0)); + printf("NanoVDB CPU; %4.2f\n", acc(100, 0, 0)); +} + +// This is called by the device only +__global__ void gpu_kernel(const nanovdb::NanoGrid* gpuGrid) +{ + nanovdb::ChannelAccessor acc(*gpuGrid); + //printf("\nNanoVDB GPU: channels=%u values=%lu\n", gpuGrid->blindDataCount(), acc.root().maximum()); + printf("NanoVDB GPU; %lu\n", acc.idx( 0, 0, 0)); + printf("NanoVDB GPU; %lu\n", acc.idx( 99, 0, 0)); + printf("NanoVDB GPU; %lu\n", acc.idx(100, 0, 0)); + printf("NanoVDB GPU; %4.2f\n", acc( 0, 0, 0)); + printf("NanoVDB GPU; %4.2f\n", acc( 99, 0, 0)); + printf("NanoVDB GPU; %4.2f\n", acc(100, 0, 0)); +} + +// This is called by the client code on the host +extern "C" void launch_kernels(const nanovdb::NanoGrid* gpuGrid, + const nanovdb::NanoGrid* cpuGrid, + cudaStream_t stream) +{ + gpu_kernel<<<1, 1, 0, stream>>>(gpuGrid); // Launch the device kernel asynchronously + + cpu_kernel(cpuGrid); // Launch the host "kernel" (synchronously) +} \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cc b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cc index 767026a167..7b4da85f0a 100644 --- a/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cc +++ b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cc @@ -1,6 +1,8 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 +#undef NANOVDB_USE_OPENVDB // Prevents include/openvdb/points/AttributeArray.h:1841:25: error: ‘stride’ cannot be used as a function + #include #include #include diff --git a/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cu b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda_kernel.cu similarity index 92% rename from nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cu rename to nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda_kernel.cu index 335d0b0631..ae3556ad7a 100644 --- a/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda.cu +++ b/nanovdb/nanovdb/examples/ex_make_custom_nanovdb_cuda/make_custom_nanovdb_cuda_kernel.cu @@ -2,6 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 #include // this defined the core tree data structure of NanoVDB accessable on both the host and device +#include // required since GridHandle has device code #include // for printf // This is called by the host only @@ -32,4 +33,4 @@ extern "C" void launch_kernels(const nanovdb::NanoGrid* deviceGrid, // Launch the host "kernel" (synchronously) cpu_kernel(cpuGrid); -} \ No newline at end of file +} diff --git a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc index 7d668a48c0..56021ea80c 100644 --- a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc +++ b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc @@ -10,6 +10,8 @@ extern "C" void launch_kernels(const nanovdb::NodeManager*, const nanovdb::NodeManager*, cudaStream_t stream); +openvdb::FloatGrid::Ptr createLevelSetSphere(); + /// @brief This examples depends on OpenVDB, NanoVDB and CUDA. int main() { diff --git a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cu b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda_kernel.cu similarity index 90% rename from nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cu rename to nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda_kernel.cu index 0dd65c9008..97d8703a13 100644 --- a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cu +++ b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda_kernel.cu @@ -3,6 +3,7 @@ #include // this defined the core tree data structure of NanoVDB accessable on both the host and device #include +#include // required since GridHandle has device code #include // for printf // This is called by the host only diff --git a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda.cc b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda.cc index 6cb9f5b4d7..ae4d435dfc 100644 --- a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda.cc +++ b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda.cc @@ -10,7 +10,7 @@ extern "C" void launch_kernels(const nanovdb::NanoGrid*, cudaStream_t stream); /// @brief This examples depends on OpenVDB, NanoVDB and CUDA. -int main() +int main(int, char**) { using SrcGridT = openvdb::FloatGrid; try { diff --git a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda.cu b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda_kernel.cu similarity index 90% rename from nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda.cu rename to nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda_kernel.cu index 321f89fe64..543b0e3027 100644 --- a/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda.cu +++ b/nanovdb/nanovdb/examples/ex_openvdb_to_nanovdb_cuda/openvdb_to_nanovdb_cuda_kernel.cu @@ -2,6 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 #include // this defined the core tree data structure of NanoVDB accessable on both the host and device +#include // required since GridHandle has device code #include // for printf // This is called by the host only diff --git a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/main.cc b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/main.cc index fad142657c..29752239f1 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/main.cc +++ b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/main.cc @@ -5,9 +5,9 @@ #include #include #include -#include #if defined(NANOVDB_USE_CUDA) +#include using BufferT = nanovdb::CudaDeviceBuffer; #else using BufferT = nanovdb::HostBuffer; diff --git a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/nanovdb.cu b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/nanovdb.cu index 1af67e3c88..c65dfff85a 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/nanovdb.cu +++ b/nanovdb/nanovdb/examples/ex_raytrace_fog_volume/nanovdb.cu @@ -5,17 +5,17 @@ #include #include -#include -#include -#include - -#include "common.h" - #if defined(NANOVDB_USE_CUDA) +#include using BufferT = nanovdb::CudaDeviceBuffer; #else using BufferT = nanovdb::HostBuffer; #endif +#include +#include +#include + +#include "common.h" void runNanoVDB(nanovdb::GridHandle& handle, int numIterations, int width, int height, BufferT& imageBuffer) { diff --git a/nanovdb/nanovdb/examples/ex_raytrace_level_set/nanovdb.cu b/nanovdb/nanovdb/examples/ex_raytrace_level_set/nanovdb.cu index 53f7bd83a5..14c8bd678d 100644 --- a/nanovdb/nanovdb/examples/ex_raytrace_level_set/nanovdb.cu +++ b/nanovdb/nanovdb/examples/ex_raytrace_level_set/nanovdb.cu @@ -5,18 +5,18 @@ #include #include -#include -#include -#include -#include - -#include "common.h" - #if defined(NANOVDB_USE_CUDA) +#include using BufferT = nanovdb::CudaDeviceBuffer; #else using BufferT = nanovdb::HostBuffer; #endif +#include +#include +#include +#include + +#include "common.h" void runNanoVDB(nanovdb::GridHandle& handle, int numIterations, int width, int height, BufferT& imageBuffer) { diff --git a/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cc b/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cc deleted file mode 100644 index f1fe52e5df..0000000000 --- a/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cc +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -#include // this is required to read (and write) NanoVDB files on the host -#include // required for CUDA memory management - -extern "C" void launch_kernels(const nanovdb::NanoGrid*, - const nanovdb::NanoGrid*, - cudaStream_t stream); - -/// @brief Read a NanoVDB grid from a file and print out multiple values on both the cpu and gpu. -/// -/// @note Note This example does NOT depend on OpenVDB, only NanoVDB and CUDA. -int main() -{ - try { - // returns a GridHandle using CUDA for memory management. - auto handle = nanovdb::io::readGrid("data/sphere.nvdb"); - - cudaStream_t stream; // Create a CUDA stream to allow for asynchronous copy of pinned CUDA memory. - cudaStreamCreate(&stream); - - handle.deviceUpload(stream, false); // Copy the NanoVDB grid to the GPU asynchronously - - auto* cpuGrid = handle.grid(); // get a (raw) pointer to a NanoVDB grid of value type float on the CPU - auto* deviceGrid = handle.deviceGrid(); // get a (raw) pointer to a NanoVDB grid of value type float on the GPU - - if (!deviceGrid || !cpuGrid) - throw std::runtime_error("GridHandle did not contain a grid with value type float"); - - launch_kernels(deviceGrid, cpuGrid, stream); // Call a host method to print a grid values on both the CPU and GPU - - cudaStreamDestroy(stream); // Destroy the CUDA stream - } - catch (const std::exception& e) { - std::cerr << "An exception occurred: \"" << e.what() << "\"" << std::endl; - } - - return 0; -} \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cu b/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cu index 31301e2d17..4343e01420 100644 --- a/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cu +++ b/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda.cu @@ -1,36 +1,41 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include // this defined the core tree data structure of NanoVDB accessable on both the host and device -#include // for printf +#include // this is required to read (and write) NanoVDB files on the host +#include // required for CUDA memory management +#include -// This is called by the host only -void cpu_kernel(const nanovdb::NanoGrid* cpuGrid) -{ - auto cpuAcc = cpuGrid->getAccessor(); - for (int i = 97; i < 104; ++i) { - printf("(%3i,0,0) NanoVDB cpu: % -4.2f\n", i, cpuAcc.getValue(nanovdb::Coord(i, 0, 0))); - } -} +extern "C" void launch_kernels(const nanovdb::NanoGrid*, + const nanovdb::NanoGrid*, + cudaStream_t stream); -// This is called by the device only -__global__ void gpu_kernel(const nanovdb::NanoGrid* deviceGrid) -{ - if (threadIdx.x > 6) - return; - int i = 97 + threadIdx.x; - auto gpuAcc = deviceGrid->getAccessor(); - printf("(%3i,0,0) NanoVDB gpu: % -4.2f\n", i, gpuAcc.getValue(nanovdb::Coord(i, 0, 0))); -} - -// This is called by the client code on the host -extern "C" void launch_kernels(const nanovdb::NanoGrid* deviceGrid, - const nanovdb::NanoGrid* cpuGrid, - cudaStream_t stream) +/// @brief Read a NanoVDB grid from a file and print out multiple values on both the cpu and gpu. +/// +/// @note Note This example does NOT depend on OpenVDB, only NanoVDB and CUDA. +int main(int, char**) { - // Launch the device kernel asynchronously - gpu_kernel<<<1, 64, 0, stream>>>(deviceGrid); + try { + // returns a GridHandle using CUDA for memory management. + auto handle = nanovdb::io::readGrid("data/sphere.nvdb"); + + cudaStream_t stream; // Create a CUDA stream to allow for asynchronous copy of pinned CUDA memory. + cudaStreamCreate(&stream); + + handle.deviceUpload(stream, false); // Copy the NanoVDB grid to the GPU asynchronously + + auto* cpuGrid = handle.grid(); // get a (raw) pointer to a NanoVDB grid of value type float on the CPU + auto* deviceGrid = handle.deviceGrid(); // get a (raw) pointer to a NanoVDB grid of value type float on the GPU + + if (!deviceGrid || !cpuGrid) + throw std::runtime_error("GridHandle did not contain a grid with value type float"); + + launch_kernels(deviceGrid, cpuGrid, stream); // Call a host method to print a grid values on both the CPU and GPU + + cudaStreamDestroy(stream); // Destroy the CUDA stream + } + catch (const std::exception& e) { + std::cerr << "An exception occurred: \"" << e.what() << "\"" << std::endl; + } - // Launch the host "kernel" (synchronously) - cpu_kernel(cpuGrid); + return 0; } \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda_kernel.cu b/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda_kernel.cu new file mode 100644 index 0000000000..31301e2d17 --- /dev/null +++ b/nanovdb/nanovdb/examples/ex_read_nanovdb_sphere_accessor_cuda/read_nanovdb_sphere_accessor_cuda_kernel.cu @@ -0,0 +1,36 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +#include // this defined the core tree data structure of NanoVDB accessable on both the host and device +#include // for printf + +// This is called by the host only +void cpu_kernel(const nanovdb::NanoGrid* cpuGrid) +{ + auto cpuAcc = cpuGrid->getAccessor(); + for (int i = 97; i < 104; ++i) { + printf("(%3i,0,0) NanoVDB cpu: % -4.2f\n", i, cpuAcc.getValue(nanovdb::Coord(i, 0, 0))); + } +} + +// This is called by the device only +__global__ void gpu_kernel(const nanovdb::NanoGrid* deviceGrid) +{ + if (threadIdx.x > 6) + return; + int i = 97 + threadIdx.x; + auto gpuAcc = deviceGrid->getAccessor(); + printf("(%3i,0,0) NanoVDB gpu: % -4.2f\n", i, gpuAcc.getValue(nanovdb::Coord(i, 0, 0))); +} + +// This is called by the client code on the host +extern "C" void launch_kernels(const nanovdb::NanoGrid* deviceGrid, + const nanovdb::NanoGrid* cpuGrid, + cudaStream_t stream) +{ + // Launch the device kernel asynchronously + gpu_kernel<<<1, 64, 0, stream>>>(deviceGrid); + + // Launch the host "kernel" (synchronously) + cpu_kernel(cpuGrid); +} \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu b/nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu index 44fb25407a..2c34a53aee 100644 --- a/nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu +++ b/nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu @@ -1,7 +1,7 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#include +#include /// @brief Demonstrates how to create a NanoVDB grid from voxel coordinates on the GPU int main() diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cc b/nanovdb/nanovdb/unittest/TestNanoVDB.cc index 9d68937d3c..d0cb5a5cb6 100644 --- a/nanovdb/nanovdb/unittest/TestNanoVDB.cc +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cc @@ -169,7 +169,7 @@ using MyTypes = ::testing::Types(int &offset) } template<> -void checkLeaf(int &offset) +void checkLeaf(int &offset) { - using DataT = typename nanovdb::LeafNode::DataType; + using DataT = typename nanovdb::LeafNode::DataType; EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mOffset), offset); offset += sizeof(uint64_t); EXPECT_EQ(NANOVDB_OFFSETOF(DataT, mPointCount), offset); @@ -4772,11 +4772,11 @@ void validateLeaf(pnanovdb_grid_type_t grid_type) EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mMask), PNANOVDB_LEAF_OFF_VALUE_MASK + 64 + 8 + 8); } -// template specializations for nanovdb::Points types +// template specializations for nanovdb::Point types template <> -void validateLeaf(pnanovdb_grid_type_t grid_type) +void validateLeaf(pnanovdb_grid_type_t grid_type) { - using leaf_t = typename nanovdb::LeafNode; + using leaf_t = typename nanovdb::LeafNode; EXPECT_EQ(sizeof(leaf_t), (pnanovdb_grid_type_constants[grid_type].leaf_size)); EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mOffset), PNANOVDB_LEAF_OFF_VALUE_MASK + 64); EXPECT_EQ(NANOVDB_OFFSETOF(leaf_t, mPointCount), PNANOVDB_LEAF_OFF_VALUE_MASK + 64 + 8); @@ -4823,7 +4823,7 @@ TYPED_TEST(TestOffsets, PNanoVDB) grid_type = PNANOVDB_GRID_TYPE_FP16; } else if (std::is_same::value) { grid_type = PNANOVDB_GRID_TYPE_FPN; - } else if (std::is_same::value) { + } else if (std::is_same::value) { grid_type = PNANOVDB_GRID_TYPE_POINTINDEX; } else if (std::is_same::value) { grid_type = PNANOVDB_GRID_TYPE_VEC3U8; @@ -7427,22 +7427,22 @@ TEST_F(TestNanoVDB, BuildTree) nanovdb::CoordBBox bbox(nanovdb::Coord(0), nanovdb::Coord(511)); nanovdb::build::Grid grid1(false), grid2(false); { - mTimer.start("Serial build::Tree"); + //mTimer.start("Serial build::Tree"); auto kernel = [&](const nanovdb::CoordBBox& bbox) { auto acc = grid1.getAccessor(); for (auto it = bbox.begin(); it; ++it) acc.setValueOn(*it); }; kernel(bbox); - mTimer.stop(); + //mTimer.stop(); } { - mTimer.start("Parallel build::Tree"); + //mTimer.start("Parallel build::Tree"); auto kernel = [&](const nanovdb::CoordBBox& bbox) { auto acc = grid2.getWriteAccessor(); for (auto it = bbox.begin(); it; ++it) acc.setValueOn(*it); }; nanovdb::forEach(bbox, kernel); - mTimer.stop(); + //mTimer.stop(); } { auto acc1 = grid1.getAccessor(), acc2 = grid2.getAccessor(); @@ -7657,7 +7657,7 @@ TEST_F(TestNanoVDB, mergeSplitGrids) size_t size1 = 0, size2 = 0; std::vector> handles1, handles2; std::vector gridNames; - nanovdb::CpuTimer timer("create 5 host grids"); + //nanovdb::CpuTimer timer("create 5 host grids"); for (int radius = 100; radius<150; radius += 10) { gridNames.emplace_back("sphere_" + std::to_string(radius)); handles1.emplace_back(nanovdb::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, @@ -7667,7 +7667,7 @@ TEST_F(TestNanoVDB, mergeSplitGrids) } EXPECT_EQ(5u, gridNames.size()); EXPECT_EQ(5u, handles1.size()); - timer.restart("create 5 host grids"); + //timer.restart("create 5 host grids"); for (int radius = 150; radius<200; radius += 10) { gridNames.emplace_back("sphere_" + std::to_string(radius)); handles2.emplace_back(nanovdb::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, @@ -7676,7 +7676,7 @@ TEST_F(TestNanoVDB, mergeSplitGrids) } EXPECT_EQ(10u, gridNames.size()); EXPECT_EQ( 5u, handles2.size()); - timer.restart("merging 5 host grids"); + //timer.restart("merging 5 host grids"); auto mergedHandle = nanovdb::mergeGrids(handles2);// merge last 5 grid handles EXPECT_EQ(size2, mergedHandle.size()); EXPECT_FALSE(mergedHandle.isPadded()); @@ -7686,7 +7686,7 @@ TEST_F(TestNanoVDB, mergeSplitGrids) EXPECT_EQ(5u, gridData->mGridCount); EXPECT_EQ(0u, gridData->mGridIndex); EXPECT_EQ(handles2[0].size(), gridData->mGridSize); - timer.restart("unit-test host grids"); + //timer.restart("unit-test host grids"); for (int i=0; i<5; ++i){ gridData = mergedHandle.gridData(i); EXPECT_TRUE(gridData); @@ -7715,7 +7715,7 @@ TEST_F(TestNanoVDB, mergeSplitGrids) auto& handle = handles3[5]; EXPECT_EQ(5u, handle.gridCount()); - timer.restart("merging 10 host grids"); + //timer.restart("merging 10 host grids"); mergedHandle = nanovdb::mergeGrids(handles1); EXPECT_EQ(size1 + size2, mergedHandle.size()); EXPECT_TRUE(mergedHandle.data()); @@ -7725,9 +7725,9 @@ TEST_F(TestNanoVDB, mergeSplitGrids) EXPECT_EQ( 0u, gridData->mGridIndex); EXPECT_EQ(handles1[0].size(), gridData->mGridSize); - timer.restart("splitting host grids"); + //timer.restart("splitting host grids"); auto splitHandles = nanovdb::splitGrids(mergedHandle); - timer.restart("unit-test split grids"); + //timer.restart("unit-test split grids"); EXPECT_EQ(10u, splitHandles.size()); for (int i=0; i<5; ++i){ EXPECT_EQ(handles1[i].size(), splitHandles[i].size()); @@ -7743,7 +7743,7 @@ TEST_F(TestNanoVDB, mergeSplitGrids) EXPECT_EQ(1u, gridData->mGridCount); EXPECT_EQ(strcmp(gridNames[i].c_str(), gridData->mGridName),0); } - timer.stop(); + //timer.stop(); }// mergeSplitGrids int main(int argc, char** argv) diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cu b/nanovdb/nanovdb/unittest/TestNanoVDB.cu index 524649b940..32ca28c211 100644 --- a/nanovdb/nanovdb/unittest/TestNanoVDB.cu +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cu @@ -8,10 +8,10 @@ #include #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include @@ -501,7 +501,7 @@ TEST(TestNanoVDBCUDA, Basic_CudaPointsToGrid_ValueOnIndexMask) TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_old) { using BuildT = nanovdb::ValueOnIndex; - nanovdb::CpuTimer timer; + //nanovdb::CpuTimer timer; const size_t voxelCount = 1 << 20;// 1048576 std::vector voxels; {//generate random voxels @@ -509,22 +509,22 @@ TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_old) std::srand(98765); const int max = 512, min = -max; auto op = [&](){return rand() % (max - min) + min;}; - timer.start("Creating "+std::to_string(voxelCount)+" random voxels on the CPU"); + //timer.start("Creating "+std::to_string(voxelCount)+" random voxels on the CPU"); while (voxels.size() < voxelCount) voxels.push_back(nanovdb::Coord(op(), op(), op())); - timer.stop(); + //timer.stop(); EXPECT_EQ(voxelCount, voxels.size()); } #if 0 {// Build grid on CPU nanovdb::build::Grid buildGrid(0.0f); - timer.start("Building grid on CPU from "+std::to_string(voxels.size())+" points"); + //timer.start("Building grid on CPU from "+std::to_string(voxels.size())+" points"); nanovdb::forEach(0, voxelCount, voxelCount >> 6, [&](const nanovdb::Range1D &r){ auto acc = buildGrid.getWriteAccessor(); for (size_t i=r.begin(); i!=r.end(); ++i) acc.setValueOn(voxels[i]); }); - timer.restart("Converting CPU build::Grid to nanovdb"); + //timer.restart("Converting CPU build::Grid to nanovdb"); auto handle = nanovdb::createNanoGrid(buildGrid); - timer.stop(); + //timer.stop(); } #endif nanovdb::Coord* d_coords; @@ -535,11 +535,11 @@ TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_old) cudaCheck(cudaMemcpy(d_coords, voxels.data(), voxelSize, cudaMemcpyHostToDevice)); //timer.stop(); - timer.start("Building grid on GPU from "+std::to_string(voxels.size())+" points"); + //timer.start("Building grid on GPU from "+std::to_string(voxels.size())+" points"); nanovdb::CudaPointsToGrid converter; //converter.setVerbose(); auto handle = converter.getHandle(d_coords, voxelCount); - timer.stop(); + //timer.stop(); EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU EXPECT_TRUE(handle.deviceGrid()); @@ -584,7 +584,7 @@ TEST(TestNanoVDBCUDA, mergeSplitGrids) size_t size1 = 0, size2 = 0; std::vector> handles1, handles2; std::vector gridNames; - nanovdb::CpuTimer timer("create 5 host grids"); + //nanovdb::CpuTimer timer("create 5 host grids"); for (int radius = 100; radius<150; radius += 10) { gridNames.emplace_back("sphere_" + std::to_string(radius)); handles1.emplace_back(nanovdb::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, @@ -594,7 +594,7 @@ TEST(TestNanoVDBCUDA, mergeSplitGrids) } EXPECT_EQ(5u, gridNames.size()); EXPECT_EQ(5u, handles1.size()); - timer.restart("create 5 host grids"); + //timer.restart("create 5 host grids"); for (int radius = 150; radius<200; radius += 10) { gridNames.emplace_back("sphere_" + std::to_string(radius)); handles2.emplace_back(nanovdb::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, @@ -603,7 +603,7 @@ TEST(TestNanoVDBCUDA, mergeSplitGrids) } EXPECT_EQ(10u, gridNames.size()); EXPECT_EQ( 5u, handles2.size()); - timer.restart("merging 5 host grids"); + //timer.restart("merging 5 host grids"); auto mergedHandle = nanovdb::mergeGrids(handles2);// merge last 5 grid handles EXPECT_EQ(size2, mergedHandle.size()); EXPECT_FALSE(mergedHandle.isPadded()); @@ -613,7 +613,7 @@ TEST(TestNanoVDBCUDA, mergeSplitGrids) EXPECT_EQ(5u, gridData->mGridCount); EXPECT_EQ(0u, gridData->mGridIndex); EXPECT_EQ(handles2[0].size(), gridData->mGridSize); - timer.restart("unit-test host grids"); + //timer.restart("unit-test host grids"); for (int i=0; i<5; ++i){ gridData = mergedHandle.gridData(i); EXPECT_TRUE(gridData); @@ -626,7 +626,7 @@ TEST(TestNanoVDBCUDA, mergeSplitGrids) handles1.push_back(std::move(mergedHandle));// append one handle with 5 merged grids EXPECT_TRUE(mergedHandle.empty()); EXPECT_EQ(6u, handles1.size()); - timer.restart("merging 10 host grids"); + //timer.restart("merging 10 host grids"); mergedHandle = nanovdb::mergeGrids(handles1); EXPECT_EQ(size1 + size2, mergedHandle.size()); EXPECT_TRUE(mergedHandle.data()); @@ -636,9 +636,9 @@ TEST(TestNanoVDBCUDA, mergeSplitGrids) EXPECT_EQ( 0u, gridData->mGridIndex); EXPECT_EQ(handles1[0].size(), gridData->mGridSize); - timer.restart("splitting host grids"); + //timer.restart("splitting host grids"); auto splitHandles = nanovdb::splitGrids(mergedHandle); - timer.restart("unit-test split grids"); + //timer.restart("unit-test split grids"); EXPECT_EQ(10u, splitHandles.size()); for (int i=0; i<5; ++i){ EXPECT_EQ(handles1[i].size(), splitHandles[i].size()); @@ -654,7 +654,7 @@ TEST(TestNanoVDBCUDA, mergeSplitGrids) EXPECT_EQ(1u, gridData->mGridCount); EXPECT_EQ(strcmp(gridNames[i].c_str(), gridData->mGridName),0); } - timer.stop(); + //timer.stop(); }// mergeSplitGrids TEST(TestNanoVDBCUDA, mergeSplitDeviceGrids) @@ -664,7 +664,7 @@ TEST(TestNanoVDBCUDA, mergeSplitDeviceGrids) size_t size = 0; std::vector handles; std::vector gridNames; - nanovdb::CpuTimer timer("create 10 host grids"); + //nanovdb::CpuTimer timer("create 10 host grids"); for (int radius = 100; radius<200; radius += 10) { gridNames.emplace_back("sphere_" + std::to_string(radius)); handles.emplace_back(nanovdb::createLevelSetSphere(radius,nanovdb::Vec3d(0),1,3, @@ -672,16 +672,16 @@ TEST(TestNanoVDBCUDA, mergeSplitDeviceGrids) EXPECT_FALSE(handles.back().isPadded()); size += handles.back().size(); } - timer.restart("copy grids to device"); + //timer.restart("copy grids to device"); for (auto &h : handles) h.deviceUpload(); EXPECT_EQ(10u, handles.size()); - timer.restart("merging device grids"); + //timer.restart("merging device grids"); auto mergedHandle = nanovdb::mergeDeviceGrids(handles); EXPECT_EQ(size, mergedHandle.size()); EXPECT_FALSE(mergedHandle.data()); EXPECT_TRUE(mergedHandle.deviceData()); EXPECT_FALSE(mergedHandle.isPadded()); - timer.restart("copy grids to host"); + //timer.restart("copy grids to host"); mergedHandle.deviceDownload(); EXPECT_TRUE(mergedHandle.data()); EXPECT_TRUE(mergedHandle.deviceData()); @@ -690,16 +690,16 @@ TEST(TestNanoVDBCUDA, mergeSplitDeviceGrids) EXPECT_TRUE(gridData); EXPECT_EQ(10u, gridData->mGridCount); EXPECT_EQ(0u, gridData->mGridIndex); - timer.restart("unit-test host grids"); + //timer.restart("unit-test host grids"); for (uint32_t i=0; i<10; ++i) { gridData = mergedHandle.gridData(i); EXPECT_TRUE(gridData); EXPECT_EQ(i, gridData->mGridIndex); EXPECT_EQ(strcmp(gridNames[i].c_str(), gridData->mGridName),0); } - timer.restart("splitting device grids"); + //timer.restart("splitting device grids"); auto splitHandles = nanovdb::splitDeviceGrids(mergedHandle); - timer.restart("unit-test split grids"); + //timer.restart("unit-test split grids"); EXPECT_EQ(10u, splitHandles.size()); for (uint32_t i=0u; i<10u; ++i) { EXPECT_EQ(handles[i].size(), splitHandles[i].size()); @@ -712,7 +712,7 @@ TEST(TestNanoVDBCUDA, mergeSplitDeviceGrids) EXPECT_EQ(1u, gridData->mGridCount); EXPECT_EQ(strcmp(gridNames[i].c_str(), gridData->mGridName),0); } - timer.stop(); + //timer.stop(); }// mergeSplitDeviceGrids // make -j 4 testNanoVDB && ./unittest/testNanoVDB --gtest_filter="*Cuda*" --gtest_break_on_failure @@ -1018,7 +1018,7 @@ TEST(TestNanoVDBCUDA, OneVoxelToGrid) TEST(TestNanoVDBCUDA, ThreePointsToGrid) { - using BuildT = nanovdb::Points; + using BuildT = nanovdb::Point; using Vec3T = nanovdb::Vec3f; using GridT = nanovdb::NanoGrid; const size_t num_points = 3; @@ -1196,7 +1196,7 @@ TEST(TestNanoVDBCUDA, EightVoxelsToFloatGrid) TEST(TestNanoVDBCUDA, Random_CudaPointsToGrid_World64) { - using BuildT = nanovdb::Points;//uint32_t; + using BuildT = nanovdb::Point;//uint32_t; using Vec3T = nanovdb::Vec3d; //nanovdb::CpuTimer timer; const size_t pointCount = 1 << 20;// 1048576 @@ -1309,7 +1309,7 @@ TEST(TestNanoVDBCUDA, Random_CudaPointsToGrid_World64) TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_World64) { - using BuildT = nanovdb::Points; + using BuildT = nanovdb::Point; using Vec3T = nanovdb::Vec3d; //nanovdb::CpuTimer timer; const size_t pointCount = 1 << 20;// 1048576 @@ -1427,12 +1427,12 @@ TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_World64) TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_World32) { - using BuildT = nanovdb::Points; + using BuildT = nanovdb::Point; using Vec3T = nanovdb::Vec3f; - nanovdb::CpuTimer timer("Generate sphere with points"); + //nanovdb::CpuTimer timer("Generate sphere with points"); auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); - timer.stop(); + //timer.stop(); auto *pointGrid = pointsHandle.grid(); EXPECT_TRUE(pointGrid); @@ -1456,11 +1456,11 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_World32) cudaCheck(cudaMemcpy(d_points, begin, pointSize, cudaMemcpyHostToDevice)); //timer.stop(); - timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); + //timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); nanovdb::CudaPointsToGrid converter(pointGrid->map()); //converter.setVerbose(); auto handle = converter.getHandle(d_points, pointCount); - timer.stop(); + //timer.stop(); cudaCheck(cudaFree(d_points)); //std::cerr << "Grid size: " << (handle.size() >> 20) << " MB" << std::endl; @@ -1549,12 +1549,12 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_World32) TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel32) { - using BuildT = nanovdb::Points; + using BuildT = nanovdb::Point; using Vec3T = nanovdb::Vec3f; - nanovdb::CpuTimer timer("Generate sphere with points"); + //nanovdb::CpuTimer timer("Generate sphere with points"); auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); - timer.stop(); + //timer.stop(); auto *pointGrid = pointsHandle.grid(); EXPECT_TRUE(pointGrid); @@ -1578,14 +1578,14 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel32) cudaCheck(cudaMemcpy(d_points, begin, pointSize, cudaMemcpyHostToDevice)); //timer.stop(); - timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); + //timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); ///////////////////////////////////////////////////////////////////////// nanovdb::CudaPointsToGrid converter(pointGrid->map()); //converter.setVerbose(); converter.setPointType(nanovdb::PointType::Voxel32); auto handle = converter.getHandle(d_points, pointCount); ///////////////////////////////////////////////////////////////////////// - timer.stop(); + //timer.stop(); cudaCheck(cudaFree(d_points)); //std::cerr << "Grid size: " << (handle.size() >> 20) << " MB" << std::endl; @@ -1681,12 +1681,12 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel32) TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel16) { EXPECT_EQ(6u, sizeof(nanovdb::Vec3u16)); - using BuildT = nanovdb::Points; + using BuildT = nanovdb::Point; using Vec3T = nanovdb::Vec3f; - nanovdb::CpuTimer timer("Generate sphere with points"); + //nanovdb::CpuTimer timer("Generate sphere with points"); auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); - timer.stop(); + //timer.stop(); auto *pointGrid = pointsHandle.grid(); EXPECT_TRUE(pointGrid); @@ -1710,14 +1710,14 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel16) cudaCheck(cudaMemcpy(d_points, begin, pointSize, cudaMemcpyHostToDevice)); //timer.stop(); - timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); + //timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); ///////////////////////////////////////////////////////////////////////// nanovdb::CudaPointsToGrid converter(pointGrid->map()); //converter.setVerbose(); converter.setPointType(nanovdb::PointType::Voxel16); auto handle = converter.getHandle(d_points, pointCount); ///////////////////////////////////////////////////////////////////////// - timer.stop(); + //timer.stop(); cudaCheck(cudaFree(d_points)); //std::cerr << "Grid size: " << (handle.size() >> 20) << " MB" << std::endl; @@ -1806,16 +1806,16 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel8) { EXPECT_EQ(3u, sizeof(nanovdb::Vec3u8)); - using BuildT = nanovdb::Points; + using BuildT = nanovdb::Point; using Vec3T = nanovdb::Vec3f; - nanovdb::CpuTimer timer("Generate sphere with points"); + //nanovdb::CpuTimer timer("Generate sphere with points"); auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); - timer.stop(); + //timer.stop(); auto *pointGrid = pointsHandle.grid(); EXPECT_TRUE(pointGrid); - std::cerr << "nanovdb::bbox = " << pointGrid->indexBBox() << " voxel count = " << pointGrid->activeVoxelCount() << std::endl; + //std::cerr << "nanovdb::bbox = " << pointGrid->indexBBox() << " voxel count = " << pointGrid->activeVoxelCount() << std::endl; nanovdb::PointAccessor acc2(*pointGrid); EXPECT_TRUE(acc2); const Vec3T *begin, *end; @@ -1836,7 +1836,7 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel8) cudaCheck(cudaMemcpy(d_points, begin, pointSize, cudaMemcpyHostToDevice)); //timer.stop(); - timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); + //timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); ///////////////////////////////////////////////////////////////////////// //auto handle = nanovdb::cudaPointsToGrid(d_points, pointCount, nanovdb::PointType::Voxel8); nanovdb::CudaPointsToGrid converter(pointGrid->map()); @@ -1844,7 +1844,7 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel8) converter.setPointType(nanovdb::PointType::Voxel8); auto handle = converter.getHandle(d_points, pointCount); ///////////////////////////////////////////////////////////////////////// - timer.stop(); + //timer.stop(); cudaCheck(cudaFree(d_points)); //std::cerr << "Grid size: " << (handle.size() >> 20) << " MB" << std::endl; @@ -1873,7 +1873,7 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel8) grid = handle.grid(); EXPECT_TRUE(grid); EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); - std::cerr << grid->indexBBox() << std::endl; + //std::cerr << grid->indexBBox() << std::endl; EXPECT_STREQ("Voxel8: Vec3 point coordinates in voxel space", grid->blindMetaData(0).mName); @@ -1933,16 +1933,16 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_PointID) { EXPECT_EQ(3u, sizeof(nanovdb::Vec3u8)); - using BuildT = nanovdb::Points; + using BuildT = nanovdb::Point; using Vec3T = nanovdb::Vec3f; - nanovdb::CpuTimer timer("Generate sphere with points"); + //nanovdb::CpuTimer timer("Generate sphere with points"); auto pointsHandle = nanovdb::createPointSphere(8, 100.0, nanovdb::Vec3d(0.0), 0.5); - timer.stop(); + //timer.stop(); auto *pointGrid = pointsHandle.grid(); EXPECT_TRUE(pointGrid); - std::cerr << "nanovdb::bbox = " << pointGrid->indexBBox() << " voxel count = " << pointGrid->activeVoxelCount() << std::endl; + //std::cerr << "nanovdb::bbox = " << pointGrid->indexBBox() << " voxel count = " << pointGrid->activeVoxelCount() << std::endl; nanovdb::PointAccessor acc2(*pointGrid); EXPECT_TRUE(acc2); const Vec3T *begin, *end; @@ -1963,15 +1963,15 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_PointID) cudaCheck(cudaMemcpy(d_points, begin, pointSize, cudaMemcpyHostToDevice)); //timer.stop(); - timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); + //timer.start("Building grid on GPU from "+std::to_string(pointCount)+" points"); ///////////////////////////////////////////////////////////////////////// //auto handle = nanovdb::cudaPointsToGrid(d_points, pointCount, nanovdb::PointType::Voxel8); nanovdb::CudaPointsToGrid converter(pointGrid->map()); - converter.setVerbose(2); + //converter.setVerbose(2); converter.setPointType(nanovdb::PointType::PointID); auto handle = converter.getHandle(d_points, pointCount); ///////////////////////////////////////////////////////////////////////// - timer.stop(); + //timer.stop(); cudaCheck(cudaFree(d_points)); //std::cerr << "Grid size: " << (handle.size() >> 20) << " MB" << std::endl; @@ -2055,7 +2055,7 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_PointID) TEST(TestNanoVDBCUDA, NanoGrid_Rgba8) { using BuildT = nanovdb::Rgba8; - using GridT = nanovdb::NanoGrid; + using GridT = nanovdb::NanoGrid; const size_t num_points = 1; nanovdb::Coord coords[num_points] = {nanovdb::Coord(1, 2, 3)}, *d_coords = nullptr; cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); @@ -2112,10 +2112,10 @@ TEST(TestNanoVDBCUDA, cudaAddBlindData) cudaCheck(cudaMalloc(&d_blind, num_points * sizeof(float))); cudaCheck(cudaMemcpy(d_blind, blind, num_points * sizeof(float), cudaMemcpyHostToDevice));// CPU -> GPU - nanovdb::GpuTimer timer("cudaAddBlindData"); + //nanovdb::GpuTimer timer("cudaAddBlindData"); auto handle2 = nanovdb::cudaAddBlindData(d_grid, d_blind, num_points); cudaCheck(cudaFree(d_blind)); - timer.stop(); + //timer.stop(); EXPECT_TRUE(handle2.deviceData());// grid only exists on the GPU EXPECT_FALSE(handle2.data());// no grid was yet allocated on the CPU EXPECT_EQ(handle2.size(), handle.size() + sizeof(nanovdb::GridBlindMetaData) + nanovdb::AlignUp(num_points*sizeof(float))); diff --git a/nanovdb/nanovdb/unittest/TestOpenVDB.cc b/nanovdb/nanovdb/unittest/TestOpenVDB.cc index 8c81221e31..aa93dd5767 100644 --- a/nanovdb/nanovdb/unittest/TestOpenVDB.cc +++ b/nanovdb/nanovdb/unittest/TestOpenVDB.cc @@ -2678,7 +2678,7 @@ TEST_F(TestOpenVDB, BuildNodeManager) } }// BuildNodeManager -#if 1 +#if 0// toggle to enable benchmark tests class NanoPointList { diff --git a/nanovdb/nanovdb/util/CreateNanoGrid.h b/nanovdb/nanovdb/util/CreateNanoGrid.h index 2ca81c72c5..d2823ad2a7 100644 --- a/nanovdb/nanovdb/util/CreateNanoGrid.h +++ b/nanovdb/nanovdb/util/CreateNanoGrid.h @@ -850,8 +850,7 @@ struct CreateNanoGrid::BlindMetaData metaData->mValueSize = valueSize; NANOVDB_ASSERT(metaData->isValid()); } - - ~BlindMetaData(){ delete metaData;} + ~BlindMetaData(){ delete [] reinterpret_cast(metaData); } bool operator<(const BlindMetaData& other) const { return order < other.order; } // required by std::set static GridType mapToType(const std::string& name) { diff --git a/nanovdb/nanovdb/util/GridBuilder.h b/nanovdb/nanovdb/util/GridBuilder.h index 6468b7e414..30fba27f94 100644 --- a/nanovdb/nanovdb/util/GridBuilder.h +++ b/nanovdb/nanovdb/util/GridBuilder.h @@ -618,7 +618,7 @@ struct InternalNode static constexpr uint32_t TOTAL = LOG2DIM + ChildT::TOTAL; //dimension in index space static constexpr uint32_t DIM = 1u << TOTAL; static constexpr uint32_t SIZE = 1u << (3 * LOG2DIM); //number of tile values (or child pointers) - static constexpr int32_t MASK = DIM - 1; + static constexpr uint32_t MASK = DIM - 1; static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node using MaskT = Mask; @@ -754,9 +754,9 @@ struct InternalNode static uint32_t CoordToOffset(const Coord& ijk) { - return (((ijk[0] & MASK) >> ChildT::TOTAL) << (2 * LOG2DIM)) + - (((ijk[1] & MASK) >> ChildT::TOTAL) << (LOG2DIM)) + - ((ijk[2] & MASK) >> ChildT::TOTAL); + return (((ijk[0] & int32_t(MASK)) >> ChildT::TOTAL) << (2 * LOG2DIM)) + + (((ijk[1] & int32_t(MASK)) >> ChildT::TOTAL) << (LOG2DIM)) + + ((ijk[2] & int32_t(MASK)) >> ChildT::TOTAL); } static Coord OffsetToLocalCoord(uint32_t n) @@ -1099,7 +1099,7 @@ struct LeafNode static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes static constexpr uint32_t DIM = 1u << TOTAL; static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node - static constexpr int32_t MASK = DIM - 1; // mask for bit operations + static constexpr uint32_t MASK = DIM - 1; // mask for bit operations static constexpr uint32_t LEVEL = 0; // level 0 = leaf static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node using NodeMaskType = Mask; @@ -1195,14 +1195,16 @@ struct LeafNode /// @brief Return the linear offset corresponding to the given coordinate static uint32_t CoordToOffset(const Coord& ijk) { - return ((ijk[0] & MASK) << (2 * LOG2DIM)) + ((ijk[1] & MASK) << LOG2DIM) + (ijk[2] & MASK); + return ((ijk[0] & int32_t(MASK)) << (2 * LOG2DIM)) + + ((ijk[1] & int32_t(MASK)) << LOG2DIM) + + (ijk[2] & int32_t(MASK)); } static Coord OffsetToLocalCoord(uint32_t n) { NANOVDB_ASSERT(n < SIZE); const int32_t m = n & ((1 << 2 * LOG2DIM) - 1); - return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & MASK); + return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & int32_t(MASK)); } void localToGlobalCoord(Coord& ijk) const @@ -1292,7 +1294,7 @@ struct LeafNode static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes static constexpr uint32_t DIM = 1u << TOTAL; static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node - static constexpr int32_t MASK = DIM - 1; // mask for bit operations + static constexpr uint32_t MASK = DIM - 1; // mask for bit operations static constexpr uint32_t LEVEL = 0; // level 0 = leaf static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node using NodeMaskType = Mask; @@ -1382,14 +1384,16 @@ struct LeafNode /// @brief Return the linear offset corresponding to the given coordinate static uint32_t CoordToOffset(const Coord& ijk) { - return ((ijk[0] & MASK) << (2 * LOG2DIM)) + ((ijk[1] & MASK) << LOG2DIM) + (ijk[2] & MASK); + return ((ijk[0] & int32_t(MASK)) << (2 * LOG2DIM)) + + ((ijk[1] & int32_t(MASK)) << LOG2DIM) + + (ijk[2] & int32_t(MASK)); } static Coord OffsetToLocalCoord(uint32_t n) { NANOVDB_ASSERT(n < SIZE); const int32_t m = n & ((1 << 2 * LOG2DIM) - 1); - return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & MASK); + return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & int32_t(MASK)); } void localToGlobalCoord(Coord& ijk) const {ijk += mOrigin;} @@ -1462,7 +1466,7 @@ struct LeafNode static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes static constexpr uint32_t DIM = 1u << TOTAL; static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node - static constexpr int32_t MASK = DIM - 1; // mask for bit operations + static constexpr uint32_t MASK = DIM - 1; // mask for bit operations static constexpr uint32_t LEVEL = 0; // level 0 = leaf static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node using NodeMaskType = Mask; @@ -1553,14 +1557,16 @@ struct LeafNode /// @brief Return the linear offset corresponding to the given coordinate static uint32_t CoordToOffset(const Coord& ijk) { - return ((ijk[0] & MASK) << (2 * LOG2DIM)) + ((ijk[1] & MASK) << LOG2DIM) + (ijk[2] & MASK); + return ((ijk[0] & int32_t(MASK)) << (2 * LOG2DIM)) + + ((ijk[1] & int32_t(MASK)) << LOG2DIM) + + (ijk[2] & int32_t(MASK)); } static Coord OffsetToLocalCoord(uint32_t n) { NANOVDB_ASSERT(n < SIZE); const int32_t m = n & ((1 << 2 * LOG2DIM) - 1); - return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & MASK); + return Coord(n >> 2 * LOG2DIM, m >> LOG2DIM, m & int32_t(MASK)); } void localToGlobalCoord(Coord& ijk) const @@ -1683,9 +1689,9 @@ struct ValueAccessor template bool isCached(const Coord& ijk) const { - return (ijk[0] & ~NodeT::MASK) == mKeys[NodeT::LEVEL][0] && - (ijk[1] & ~NodeT::MASK) == mKeys[NodeT::LEVEL][1] && - (ijk[2] & ~NodeT::MASK) == mKeys[NodeT::LEVEL][2]; + return (ijk[0] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][0] && + (ijk[1] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][1] && + (ijk[2] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][2]; } template diff --git a/nanovdb/nanovdb/util/GridHandle.h b/nanovdb/nanovdb/util/GridHandle.h index 511f5071ce..bce616c89a 100644 --- a/nanovdb/nanovdb/util/GridHandle.h +++ b/nanovdb/nanovdb/util/GridHandle.h @@ -18,11 +18,7 @@ #include #include #include // for mapToGridType -#include "HostBuffer.h" - -#if defined(__CUDACC__) -#include "cuda/CudaUtils.h"// for cudaCheckError(); -#endif +#include namespace nanovdb { @@ -45,10 +41,17 @@ class GridHandle public: using BufferType = BufferT; - /// @brief Move constructor from a buffer + /// @brief Move constructor from a host buffer + /// @param buffer buffer containing one or more NanoGrids that will be moved into this GridHandle + /// @throw Will throw and error with the buffer does not contain a valid NanoGrid! + template::hasDeviceDual, int>::type = 0> + GridHandle(T&& buffer); + + /// @brief Move constructor from a dual host-device buffer /// @param buffer buffer containing one or more NanoGrids that will be moved into this GridHandle /// @throw Will throw and error with the buffer does not contain a valid NanoGrid! - GridHandle(BufferT&& buffer); + template::hasDeviceDual, int>::type = 0> + GridHandle(T&& buffer); /// @brief Constructs an empty GridHandle GridHandle() = default; @@ -218,33 +221,18 @@ inline __hostdev__ void cpyMetaData(const GridData *data, GridHandleMetaData *me data = PtrAdd(data, p->size); } } -#if defined(__CUDACC__) -__global__ void cudaCpyMetaData(const GridData *data, GridHandleMetaData *meta){cpyMetaData(data, meta);} -#endif }// anonymous namespace template -GridHandle::GridHandle(BufferT&& buffer) +template::hasDeviceDual, int>::type> +GridHandle::GridHandle(T&& buffer) { + static_assert(is_same::value, "Expected U==BufferT"); mBuffer = std::move(buffer); if (auto *data = reinterpret_cast(mBuffer.data())) { if (!data->isValid()) throw std::runtime_error("GridHandle was constructed with an invalid host buffer"); mMetaData.resize(data->mGridCount); cpyMetaData(data, mMetaData.data()); -#if defined(__CUDACC__) - } else if constexpr(BufferTraits::hasDeviceDual) { - if (auto *d_data = reinterpret_cast(mBuffer.deviceData())) { - GridData tmp; - cudaCheck(cudaMemcpy(&tmp, d_data, sizeof(GridData), cudaMemcpyDeviceToHost)); - if (!tmp.isValid()) throw std::runtime_error("GridHandle was constructed with an invalid device buffer"); - GridHandleMetaData *d_metaData; - cudaMalloc((void**)&d_metaData, tmp.mGridCount*sizeof(GridHandleMetaData)); - cudaCpyMetaData<<<1,1>>>(d_data, d_metaData); - mMetaData.resize(tmp.mGridCount); - cudaCheck(cudaMemcpy(mMetaData.data(), d_metaData,tmp.mGridCount*sizeof(GridHandleMetaData), cudaMemcpyDeviceToHost)); - cudaCheck(cudaFree(d_metaData)); - } -#endif } } @@ -285,9 +273,6 @@ inline __hostdev__ void updateGridData(GridData *data, uint32_t gridIndex, uint3 data->mGridIndex = gridIndex; data->mGridCount = gridCount; } -#if defined(__CUDACC__) -__global__ void cudaUpdateGridData(GridData *data, uint32_t gridIndex, uint32_t gridCount){updateGridData(data, gridIndex, gridCount);} -#endif }// anonymous namespace template class VectorT = std::vector> @@ -337,56 +322,10 @@ mergeGrids(const VectorT> &handles, const BufferT* other = n return GridHandle(std::move(buffer)); }// mergeGrids -#if defined(__CUDACC__) - -template class VectorT = std::vector> -inline typename enable_if::hasDeviceDual, VectorT>>::type -splitDeviceGrids(const GridHandle &handle, const BufferT* other = nullptr) -{ - const uint8_t *ptr = handle.deviceData(); - if (ptr == nullptr) return VectorT>(); - VectorT> handles(handle.gridCount()); - for (uint32_t n=0; n(buffer.deviceData()); - const GridData *src = reinterpret_cast(ptr); - cudaCheck(cudaMemcpy(dst, src, handle.gridSize(n), cudaMemcpyDeviceToDevice)); - cudaUpdateGridData<<<1,1>>>(dst, 0u, 1u); - cudaCheckError(); - handles[n] = GridHandle(std::move(buffer)); - ptr += handle.gridSize(n); - } - return std::move(handles); -}// splitDeviceGrids - -template class VectorT> -inline typename enable_if::hasDeviceDual, GridHandle>::type -mergeDeviceGrids(const VectorT> &handles, const BufferT* other = nullptr) -{ - uint64_t size = 0u; - uint32_t counter = 0u, gridCount = 0u; - for (auto &h : handles) { - gridCount += h.gridCount(); - for (uint32_t n=0; n(dst); - cudaUpdateGridData<<<1,1>>>(data, counter++, gridCount); - cudaCheckError(); - dst += h.gridSize(n); - src += h.gridSize(n); - } - } - return GridHandle(std::move(buffer)); -}// mergeDeviceGrids +} // namespace nanovdb +#if defined(__CUDACC__) +#include #endif// defined(__CUDACC__) -} // namespace nanovdb - #endif // NANOVDB_GRID_HANDLE_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/PrefixSum.h b/nanovdb/nanovdb/util/PrefixSum.h index b360579745..b08ee11d43 100644 --- a/nanovdb/nanovdb/util/PrefixSum.h +++ b/nanovdb/nanovdb/util/PrefixSum.h @@ -10,9 +10,9 @@ \brief Multi-threaded implementations of inclusive prefix sum - \note An exclusive prefix sum is simply an array starting with zero - followed by the elements in the inclusive prefix sum, minus its - last entry which is the sum of all the input elements. + \note An exclusive prefix sum is simply an array starting with zero + followed by the elements in the inclusive prefix sum, minus its + last entry which is the sum of all the input elements. */ #ifndef NANOVDB_PREFIX_SUM_H_HAS_BEEN_INCLUDED @@ -44,27 +44,27 @@ template void inclusiveScan(T *array, size_t size, const T &identity, bool threaded, Op op) { #ifndef NANOVDB_USE_TBB - threaded = false; - (void)identity;// avoids compiler warning + threaded = false; + (void)identity;// avoids compiler warning #endif if (threaded) { #ifdef NANOVDB_USE_TBB - using RangeT = tbb::blocked_range; - tbb::parallel_scan(RangeT(0, size), identity, - [&](const RangeT &r, T sum, bool is_final_scan)->T { - T tmp = sum; - for (size_t i = r.begin(); i < r.end(); ++i) { - tmp = op(tmp, array[i]); - if (is_final_scan) array[i] = tmp; - } - return tmp; - },[&](const T &a, const T &b) {return op(a, b);} - ); + using RangeT = tbb::blocked_range; + tbb::parallel_scan(RangeT(0, size), identity, + [&](const RangeT &r, T sum, bool is_final_scan)->T { + T tmp = sum; + for (size_t i = r.begin(); i < r.end(); ++i) { + tmp = op(tmp, array[i]); + if (is_final_scan) array[i] = tmp; + } + return tmp; + },[&](const T &a, const T &b) {return op(a, b);} + ); #endif - } else { // serial inclusive prefix operation - for (size_t i=1; i diff --git a/nanovdb/nanovdb/util/cuda/CudaAddBlindData.h b/nanovdb/nanovdb/util/cuda/CudaAddBlindData.cuh similarity index 90% rename from nanovdb/nanovdb/util/cuda/CudaAddBlindData.h rename to nanovdb/nanovdb/util/cuda/CudaAddBlindData.cuh index de46595a24..42a6e7f20c 100644 --- a/nanovdb/nanovdb/util/cuda/CudaAddBlindData.h +++ b/nanovdb/nanovdb/util/cuda/CudaAddBlindData.cuh @@ -1,8 +1,21 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#ifndef NVIDIA_CUDA_ADD_BLIND_DATA_H_HAS_BEEN_INCLUDED -#define NVIDIA_CUDA_ADD_BLIND_DATA_H_HAS_BEEN_INCLUDED +/*! + \file CudaAddBlindData.cuh + + \author Ken Museth + + \date August 3, 2023 + + \brief Defines function that appends blind device data to and existing device NanoGrid + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NVIDIA_CUDA_ADD_BLIND_DATA_CUH_HAS_BEEN_INCLUDED +#define NVIDIA_CUDA_ADD_BLIND_DATA_CUH_HAS_BEEN_INCLUDED #include #include "CudaDeviceBuffer.h" @@ -91,4 +104,4 @@ cudaAddBlindData(const NanoGrid *d_grid, }// nanovdb namespace -#endif // NVIDIA_CUDA_ADD_BLIND_DATA_H_HAS_BEEN_INCLUDED \ No newline at end of file +#endif // NVIDIA_CUDA_ADD_BLIND_DATA_CUH_HAS_BEEN_INCLUDED \ No newline at end of file diff --git a/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh b/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh new file mode 100644 index 0000000000..5dc3adf2fd --- /dev/null +++ b/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh @@ -0,0 +1,116 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file CudaGridHandle.cuh + + \author Ken Museth, Doyub Kim + + \date August 3, 2023 + + \brief Contains cuda kernels for GridHandle + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NANOVDB_CUDA_GRID_HANDLE_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_CUDA_GRID_HANDLE_CUH_HAS_BEEN_INCLUDED + +#include "CudaDeviceBuffer.h"// required for instantiation of move c-tor of GridHandle +#include "../GridHandle.h" + +namespace nanovdb { + +namespace {// anonymous namespace +__global__ void cudaCpyMetaData(const GridData *data, GridHandleMetaData *meta){cpyMetaData(data, meta);} +__global__ void cudaUpdateGridData(GridData *data, uint32_t gridIndex, uint32_t gridCount){updateGridData(data, gridIndex, gridCount);} +}// anonymous namespace + +template +template::hasDeviceDual, int>::type> +GridHandle::GridHandle(T&& buffer) +{ + static_assert(is_same::value, "Expected U==BufferT"); + mBuffer = std::move(buffer); + if (auto *data = reinterpret_cast(mBuffer.data())) { + if (!data->isValid()) throw std::runtime_error("GridHandle was constructed with an invalid host buffer"); + mMetaData.resize(data->mGridCount); + cpyMetaData(data, mMetaData.data()); + } else { + if (auto *d_data = reinterpret_cast(mBuffer.deviceData())) { + GridData tmp; + cudaCheck(cudaMemcpy(&tmp, d_data, sizeof(GridData), cudaMemcpyDeviceToHost)); + if (!tmp.isValid()) throw std::runtime_error("GridHandle was constructed with an invalid device buffer"); + GridHandleMetaData *d_metaData; + cudaMalloc((void**)&d_metaData, tmp.mGridCount*sizeof(GridHandleMetaData)); + cudaCpyMetaData<<<1,1>>>(d_data, d_metaData); + mMetaData.resize(tmp.mGridCount); + cudaCheck(cudaMemcpy(mMetaData.data(), d_metaData,tmp.mGridCount*sizeof(GridHandleMetaData), cudaMemcpyDeviceToHost)); + cudaCheck(cudaFree(d_metaData)); + } + } +}// GridHandle(T&& buffer) + +// Dummy function that ensures instantiation of the move-constructor above when BufferT=CudaDeviceBuffer +namespace {auto __dummy(){return GridHandle(std::move(CudaDeviceBuffer()));}} + +template class VectorT = std::vector> +inline typename enable_if::hasDeviceDual, VectorT>>::type +cudaSplitGridHandles(const GridHandle &handle, const BufferT* other = nullptr) +{ + const uint8_t *ptr = handle.deviceData(); + if (ptr == nullptr) return VectorT>(); + VectorT> handles(handle.gridCount()); + for (uint32_t n=0; n(buffer.deviceData()); + const GridData *src = reinterpret_cast(ptr); + cudaCheck(cudaMemcpy(dst, src, handle.gridSize(n), cudaMemcpyDeviceToDevice)); + cudaUpdateGridData<<<1,1>>>(dst, 0u, 1u); + cudaCheckError(); + handles[n] = GridHandle(std::move(buffer)); + ptr += handle.gridSize(n); + } + return std::move(handles); +}// cudaSplitGridHandles + +template class VectorT = std::vector> +inline typename enable_if::hasDeviceDual, VectorT>>::type +splitDeviceGrids(const GridHandle &handle, const BufferT* other = nullptr) +{ return cudaSplitGridHandles(handle, other); } + +template class VectorT> +inline typename enable_if::hasDeviceDual, GridHandle>::type +cudaMergeGridHandles(const VectorT> &handles, const BufferT* other = nullptr) +{ + uint64_t size = 0u; + uint32_t counter = 0u, gridCount = 0u; + for (auto &h : handles) { + gridCount += h.gridCount(); + for (uint32_t n=0; n(dst); + cudaUpdateGridData<<<1,1>>>(data, counter++, gridCount); + cudaCheckError(); + dst += h.gridSize(n); + src += h.gridSize(n); + } + } + return GridHandle(std::move(buffer)); +}// cudaMergeGridHandles + +template class VectorT> +inline typename enable_if::hasDeviceDual, GridHandle>::type +mergeDeviceGrids(const VectorT> &handles, const BufferT* other = nullptr) +{ return cudaMergeGridHandles(handles, other); } + +} // namespace nanovdb + +#endif // NANOVDB_CUDA_GRID_HANDLE_CUH_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/cuda/CudaIndexToGrid.h b/nanovdb/nanovdb/util/cuda/CudaIndexToGrid.cuh similarity index 97% rename from nanovdb/nanovdb/util/cuda/CudaIndexToGrid.h rename to nanovdb/nanovdb/util/cuda/CudaIndexToGrid.cuh index 2dda21e944..9d53dca4ca 100644 --- a/nanovdb/nanovdb/util/cuda/CudaIndexToGrid.h +++ b/nanovdb/nanovdb/util/cuda/CudaIndexToGrid.cuh @@ -2,28 +2,29 @@ // SPDX-License-Identifier: MPL-2.0 /*! - \file CudaIndexToGrid.h + \file CudaIndexToGrid.cuh \author Ken Museth \date April 17, 2023 \brief Combines an IndexGrid and values into a regular Grid on the device + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) */ -#ifndef NVIDIA_CUDA_INDEX_TO_GRID_H_HAS_BEEN_INCLUDED -#define NVIDIA_CUDA_INDEX_TO_GRID_H_HAS_BEEN_INCLUDED +#ifndef NVIDIA_CUDA_INDEX_TO_GRID_CUH_HAS_BEEN_INCLUDED +#define NVIDIA_CUDA_INDEX_TO_GRID_CUH_HAS_BEEN_INCLUDED #include #include "CudaDeviceBuffer.h" #include -#include +#include #include namespace nanovdb { -// cudeIndexGridToGrid - /// @brief Freestanding function that combines an IndexGrid and values into a regular Grid /// @tparam DstBuildT Build time of the destination/output Grid /// @tparam SrcBuildT Build type of the source/input IndexGrid @@ -78,7 +79,7 @@ class CudaIndexToGrid private: GpuTimer mTimer; std::string mGridName; - bool mVerbose{true}; + bool mVerbose{false}; NodeAccessor mNodeAcc, *mDevNodeAcc; template @@ -367,4 +368,4 @@ cudaIndexToGrid(const NanoGrid *d_srcGrid, const typename BuildToValu }// nanovdb namespace -#endif // NVIDIA_CUDA_INDEX_TO_GRID_H_HAS_BEEN_INCLUDED +#endif // NVIDIA_CUDA_INDEX_TO_GRID_CUH_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.h b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh similarity index 94% rename from nanovdb/nanovdb/util/cuda/CudaPointsToGrid.h rename to nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh index ad984b55a0..058ef5cc58 100644 --- a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.h +++ b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh @@ -1,8 +1,17 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -#ifndef NVIDIA_CUDA_POINTS_TO_GRID_H_HAS_BEEN_INCLUDED -#define NVIDIA_CUDA_POINTS_TO_GRID_H_HAS_BEEN_INCLUDED +/*! + \file CudaPointsToGrid.cuh + + \brief Generates NanoVDB grids from a list of voxels or points on the device + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NVIDIA_CUDA_POINTS_TO_GRID_CUH_HAS_BEEN_INCLUDED +#define NVIDIA_CUDA_POINTS_TO_GRID_CUH_HAS_BEEN_INCLUDED #include #include @@ -12,23 +21,17 @@ #include #include "CudaDeviceBuffer.h" #include -#include +#include #include /* -Notes: - -Improvements: no limit on domain size, avoid random access in root node, removed offsetInLeafNode array - -make -j testNanoVDB && ./unittest/testNanoVDB --gtest_filter="*CudaPointsToGrid*" --gtest_break_on_failure --gtest_repeat=3 - -4.29 billion (=2^32) coordinates of type Vec3f have a memory footprint of 48 GB! +Note: 4.29 billion (=2^32) coordinates of type Vec3f have a memory footprint of 48 GB! */ namespace nanovdb { // Define the type used when the points are encoded as blind data in the output grid -enum class PointType : uint32_t { Disable = 0,// no point information e.g. when BuildT != Points +enum class PointType : uint32_t { Disable = 0,// no point information e.g. when BuildT != Point PointID = 1,// linear index of type uint32_t to points World64 = 2,// Vec3d in world space World32 = 3,// Vec3f in world space @@ -53,7 +56,7 @@ enum class PointType : uint32_t { Disable = 0,// no point information e.g. when /// @param type Defined the way point information is represented in the output grid (see PointType enum above) /// Should not be PointType::Disable! /// @param buffer Instance of the device buffer used for memory allocation -/// @return Returns a handle with a grid of type NanoGrid where point information, e.g. coordinates, +/// @return Returns a handle with a grid of type NanoGrid where point information, e.g. coordinates, /// are represented as blind data defined by @c type. template GridHandle @@ -205,7 +208,7 @@ class CudaPointsToGrid /// @brief Constructor from a Map /// @param map Map to be used for the output device grid - CudaPointsToGrid(const Map &map) : mPointType(is_same::value ? PointType::Default : PointType::Disable){ + CudaPointsToGrid(const Map &map) : mPointType(is_same::value ? PointType::Default : PointType::Disable){ mData.map = map; mData.flags.initMask({GridFlags::HasBBox, GridFlags::IsBreadthFirst}); cudaCheck(cudaMalloc((void**)&mDeviceData, sizeof(Data))); @@ -231,8 +234,8 @@ class CudaPointsToGrid /// @param name name of the output grid void setGridName(const std::string &name) {mGridName = name;} - // only available when BuildT == Points - template typename enable_if::value>::type + // only available when BuildT == Point + template typename enable_if::value>::type setPointType(PointType type) { mPointType = type; } /// @brief Creates a handle to a grid with the specified build type from a list of points in index or world space @@ -266,10 +269,10 @@ class CudaPointsToGrid void processBBox(); - // the following methods are only defined when BuildT == Points - template typename enable_if::value, uint32_t>::type + // the following methods are only defined when BuildT == Point + template typename enable_if::value, uint32_t>::type maxPointsPerVoxel() const {return mMaxPointsPerVoxel;} - template typename enable_if::value, uint32_t>::type + template typename enable_if::value, uint32_t>::type maxPointsPerLeaf() const {return mMaxPointsPerLeaf;} private: @@ -406,26 +409,26 @@ void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t auto *d_indx = mMemPool.template alloc(pointCount); if (mVerbose==2) mTimer.restart("Generate tile keys"); - if (is_same::value) {// points in world space - if (is_same::value) { + if constexpr(is_same::value) {// points in world space + if constexpr(is_same::value) { cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { d_indx[tid] = uint32_t(tid); - d_keys[tid] = NanoRoot::CoordToKey(d_data->map.applyInverseMapF(d_points[tid]).round()); + d_keys[tid] = NanoRoot::CoordToKey(d_data->map.applyInverseMapF(d_points[tid]).round()); }, mDeviceData); cudaCheckError(); - } else if (is_same::value) { + } else if constexpr(is_same::value) { cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { d_indx[tid] = uint32_t(tid); - d_keys[tid] = NanoRoot::CoordToKey(d_data->map.applyInverseMap(d_points[tid]).round()); + d_keys[tid] = NanoRoot::CoordToKey(d_data->map.applyInverseMap(d_points[tid]).round()); }, mDeviceData); cudaCheckError(); } else { - throw std::runtime_error("Points (vs voxels) coordinates should be represented as Vec3f or Vec3d"); + throw std::runtime_error("Point (vs voxels) coordinates should be represented as Vec3f or Vec3d"); } - } else if (is_same::value) { + } else if constexpr(is_same::value) { cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { d_indx[tid] = uint32_t(tid); d_keys[tid] = NanoRoot::CoordToKey(d_points[tid]); }, mDeviceData); cudaCheckError(); - } else if (is_same::value || is_same::value) { + } else if constexpr(is_same::value || is_same::value) { cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { d_indx[tid] = uint32_t(tid); d_keys[tid] = NanoRoot::CoordToKey(d_points[tid].round()); @@ -465,7 +468,7 @@ void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t cudaLambdaKernel<<>>(count, [=] __device__(size_t tid, const Data *d_data) { tid += offset; Vec3T p = d_points[d_indx[tid]]; - if constexpr(is_same::value) p = is_same::value ? d_data->map.applyInverseMapF(p) : d_data->map.applyInverseMap(p); + if constexpr(is_same::value) p = is_same::value ? d_data->map.applyInverseMapF(p) : d_data->map.applyInverseMap(p); d_keys[tid] = voxelKey(id, p.round()); }, mDeviceData); cudaCheckError(); CALL_CUBS(DeviceRadixSort::SortPairs, d_keys + offset, mData.d_keys + offset, d_indx + offset, mData.d_indx + offset, count, 0, 36);// 9+12+15=36 @@ -482,7 +485,7 @@ void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t cudaCheck(cudaMemcpy(&mData.voxelCount, d_voxel_count, sizeof(uint32_t), cudaMemcpyDeviceToHost)); mMemPool.free(d_voxel_count); - if constexpr(is_same::value) { + if constexpr(is_same::value) { if (mVerbose==2) mTimer.restart("Count max points per voxel"); uint32_t *d_maxPointsPerVoxel = mMemPool.template alloc(1); CALL_CUBS(DeviceReduce::Max, mData.pointsPerVoxel, d_maxPointsPerVoxel, mData.voxelCount); @@ -499,7 +502,7 @@ void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t CALL_CUBS(DeviceRunLengthEncode::Encode, ShiftRightIterator<9>(mData.d_keys), d_keys, mData.pointsPerLeaf, d_node_count, pointCount); cudaCheck(cudaMemcpy(mData.nodeCount, d_node_count, sizeof(uint32_t), cudaMemcpyDeviceToHost)); - if constexpr(is_same::value) { + if constexpr(is_same::value) { uint32_t *d_maxPointsPerLeaf = mMemPool.template alloc(1); CALL_CUBS(DeviceReduce::Max, mData.pointsPerLeaf, d_maxPointsPerLeaf, mData.nodeCount[0]); cudaCheck(cudaMemcpy(&mMaxPointsPerLeaf, d_maxPointsPerLeaf, sizeof(uint32_t), cudaMemcpyDeviceToHost)); @@ -594,7 +597,7 @@ inline void CudaPointsToGrid::processGridTreeRoot(const Vec3T *d // process Grid auto &grid = d_data->getGrid(); grid.init({GridFlags::HasBBox, GridFlags::IsBreadthFirst}, d_data->size, d_data->map, mapToGridType()); - grid.mBlindMetadataCount = is_same::value;// ? 1u : 0u; + grid.mBlindMetadataCount = is_same::value;// ? 1u : 0u; grid.mBlindMetadataOffset = d_data->meta; if (pointType != PointType::Disable) { const auto lastLeaf = tree.mNodeCount[0] - 1; @@ -782,11 +785,7 @@ inline void CudaPointsToGrid::processLeafNodes(const Vec3T *d_po auto &valueMask = leaf.mValueMask; valueMask.setOff();// initiate all bits to off - //for (uint64_t *ptr=d_data->d_keys+d_data->pointsPerLeafPrefix[tid], *end=ptr+d_data->pointsPerLeaf[tid]; ptr!=end; ++ptr) { - // valueMask.setOn(*ptr & uint64_t(511)); - //} - - if constexpr(is_same::value) { + if constexpr(is_same::value) { leaf.mOffset = d_data->pointsPerLeafPrefix[tid]; leaf.mPointCount = d_data->pointsPerLeaf[tid]; } else if constexpr(BuildTraits::is_offindex) { @@ -808,7 +807,7 @@ inline void CudaPointsToGrid::processLeafNodes(const Vec3T *d_po auto &leaf = *lower.getChild((voxelKey >> 9) & 4095u); const uint32_t n = voxelKey & 511u; leaf.mValueMask.setOnAtomic(n);// <--- slow! - if constexpr(is_same::value) { + if constexpr(is_same::value) { leaf.mValues[n] = uint16_t(pointID + d_data->pointsPerVoxel[tid] - leaf.offset()); } else if constexpr(!BuildTraits::is_special) { leaf.mValues[n] = NanoLeaf::ValueType(1);// set value of active voxels that are not points (or index) @@ -818,13 +817,12 @@ inline void CudaPointsToGrid::processLeafNodes(const Vec3T *d_po mMemPool.free(mData.d_keys, mData.pointsPerVoxel, mData.pointsPerVoxelPrefix, mData.pointsPerLeafPrefix, mData.pointsPerLeaf); if (mVerbose==2) mTimer.restart("set inactive voxel values"); - //if constexpr(is_same::value) {// set inactive voxel values when BuildT == Points const uint64_t denseVoxelCount = mData.nodeCount[0] << 9; cudaLambdaKernel<<>>(denseVoxelCount, [=] __device__(size_t tid, Data *d_data) { auto &leaf = d_data->getLeaf(tid >> 9u); const uint32_t n = tid & 511u; if (leaf.mValueMask.isOn(n)) return; - if constexpr(is_same::value) { + if constexpr(is_same::value) { const uint32_t m = leaf.mValueMask.findPrev(n - 1); leaf.mValues[n] = m < 512u ? leaf.mValues[m] : 0u; } else if constexpr(!BuildTraits::is_special) { @@ -882,14 +880,14 @@ inline void CudaPointsToGrid::processPoints(const Vec3T *d_point //================================================================================================ -// Template specialization with BuildT = Points +// Template specialization with BuildT = Point template <> template -inline void CudaPointsToGrid::processPoints(const Vec3T *d_points, size_t pointCount) +inline void CudaPointsToGrid::processPoints(const Vec3T *d_points, size_t pointCount) { switch (mPointType){ case PointType::Disable: - throw std::runtime_error("CudaPointsToGrid::processPoints: mPointType == PointType::Disable\n"); + throw std::runtime_error("CudaPointsToGrid::processPoints: mPointType == PointType::Disable\n"); case PointType::PointID: cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { d_data->template getPoint(tid) = d_data->d_indx[tid]; @@ -936,10 +934,10 @@ inline void CudaPointsToGrid::processPoints(const Vec3T *d_points, size_ }, mDeviceData); cudaCheckError(); break; default: - printf("Internal error in CudaPointsToGrid::processPoints\n"); + printf("Internal error in CudaPointsToGrid::processPoints\n"); } mMemPool.free(mData.d_indx); -}// CudaPointsToGrid::processPoints +}// CudaPointsToGrid::processPoints //================================================================================================ @@ -1001,10 +999,10 @@ inline void CudaPointsToGrid::processBBox() //================================================================================================ template -GridHandle// Grid with PointType coordinates as blind data +GridHandle// Grid with PointType coordinates as blind data cudaPointsToGrid(const Vec3T* d_xyz, int pointCount, double voxelSize, PointType type, BufferT &buffer) { - CudaPointsToGrid converter(voxelSize); + CudaPointsToGrid converter(voxelSize); converter.setPointType(type); return converter.getHandle(d_xyz, pointCount, buffer); } @@ -1043,4 +1041,4 @@ cudaVoxelsToGrid(std::vector> vec, const }// nanovdb namespace -#endif // NVIDIA_CUDA_POINTS_TO_GRID_H_HAS_BEEN_INCLUDED +#endif // NVIDIA_CUDA_POINTS_TO_GRID_CUH_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.h b/nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.cuh similarity index 93% rename from nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.h rename to nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.cuh index ec9dd24caf..0b38d13486 100644 --- a/nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.h +++ b/nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.cuh @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 /*! - \file CudaSignedFloodFill.h + \file CudaSignedFloodFill.cuh \author Ken Museth @@ -14,14 +14,17 @@ needs to be modified during the signed flood fill operation. This happens when the root-table needs to be expanded with tile values (of size 4096^3) that are completely inside the implicit surface. + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) */ -#ifndef NANOVDB_CUDA_SIGNED_FLOOD_FILL_H_HAS_BEEN_INCLUDED -#define NANOVDB_CUDA_SIGNED_FLOOD_FILL_H_HAS_BEEN_INCLUDED +#ifndef NANOVDB_CUDA_SIGNED_FLOOD_FILL_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_CUDA_SIGNED_FLOOD_FILL_CUH_HAS_BEEN_INCLUDED #include #include -#include +#include #include namespace nanovdb { @@ -153,7 +156,7 @@ void CudaSignedFloodFill::operator()(NanoGrid *d_grid) cudaCheck(cudaFree(d_count)); static const int threadsPerBlock = 128; - auto blocksPerGrid = [&](size_t count){return (count + (threadsPerBlock - 1)) / threadsPerBlock;}; + auto blocksPerGrid = [&](size_t count)->uint32_t{return (count + (threadsPerBlock - 1)) / threadsPerBlock;}; auto *tree = reinterpret_cast*>(d_grid + 1); if (mVerbose) mTimer.start("\nProcess leaf nodes"); @@ -187,4 +190,4 @@ cudaSignedFloodFill(NanoGrid *d_grid, bool verbose) }// nanovdb namespace -#endif // NANOVDB_CUDA_SIGNED_FLOOD_FILL_H_HAS_BEEN_INCLUDED +#endif // NANOVDB_CUDA_SIGNED_FLOOD_FILL_CUH_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/cuda/CudaUtils.h b/nanovdb/nanovdb/util/cuda/CudaUtils.h index b4a377b227..e007e91e73 100644 --- a/nanovdb/nanovdb/util/cuda/CudaUtils.h +++ b/nanovdb/nanovdb/util/cuda/CudaUtils.h @@ -114,6 +114,6 @@ __device__ inline bool cudaStrEq(const char *lhs, const char *rhs) return cudaStrcmp(lhs, rhs) == 0; } -#endif +#endif// __CUDACC__ #endif// NANOVDB_CUDA_UTILS_H_HAS_BEEN_INCLUDED \ No newline at end of file diff --git a/nanovdb/nanovdb/util/cuda/GpuTimer.h b/nanovdb/nanovdb/util/cuda/GpuTimer.cuh similarity index 84% rename from nanovdb/nanovdb/util/cuda/GpuTimer.h rename to nanovdb/nanovdb/util/cuda/GpuTimer.cuh index 435dbb1f3e..416ca403da 100644 --- a/nanovdb/nanovdb/util/cuda/GpuTimer.h +++ b/nanovdb/nanovdb/util/cuda/GpuTimer.cuh @@ -1,14 +1,14 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/// @file GpuTimer.h +/// @file GpuTimer.cuh /// /// @author Ken Museth /// /// @brief A simple GPU timing class -#ifndef NANOVDB_GPU_TIMER_H_HAS_BEEN_INCLUDED -#define NANOVDB_GPU_TIMER_H_HAS_BEEN_INCLUDED +#ifndef NANOVDB_GPU_TIMER_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_GPU_TIMER_CUH_HAS_BEEN_INCLUDED #include // for std::cerr #include @@ -67,6 +67,16 @@ class GpuTimer this->start(stream); } + /// @brief Start the timer + /// @param msg string message to be printed when timer is started + /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @param os output stream for the message above + void start(const char* msg, void* stream = nullptr, std::ostream& os = std::cerr) + { + os << msg << " ... " << std::flush; + this->start(stream); + } + /// @brief elapsed time (since start) in miliseconds /// @param stream CUDA stream to be timed (defaults to stream 0) /// @return elapsed time (since start) in miliseconds @@ -101,4 +111,4 @@ class GpuTimer } // namespace nanovdb -#endif // NANOVDB_GPU_TIMER_HAS_BEEN_INCLUDED +#endif // NANOVDB_GPU_TIMER_CUH_HAS_BEEN_INCLUDED From 07089f2cb5dc73046bf545a30543935d064e5a52 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Fri, 4 Aug 2023 09:25:42 -0700 Subject: [PATCH 08/49] fixed build issue of nanovdb benchmarks Signed-off-by: Ken Museth --- nanovdb/nanovdb/examples/benchmark/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nanovdb/nanovdb/examples/benchmark/CMakeLists.txt b/nanovdb/nanovdb/examples/benchmark/CMakeLists.txt index d7a06d8930..967b5299ee 100644 --- a/nanovdb/nanovdb/examples/benchmark/CMakeLists.txt +++ b/nanovdb/nanovdb/examples/benchmark/CMakeLists.txt @@ -36,13 +36,13 @@ endif() # launching from the cmake binary working directory. file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/data") -set(BENCHMARK_SOURCES Benchmark.cc Camera.h Image.h) +set(BENCHMARK_SOURCES TestBenchmark.cc Camera.h Image.h) if(NANOVDB_USE_CUDA) - list(APPEND BENCHMARK_SOURCES BenchKernels_nano.cu) + list(APPEND BENCHMARK_SOURCES TestBenchmark.cu BenchKernels_nano.cu) - add_executable(benchmark_nano Benchmark_nano.cc BenchKernels_nano.cu) - add_executable(benchmark_dense Benchmark_dense.cc BenchKernels_dense.cu) + add_executable(benchmark_nano Benchmark_nano.cu BenchKernels_nano.cu) + add_executable(benchmark_dense Benchmark_dense.cu BenchKernels_dense.cu) target_link_libraries(benchmark_nano PRIVATE nanovdb) target_link_libraries(benchmark_dense PRIVATE nanovdb) From ebd750a7ae34185729e311b5c08ca647d670576b Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Fri, 4 Aug 2023 09:43:01 -0700 Subject: [PATCH 09/49] snapshot Signed-off-by: Ken Museth --- .../ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu b/nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu index 2c34a53aee..efb8b12879 100644 --- a/nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu +++ b/nanovdb/nanovdb/examples/ex_voxels_to_grid_cuda/ex_voxels_to_grid_cuda.cu @@ -27,13 +27,13 @@ int main() // Launch a device kernel that sets the values of voxels define above and prints them const unsigned int numThreads = 128, numBlocks = (numVoxels + numThreads - 1) / numThreads; cudaLambdaKernel<<>>(numVoxels, [=] __device__(size_t tid) { - using OpT = SetVoxel;// defined to type of random-access operation (set value) + using OpT = SetVoxel;// defines type of random-access operation (set value) const Coord &ijk = d_coords[tid]; - grid->tree().set(ijk, d_values[tid]); + grid->tree().set(ijk, d_values[tid]);// normally one should use a ValueAccessor printf("GPU: voxel # %lu, grid(%4i,%4i,%4i) = %5.1f\n", tid, ijk[0], ijk[1], ijk[2], grid->tree().getValue(ijk)); }); cudaCheckError(); - // Copy grid from GPU to CPU and print the voxel values + // Copy grid from GPU to CPU and print the voxel values for validation handle.deviceDownload();// creates a copy on the CPU grid = handle.grid(); for (size_t i=0; i Date: Fri, 4 Aug 2023 09:55:12 -0700 Subject: [PATCH 10/49] updated CMakeLists.txt Signed-off-by: Ken Museth --- doc/nanovdb/SourceTree.md | 34 ++++++++++++++++++---------------- nanovdb/nanovdb/CMakeLists.txt | 11 ++++++----- pendingchanges/nanovdb.txt | 25 +++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 21 deletions(-) create mode 100644 pendingchanges/nanovdb.txt diff --git a/doc/nanovdb/SourceTree.md b/doc/nanovdb/SourceTree.md index c2bb2f3023..6eb0a1dcc9 100644 --- a/doc/nanovdb/SourceTree.md +++ b/doc/nanovdb/SourceTree.md @@ -33,13 +33,14 @@ foo@bar:~$ tree │ ├── benchmark │ │ ├── BenchKernels_dense.cu │ │ ├── BenchKernels_nano.cu -│ │ ├── Benchmark.cc -│ │ ├── Benchmark_dense.cc -│ │ ├── Benchmark_nano.cc +│ │ ├── Benchmark_dense.cu +│ │ ├── Benchmark_nano.cu │ │ ├── Camera.h │ │ ├── CMakeLists.txt │ │ ├── DenseGrid.h -│ │ └── Image.h +│ │ ├── Image.h +│ │ ├── TestBenchmark.cc +│ │ └── TestBenchmark.cu │ ├── CMakeLists.txt │ ├── ex_bump_pool_buffer │ │ └── bump_pool_buffer.cc @@ -49,13 +50,13 @@ foo@bar:~$ tree │ │ ├── nanovdb.cu │ │ └── openvdb.cc │ ├── ex_index_grid_cuda -│ │ ├── index_grid_cuda.cc -│ │ └── index_grid_cuda.cu +│ │ ├── index_grid_cuda.cu +│ │ └── index_grid_cuda_kernel.cu │ ├── ex_make_custom_nanovdb │ │ └── make_custom_nanovdb.cc │ ├── ex_make_custom_nanovdb_cuda │ │ ├── make_custom_nanovdb_cuda.cc -│ │ └── make_custom_nanovdb_cuda.cu +│ │ └── make_custom_nanovdb_cuda_kernel.cu │ ├── ex_make_funny_nanovdb │ │ └── make_funny_nanovdb.cc │ ├── ex_make_nanovdb_sphere @@ -68,14 +69,14 @@ foo@bar:~$ tree │ │ └── modify_nanovdb_thrust.cu │ ├── ex_nodemanager_cuda │ │ ├── nodemanager_cuda.cc -│ │ └── nodemanager_cuda.cu +│ │ └── nodemanager_cuda_kernel.cu │ ├── ex_openvdb_to_nanovdb │ │ └── openvdb_to_nanovdb.cc │ ├── ex_openvdb_to_nanovdb_accessor │ │ └── openvdb_to_nanovdb_accessor.cc │ ├── ex_openvdb_to_nanovdb_cuda │ │ ├── openvdb_to_nanovdb_cuda.cc -│ │ └── openvdb_to_nanovdb_cuda.cu +│ │ └── openvdb_to_nanovdb_cuda_kernel.cu │ ├── ex_raytrace_fog_volume │ │ ├── common.h │ │ ├── main.cc @@ -91,8 +92,8 @@ foo@bar:~$ tree │ ├── ex_read_nanovdb_sphere_accessor │ │ └── read_nanovdb_sphere_accessor.cc │ ├── ex_read_nanovdb_sphere_accessor_cuda -│ │ ├── read_nanovdb_sphere_accessor_cuda.cc -│ │ └── read_nanovdb_sphere_accessor_cuda.cu +│ │ ├── read_nanovdb_sphere_accessor_cuda.cu +│ │ └── read_nanovdb_sphere_accessor_cuda_kernel.cu │ ├── ex_util │ │ └── ComputePrimitives.h │ ├── ex_voxels_to_grid_cuda @@ -116,13 +117,14 @@ foo@bar:~$ tree ├── CreateNanoGrid.h ├── CSampleFromVoxels.h ├── cuda - │ ├── CudaAddBlindData.h + │ ├── CudaAddBlindData.cuh │ ├── CudaDeviceBuffer.h - │ ├── CudaIndexToGrid.h - │ ├── CudaPointsToGrid.h - │ ├── CudaSignedFloodFill.h + │ ├── CudaGridHandle.cuh + │ ├── CudaIndexToGrid.cuh + │ ├── CudaPointsToGrid.cuh + │ ├── CudaSignedFloodFill.cuh │ ├── CudaUtils.h - │ └── GpuTimer.h + │ └── GpuTimer.cuh ├── DitherLUT.h ├── ForEach.h ├── GridBuilder.h diff --git a/nanovdb/nanovdb/CMakeLists.txt b/nanovdb/nanovdb/CMakeLists.txt index 18efe44f17..f0018fd2c0 100644 --- a/nanovdb/nanovdb/CMakeLists.txt +++ b/nanovdb/nanovdb/CMakeLists.txt @@ -170,13 +170,14 @@ set(NANOVDB_INCLUDE_UTILFILES util/CpuTimer.h util/CreateNanoGrid.h util/CSampleFromVoxels.h - util/cuda/CudaAddBlindData.h + util/cuda/CudaAddBlindData.cuh util/cuda/CudaDeviceBuffer.h - util/cuda/CudaIndexToGrid.h - util/cuda/CudaPointsToGrid.h - util/cuda/CudaSignedFloodFill.h + util/cuda/CudaGridHandle.cuh + util/cuda/CudaIndexToGrid.cuh + util/cuda/CudaPointsToGrid.cuh + util/cuda/CudaSignedFloodFill.cuh util/cuda/CudaUtils.h - util/cuda/GpuTimer.h + util/cuda/GpuTimer.cuh util/DitherLUT.h util/ForEach.h util/GridBuilder.h diff --git a/pendingchanges/nanovdb.txt b/pendingchanges/nanovdb.txt new file mode 100644 index 0000000000..37c31452c1 --- /dev/null +++ b/pendingchanges/nanovdb.txt @@ -0,0 +1,25 @@ + + +NanoVDB: + Minor version changed from 4 to 5 (major version is unchanged since the ABI is preserved) + Transition from C++11 to C++17 in NanoVDB.h and its tools + Several new ways to construct and modify NanoVDB grids on the GPU + New device function to convert points into a compact grid: nanovdb::cudaPointsToGrid + Improved and renamed device function that converts voxels into a grid: nanovdb::cudaVoxelsToGrid + Introduced a new extendable API for acceleration of custom random-access methods, e.g. getValue(ijk) + Index grids in 4 flavors (Index, OnIndex, IndexMask, and OnIndexMask) + Introduced new (dummy) build-type nanovdb::Points and nanovdb::GridType::PointIndex + Introduced new types nanovdb::GridType::Vec3u16 and nanovdb::GridType::Vec3u8 used for compressed representations of point coordinates as blind data + CreateNanoGrid.h is replacing GridBuilder.h, IndexGridBuilder.h and OpenToNanoVDB.h + Moved CudaDeviceBuffer.h to cuda/CudaDeviceBuffer.h + Added cuda/CudaUtils.h and cuda/GpuTimer.h with cuda utility functions + Added cuda/CudaPointToGrids.h that constructs device grids from points or voxels + Added cuda/CudaIndexGridToGrid.h that converts IndexGrids and values into regular Grids + Added cuda/CudaSignedFloodFill.h that performs signed-flood filing on SDF on the GPU + The move constructor in now requires the GridBuffer to actually contain a valid grid + Added types: Ve4f Ve4d, ValueIndex, ValueOnIndex, ValueIndexMask, ValueOnIndexMask + Major improvements to GridBuilder.h, which allows user to construct grids with random access on the host + Numerous improvements in NanoVDB.h: e.g. Customizable get/set methods on ValueAccessor, BitFlags, transform(Map), expandAtomic(BBox), expandAtomic(Coord), intersectAtomic(BBox), pi(), BuildTraits, more documentation, Mask:: DenseIterator, Mask:: setOnAtomic,Mask:: setOffAtomic, Map constructors, DataType are now public vs private in all node types, GridMetaData can now be copied + PNanoVDB.h is now in sync with NanoVDB.h + Added PrefixSum.h for concurrent computation of prefix sum on the host + Primitives.h can now create grids on the CPU with SDF, FOG and point of torus From 4b6cdc86e741fc4a03bd1e69a403c32affb06e62 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Fri, 4 Aug 2023 10:09:30 -0700 Subject: [PATCH 11/49] removed tabs Signed-off-by: Ken Museth --- nanovdb/nanovdb/util/PrefixSum.h | 38 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/nanovdb/nanovdb/util/PrefixSum.h b/nanovdb/nanovdb/util/PrefixSum.h index b08ee11d43..87775c2d2a 100644 --- a/nanovdb/nanovdb/util/PrefixSum.h +++ b/nanovdb/nanovdb/util/PrefixSum.h @@ -10,9 +10,9 @@ \brief Multi-threaded implementations of inclusive prefix sum - \note An exclusive prefix sum is simply an array starting with zero - followed by the elements in the inclusive prefix sum, minus its - last entry which is the sum of all the input elements. + \note An exclusive prefix sum is simply an array starting with zero + followed by the elements in the inclusive prefix sum, minus its + last entry which is the sum of all the input elements. */ #ifndef NANOVDB_PREFIX_SUM_H_HAS_BEEN_INCLUDED @@ -44,27 +44,27 @@ template void inclusiveScan(T *array, size_t size, const T &identity, bool threaded, Op op) { #ifndef NANOVDB_USE_TBB - threaded = false; - (void)identity;// avoids compiler warning + threaded = false; + (void)identity;// avoids compiler warning #endif if (threaded) { #ifdef NANOVDB_USE_TBB - using RangeT = tbb::blocked_range; - tbb::parallel_scan(RangeT(0, size), identity, - [&](const RangeT &r, T sum, bool is_final_scan)->T { - T tmp = sum; - for (size_t i = r.begin(); i < r.end(); ++i) { - tmp = op(tmp, array[i]); - if (is_final_scan) array[i] = tmp; - } - return tmp; - },[&](const T &a, const T &b) {return op(a, b);} - ); + using RangeT = tbb::blocked_range; + tbb::parallel_scan(RangeT(0, size), identity, + [&](const RangeT &r, T sum, bool is_final_scan)->T { + T tmp = sum; + for (size_t i = r.begin(); i < r.end(); ++i) { + tmp = op(tmp, array[i]); + if (is_final_scan) array[i] = tmp; + } + return tmp; + },[&](const T &a, const T &b) {return op(a, b);} + ); #endif - } else { // serial inclusive prefix operation - for (size_t i=1; i From 7764a852327bf1fd3c5ba38b3c5be69329dbca8f Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Fri, 4 Aug 2023 10:13:44 -0700 Subject: [PATCH 12/49] removed tabs Signed-off-by: Ken Museth --- .../examples/ex_write_nanovdb_grids/write_nanovdb_grids.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nanovdb/nanovdb/examples/ex_write_nanovdb_grids/write_nanovdb_grids.cc b/nanovdb/nanovdb/examples/ex_write_nanovdb_grids/write_nanovdb_grids.cc index f883b3aded..314fe4ea57 100644 --- a/nanovdb/nanovdb/examples/ex_write_nanovdb_grids/write_nanovdb_grids.cc +++ b/nanovdb/nanovdb/examples/ex_write_nanovdb_grids/write_nanovdb_grids.cc @@ -10,7 +10,7 @@ int main() { try { - std::vector> handles; + std::vector> handles; // Create multiple NanoVDB grids of various types handles.push_back(nanovdb::createLevelSetSphere(100.0f)); handles.push_back(nanovdb::createLevelSetTorus(100.0f, 50.0f)); @@ -31,4 +31,4 @@ int main() std::cerr << "An exception occurred: \"" << e.what() << "\"" << std::endl; } return 0; -} \ No newline at end of file +} From 4ba3f1d70745c211340ebfc271c02269cc4f6833 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Fri, 4 Aug 2023 10:34:57 -0700 Subject: [PATCH 13/49] fixed build issue Signed-off-by: Ken Museth --- .../{index_grid_cuda.cu => index_grid_cuda.cc} | 0 .../examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu | 3 ++- 2 files changed, 2 insertions(+), 1 deletion(-) rename nanovdb/nanovdb/examples/ex_index_grid_cuda/{index_grid_cuda.cu => index_grid_cuda.cc} (100%) diff --git a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cu b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cc similarity index 100% rename from nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cu rename to nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda.cc diff --git a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu index be83ceb074..3a78c94093 100644 --- a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu +++ b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu @@ -2,6 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 #include // this defined the core tree data structure of NanoVDB accessable on both the host and device +#include // required since GridHandle has device code #include // for printf // This is called by the host only @@ -38,4 +39,4 @@ extern "C" void launch_kernels(const nanovdb::NanoGrid* g gpu_kernel<<<1, 1, 0, stream>>>(gpuGrid); // Launch the device kernel asynchronously cpu_kernel(cpuGrid); // Launch the host "kernel" (synchronously) -} \ No newline at end of file +} From 046f663f6b37ea597a49c8ebef2e504c04653508 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Fri, 4 Aug 2023 13:40:09 -0700 Subject: [PATCH 14/49] build fix Signed-off-by: Ken Museth --- .../examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu b/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu index 6b98939768..22008ab27d 100644 --- a/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu +++ b/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu @@ -9,6 +9,7 @@ #include #include +#include void scaleActiveVoxels(nanovdb::FloatGrid *grid_d, uint64_t leafCount, float scale) { From 0555dd498202821cf4337b911dd3ccf2ff5628dd Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Fri, 4 Aug 2023 14:32:09 -0700 Subject: [PATCH 15/49] fixing thrust example Signed-off-by: Ken Museth --- .../modify_nanovdb_thrust.cc | 43 +++++++++++++++++++ .../modify_nanovdb_thrust.cu | 36 +--------------- 2 files changed, 44 insertions(+), 35 deletions(-) create mode 100644 nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cc diff --git a/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cc b/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cc new file mode 100644 index 0000000000..dbda5b3d73 --- /dev/null +++ b/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cc @@ -0,0 +1,43 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/// @brief This examples demonstrates how values in a NanpVDB grid can be +/// modified on the device. It depends on NanoVDB and CUDA thrust. + +#include +#include + +extern "C" void scaleActiveVoxels(nanovdb::FloatGrid *grid_d, uint64_t leafCount, float scale); + +int main() +{ + try { + // Create an NanoVDB grid of a sphere at the origin with radius 100 and voxel size 1. + auto handle = nanovdb::createLevelSetSphere(100.0f); + using GridT = nanovdb::FloatGrid; + + handle.deviceUpload(0, false); // Copy the NanoVDB grid to the GPU asynchronously + + const GridT* grid = handle.grid(); // get a (raw) const pointer to a NanoVDB grid of value type float on the CPU + GridT* deviceGrid = handle.deviceGrid(); // get a (raw) pointer to a NanoVDB grid of value type float on the GPU + + if (!deviceGrid || !grid) { + throw std::runtime_error("GridHandle did not contain a grid with value type float"); + } + if (!grid->isSequential<0>()) { + throw std::runtime_error("Grid does not support sequential access to leaf nodes!"); + } + + std::cout << "Value before scaling = " << grid->tree().getValue(nanovdb::Coord(101,0,0)) << std::endl; + + scaleActiveVoxels(deviceGrid, grid->tree().nodeCount(0), 2.0f); + + handle.deviceDownload(0, true); // Copy the NanoVDB grid to the CPU synchronously + + std::cout << "Value after scaling = " << grid->tree().getValue(nanovdb::Coord(101,0,0)) << std::endl; + } + catch (const std::exception& e) { + std::cerr << "An exception occurred: \"" << e.what() << "\"" << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu b/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu index 22008ab27d..1078b8aa1b 100644 --- a/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu +++ b/nanovdb/nanovdb/examples/ex_modify_nanovdb_thrust/modify_nanovdb_thrust.cu @@ -7,11 +7,10 @@ #include #include -#include #include #include -void scaleActiveVoxels(nanovdb::FloatGrid *grid_d, uint64_t leafCount, float scale) +extern "C" void scaleActiveVoxels(nanovdb::FloatGrid *grid_d, uint64_t leafCount, float scale) { auto kernel = [grid_d, scale] __device__ (const uint64_t n) { auto *leaf_d = grid_d->tree().getFirstNode<0>() + (n >> 9);// this only works if grid->isSequential<0>() == true @@ -24,37 +23,4 @@ void scaleActiveVoxels(nanovdb::FloatGrid *grid_d, uint64_t leafCount, float sca thrust::counting_iterator iter(0); thrust::for_each(iter, iter + 512*leafCount, kernel); -} - -int main() -{ - try { - // Create an NanoVDB grid of a sphere at the origin with radius 100 and voxel size 1. - auto handle = nanovdb::createLevelSetSphere(100.0f); - using GridT = nanovdb::FloatGrid; - - handle.deviceUpload(0, false); // Copy the NanoVDB grid to the GPU asynchronously - - const GridT* grid = handle.grid(); // get a (raw) const pointer to a NanoVDB grid of value type float on the CPU - GridT* deviceGrid = handle.deviceGrid(); // get a (raw) pointer to a NanoVDB grid of value type float on the GPU - - if (!deviceGrid || !grid) { - throw std::runtime_error("GridHandle did not contain a grid with value type float"); - } - if (!grid->isSequential<0>()) { - throw std::runtime_error("Grid does not support sequential access to leaf nodes!"); - } - - std::cout << "Value before scaling = " << grid->tree().getValue(nanovdb::Coord(101,0,0)) << std::endl; - - scaleActiveVoxels(deviceGrid, grid->tree().nodeCount(0), 2.0f); - - handle.deviceDownload(0, true); // Copy the NanoVDB grid to the CPU synchronously - - std::cout << "Value after scaling = " << grid->tree().getValue(nanovdb::Coord(101,0,0)) << std::endl; - } - catch (const std::exception& e) { - std::cerr << "An exception occurred: \"" << e.what() << "\"" << std::endl; - } - return 0; } \ No newline at end of file From 2a883ed82fbaddd54099a506e0350c589a20a691 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Thu, 24 Aug 2023 09:08:08 -0700 Subject: [PATCH 16/49] sync with private repo Signed-off-by: Ken Museth --- nanovdb/nanovdb/NanoVDB.h | 28 +++++++++++++++++++--- nanovdb/nanovdb/PNanoVDB.h | 32 ++++++++++++++----------- openvdb/openvdb/points/AttributeArray.h | 2 +- 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index d6f8b9400d..1a56447b62 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -879,7 +879,7 @@ __hostdev__ inline bool isValid(GridType gridType, GridClass gridClass) return gridClass < GridClass::End && gridType < GridType::End; // any valid combination } -// --------------------------> isValue(GridType, GridClass) <------------------------------------ +// --------------------------> validation of blind data meta data <------------------------------------ /// @brief return true if the combination of GridBlindDataClass, GridBlindDataSemantic and GridType is valid. __hostdev__ inline bool isValid(const GridBlindDataClass& blindClass, @@ -3152,7 +3152,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridBlindMetaData static const int MaxNameSize = 256; // due to NULL termination the maximum length is one less! int64_t mDataOffset; // byte offset to the blind data, relative to this GridBlindMetaData. uint64_t mValueCount; // number of blind values, e.g. point count - uint32_t mValueSize;// byte size of each value, e.g. 4 if mDataType=Float and 1 if mDataType=Unknown + uint32_t mValueSize;// byte size of each value, e.g. 4 if mDataType=Float and 1 if mDataType=Unknown since that amounts to char GridBlindDataSemantic mSemantic; // semantic meaning of the data. GridBlindDataClass mDataClass; // 4 bytes GridType mDataType; // 4 bytes @@ -3182,7 +3182,29 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridBlindMetaData } /// @brief return true if this meta data has a valid combination of semantic, class and value tags - __hostdev__ bool isValid() const { return nanovdb::isValid(mDataClass, mSemantic, mDataType); } + __hostdev__ bool isValid() const + { + auto check = [&]()->bool{ + switch (mDataType){ + case GridType::Unknown: return mValueSize==1u;// i.e. we encode data as mValueCount chars + case GridType::Float: return mValueSize==4u; + case GridType::Double: return mValueSize==8u; + case GridType::Int16: return mValueSize==2u; + case GridType::Int32: return mValueSize==4u; + case GridType::Int64: return mValueSize==8u; + case GridType::Vec3f: return mValueSize==12u; + case GridType::Vec3d: return mValueSize==24u; + case GridType::RGBA8: return mValueSize==4u; + case GridType::Fp8: return mValueSize==1u; + case GridType::Fp16: return mValueSize==2u; + case GridType::Vec4f: return mValueSize==16u; + case GridType::Vec4d: return mValueSize==32u; + case GridType::Vec3u8: return mValueSize==3u; + case GridType::Vec3u16: return mValueSize==6u; + default: return true;}// all other combinations are valid + }; + return nanovdb::isValid(mDataClass, mSemantic, mDataType) && check(); + } /// @brief return size in bytes of the blind data represented by this blind meta data /// @note This size includes possible padding for 32 byte alignment. The actual amount diff --git a/nanovdb/nanovdb/PNanoVDB.h b/nanovdb/nanovdb/PNanoVDB.h index 44e5ff1da4..f32b7e0ac1 100644 --- a/nanovdb/nanovdb/PNanoVDB.h +++ b/nanovdb/nanovdb/PNanoVDB.h @@ -56,8 +56,10 @@ #endif #ifdef PNANOVDB_CMATH +#ifndef __CUDACC_RTC__ #include #endif +#endif // ------------------------------------------------ Buffer ----------------------------------------------------------- @@ -72,9 +74,11 @@ #endif #if defined(PNANOVDB_BUF_C) +#ifndef __CUDACC_RTC__ #include +#endif #if defined(__CUDACC__) -#define PNANOVDB_BUF_FORCE_INLINE static inline __host__ __device__ __forceinline__ +#define PNANOVDB_BUF_FORCE_INLINE static __host__ __device__ __forceinline__ #elif defined(_WIN32) #define PNANOVDB_BUF_FORCE_INLINE static inline __forceinline #else @@ -272,7 +276,7 @@ void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uvec2 value // force inline #if defined(PNANOVDB_C) #if defined(__CUDACC__) -#define PNANOVDB_FORCE_INLINE static inline __host__ __device__ __forceinline__ +#define PNANOVDB_FORCE_INLINE static __host__ __device__ __forceinline__ #elif defined(_WIN32) #define PNANOVDB_FORCE_INLINE static inline __forceinline #else @@ -287,11 +291,7 @@ void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uvec2 value // struct typedef, static const, inout #if defined(PNANOVDB_C) #define PNANOVDB_STRUCT_TYPEDEF(X) typedef struct X X; -#if defined(__CUDACC__) -#define PNANOVDB_STATIC_CONST static const __host__ __device__ -#else #define PNANOVDB_STATIC_CONST static const -#endif #define PNANOVDB_INOUT(X) X* #define PNANOVDB_IN(X) const X* #define PNANOVDB_DEREF(X) (*X) @@ -315,9 +315,13 @@ void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uvec2 value // basic types, type conversion #if defined(PNANOVDB_C) #define PNANOVDB_NATIVE_64 +#ifndef __CUDACC_RTC__ #include +#endif #if !defined(PNANOVDB_MEMCPY_CUSTOM) +#ifndef __CUDACC_RTC__ #include +#endif #define pnanovdb_memcpy memcpy #endif typedef uint32_t pnanovdb_uint32_t; @@ -2081,12 +2085,12 @@ PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_point_count } PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_first(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) { - return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), + return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), (i == 0u ? 0u : pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i - 1u)))); } PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_last(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) { - return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), + return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i))); } PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_value(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) @@ -2117,7 +2121,7 @@ PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_on(pnanovdb_buf_t buf, p pnanovdb_write_uint32(buf, addr, val_mask); } PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_value(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i, pnanovdb_uint32_t value) -{ +{ pnanovdb_leaf_pointindex_set_on(buf, leaf, i); pnanovdb_leaf_pointindex_set_value_only(buf, leaf, i, value); } @@ -2432,11 +2436,11 @@ PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_onindex_get_value_index(pn } PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_pointindex_get_point_range( - pnanovdb_buf_t buf, - pnanovdb_address_t value_address, - PNANOVDB_IN(pnanovdb_coord_t) ijk, - pnanovdb_uint32_t level, - PNANOVDB_INOUT(pnanovdb_uint64_t)range_begin, + pnanovdb_buf_t buf, + pnanovdb_address_t value_address, + PNANOVDB_IN(pnanovdb_coord_t) ijk, + pnanovdb_uint32_t level, + PNANOVDB_INOUT(pnanovdb_uint64_t)range_begin, PNANOVDB_INOUT(pnanovdb_uint64_t)range_end ) { diff --git a/openvdb/openvdb/points/AttributeArray.h b/openvdb/openvdb/points/AttributeArray.h index d00a77b362..683e69873e 100644 --- a/openvdb/openvdb/points/AttributeArray.h +++ b/openvdb/openvdb/points/AttributeArray.h @@ -1906,7 +1906,7 @@ TypedAttributeArray::writeMetadata(std::ostream& os, bool ou os.write(reinterpret_cast(&size), sizeof(Index)); // write strided - if (!strideOfOne) os.write(reinterpret_cast(&strideOrTotalSize), sizeof(Index)); + if (!strideOfOne) os.write(reinterpret_cast(&strideOrTotalSize), sizeof(Index)); } From c320c63b11964e57f1e902e837249bbc4efd7da8 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Thu, 24 Aug 2023 10:26:59 -0700 Subject: [PATCH 17/49] fixed warning Signed-off-by: Ken Museth --- nanovdb/nanovdb/unittest/TestNanoVDB.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cc b/nanovdb/nanovdb/unittest/TestNanoVDB.cc index d0cb5a5cb6..a17417beef 100644 --- a/nanovdb/nanovdb/unittest/TestNanoVDB.cc +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cc @@ -5798,12 +5798,12 @@ TEST_F(TestNanoVDB, StencilIntersection) EXPECT_TRUE(mask.none() == (count == 0)); EXPECT_TRUE(mask.any() == (count > 0)); EXPECT_EQ(count, mask.count()); - EXPECT_TRUE(mask.test(0) == mx); - EXPECT_TRUE(mask.test(1) == px); - EXPECT_TRUE(mask.test(2) == my); - EXPECT_TRUE(mask.test(3) == py); - EXPECT_TRUE(mask.test(4) == mz); - EXPECT_TRUE(mask.test(5) == pz); + EXPECT_TRUE(mask.test(0) == (mx > 0)); + EXPECT_TRUE(mask.test(1) == (px > 0)); + EXPECT_TRUE(mask.test(2) == (my > 0)); + EXPECT_TRUE(mask.test(3) == (py > 0)); + EXPECT_TRUE(mask.test(4) == (mz > 0)); + EXPECT_TRUE(mask.test(5) == (pz > 0)); }//pz }//mz }//py From 9cc8778423d65934ba27d75c4646e606f79c4d2c Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Thu, 24 Aug 2023 11:10:38 -0700 Subject: [PATCH 18/49] fixing build on Windows Signed-off-by: Ken Museth --- nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh index 058ef5cc58..ab0eeaae3d 100644 --- a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh @@ -409,13 +409,13 @@ void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t auto *d_indx = mMemPool.template alloc(pointCount); if (mVerbose==2) mTimer.restart("Generate tile keys"); - if constexpr(is_same::value) {// points in world space - if constexpr(is_same::value) { + if (is_same::value) {// points in world space + if (is_same::value) { cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { d_indx[tid] = uint32_t(tid); d_keys[tid] = NanoRoot::CoordToKey(d_data->map.applyInverseMapF(d_points[tid]).round()); }, mDeviceData); cudaCheckError(); - } else if constexpr(is_same::value) { + } else if (is_same::value) { cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { d_indx[tid] = uint32_t(tid); d_keys[tid] = NanoRoot::CoordToKey(d_data->map.applyInverseMap(d_points[tid]).round()); @@ -423,12 +423,12 @@ void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t } else { throw std::runtime_error("Point (vs voxels) coordinates should be represented as Vec3f or Vec3d"); } - } else if constexpr(is_same::value) { + } else if (is_same::value) { cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { d_indx[tid] = uint32_t(tid); d_keys[tid] = NanoRoot::CoordToKey(d_points[tid]); }, mDeviceData); cudaCheckError(); - } else if constexpr(is_same::value || is_same::value) { + } else if (is_same::value || is_same::value) { cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { d_indx[tid] = uint32_t(tid); d_keys[tid] = NanoRoot::CoordToKey(d_points[tid].round()); From 60dabbf12e561688170ca963c573af5e79ae7934 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Thu, 24 Aug 2023 11:34:01 -0700 Subject: [PATCH 19/49] fixed typo Signed-off-by: Ken Museth --- nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh index ab0eeaae3d..b45387f371 100644 --- a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh @@ -507,7 +507,7 @@ void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t CALL_CUBS(DeviceReduce::Max, mData.pointsPerLeaf, d_maxPointsPerLeaf, mData.nodeCount[0]); cudaCheck(cudaMemcpy(&mMaxPointsPerLeaf, d_maxPointsPerLeaf, sizeof(uint32_t), cudaMemcpyDeviceToHost)); //printf("\n Leaf count = %u, max points per leaf = %u\n", mData.nodeCount[0], mMaxPointsPerLeaf); - if (mMaxPointsPerLeaf > std::numeric_limits::max()) { + if (mMaxPointsPerLeaf > std::numeric_limits::max()) { throw std::runtime_error("Too many points per leaf: "+std::to_string(mMaxPointsPerLeaf)); } mMemPool.free(d_maxPointsPerLeaf); From 04cc6be325dabae208d06e661098c5e2a2d3e42d Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Thu, 31 Aug 2023 10:16:34 -0700 Subject: [PATCH 20/49] cleanup of CudaPointsToGrid and improvements to is_same Signed-off-by: Ken Museth --- nanovdb/nanovdb/NanoVDB.h | 36 +++++++------- nanovdb/nanovdb/unittest/TestNanoVDB.cu | 23 +++++++++ nanovdb/nanovdb/util/GridHandle.h | 3 ++ nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h | 3 ++ .../nanovdb/util/cuda/CudaPointsToGrid.cuh | 48 ++++++++----------- 5 files changed, 66 insertions(+), 47 deletions(-) diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index 1a56447b62..6c5aad2a3c 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -426,8 +426,15 @@ enum class GridBlindDataSemantic : uint32_t { Unknown = 0, // --------------------------> is_same <------------------------------------ /// @brief C++11 implementation of std::is_same -template +/// @note When more than two arguments are provided value = T0==T1 || T0==T2 || ... +template struct is_same +{ + static constexpr bool value = is_same::value || is_same::value; +}; + +template +struct is_same { static constexpr bool value = false; }; @@ -444,7 +451,7 @@ struct is_same template struct is_floating_point { - static constexpr bool value = is_same::value || is_same::value; + static constexpr bool value = is_same::value; }; // --------------------------> BuildTraits <------------------------------------ @@ -454,29 +461,18 @@ template struct BuildTraits { // check if T is an index type - static constexpr bool is_index = is_same::value || - is_same::value || - is_same::value || - is_same::value; - static constexpr bool is_onindex = is_same::value || - is_same::value; - static constexpr bool is_offindex = is_same::value || - is_same::value; - static constexpr bool is_indexmask = is_same::value || - is_same::value; + static constexpr bool is_index = is_same::value; + static constexpr bool is_onindex = is_same::value; + static constexpr bool is_offindex = is_same::value; + static constexpr bool is_indexmask = is_same::value; // check if T is a compressed float type with fixed bit precision - static constexpr bool is_FpX = is_same::value || - is_same::value || - is_same::value; + static constexpr bool is_FpX = is_same::value; // check if T is a compressed float type with fixed or variable bit precision - static constexpr bool is_Fp = is_FpX || is_same::value; + static constexpr bool is_Fp = is_same::value; // check if T is a POD float type, i.e float or double static constexpr bool is_float = is_floating_point::value; // check if T is a template specialization of LeafData, i.e. has T mValues[512] - static constexpr bool is_special = is_index || is_Fp || - is_same::value || - is_same::value || - is_same::value; + static constexpr bool is_special = is_index || is_Fp || is_same::value; }; // BuildTraits // --------------------------> enable_if <------------------------------------ diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cu b/nanovdb/nanovdb/unittest/TestNanoVDB.cu index 32ca28c211..3cce7a2537 100644 --- a/nanovdb/nanovdb/unittest/TestNanoVDB.cu +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cu @@ -2178,3 +2178,26 @@ TEST(TestNanoVDBCUDA, cudaAddBlindData) EXPECT_TRUE(dataPtr2); for (size_t i=0; i(100); + { + auto *floatGrid = cudaHandle.grid(); + EXPECT_TRUE(floatGrid); + auto acc = floatGrid->getAccessor(); + EXPECT_EQ( 3.0f, acc(103,0,0)); + EXPECT_EQ( 0.0f, acc(100,0,0)); + EXPECT_EQ(-3.0f, acc( 97,0,0)); + } + auto hostHandle = cudaHandle.copy(); + EXPECT_TRUE(cudaHandle.grid());// should be unchanged + { + auto *floatGrid = hostHandle.grid(); + EXPECT_TRUE(floatGrid); + auto acc = floatGrid->getAccessor(); + EXPECT_EQ( 3.0f, acc(103,0,0)); + EXPECT_EQ( 0.0f, acc(100,0,0)); + EXPECT_EQ(-3.0f, acc( 97,0,0)); + } +} diff --git a/nanovdb/nanovdb/util/GridHandle.h b/nanovdb/nanovdb/util/GridHandle.h index bce616c89a..20dde83535 100644 --- a/nanovdb/nanovdb/util/GridHandle.h +++ b/nanovdb/nanovdb/util/GridHandle.h @@ -112,8 +112,11 @@ class GridHandle /// @brief Returns the size in bytes of the raw memory buffer managed by this GridHandle. uint64_t size() const { return mBuffer.size(); } + //@{ /// @brief Return true if this handle is empty, i.e. has no allocated memory bool empty() const { return this->size() == 0; } + bool isEmpty() const { return this->size() == 0; } + //@} /// @brief Return true if this handle contains any grids operator bool() const { return !this->empty(); } diff --git a/nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h b/nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h index dc96ed0993..7674f84a12 100644 --- a/nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h +++ b/nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h @@ -115,8 +115,11 @@ class CudaDeviceBuffer /// @brief Returns the size in bytes of the raw memory buffer managed by this allocator. uint64_t size() const { return mSize; } + //@{ /// @brief Returns true if this allocator is empty, i.e. has no allocated memory bool empty() const { return mSize == 0; } + bool isEmpty() const { return mSize == 0; } + //@} /// @brief De-allocate all memory managed by this allocator and set all pointers to NULL void clear(); diff --git a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh index b45387f371..ad3524775c 100644 --- a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh @@ -400,6 +400,12 @@ template template void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t pointCount) { + if constexpr(is_same::value) { + static_assert(is_same::value, "Point (vs voxels) coordinates should be represented as Vec3f or Vec3d"); + } else { + static_assert(is_same::value, "Voxel coordinates should be represented as Coord, Vec3f or Vec3d"); + } + mData.d_keys = mMemPool.template alloc(pointCount); mData.d_indx = mMemPool.template alloc(pointCount);// uint32_t can index 4.29 billion Coords, corresponding to 48 GB cudaCheck(cudaMemcpy(mDeviceData, &mData, sizeof(Data), cudaMemcpyHostToDevice));// copy mData from CPU -> GPU @@ -409,34 +415,22 @@ void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t auto *d_indx = mMemPool.template alloc(pointCount); if (mVerbose==2) mTimer.restart("Generate tile keys"); - if (is_same::value) {// points in world space - if (is_same::value) { - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { - d_indx[tid] = uint32_t(tid); - d_keys[tid] = NanoRoot::CoordToKey(d_data->map.applyInverseMapF(d_points[tid]).round()); - }, mDeviceData); cudaCheckError(); - } else if (is_same::value) { - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { - d_indx[tid] = uint32_t(tid); - d_keys[tid] = NanoRoot::CoordToKey(d_data->map.applyInverseMap(d_points[tid]).round()); - }, mDeviceData); cudaCheckError(); - } else { - throw std::runtime_error("Point (vs voxels) coordinates should be represented as Vec3f or Vec3d"); + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Vec3T *d_points, const Data *d_data) { + d_indx[tid] = uint32_t(tid); + uint64_t &key = d_keys[tid]; + if constexpr(is_same::value) {// points are in world space + if constexpr(is_same::value) { + key = NanoRoot::CoordToKey(d_data->map.applyInverseMapF(d_points[tid]).round()); + } else {// points are Vec3d + key = NanoRoot::CoordToKey(d_data->map.applyInverseMap(d_points[tid]).round()); + } + } else if constexpr(is_same::value) {// points Coord are in index space + key = NanoRoot::CoordToKey(d_points[tid]); + } else {// points are Vec3f or Vec3d in index space + key = NanoRoot::CoordToKey(d_points[tid].round()); } - } else if (is_same::value) { - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { - d_indx[tid] = uint32_t(tid); - d_keys[tid] = NanoRoot::CoordToKey(d_points[tid]); - }, mDeviceData); cudaCheckError(); - } else if (is_same::value || is_same::value) { - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data) { - d_indx[tid] = uint32_t(tid); - d_keys[tid] = NanoRoot::CoordToKey(d_points[tid].round()); - }, mDeviceData); cudaCheckError(); - } else { - throw std::runtime_error("Voxel coordinates should be represented as Coord, Vec3f or Vec3d"); - } - + }, d_points, mDeviceData); + cudaCheckError(); if (mVerbose==2) mTimer.restart("DeviceRadixSort of "+std::to_string(pointCount)+" tile keys"); CALL_CUBS(DeviceRadixSort::SortPairs, d_keys, mData.d_keys, d_indx, mData.d_indx, pointCount, 0, 62);// 21 bits per coord std::swap(d_indx, mData.d_indx);// sorted indices are now in d_indx From df574a8d2194be4dba91725bdb4c13f9a547f311 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Tue, 5 Sep 2023 12:43:42 -0700 Subject: [PATCH 21/49] fixed issues in IO.h and CreateNanoGrid.h Signed-off-by: Ken Museth --- nanovdb/nanovdb/unittest/TestOpenVDB.cc | 8 ++++---- nanovdb/nanovdb/util/CreateNanoGrid.h | 21 ++++++++------------- nanovdb/nanovdb/util/IO.h | 8 ++++++-- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/nanovdb/nanovdb/unittest/TestOpenVDB.cc b/nanovdb/nanovdb/unittest/TestOpenVDB.cc index aa93dd5767..a199a64fe6 100644 --- a/nanovdb/nanovdb/unittest/TestOpenVDB.cc +++ b/nanovdb/nanovdb/unittest/TestOpenVDB.cc @@ -2134,8 +2134,8 @@ TEST_F(TestOpenVDB, LevelSetFiles) // test reading from non-existing file EXPECT_THROW(nanovdb::io::readGrid("data/ls.vdb", getGridName(foundModels[0])), std::runtime_error); - // test reading non-existing grid from an existing file - EXPECT_FALSE(nanovdb::io::readGrid("data/ls.nvdb", "bunny")); + // test reading of non-existing grid from an existing file + EXPECT_THROW(nanovdb::io::readGrid("data/ls.nvdb", "bunny"), std::runtime_error); // test reading existing grid from an existing file { @@ -2204,8 +2204,8 @@ TEST_F(TestOpenVDB, FogFiles) // test reading from non-existing file EXPECT_THROW(nanovdb::io::readGrid("data/fog.vdb", getGridName(foundModels[0])), std::runtime_error); - // test reading non-existing grid from an existing file - EXPECT_FALSE(nanovdb::io::readGrid("data/fog.nvdb", "bunny")); + // test reading of non-existing grid from an existing file + EXPECT_THROW(nanovdb::io::readGrid("data/fog.nvdb", "bunny"), std::runtime_error); // test reading existing grid from an existing file { diff --git a/nanovdb/nanovdb/util/CreateNanoGrid.h b/nanovdb/nanovdb/util/CreateNanoGrid.h index d2823ad2a7..2500d80722 100644 --- a/nanovdb/nanovdb/util/CreateNanoGrid.h +++ b/nanovdb/nanovdb/util/CreateNanoGrid.h @@ -72,8 +72,8 @@ \note This files replaces GridBuilder.h, IndexGridBuilder.h and OpenToNanoVDB.h */ -#ifndef NANOVDB_CREATENANOGRID_H_HAS_BEEN_INCLUDED -#define NANOVDB_CREATENANOGRID_H_HAS_BEEN_INCLUDED +#ifndef NANOVDB_CREATE_NANOGRID_H_HAS_BEEN_INCLUDED +#define NANOVDB_CREATE_NANOGRID_H_HAS_BEEN_INCLUDED #if defined(NANOVDB_USE_OPENVDB) #include @@ -813,8 +813,8 @@ CreateNanoGrid::CreateNanoGrid(const SrcNodeAccT &srcNodeAcc) template struct CreateNanoGrid::BlindMetaData { - BlindMetaData(const std::string& name, - const std::string& type, + BlindMetaData(const std::string& name,// name + used to derive GridBlindDataSemantic + const std::string& type,// used to derive GridType of blind data GridBlindDataClass dataClass, size_t i, size_t valueCount, size_t valueSize) : metaData(reinterpret_cast(new char[sizeof(GridBlindMetaData)])) @@ -831,7 +831,7 @@ struct CreateNanoGrid::BlindMetaData metaData->mValueSize = valueSize; NANOVDB_ASSERT(metaData->isValid()); } - BlindMetaData(const std::string& name, + BlindMetaData(const std::string& name,// only name GridBlindDataSemantic dataSemantic, GridBlindDataClass dataClass, GridType dataType, @@ -881,8 +881,6 @@ struct CreateNanoGrid::BlindMetaData semantic = GridBlindDataSemantic::PointNormal; } else if ("id" == name) { semantic = GridBlindDataSemantic::PointId; - //} else { - //std::cerr << "CreateNanoGrid::mapToSemantics: Unable to map \n" << name << "\" to GridBlindDataSemantic\n"; } return semantic; } @@ -992,9 +990,9 @@ CreateNanoGrid::preProcess() for (auto it = nameMap.begin(); it != nameMap.end(); ++it) { const size_t index = it->second; auto& attArray = srcLeaf.constAttributeArray(index); - mBlindMetaData.emplace(it->first, // name + mBlindMetaData.emplace(it->first, // name used to derive semantics descriptor.valueType(index), // type - GridBlindDataClass::AttributeArray, // class + it->first == "id" ? GridBlindDataClass::IndexArray : GridBlindDataClass::AttributeArray, // class index, // order pointCount, // element count attArray.valueTypeSize()); // element size @@ -1008,7 +1006,6 @@ CreateNanoGrid::preProcess() GridBlindDataClass::GridName, GridType::Unknown, mSrcNodeAcc.getName().length() + 1, 1); - //1, mSrcNodeAcc.getName().length() + 1); } mLeafNodeSize = mSrcNodeAcc.nodeCount(0)*NanoLeaf::DataType::memUsage(); }// CreateNanoGrid::preProcess @@ -1097,7 +1094,6 @@ CreateNanoGrid::preProcess(OracleT oracle) GridBlindDataClass::GridName, GridType::Unknown, mSrcNodeAcc.getName().length() + 1, 1); - //1, mSrcNodeAcc.getName().length() + 1); } }// CreateNanoGrid::preProcess @@ -1186,7 +1182,6 @@ CreateNanoGrid::preProcess(uint32_t channels) GridBlindDataClass::GridName, GridType::Unknown, mSrcNodeAcc.getName().length() + 1, 1); - //1, mSrcNodeAcc.getName().length() + 1); } }// preProcess @@ -2075,4 +2070,4 @@ openToNanoVDB(const openvdb::GridBase::Ptr& base, } // namespace nanovdb -#endif // NANOVDB_CREATENANOGRID_H_HAS_BEEN_INCLUDED +#endif // NANOVDB_CREATE_NANOGRID_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/IO.h b/nanovdb/nanovdb/util/IO.h index f89183d84b..f304411e7c 100644 --- a/nanovdb/nanovdb/util/IO.h +++ b/nanovdb/nanovdb/util/IO.h @@ -438,8 +438,12 @@ inline void Segment::add(const GridHandle& h) meta.emplace_back(h.gridSize(i), header.codec, *grid); } else if (auto* grid = h.template grid(i)) { meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); } else if (auto* grid = h.template grid(i)) { meta.emplace_back(h.gridSize(i), header.codec, *grid); + } else if (auto* grid = h.template grid(i)) { + meta.emplace_back(h.gridSize(i), header.codec, *grid); } else if (auto* grid = h.template grid(i)) { meta.emplace_back(h.gridSize(i), header.codec, *grid); } else if (auto* grid = h.template grid(i)) { @@ -591,7 +595,7 @@ GridHandle readGrid(std::istream& is, uint64_t n, const BufferT& pool) } is.seekg(seek, std::ios_base::cur); // skip forward from the current position } - throw std::runtime_error("Grid index exceeds grid count in file"); + throw std::runtime_error("Grid index " + std::to_string(n) + "exceeds grid count (" + std::to_string(counter) + ") in file"); }// readGrid /// @brief Read the first grid with a specific name @@ -632,7 +636,7 @@ GridHandle readGrid(std::istream& is, const std::string& gridName, cons } is.seekg(seek, std::ios_base::cur); // skip forward from the current position } - return GridHandle(); // empty handle + throw std::runtime_error("Grid name '" + gridName + "' not found in file"); }// readGrid /// @brief Read all the grids From 433aa356be8130b7464e63b46a0cf839897df67a Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Tue, 5 Sep 2023 14:01:48 -0700 Subject: [PATCH 22/49] fixing nvcc/tbb build issue Signed-off-by: Ken Museth --- .../nanovdb/examples/benchmark/DenseGrid.h | 4 ++++ nanovdb/nanovdb/examples/benchmark/Image.h | 24 +++++++++---------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/nanovdb/nanovdb/examples/benchmark/DenseGrid.h b/nanovdb/nanovdb/examples/benchmark/DenseGrid.h index 9666f8a260..de188636be 100644 --- a/nanovdb/nanovdb/examples/benchmark/DenseGrid.h +++ b/nanovdb/nanovdb/examples/benchmark/DenseGrid.h @@ -229,6 +229,9 @@ DenseGrid::create(Coord min, grid->mIndexBBox[0][i] = min[i]; grid->mIndexBBox[1][i] = max[i] - 1; } +#if 1 + grid->mWorldBBox = grid->mIndexBBox.transform(grid->mMap); +#else grid->mWorldBBox[0] = grid->mWorldBBox[1] = grid->mMap.applyMap(Vec3d(min[0], min[1], min[2])); grid->mWorldBBox.expand(grid->mMap.applyMap(Vec3d(min[0], min[1], max[2]))); grid->mWorldBBox.expand(grid->mMap.applyMap(Vec3d(min[0], max[1], min[2]))); @@ -237,6 +240,7 @@ DenseGrid::create(Coord min, grid->mWorldBBox.expand(grid->mMap.applyMap(Vec3d(max[0], min[1], max[2]))); grid->mWorldBBox.expand(grid->mMap.applyMap(Vec3d(min[0], max[1], max[2]))); grid->mWorldBBox.expand(grid->mMap.applyMap(Vec3d(max[0], max[1], max[2]))); +#endif grid->mVoxelSize = grid->mMap.applyMap(Vec3d(1)) - grid->mMap.applyMap(Vec3d(0)); if (gridClass == GridClass::LevelSet && !is_floating_point::value) throw std::runtime_error("Level sets are expected to be floating point types"); diff --git a/nanovdb/nanovdb/examples/benchmark/Image.h b/nanovdb/nanovdb/examples/benchmark/Image.h index 4f427fdf6e..0461a21953 100644 --- a/nanovdb/nanovdb/examples/benchmark/Image.h +++ b/nanovdb/nanovdb/examples/benchmark/Image.h @@ -23,10 +23,10 @@ #include -#if defined(NANOVDB_USE_TBB) -#include -#include -#endif +//#if defined(NANOVDB_USE_TBB) +//#include +//#include +//#endif namespace nanovdb { @@ -126,14 +126,14 @@ inline void Image::clear(int log2) } }; -#if defined(NANOVDB_USE_TBB) - tbb::blocked_range2d range(0, ImageData::mWidth, 0, ImageData::mHeight); - tbb::parallel_for(range, [&](const tbb::blocked_range2d& r) { - kernel2D(r.rows().begin(), r.cols().begin(), r.rows().end(), r.cols().end()); - }); -#else +//#if defined(NANOVDB_USE_TBB) +// tbb::blocked_range2d range(0, ImageData::mWidth, 0, ImageData::mHeight); +// tbb::parallel_for(range, [&](const tbb::blocked_range2d& r) { +// kernel2D(r.rows().begin(), r.cols().begin(), r.rows().end(), r.cols().end()); +// }); +//#else kernel2D(0, 0, ImageData::mWidth, ImageData::mHeight); -#endif +//#endif } } @@ -156,4 +156,4 @@ inline void Image::writePPM(const std::string& fileName, const std::string& comm } // namespace nanovdb -#endif // end of NANOVDB_IMAGE_H_HAS_BEEN_INCLUDED \ No newline at end of file +#endif // end of NANOVDB_IMAGE_H_HAS_BEEN_INCLUDED From 28f6361790b70c9e609702b1c68405f0a9629d33 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Wed, 27 Sep 2023 13:53:25 -0700 Subject: [PATCH 23/49] snapshot Signed-off-by: Ken Museth --- nanovdb/nanovdb/examples/benchmark/Image.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/nanovdb/nanovdb/examples/benchmark/Image.h b/nanovdb/nanovdb/examples/benchmark/Image.h index 0461a21953..c3686769eb 100644 --- a/nanovdb/nanovdb/examples/benchmark/Image.h +++ b/nanovdb/nanovdb/examples/benchmark/Image.h @@ -24,9 +24,10 @@ #include //#if defined(NANOVDB_USE_TBB) -//#include -//#include -//#endif +#if defined(NANOVDB_USE_TBB) && !defined(__CUDACC_RTC__) +#include +#include +#endif namespace nanovdb { @@ -127,13 +128,14 @@ inline void Image::clear(int log2) }; //#if defined(NANOVDB_USE_TBB) -// tbb::blocked_range2d range(0, ImageData::mWidth, 0, ImageData::mHeight); -// tbb::parallel_for(range, [&](const tbb::blocked_range2d& r) { -// kernel2D(r.rows().begin(), r.cols().begin(), r.rows().end(), r.cols().end()); -// }); -//#else +#if defined(NANOVDB_USE_TBB) && !defined(__CUDACC_RTC__) + tbb::blocked_range2d range(0, ImageData::mWidth, 0, ImageData::mHeight); + tbb::parallel_for(range, [&](const tbb::blocked_range2d& r) { + kernel2D(r.rows().begin(), r.cols().begin(), r.rows().end(), r.cols().end()); + }); +#else kernel2D(0, 0, ImageData::mWidth, ImageData::mHeight); -//#endif +#endif } } From 870567288e65fca8d1f71c34f90a284752dd9087 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Thu, 28 Sep 2023 08:30:57 -0700 Subject: [PATCH 24/49] removed Benchmark from cmake - for now Signed-off-by: Ken Museth --- nanovdb/nanovdb/examples/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nanovdb/nanovdb/examples/CMakeLists.txt b/nanovdb/nanovdb/examples/CMakeLists.txt index 1ff1e8fd5e..e86c17bc32 100644 --- a/nanovdb/nanovdb/examples/CMakeLists.txt +++ b/nanovdb/nanovdb/examples/CMakeLists.txt @@ -83,9 +83,9 @@ endfunction() # ----------------------------------------------------------------------- -if(NANOVDB_BUILD_BENCHMARK) - add_subdirectory(benchmark) -endif() +#if(NANOVDB_BUILD_BENCHMARK) +# add_subdirectory(benchmark) +#endif() nanovdb_example(NAME "ex_make_custom_nanovdb") nanovdb_example(NAME "ex_make_custom_nanovdb_cuda") From 96499dd5cc4f494e4e5fc5f37ebf347cedea5858 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Thu, 28 Sep 2023 09:55:54 -0700 Subject: [PATCH 25/49] removed examples/Benchmark (for now) Signed-off-by: Ken Museth --- nanovdb/nanovdb/CMakeLists.txt | 5 +- .../examples/benchmark/BenchKernels_dense.cu | 94 --- .../examples/benchmark/BenchKernels_nano.cu | 99 --- .../examples/benchmark/Benchmark_dense.cu | 110 ---- .../examples/benchmark/Benchmark_nano.cu | 122 ---- .../nanovdb/examples/benchmark/CMakeLists.txt | 74 --- nanovdb/nanovdb/examples/benchmark/Camera.h | 72 --- .../nanovdb/examples/benchmark/DenseGrid.h | 490 --------------- nanovdb/nanovdb/examples/benchmark/Image.h | 161 ----- .../examples/benchmark/TestBenchmark.cc | 579 ------------------ .../examples/benchmark/TestBenchmark.cu | 109 ---- 11 files changed, 3 insertions(+), 1912 deletions(-) delete mode 100644 nanovdb/nanovdb/examples/benchmark/BenchKernels_dense.cu delete mode 100644 nanovdb/nanovdb/examples/benchmark/BenchKernels_nano.cu delete mode 100644 nanovdb/nanovdb/examples/benchmark/Benchmark_dense.cu delete mode 100644 nanovdb/nanovdb/examples/benchmark/Benchmark_nano.cu delete mode 100644 nanovdb/nanovdb/examples/benchmark/CMakeLists.txt delete mode 100644 nanovdb/nanovdb/examples/benchmark/Camera.h delete mode 100644 nanovdb/nanovdb/examples/benchmark/DenseGrid.h delete mode 100644 nanovdb/nanovdb/examples/benchmark/Image.h delete mode 100644 nanovdb/nanovdb/examples/benchmark/TestBenchmark.cc delete mode 100644 nanovdb/nanovdb/examples/benchmark/TestBenchmark.cu diff --git a/nanovdb/nanovdb/CMakeLists.txt b/nanovdb/nanovdb/CMakeLists.txt index f0018fd2c0..b4aa3f1dd2 100644 --- a/nanovdb/nanovdb/CMakeLists.txt +++ b/nanovdb/nanovdb/CMakeLists.txt @@ -25,7 +25,7 @@ message(STATUS "----------------------------------------------------") option(NANOVDB_BUILD_TOOLS "Build command-line tools" ON) option(NANOVDB_BUILD_UNITTESTS "Build Unit tests" OFF) option(NANOVDB_BUILD_EXAMPLES "Build examples" OFF) -option(NANOVDB_BUILD_BENCHMARK "Build benchmark in examples" OFF) +#option(NANOVDB_BUILD_BENCHMARK "Build benchmark in examples" OFF) option(NANOVDB_USE_INTRINSICS "Build with hardware intrinsics support" OFF) option(NANOVDB_USE_CUDA "Build with CUDA support" OFF) @@ -71,7 +71,8 @@ if(UNIX) find_package(Threads REQUIRED) endif() -if(NANOVDB_BUILD_UNITTESTS OR NANOVDB_BUILD_BENCHMARK) +#if(NANOVDB_BUILD_UNITTESTS OR NANOVDB_BUILD_BENCHMARK) +if(NANOVDB_BUILD_UNITTESTS) find_package(GTest REQUIRED) endif() diff --git a/nanovdb/nanovdb/examples/benchmark/BenchKernels_dense.cu b/nanovdb/nanovdb/examples/benchmark/BenchKernels_dense.cu deleted file mode 100644 index 6d3544e9f8..0000000000 --- a/nanovdb/nanovdb/examples/benchmark/BenchKernels_dense.cu +++ /dev/null @@ -1,94 +0,0 @@ - -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -/// @file BenchKernels_dense.cu -/// -/// @author Ken Museth -/// -/// @brief CUDA kernel for a simple ray-tracing benchmark test. - -#include "DenseGrid.h" -#include // for CUDA memory management -#include -#include // for nanovdb::Ray -#include // for nanovdb::DDA - -#include "Image.h" -#include "Camera.h" - -// Comment out to disable timing of the CUDA kernel -#define CUDA_TIMING - -// This is called by the device -__global__ void render_kernel(const nanovdb::DenseGrid& grid, - const nanovdb::Camera& camera, - nanovdb::Image& img) -{ - using RealT = float; - using Vec3T = nanovdb::Vec3; - using CoordT = nanovdb::Coord; - using RayT = nanovdb::Ray; - using ColorRGB = nanovdb::Image::ColorRGB; - - const int w = blockIdx.x * blockDim.x + threadIdx.x; - const int h = blockIdx.y * blockDim.y + threadIdx.y; - if (w >= img.width() || h >= img.height()) return; - RayT ray = camera.getRay(img.u(w), img.v(h));// ray in world space - ray = ray.worldToIndexF(grid);// ray in index space - if (ray.clip(grid.indexBBox().expandBy(-1))) {// clip to the index bounding box - nanovdb::DDA dda(ray); - const float v0 = grid.getValue(dda.voxel()); - while( dda.step() ) { - CoordT ijk = dda.voxel(); - const float v1 = grid.getValue(ijk); - if (v0*v1>0) continue; -#if 1// second-order central difference - Vec3T grad(grid.getValue(ijk.offsetBy(1,0,0)) - grid.getValue(ijk.offsetBy(-1,0,0)), - grid.getValue(ijk.offsetBy(0,1,0)) - grid.getValue(ijk.offsetBy(0,-1,0)), - grid.getValue(ijk.offsetBy(0,0,1)) - grid.getValue(ijk.offsetBy(0,0,-1))); -#else// first order single-sided difference - Vec3T grad(-v0); - ijk[0] += 1; - grad[0] += grid.getValue(ijk); - ijk[0] -= 1; - ijk[1] += 1; - grad[1] += grid.getValue(ijk); - ijk[1] -= 1; - ijk[2] += 1; - grad[2] += grid.getValue(ijk); -#endif - grad *= rnorm3df(grad[0], grad[1], grad[2]); - img(w, h) = ColorRGB(abs(grad.dot(ray.dir())), 0, 0); - return; - } - } - const int checkerboard = 1 << 7; - img(w, h) = ((h & checkerboard) ^ (w & checkerboard)) ? ColorRGB(1, 1, 1) : ColorRGB(0, 0, 0); -} - -// This is called by the host -extern "C" float launch_kernels(const nanovdb::DenseGridHandle& gridHandle, - nanovdb::ImageHandle& imgHandle, - const nanovdb::Camera* camera, - cudaStream_t stream) -{ - const auto* img = imgHandle.image(); // host image! - auto round = [](int a, int b) { return (a + b - 1) / b; }; - const dim3 threadsPerBlock(8, 8), numBlocks(round(img->width(), threadsPerBlock.x), round(img->height(), threadsPerBlock.y)); - auto* deviceGrid = gridHandle.deviceGrid(); // note this cannot be de-referenced since it points to a memory address on the GPU! - auto* deviceImage = imgHandle.deviceImage(); // note this cannot be de-referenced since it points to a memory address on the GPU! - assert(deviceGrid && deviceImage); - float elapsedTime = 0.0f; -#ifdef CUDA_TIMING - nanovdb::GpuTimer timer; - timer.start(); -#endif - // kernal syntax: <<>> - render_kernel<<>>(*deviceGrid, *camera, *deviceImage); - #ifdef CUDA_TIMING - elapsedTime = timer.elapsed(); -#endif - cudaCheckError(); - return elapsedTime; -} diff --git a/nanovdb/nanovdb/examples/benchmark/BenchKernels_nano.cu b/nanovdb/nanovdb/examples/benchmark/BenchKernels_nano.cu deleted file mode 100644 index 5d8aee5d1f..0000000000 --- a/nanovdb/nanovdb/examples/benchmark/BenchKernels_nano.cu +++ /dev/null @@ -1,99 +0,0 @@ - -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -/// @file BenchKernels.cu -/// -/// @author Ken Museth -/// -/// @brief CUDA kernel for a simple ray-tracing benchmark test. - -#include // for nanovdb::GridHandle -#include // for CUDA memory management -#include // for nanovdb::Ray -#include // for nanovdb::ZeroCrossing -#include - -#include "Image.h" -#include "Camera.h" - -// Comment out to disable timing of the CUDA kernel -#define CUDA_TIMING - -// This is called by the device -template -__global__ void render_kernel(const nanovdb::NanoGrid& grid, - const nanovdb::Camera& camera, - nanovdb::Image& img) -{ - using RealT = float; - using Vec3T = nanovdb::Vec3; - using CoordT = nanovdb::Coord; - using RayT = nanovdb::Ray; - using ColorRGB = nanovdb::Image::ColorRGB; - - const int w = blockIdx.x * blockDim.x + threadIdx.x; - const int h = blockIdx.y * blockDim.y + threadIdx.y; - if (w >= img.width() || h >= img.height()) return; - - const auto& tree = grid.tree(); - const auto& bbox = tree.bbox(); - RayT ray = camera.getRay(img.u(w), img.v(h)); - ray = ray.worldToIndexF(grid); - - auto acc = tree.getAccessor(); - CoordT ijk; - float t; - float v0; - - if (nanovdb::ZeroCrossing(ray, acc, ijk, v0, t)) { -#if 1// second-order central difference - Vec3T grad(acc.getValue(ijk.offsetBy(1,0,0)) - acc.getValue(ijk.offsetBy(-1,0,0)), - acc.getValue(ijk.offsetBy(0,1,0)) - acc.getValue(ijk.offsetBy(0,-1,0)), - acc.getValue(ijk.offsetBy(0,0,1)) - acc.getValue(ijk.offsetBy(0,0,-1))); -#else// first order single-sided difference - Vec3T grad(-v0); - ijk[0] += 1; - grad[0] += acc.getValue(ijk); - ijk[0] -= 1; - ijk[1] += 1; - grad[1] += acc.getValue(ijk); - ijk[1] -= 1; - ijk[2] += 1; - grad[2] += acc.getValue(ijk); -#endif - grad *= rnorm3df(grad[0], grad[1], grad[2]); - img(w, h) = ColorRGB(abs(grad.dot(ray.dir())), 0, 0); - } else { - const int checkerboard = 1 << 7; - img(w, h) = ((h & checkerboard) ^ (w & checkerboard)) ? ColorRGB(1, 1, 1) : ColorRGB(0, 0, 0); - } -} - -// This is called by the host -extern "C" float launch_kernels(const nanovdb::GridHandle& gridHandle, - nanovdb::ImageHandle& imgHandle, - const nanovdb::Camera* camera, - cudaStream_t stream) -{ - using BuildT = float;// nanovdb::FpN; - const auto* img = imgHandle.image(); // host image! - auto round = [](int a, int b) { return (a + b - 1) / b; }; - const dim3 threadsPerBlock(8, 8), numBlocks(round(img->width(), threadsPerBlock.x), round(img->height(), threadsPerBlock.y)); - auto* deviceGrid = gridHandle.deviceGrid(); // note this cannot be de-referenced since it points to a memory address on the GPU! - auto* deviceImage = imgHandle.deviceImage(); // note this cannot be de-referenced since it points to a memory address on the GPU! - if (!deviceGrid) throw std::runtime_error(std::string("\nError in launch_kernels: No device grid of type: ") + nanovdb::toStr(nanovdb::mapToGridType())); - if (!deviceImage) throw std::runtime_error("\nError in launch_kernels: No device image!"); - float elapsedTime = 0.0f; -#ifdef CUDA_TIMING - nanovdb::GpuTimer timer; - timer.start(); -#endif - // kernal syntax: <<>> - render_kernel<<>>(*deviceGrid, *camera, *deviceImage); -#ifdef CUDA_TIMING - elapsedTime = timer.elapsed(); -#endif - cudaCheckError(); - return elapsedTime; -} diff --git a/nanovdb/nanovdb/examples/benchmark/Benchmark_dense.cu b/nanovdb/nanovdb/examples/benchmark/Benchmark_dense.cu deleted file mode 100644 index d7c0d44e2b..0000000000 --- a/nanovdb/nanovdb/examples/benchmark/Benchmark_dense.cu +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -/// @file Benchmark_nano.cpp -/// -/// @author Ken Museth -/// -/// @brief A super lightweight and portable ray-tracing benchmark -/// that only depends on NanoVDB (not OpenVDB) and CUDA. - -#ifdef _WIN32 -#define _USE_MATH_DEFINES -#endif - -#include -#include -#include "Image.h" -#include "Camera.h" -#include "DenseGrid.h" -#include - -#include // for std::setfill and std::setw - -extern "C" float launch_kernels(const nanovdb::DenseGridHandle&, - nanovdb::ImageHandle&, - const nanovdb::Camera*, - cudaStream_t stream); - -int main(int argc, char** argv) -{ - using BufferT = nanovdb::CudaDeviceBuffer; - using RealT = float; - using Vec3T = nanovdb::Vec3; - using CameraT = nanovdb::Camera; - nanovdb::CpuTimer timer; - - if (argc!=2) { - std::cerr << "Usage: " << argv[0] << " path/level_set.vol" << std::endl; - //std::cerr << "To generate an input file: nanovdb_convert dragon.vdb dragon.nvdb\n"; - return 1; - } - - // The first CUDA run time call initializes the CUDA sub-system (loads the runtime API) which takes time! - int deviceCount; - cudaGetDeviceCount(&deviceCount); - for (int device = 0; device < deviceCount; ++device) { - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, device); - printf("Device %d has compute capability %d.%d.\n", - device, - deviceProp.major, - deviceProp.minor); - } - cudaSetDevice(0); - - cudaStream_t stream; - cudaCheck(cudaStreamCreate(&stream)); - - auto handle = nanovdb::io::readDense(argv[1]); - - const auto* grid = handle.grid(); - if (!grid || !grid->isLevelSet()) { - std::cerr << "Error loading NanoVDB level set from file" << std::endl; - exit (EXIT_FAILURE); - } - handle.deviceUpload(stream, false); - std::cout << "\nRay-tracing DenseGrid of size " - << (grid->gridSize() >> 20) << " MB" << std::endl; - - const int width = 1280, height = 720; - const RealT vfov = 25.0f, aspect = RealT(width) / height, radius = 300.0f; - const auto bbox = grid->worldBBox(); - const Vec3T lookat(0.5 * (bbox.min() + bbox.max())), up(0, -1, 0); - auto eye = [&lookat, &radius](int angle) { - const RealT theta = angle * M_PI / 180.0f; - return lookat + radius * Vec3T(sin(theta), 0, cos(theta)); - }; - CameraT *host_camera, *dev_camera; - cudaCheck(cudaMalloc((void**)&dev_camera, sizeof(CameraT))); // un-managed memory on the device - cudaCheck(cudaMallocHost((void**)&host_camera, sizeof(CameraT))); - - nanovdb::ImageHandle imgHandle(width, height); - auto* img = imgHandle.image(); - imgHandle.deviceUpload(stream, false); - - float elapsedTime = 0.0f; - const int maxAngle = 360; - for (int angle = 0; angle < maxAngle; ++angle) { - host_camera->update(eye(angle), lookat, up, vfov, aspect); - cudaCheck(cudaMemcpyAsync(dev_camera, host_camera, sizeof(CameraT), cudaMemcpyHostToDevice, stream)); - elapsedTime += launch_kernels(handle, imgHandle, dev_camera, stream); - - //timer.start("Write image to file"); - imgHandle.deviceDownload(stream); - std::stringstream ss; - ss << "./dense_gpu_" << std::setfill('0') << std::setw(3) << angle << ".ppm"; - img->writePPM(ss.str(), "Benchmark test"); - //timer.stop(); - - } //frame number angle - - cudaCheck(cudaStreamDestroy(stream)); - cudaCheck(cudaFreeHost(host_camera)); - cudaCheck(cudaFree(dev_camera)); - - printf("\nRay-traced %i different frames, each with %i rays, in %5.3f ms.\nThis corresponds to an average of %5.3f ms per frame or %5.3f FPS!\n", - maxAngle, imgHandle.image()->size(), elapsedTime, elapsedTime/maxAngle, 1000.0f*maxAngle/elapsedTime); - - return 0; -} \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/benchmark/Benchmark_nano.cu b/nanovdb/nanovdb/examples/benchmark/Benchmark_nano.cu deleted file mode 100644 index 3c17538dbf..0000000000 --- a/nanovdb/nanovdb/examples/benchmark/Benchmark_nano.cu +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -/// @file Benchmark_nano.cpp -/// -/// @author Ken Museth -/// -/// @brief A super lightweight and portable ray-tracing benchmark -/// that only depends on NanoVDB (not OpenVDB) and CUDA. - -#ifdef _WIN32 -#define _USE_MATH_DEFINES -#endif - -#include -#include -#include "Image.h" -#include "Camera.h" -#include - -#include // for std::setfill and std::setw - -extern "C" float launch_kernels(const nanovdb::GridHandle&, - nanovdb::ImageHandle&, - const nanovdb::Camera*, - cudaStream_t stream); - -int main(int argc, char** argv) -{ - using BufferT = nanovdb::CudaDeviceBuffer; - using ValueT = float; - using BuildT = float;//nanovdb::FpN; - using Vec3T = nanovdb::Vec3; - using CameraT = nanovdb::Camera; - nanovdb::CpuTimer timer; - - if (argc!=2) { - std::cerr << "Usage: " << argv[0] << " path/level_set.nvdb" << std::endl; - std::cerr << "To generate an input file: nanovdb_convert dragon.vdb dragon.nvdb\n"; - return 1; - } - - // The first CUDA run time call initializes the CUDA sub-system (loads the runtime API) which takes time! - int deviceCount; - cudaGetDeviceCount(&deviceCount); - for (int device = 0; device < deviceCount; ++device) { - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, device); - printf("Device %d has compute capability %d.%d.\n", - device, - deviceProp.major, - deviceProp.minor); - } - cudaSetDevice(0); - int driverVersion, runtimeVersion; - cudaDriverGetVersion(&driverVersion); - cudaRuntimeGetVersion(&runtimeVersion); - printf("CUDA driver version:\t%i.%i\n", driverVersion/1000, (driverVersion%1000)/10); - printf("CUDA runtime version:\t%i.%i\n", runtimeVersion/1000, (runtimeVersion%1000)/10); - - cudaStream_t stream; - cudaCheck(cudaStreamCreate(&stream)); - - const int gridID = 0, verbose = 1; - auto handle = nanovdb::io::readGrid(argv[1], gridID, verbose); - - const auto* grid = handle.grid(gridID); - if (!grid) { - std::cerr << "Error loading \"" << nanovdb::toStr(nanovdb::mapToGridType()) << "\" grid from file " << argv[1] << std::endl; - exit (EXIT_FAILURE); - if (!grid->isLevelSet()) { - std::cerr << "Grid is not a level set\n"; - exit (EXIT_FAILURE); - } - } - handle.deviceUpload(stream, false); - std::cout << "\nRay-tracing NanoVDB grid named \"" << grid->gridName() << "\" of size " - << (grid->gridSize() >> 20) << " MB" << std::endl; - - const int width = 1280, height = 720; - const ValueT vfov = 25.0f, aspect = ValueT(width) / height, radius = 300.0f; - const auto bbox = grid->worldBBox(); - const Vec3T lookat(0.5 * (bbox.min() + bbox.max())), up(0, -1, 0); - auto eye = [&lookat, &radius](int angle) { - const ValueT theta = angle * M_PI / 180.0f; - return lookat + radius * Vec3T(sin(theta), 0, cos(theta)); - }; - CameraT *host_camera, *dev_camera; - cudaCheck(cudaMalloc((void**)&dev_camera, sizeof(CameraT))); // un-managed memory on the device - cudaCheck(cudaMallocHost((void**)&host_camera, sizeof(CameraT))); - - nanovdb::ImageHandle imgHandle(width, height); - auto* img = imgHandle.image(); - imgHandle.deviceUpload(stream, false); - - float elapsedTime = 0.0f; - const int maxAngle = 360; - for (int angle = 0; angle < maxAngle; ++angle) { - host_camera->update(eye(angle), lookat, up, vfov, aspect); - cudaCheck(cudaMemcpyAsync(dev_camera, host_camera, sizeof(CameraT), cudaMemcpyHostToDevice, stream)); - elapsedTime += launch_kernels(handle, imgHandle, dev_camera, stream); - - //timer.start("Write image to file"); - imgHandle.deviceDownload(stream); -#if 1 - std::stringstream ss; - ss << "./nanovdb_gpu_" << std::setfill('0') << std::setw(3) << angle << ".ppm"; - img->writePPM(ss.str(), "Benchmark test"); -#endif - //timer.stop(); - - } //frame number angle - - cudaCheck(cudaStreamDestroy(stream)); - cudaCheck(cudaFreeHost(host_camera)); - cudaCheck(cudaFree(dev_camera)); - - printf("\nRay-traced %i different frames, each with %i rays, in %5.3f ms.\nThis corresponds to an average of %5.3f ms per frame or %5.3f FPS!\n", - maxAngle, imgHandle.image()->size(), elapsedTime, elapsedTime/maxAngle, 1000.0f*maxAngle/elapsedTime); - - return 0; -} \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/benchmark/CMakeLists.txt b/nanovdb/nanovdb/examples/benchmark/CMakeLists.txt deleted file mode 100644 index 967b5299ee..0000000000 --- a/nanovdb/nanovdb/examples/benchmark/CMakeLists.txt +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright Contributors to the OpenVDB Project -# SPDX-License-Identifier: MPL-2.0 -# -#[=======================================================================[ - - CMake Configuration for NanoVDB Benchmark - -#]=======================================================================] - -cmake_minimum_required(VERSION 3.18) -project(NanoVDBBenchmark LANGUAGES CXX) - -include(GNUInstallDirs) - -############################################################################### - -message(STATUS "----------------------------------------------------") -message(STATUS "---------- Configuring NanoVDB Benchmark -----------") -message(STATUS "----------------------------------------------------") - -############################################################################### - -if(WIN32 AND NANOVDB_CUDA_KEEP_PTX) - file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/x64/Release") -endif() - -# ----------------------------------------------------------------------- -# TODO: Benchmark should probably not require gtest. -if(NOT TARGET GTest::GTest) - message(WARNING " - GTest required to build benchmark. Skipping.") - return() -endif() - -# ----------------------------------------------------------------------- -# many of the sample projects depend on a data directory. This allows Debug -# launching from the cmake binary working directory. -file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/data") - -set(BENCHMARK_SOURCES TestBenchmark.cc Camera.h Image.h) - -if(NANOVDB_USE_CUDA) - list(APPEND BENCHMARK_SOURCES TestBenchmark.cu BenchKernels_nano.cu) - - add_executable(benchmark_nano Benchmark_nano.cu BenchKernels_nano.cu) - add_executable(benchmark_dense Benchmark_dense.cu BenchKernels_dense.cu) - - target_link_libraries(benchmark_nano PRIVATE nanovdb) - target_link_libraries(benchmark_dense PRIVATE nanovdb) - - # Propagate MSVC Runtime from the OpenVDB library (if VDB_MSVC_RUNTIME_SELECTION - # has been set then we're building and linking against OpenVDB, see the root - # NanoVDB CMakeLists.txt) - if(VDB_MSVC_RUNTIME_SELECTION) - set_target_properties(benchmark_nano PROPERTIES - MSVC_RUNTIME_LIBRARY ${VDB_MSVC_RUNTIME_SELECTION}) - set_target_properties(benchmark_dense PROPERTIES - MSVC_RUNTIME_LIBRARY ${VDB_MSVC_RUNTIME_SELECTION}) - endif() -endif() - -# ----------------------------------------------------------------------- - -add_executable(benchmark ${BENCHMARK_SOURCES}) -target_link_libraries(benchmark PRIVATE nanovdb GTest::GTest GTest::Main) - -# Propagate MSVC Runtime from the OpenVDB library (if VDB_MSVC_RUNTIME_SELECTION -# has been set then we're building and linking against OpenVDB, see the root -# NanoVDB CMakeLists.txt) -if(VDB_MSVC_RUNTIME_SELECTION) - set_target_properties(benchmark PROPERTIES - MSVC_RUNTIME_LIBRARY ${VDB_MSVC_RUNTIME_SELECTION}) -endif() - -install(TARGETS benchmark DESTINATION ${CMAKE_INSTALL_DOCDIR}/examples) diff --git a/nanovdb/nanovdb/examples/benchmark/Camera.h b/nanovdb/nanovdb/examples/benchmark/Camera.h deleted file mode 100644 index 88e4580562..0000000000 --- a/nanovdb/nanovdb/examples/benchmark/Camera.h +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -/// @file Camera.h -/// -/// @author Ken Museth -/// -/// @brief A simple camera class. - -#ifndef NANOVDB_CAMERA_H_HAS_BEEN_INCLUDED -#define NANOVDB_CAMERA_H_HAS_BEEN_INCLUDED - -#include // for Vec3 -#include - -namespace nanovdb { - -/// @brief A minimal perspective camera for ray generation -template, typename RayT = Ray> -class Camera -{ - Vec3T mEye, mW, mU, mV; - - __hostdev__ void init(RealT vfov, RealT aspect) - { - const RealT halfHeight = RealT(tan(vfov * 3.14159265358979323846 / 360)); - const RealT halfWidth = aspect * halfHeight; - mW = halfWidth * mU + halfHeight * mV + mW; // remove eye here and in getRay - mU *= 2 * halfWidth; - mV *= 2 * halfHeight; - } - -public: - /// @brief default Ctor. - Camera() = default; - - /// @brief Ctor. // vfov is top to bottom in degrees - /// @note up is assumed to be a unit-vector - __hostdev__ Camera(const Vec3T& eye, const Vec3T& lookat, const Vec3T& up, RealT vfov, RealT aspect) - : mEye(eye) - , mW((eye - lookat).normalize()) - , mU(up.cross(mW)) - , mV(up) - { - this->init(vfov, aspect); - } - __hostdev__ void update(const Vec3T& eye, const Vec3T& lookat, const Vec3T& up, RealT vfov, RealT aspect) - { - mEye = eye; - mV = up; - mW = mEye - lookat; - mW.normalize(); - mU = mV.cross(mW); - this->init(vfov, aspect); - } - /// @brief {u,v} are are assumed to be [0,1] - __hostdev__ RayT getRay(RealT u, RealT v) const { - auto dir = u * mU + v * mV - mW; - dir.normalize(); - return RayT(mEye, dir); - } - - __hostdev__ const Vec3T& P() const { return mEye; } - __hostdev__ const Vec3T& U() const { return mU; } - __hostdev__ const Vec3T& V() const { return mV; } - __hostdev__ const Vec3T& W() const { return mW; } - -}; // Camera - -} // namespace nanovdb - -#endif // NANOVDB_CAMERA_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/examples/benchmark/DenseGrid.h b/nanovdb/nanovdb/examples/benchmark/DenseGrid.h deleted file mode 100644 index de188636be..0000000000 --- a/nanovdb/nanovdb/examples/benchmark/DenseGrid.h +++ /dev/null @@ -1,490 +0,0 @@ -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -/// @file DenseGrid.h -/// -/// @author Ken Museth -/// -/// @brief Simple dense grid class. - -#ifndef NANOVDB_DENSEGRID_H_HAS_BEEN_INCLUDED -#define NANOVDB_DENSEGRID_H_HAS_BEEN_INCLUDED - -#include // for uint64_t -#include // for std::ifstream -#include // for default Buffer -#include -#include // for Map, GridClass, GridType and and Coord - - -// use 4x4x4 tiles for better cache coherence -// else it uses dense indexing which is slow! -// 0 means disable, 1 is 2x2x2, 2 is 4x4x4 and 3 is 8x8x8 -#define LOG2_TILE_SIZE 2 - -namespace nanovdb { - -// forward decleration -template -class DenseGridHandle; - -#define DENSE_MAGIC_NUMBER 0x42445665736e6544UL // "DenseVDB" in hex - little endian (uint64_t) - - -struct DenseData -{ - uint64_t mMagic;// magic number - uint64_t mSize; - Map mMap;// defined in NanoVDB.h - CoordBBox mIndexBBox;// min/max of bbox - BBox mWorldBBox;// 48B. floating-point AABB of active values in WORLD SPACE (2 x 3 doubles) - Vec3d mVoxelSize; - GridClass mGridClass;// defined in NanoVDB.h - GridType mGridType; // defined in NanoVDB.h - uint64_t mY, mX;//strides in the y and x direction - - __hostdev__ Coord dim() const { return mIndexBBox.dim(); } - - // Affine transformations based on double precision - template - __hostdev__ Vec3T applyMap(const Vec3T& xyz) const { return mMap.applyMap(xyz); } // Pos: index -> world - template - __hostdev__ Vec3T applyInverseMap(const Vec3T& xyz) const { return mMap.applyInverseMap(xyz); } // Pos: world -> index - template - __hostdev__ Vec3T applyJacobian(const Vec3T& xyz) const { return mMap.applyJacobian(xyz); } // Dir: index -> world - template - __hostdev__ Vec3T applyInverseJacobian(const Vec3T& xyz) const { return mMap.applyInverseJacobian(xyz); } // Dir: world -> index - template - __hostdev__ Vec3T applyIJT(const Vec3T& xyz) const { return mMap.applyIJT(xyz); } - // Affine transformations based on single precision - template - __hostdev__ Vec3T applyMapF(const Vec3T& xyz) const { return mMap.applyMapF(xyz); } // Pos: index -> world - template - __hostdev__ Vec3T applyInverseMapF(const Vec3T& xyz) const { return mMap.applyInverseMapF(xyz); } // Pos: world -> index - template - __hostdev__ Vec3T applyJacobianF(const Vec3T& xyz) const { return mMap.applyJacobianF(xyz); } // Dir: index -> world - template - __hostdev__ Vec3T applyInverseJacobianF(const Vec3T& xyz) const { return mMap.applyInverseJacobianF(xyz); } // Dir: world -> index - template - __hostdev__ Vec3T applyIJTF(const Vec3T& xyz) const { return mMap.applyIJTF(xyz); } -}; -/// @brief Simple dense grid class -/// @note ZYX is the memory-layout in VDB. It leads to nested -/// for-loops of the order x, y, z. -template -class DenseGrid : private DenseData -{ -#if LOG2_TILE_SIZE > 0 - static constexpr uint32_t TileLog2 = LOG2_TILE_SIZE, TileMask = (1 << TileLog2) - 1, TileDim = 1 << (3*TileLog2); -#endif - using DenseData = DenseData; - -public: - using ValueType = ValueT; - - template - inline static DenseGridHandle create(Coord min, // min inclusive index coordinate - Coord max, // max inclusive index coordinate - double dx = 1.0, //voxel size - const Vec3d& p0 = Vec3d(0.0), // origin - GridClass gridClass = GridClass::Unknown, - const BufferT& allocator = BufferT()); - - __hostdev__ DenseGrid(const DenseGrid&) = delete; - __hostdev__ ~DenseGrid() = delete; - __hostdev__ DenseGrid& operator=(const DenseGrid&) = delete; - - __hostdev__ uint64_t size() const { return mIndexBBox.volume(); } - __hostdev__ inline uint64_t coordToOffset(const Coord &ijk) const; - __hostdev__ inline bool test(const Coord &ijk) const; - __hostdev__ uint64_t memUsage() const {return mSize;} - __hostdev__ uint64_t gridSize() const {return this->memUsage();} - __hostdev__ const Coord& min() const { return mIndexBBox[0]; } - __hostdev__ const Coord& max() const { return mIndexBBox[1]; } - __hostdev__ inline bool isValidType() const; - - /// @brief Return a const reference to the Map for this grid - __hostdev__ const Map& map() const { return DenseData::mMap; } - - // @brief Return a const reference to the size of a voxel in world units - __hostdev__ const Vec3d& voxelSize() const { return DenseData::mVoxelSize; } - - /// @brief world to index space transformation - template - __hostdev__ Vec3T worldToIndex(const Vec3T& xyz) const { return this->applyInverseMap(xyz); } - - /// @brief world to index space transformation - template - __hostdev__ Vec3T indexToWorld(const Vec3T& xyz) const { return this->applyMap(xyz); } - - /// @brief transformation from index space direction to world space direction - /// @warning assumes dir to be normalized - template - __hostdev__ Vec3T indexToWorldDir(const Vec3T& dir) const { return this->applyJacobian(dir); } - - /// @brief transformation from world space direction to index space direction - /// @warning assumes dir to be normalized - template - __hostdev__ Vec3T worldToIndexDir(const Vec3T& dir) const { return this->applyInverseJacobian(dir); } - - /// @brief transform the gradient from index space to world space. - /// @details Applies the inverse jacobian transform map. - template - __hostdev__ Vec3T indexToWorldGrad(const Vec3T& grad) const { return this->applyIJT(grad); } - - /// @brief world to index space transformation - template - __hostdev__ Vec3T worldToIndexF(const Vec3T& xyz) const { return this->applyInverseMapF(xyz); } - - /// @brief index to world space transformation - template - __hostdev__ Vec3T indexToWorldF(const Vec3T& xyz) const { return this->applyMapF(xyz); } - - /// @brief transformation from index space direction to world space direction - /// @warning assumes dir to be normalized - template - __hostdev__ Vec3T indexToWorldDirF(const Vec3T& dir) const { return this->applyJacobianF(dir); } - - /// @brief transformation from world space direction to index space direction - /// @warning assumes dir to be normalized - template - __hostdev__ Vec3T worldToIndexDirF(const Vec3T& dir) const { return this->applyInverseJacobianF(dir); } - - /// @brief Transforms the gradient from index space to world space. - /// @details Applies the inverse jacobian transform map. - template - __hostdev__ Vec3T indexToWorldGradF(const Vec3T& grad) const { return DenseData::applyIJTF(grad); } - - /// @brief Computes a AABB of active values in world space - __hostdev__ const BBox& worldBBox() const { return DenseData::mWorldBBox; } - - __hostdev__ bool isLevelSet() const { return DenseData::mGridClass == GridClass::LevelSet; } - __hostdev__ bool isFogVolume() const { return DenseData::mGridClass == GridClass::FogVolume; } - - /// @brief Computes a AABB of active values in index space - /// - /// @note This method is returning a floating point bounding box and not a CoordBBox. This makes - /// it more useful for clipping rays. - __hostdev__ const CoordBBox& indexBBox() const { return mIndexBBox; } - - __hostdev__ const GridType& gridType() const { return DenseData::mGridType; } - __hostdev__ const GridClass& gridClass() const { return DenseData::mGridClass; } - - __hostdev__ DenseData* data() { return reinterpret_cast(this); } - __hostdev__ const DenseData* data() const { return reinterpret_cast(this); } - - __hostdev__ ValueT* values() { return reinterpret_cast(this+1);} - __hostdev__ const ValueT* values() const { return reinterpret_cast(this+1); } - - __hostdev__ inline const ValueT& getValue(const Coord &ijk) const; - __hostdev__ inline void setValue(const Coord &ijk, const ValueT &v); -}; // Grid - -template -template -DenseGridHandle -DenseGrid::create(Coord min, - Coord max, - double dx, //voxel size - const Vec3d& p0, // origin - GridClass gridClass, - const BufferT& allocator) -{ - if (dx <= 0) throw std::runtime_error("GridBuilder: voxel size is zero or negative"); - max += Coord(1,1,1);// now max is exclusive - -#if LOG2_TILE_SIZE > 0 - const uint64_t dim[3] = {(uint64_t(max[0] - min[0]) + TileMask) >> TileLog2, - (uint64_t(max[1] - min[1]) + TileMask) >> TileLog2, - (uint64_t(max[2] - min[2]) + TileMask) >> TileLog2}; - const uint64_t size = sizeof(DenseGrid) + sizeof(ValueT)*TileDim*dim[0]*dim[1]*dim[2]; -#else - const uint64_t dim[3] = {uint64_t(max[0] - min[0]), - uint64_t(max[1] - min[1]), - uint64_t(max[2] - min[2])}; - const uint64_t size = sizeof(DenseGrid) + sizeof(ValueT)*dim[0]*dim[1]*dim[2]; -#endif - - auto buffer = allocator.create(size); - DenseGrid* grid = reinterpret_cast(buffer.data()); - std::memset(grid, 0, size);// initiate all dense grid values to zero - grid->mMagic = DENSE_MAGIC_NUMBER; - grid->mSize = size; - const double Tx = p0[0], Ty = p0[1], Tz = p0[2]; - const double mat[4][4] = { - {dx, 0.0, 0.0, 0.0}, // row 0 - {0.0, dx, 0.0, 0.0}, // row 1 - {0.0, 0.0, dx, 0.0}, // row 2 - {Tx, Ty, Tz, 1.0}, // row 3 - }; - const double invMat[4][4] = { - {1 / dx, 0.0, 0.0, 0.0}, // row 0 - {0.0, 1 / dx, 0.0, 0.0}, // row 1 - {0.0, 0.0, 1 / dx, 0.0}, // row 2 - {-Tx, -Ty, -Tz, 1.0}, // row 3 - }; - - grid->mMap.set(mat, invMat, 1.0); - for (int i=0; i<3; ++i) { - grid->mIndexBBox[0][i] = min[i]; - grid->mIndexBBox[1][i] = max[i] - 1; - } -#if 1 - grid->mWorldBBox = grid->mIndexBBox.transform(grid->mMap); -#else - grid->mWorldBBox[0] = grid->mWorldBBox[1] = grid->mMap.applyMap(Vec3d(min[0], min[1], min[2])); - grid->mWorldBBox.expand(grid->mMap.applyMap(Vec3d(min[0], min[1], max[2]))); - grid->mWorldBBox.expand(grid->mMap.applyMap(Vec3d(min[0], max[1], min[2]))); - grid->mWorldBBox.expand(grid->mMap.applyMap(Vec3d(max[0], min[1], min[2]))); - grid->mWorldBBox.expand(grid->mMap.applyMap(Vec3d(max[0], max[1], min[2]))); - grid->mWorldBBox.expand(grid->mMap.applyMap(Vec3d(max[0], min[1], max[2]))); - grid->mWorldBBox.expand(grid->mMap.applyMap(Vec3d(min[0], max[1], max[2]))); - grid->mWorldBBox.expand(grid->mMap.applyMap(Vec3d(max[0], max[1], max[2]))); -#endif - grid->mVoxelSize = grid->mMap.applyMap(Vec3d(1)) - grid->mMap.applyMap(Vec3d(0)); - if (gridClass == GridClass::LevelSet && !is_floating_point::value) - throw std::runtime_error("Level sets are expected to be floating point types"); - if (gridClass == GridClass::FogVolume && !is_floating_point::value) - throw std::runtime_error("Fog volumes are expected to be floating point types"); - grid->mGridClass = gridClass; - grid->mGridType = mapToGridType(); - grid->mY = dim[2]; - grid->mX = dim[2] * dim[1]; - return DenseGridHandle(std::move(buffer)); -} - -template -bool DenseGrid::test(const Coord &ijk) const -{ - return (ijk[0]>=mIndexBBox[0][0]) && (ijk[0]<=mIndexBBox[1][0]) && - (ijk[1]>=mIndexBBox[0][1]) && (ijk[1]<=mIndexBBox[1][1]) && - (ijk[2]>=mIndexBBox[0][2]) && (ijk[2]<=mIndexBBox[1][2]); -} - -template -uint64_t DenseGrid::coordToOffset(const Coord &ijk) const -{ - assert(this->test(ijk)); -#if LOG2_TILE_SIZE > 0 - const uint32_t x = ijk[0] - mIndexBBox[0][0]; - const uint32_t y = ijk[1] - mIndexBBox[0][1]; - const uint32_t z = ijk[2] - mIndexBBox[0][2]; - return ((mX*(x>>TileLog2) + mY*(y>>TileLog2) + (z>>TileLog2))<<(3*TileLog2)) + - ((x&TileMask)<<(2*TileLog2)) + ((y&TileMask)< -const ValueT& DenseGrid::getValue(const Coord &ijk) const -{ - return this->values()[this->coordToOffset(ijk)]; -} - -template -void DenseGrid::setValue(const Coord &ijk, const ValueT &value) -{ - this->values()[this->coordToOffset(ijk)] = value; -} - -template -bool DenseGrid::isValidType() const -{ - return std::is_same::value ? mGridType == GridType::Float : false; -} - -///////////////////////////////////////////// - -namespace io{ - -template -void writeDense(const DenseGrid &grid, const char* fileName) -{ - std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); - if (!os.is_open()) throw std::runtime_error("Unable to open file for output"); - os.write(reinterpret_cast(&grid), grid.memUsage()); -} - -template -void writeDense(const DenseGridHandle &handle, const char* fileName) -{ - std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); - if (!os.is_open()) throw std::runtime_error("Unable to open file for output"); - os.write(reinterpret_cast(handle.data()), handle.size()); -} - -template -DenseGridHandle -readDense(const char* fileName, const BufferT& allocator = BufferT()) -{ - std::ifstream is(fileName, std::ios::in | std::ios::binary); - if (!is.is_open()) { - throw std::runtime_error("Unable to open file for input"); - } - uint64_t tmp[2]; - is.read(reinterpret_cast(tmp), 2*sizeof(uint64_t)); - if (tmp[0] != DENSE_MAGIC_NUMBER) throw std::runtime_error("This is not a dense NanoVDB file!"); - auto buffer = allocator.create(tmp[1]); - is.seekg(0);// rewind - is.read(reinterpret_cast(buffer.data()), tmp[1]); - return DenseGridHandle(std::move(buffer)); -} -}// namespace io -///////////////////////////////////////////// - -/// @brief Converts a NanoVDB grid to a DenseGrid -template -DenseGridHandle convertToDense(const GridT &grid, const BufferT& allocator = BufferT()) -{ - using ValueT = typename GridT::ValueType; - using DenseT = DenseGrid; - const Coord min = grid.indexBBox().min(), max = grid.indexBBox().max() + Coord(1,1,1);// max is exclusive! -#if LOG2_TILE_SIZE > 0 - static constexpr uint32_t TileLog2 = LOG2_TILE_SIZE, TileMask = (1 << TileLog2) - 1, TileDim = 1 << (3*TileLog2); - const uint64_t dim[3] = {(uint64_t(max[0] - min[0]) + TileMask) >> TileLog2, - (uint64_t(max[1] - min[1]) + TileMask) >> TileLog2, - (uint64_t(max[2] - min[2]) + TileMask) >> TileLog2}; - const uint64_t size = sizeof(DenseT) + sizeof(ValueT)*TileDim*dim[0]*dim[1]*dim[2]; -#else - const uint64_t dim[3] = {uint64_t(max[0] - min[0]), - uint64_t(max[1] - min[1]), - uint64_t(max[2] - min[2])}; - const uint64_t size = sizeof(DenseT) + sizeof(ValueT)*dim[0]*dim[1]*dim[2]; -#endif - - auto buffer = allocator.create(size); - auto *dense = reinterpret_cast(buffer.data()); - auto *data = dense->data(); - std::memset(data, 0, size);// zero buffer since we're only setting sparse values below - - // copy DenseData - data->mMagic = DENSE_MAGIC_NUMBER; - data->mSize = size; - data->mMap = grid.map(); - data->mIndexBBox = grid.indexBBox(); - data->mWorldBBox = grid.worldBBox(); - data->mVoxelSize = grid.voxelSize(); - data->mGridClass = grid.gridClass(); - data->mGridType = grid.gridType(); - data->mY = dim[2]; - data->mX = dim[2] * dim[1]; - - // copy values - auto kernel = [&](const Range<1,int> &r) { - auto acc = grid.getAccessor(); - Coord ijk; - for (ijk[0] = r.begin(); ijk[0] < r.end(); ++ijk[0]) { - for (ijk[1] = min[1]; ijk[1] < max[1]; ++ijk[1]) { - for (ijk[2] = min[2]; ijk[2] < max[2]; ++ijk[2]) { - dense->setValue(ijk, acc.getValue(ijk)); - } - } - } - }; - Range<1,int> range(min[0], max[0]); -#if 1 - forEach(range, kernel); -#else - kernel(range); -#endif - - return DenseGridHandle( std::move(buffer) ); -} -///////////////////////////////////////////// - -template -class DenseGridHandle -{ - BufferT mBuffer; - -public: - DenseGridHandle(BufferT&& resources) { - if (*reinterpret_cast(resources.data()) != DENSE_MAGIC_NUMBER) { - throw std::runtime_error("DenseGridHandle was constructed with an invalid buffer"); - } - mBuffer = std::move(resources); - } - - DenseGridHandle() = default; - /// @brief Disallow copy-construction - DenseGridHandle(const DenseGridHandle&) = delete; - /// @brief Disallow copy assignment operation - DenseGridHandle& operator=(const DenseGridHandle&) = delete; - /// @brief Move copy assignment operation - DenseGridHandle& operator=(DenseGridHandle&& other) noexcept - { - mBuffer = std::move(other.mBuffer); - return *this; - } - /// @brief Move copy-constructor - DenseGridHandle(DenseGridHandle&& other) noexcept { mBuffer = std::move(other.mBuffer); } - /// @brief Default destructor - ~DenseGridHandle() { this->reset(); } - - void reset() { mBuffer.clear(); } - - BufferT& buffer() { return mBuffer; } - const BufferT& buffer() const { return mBuffer; } - - /// @brief Returns a non-const pointer to the data. - /// - /// @warning Note that the return pointer can be NULL if the DenseGridHandle was not initialized - uint8_t* data() {return mBuffer.data();} - - /// @brief Returns a const pointer to the data. - /// - /// @warning Note that the return pointer can be NULL if the DenseGridHandle was not initialized - const uint8_t* data() const {return mBuffer.data();} - - /// @brief Returns the size in bytes of the raw memory buffer managed by this DenseGridHandle's allocator. - uint64_t size() const { return mBuffer.size();} - - /// @brief Returns a const pointer to the NanoVDB grid encoded in the DenseGridHandle. - /// - /// @warning Note that the return pointer can be NULL if the DenseGridHandle was not initialized or the template - /// parameter does not match! - template - const DenseGrid* grid() const - { - using GridT = const DenseGrid; - GridT* grid = reinterpret_cast(mBuffer.data()); - return (grid && grid->isValidType()) ? grid : nullptr; - } - - template - DenseGrid* grid() - { - using GridT = DenseGrid; - GridT* grid = reinterpret_cast(mBuffer.data()); - return (grid && grid->isValidType()) ? grid : nullptr; - } - - template - typename std::enable_if::hasDeviceDual, const DenseGrid*>::type - deviceGrid() const - { - using GridT = const DenseGrid; - bool isValidType = reinterpret_cast(mBuffer.data())->isValidType(); - GridT* grid = reinterpret_cast(mBuffer.deviceData()); - return (grid && isValidType) ? grid : nullptr; - } - - template - typename std::enable_if::hasDeviceDual, void>::type - deviceUpload(void* stream = nullptr, bool sync = true) { - mBuffer.deviceUpload(stream, sync); - } - - template - typename std::enable_if::hasDeviceDual, void>::type - deviceDownload(void* stream = nullptr, bool sync = true) { - mBuffer.deviceDownload(stream, sync); - } -}; // DenseGridHandle - -} // namespace nanovdb - -#endif // NANOVDB_DENSEGRID_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/examples/benchmark/Image.h b/nanovdb/nanovdb/examples/benchmark/Image.h deleted file mode 100644 index c3686769eb..0000000000 --- a/nanovdb/nanovdb/examples/benchmark/Image.h +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -/*! - \file Image.h - - \author Ken Museth - - \date January 8, 2020 - - \brief A simple image class that uses pinned memory for fast GPU transfer - - \warning This class is only included to support benchmark-tests. -*/ - -#ifndef NANOVDB_IMAGE_H_HAS_BEEN_INCLUDED -#define NANOVDB_IMAGE_H_HAS_BEEN_INCLUDED - -#include // for uint8_t -#include // for std::string -#include // for std::ofstream -#include - -#include - -//#if defined(NANOVDB_USE_TBB) -#if defined(NANOVDB_USE_TBB) && !defined(__CUDACC_RTC__) -#include -#include -#endif - -namespace nanovdb { - -struct ImageData -{ - int mWidth, mHeight, mSize; - float mScale[2]; - ImageData(int w, int h) - : mWidth(w) - , mHeight(h) - , mSize(w * h) - , mScale{1.0f / w, 1.0f / h} - { - } -}; - -/// @note Can only be constructed by an ImageHandle -class Image : private ImageData -{ - using DataT = ImageData; - -public: - struct ColorRGB - { - uint8_t r, g, b; - __hostdev__ ColorRGB(float _r, float _g, float _b) - : r(uint8_t(_r * 255.0f)) - , g(uint8_t(_g * 255.0f)) - , b(uint8_t(_b * 255.0f)) - { - } - }; - void clear(int log2 = 7); - __hostdev__ int width() const { return DataT::mWidth; } - __hostdev__ int height() const { return DataT::mHeight; } - __hostdev__ int size() const { return DataT::mSize; } - __hostdev__ float u(int w) const { return w * mScale[0]; } - __hostdev__ float v(int h) const { return h * mScale[1]; } - __hostdev__ inline ColorRGB& operator()(int w, int h); - void writePPM(const std::string& fileName, const std::string& comment = "width height 255"); -}; // Image - -template -class ImageHandle -{ - BufferT mBuffer; - -public: - ImageHandle(int width, int height, int log2 = 7); - - const Image* image() const { return reinterpret_cast(mBuffer.data()); } - - Image* image() { return reinterpret_cast(mBuffer.data()); } - - template - typename std::enable_if::hasDeviceDual, const Image*>::type - deviceImage() const { return reinterpret_cast(mBuffer.deviceData()); } - - template - typename std::enable_if::hasDeviceDual, Image*>::type - deviceImage() { return reinterpret_cast(mBuffer.deviceData()); } - - template - typename std::enable_if::hasDeviceDual, void>::type - deviceUpload(void* stream = nullptr, bool sync = true) { mBuffer.deviceUpload(stream, sync); } - - template - typename std::enable_if::hasDeviceDual, void>::type - deviceDownload(void* stream = nullptr, bool sync = true) { mBuffer.deviceDownload(stream, sync); } -}; - -template -ImageHandle::ImageHandle(int width, int height, int log2) - : mBuffer(sizeof(ImageData) + width * height * sizeof(Image::ColorRGB)) -{ - ImageData data(width, height); - *reinterpret_cast(mBuffer.data()) = data; - this->image()->clear(log2); // clear pixels or set background -} - -inline void Image::clear(int log2) -{ - ColorRGB* ptr = &(*this)(0, 0); - if (log2 < 0) { - for (auto* end = ptr + ImageData::mSize; ptr != end;) - *ptr++ = ColorRGB(0, 0, 0); - } else { - const int checkerboard = 1 << log2; - - auto kernel2D = [&](int x0, int y0, int x1, int y1) { - for (int h = y0; h != y1; ++h) { - const int n = h & checkerboard; - ColorRGB* p = ptr + h * ImageData::mWidth; - for (int w = x0; w != x1; ++w) { - *(p + w) = (n ^ (w & checkerboard)) ? ColorRGB(1, 1, 1) : ColorRGB(0, 0, 0); - } - } - }; - -//#if defined(NANOVDB_USE_TBB) -#if defined(NANOVDB_USE_TBB) && !defined(__CUDACC_RTC__) - tbb::blocked_range2d range(0, ImageData::mWidth, 0, ImageData::mHeight); - tbb::parallel_for(range, [&](const tbb::blocked_range2d& r) { - kernel2D(r.rows().begin(), r.cols().begin(), r.rows().end(), r.cols().end()); - }); -#else - kernel2D(0, 0, ImageData::mWidth, ImageData::mHeight); -#endif - } -} - -inline Image::ColorRGB& Image::operator()(int w, int h) -{ - assert(w < ImageData::mWidth); - assert(h < ImageData::mHeight); - return *(reinterpret_cast((uint8_t*)this + sizeof(ImageData)) + w + h * ImageData::mWidth); -} - -inline void Image::writePPM(const std::string& fileName, const std::string& comment) -{ - std::ofstream os(fileName, std::ios::out | std::ios::binary); - if (os.fail()) - throw std::runtime_error("Unable to open file named \"" + fileName + "\" for output"); - os << "P6\n#" << comment << "\n" - << this->width() << " " << this->height() << "\n255\n"; - os.write((const char*)&(*this)(0, 0), this->size() * sizeof(ColorRGB)); -} - -} // namespace nanovdb - -#endif // end of NANOVDB_IMAGE_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/examples/benchmark/TestBenchmark.cc b/nanovdb/nanovdb/examples/benchmark/TestBenchmark.cc deleted file mode 100644 index df3d5b5daf..0000000000 --- a/nanovdb/nanovdb/examples/benchmark/TestBenchmark.cc +++ /dev/null @@ -1,579 +0,0 @@ -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -/// @file TestBenchmark.cc -/// -/// @author Ken Museth -/// -/// @brief A simple ray-tracing benchmark test. - -#include -#include -#include -#include -#include -#include "Image.h" -#include "Camera.h" -#include - -#include "DenseGrid.h" - -#if defined(NANOVDB_USE_OPENVDB) -#include -#include -#include -#include -#include -#include -#include -#endif - -#if defined(NANOVDB_USE_TBB) -#include -#include -#endif - -#include - -// define the environment variable VDB_DATA_PATH to use models from the web -// e.g. setenv VDB_DATA_PATH /home/kmu/dev/data/vdb -// or export VDB_DATA_PATH=/Users/ken/dev/data/vdb - -// define the environment variable VDB_SCRATCH_PATH to specify the directory where image are saved - -// The fixture for testing class. -class Benchmark : public ::testing::Test -{ -protected: - Benchmark() {} - - ~Benchmark() override {} - - // If the constructor and destructor are not enough for setting up - // and cleaning up each test, you can define the following methods: - - void SetUp() override - { - // Code here will be called immediately after the constructor (right - // before each test). - } - - void TearDown() override - { - // Code here will be called immediately after each test (right - // before the destructor). - } - static std::string getEnvVar(const std::string& name, const std::string def = "") - { - const char* str = std::getenv(name.c_str()); - return str == nullptr ? def : std::string(str); - } - -#if defined(NANOVDB_USE_OPENVDB) - openvdb::FloatGrid::Ptr getSrcGrid(int verbose = 1) - { - openvdb::FloatGrid::Ptr grid; - const std::string path = this->getEnvVar("VDB_DATA_PATH"); - if (path.empty()) { // create a narrow-band level set sphere - std::cout << "\tSet the environment variable \"VDB_DATA_PATH\" to a directory\n" - << "\tcontaining OpenVDB level sets files. They can be downloaded\n" - << "\there: https://www.openvdb.org/download/" << std::endl; - const float radius = 50.0f; - const openvdb::Vec3f center(0.0f, 0.0f, 0.0f); - const float voxelSize = 0.1f, width = 3.0f; - if (verbose > 0) { - std::stringstream ss; - ss << "Generating level set sphere with a radius of " << radius << " voxels"; - mTimer.start(ss.str()); - } -#if 1 // choose between a sphere or one of five platonic solids - grid = openvdb::tools::createLevelSetSphere(radius, center, voxelSize, width); - grid->setName("ls_sphere"); -#else - const int faces[5] = {4, 6, 8, 12, 20}; - grid = openvdb::tools::createLevelSetPlatonic(faces[4], radius, center, voxelSize, width); - grid->setName("ls_platonic"); -#endif - } else { - openvdb::initialize(); - const std::vector models = {"armadillo.vdb", "buddha.vdb", "bunny.vdb", "crawler.vdb", "dragon.vdb", "iss.vdb", "space.vdb", "torus_knot_helix.vdb", "utahteapot.vdb", "bunny_cloud.vdb", "wdas_cloud.vdb"}; - const std::string fileName = path + "/" + models[4]; // - if (verbose > 0) - mTimer.start("Reading grid from the file \"" + fileName + "\""); - openvdb::io::File file(fileName); - file.open(false); //disable delayed loading - grid = openvdb::gridPtrCast(file.readGrid(file.beginName().gridName())); - } - if (verbose > 0) - mTimer.stop(); - if (verbose > 1) - grid->print(std::cout, 3); - return grid; - } -#endif - nanovdb::CpuTimer mTimer; -}; // Benchmark - -TEST_F(Benchmark, Ray) -{ - using RealT = float; - using Vec3T = nanovdb::Vec3; - using CoordT = nanovdb::Coord; - using CoordBBoxT = nanovdb::BBox; - using BBoxT = nanovdb::BBox; - using RayT = nanovdb::Ray; - - {// clip ray against an index bbox - // test bbox clip - const Vec3T dir(-1.0, 2.0, 3.0); - const Vec3T eye(2.0, 1.0, 1.0); - RealT t0 = 0.1, t1 = 12589.0; - RayT ray(eye, dir, t0, t1); - - // intersects the two faces of the box perpendicular to the y-axis! - EXPECT_TRUE(ray.clip(CoordBBoxT(CoordT(0, 2, 2), CoordT(2, 4, 6)))); - //std::cerr << "t0 = " << ray.t0() << ", ray.t1() = " << ray.t1() << std::endl; - //std::cerr << "ray(0.5) = " << ray(0.5) << std::endl; - //std::cerr << "ray(1.5) = " << ray(1.5) << std::endl; - //std::cerr << "ray(2.0) = " << ray(2.0) << std::endl; - EXPECT_EQ(0.5, ray.t0()); - EXPECT_EQ(2.0, ray.t1()); - EXPECT_EQ(ray(0.5)[1], 2); //lower y component of intersection - EXPECT_EQ(ray(2.0)[1], 5); //higher y component of intersection - - ray.reset(eye, dir, t0, t1); - // intersects the lower edge anlong the z-axis of the box - EXPECT_TRUE(ray.clip(BBoxT(Vec3T(1.5, 2.0, 2.0), Vec3T(4.5, 4.0, 6.0)))); - EXPECT_EQ(0.5, ray.t0()); - EXPECT_EQ(0.5, ray.t1()); - EXPECT_EQ(ray(0.5)[0], 1.5); //lower y component of intersection - EXPECT_EQ(ray(0.5)[1], 2.0); //higher y component of intersection - - ray.reset(eye, dir, t0, t1); - // no intersections - EXPECT_TRUE(!ray.clip(CoordBBoxT(CoordT(4, 2, 2), CoordT(6, 4, 6)))); - EXPECT_EQ(t0, ray.t0()); - EXPECT_EQ(t1, ray.t1()); - } - {// clip ray against an real bbox - // test bbox clip - const Vec3T dir(-1.0, 2.0, 3.0); - const Vec3T eye(2.0, 1.0, 1.0); - RealT t0 = 0.1, t1 = 12589.0; - RayT ray(eye, dir, t0, t1); - - // intersects the two faces of the box perpendicular to the y-axis! - EXPECT_TRUE( ray.clip(CoordBBoxT(CoordT(0, 2, 2), CoordT(2, 4, 6)).asReal()) ); - //std::cerr << "t0 = " << ray.t0() << ", ray.t1() = " << ray.t1() << std::endl; - //std::cerr << "ray(0.5) = " << ray(0.5) << std::endl; - //std::cerr << "ray(1.5) = " << ray(1.5) << std::endl; - //std::cerr << "ray(2.0) = " << ray(2.0) << std::endl; - EXPECT_EQ(0.5, ray.t0()); - EXPECT_EQ(2.0, ray.t1()); - EXPECT_EQ(ray(0.5)[1], 2); //lower y component of intersection - EXPECT_EQ(ray(1.5)[1], 4); //higher y component of intersection - - ray.reset(eye, dir, t0, t1); - // intersects the lower edge along the z-axis of the box - EXPECT_TRUE( ray.clip(BBoxT(Vec3T(1.5, 2.0, 2.0), Vec3T(4.5, 4.0, 6.0))) ); - EXPECT_EQ(0.5, ray.t0()); - EXPECT_EQ(0.5, ray.t1()); - EXPECT_EQ(ray(0.5)[0], 1.5); //lower y component of intersection - EXPECT_EQ(ray(0.5)[1], 2.0); //higher y component of intersection - - ray.reset(eye, dir, t0, t1); - // no intersections - EXPECT_TRUE(!ray.clip(CoordBBoxT(CoordT(4, 2, 2), CoordT(6, 4, 6)).asReal()) ); - EXPECT_EQ(t0, ray.t0()); - EXPECT_EQ(t1, ray.t1()); - } -} - -TEST_F(Benchmark, HDDA) -{ - using RealT = float; - using CoordT = nanovdb::Coord; - using RayT = nanovdb::Ray; - using Vec3T = RayT::Vec3T; - - { // basic test - using DDAT = nanovdb::HDDA; - const RayT::Vec3T dir(1.0, 0.0, 0.0); - const RayT::Vec3T eye(-1.0, 0.0, 0.0); - const RayT ray(eye, dir); - DDAT dda(ray, 1 << (3 + 4 + 5)); - EXPECT_EQ(nanovdb::Delta::value(), dda.time()); - EXPECT_EQ(1.0, dda.next()); - dda.step(); - EXPECT_EQ(1.0, dda.time()); - EXPECT_EQ(4096 + 1.0, dda.next()); - } - { // Check for the notorious +-0 issue! - using DDAT = nanovdb::HDDA; - - const Vec3T dir1(1.0, 0.0, 0.0); - const Vec3T eye1(2.0, 0.0, 0.0); - const RayT ray1(eye1, dir1); - DDAT dda1(ray1, 1 << 3); - dda1.step(); - - const Vec3T dir2(1.0, -0.0, -0.0); - const Vec3T eye2(2.0, 0.0, 0.0); - const RayT ray2(eye2, dir2); - DDAT dda2(ray2, 1 << 3); - dda2.step(); - - const Vec3T dir3(1.0, -1e-9, -1e-9); - const Vec3T eye3(2.0, 0.0, 0.0); - const RayT ray3(eye3, dir3); - DDAT dda3(ray3, 1 << 3); - dda3.step(); - - const Vec3T dir4(1.0, -1e-9, -1e-9); - const Vec3T eye4(2.0, 0.0, 0.0); - const RayT ray4(eye3, dir4); - DDAT dda4(ray4, 1 << 3); - dda4.step(); - - EXPECT_EQ(dda1.time(), dda2.time()); - EXPECT_EQ(dda2.time(), dda3.time()); - EXPECT_EQ(dda3.time(), dda4.time()); - EXPECT_EQ(dda1.next(), dda2.next()); - EXPECT_EQ(dda2.next(), dda3.next()); - EXPECT_EQ(dda3.next(), dda4.next()); - } - { // test voxel traversal along both directions of each axis - using DDAT = nanovdb::HDDA; - const Vec3T eye(0, 0, 0); - for (int s = -1; s <= 1; s += 2) { - for (int a = 0; a < 3; ++a) { - const int d[3] = {s * (a == 0), s * (a == 1), s * (a == 2)}; - const Vec3T dir(d[0], d[1], d[2]); - RayT ray(eye, dir); - DDAT dda(ray, 1 << 0); - for (int i = 1; i <= 10; ++i) { - EXPECT_TRUE(dda.step()); - EXPECT_EQ(i, dda.time()); - } - } - } - } - { // test Node traversal along both directions of each axis - using DDAT = nanovdb::HDDA; - const Vec3T eye(0, 0, 0); - - for (int s = -1; s <= 1; s += 2) { - for (int a = 0; a < 3; ++a) { - const int d[3] = {s * (a == 0), s * (a == 1), s * (a == 2)}; - const Vec3T dir(d[0], d[1], d[2]); - RayT ray(eye, dir); - DDAT dda(ray, 1 << 3); - for (int i = 1; i <= 10; ++i) { - EXPECT_TRUE(dda.step()); - EXPECT_EQ(8 * i, dda.time()); - } - } - } - } - { // test accelerated Node traversal along both directions of each axis - using DDAT = nanovdb::HDDA; - const Vec3T eye(0, 0, 0); - - for (int s = -1; s <= 1; s += 2) { - for (int a = 0; a < 3; ++a) { - const int d[3] = {s * (a == 0), s * (a == 1), s * (a == 2)}; - const Vec3T dir(2 * d[0], 2 * d[1], 2 * d[2]); - RayT ray(eye, dir); - DDAT dda(ray, 1 << 3); - double next = 0; - for (int i = 1; i <= 10; ++i) { - EXPECT_TRUE(dda.step()); - EXPECT_EQ(4 * i, dda.time()); - if (i > 1) { - EXPECT_EQ(dda.time(), next); - } - next = dda.next(); - } - } - } - } -} // HDDA - - -TEST_F(Benchmark, DenseGrid) -{ - {// CoordT = nanovdb::Coord - using GridT = nanovdb::DenseGrid; - const nanovdb::Coord min(-10,0,10), max(10,20,30), pos(0,5,20); - const nanovdb::CoordBBox bbox( min, max ); - auto handle = GridT::create(min, max); - auto *grid = handle.grid(); - EXPECT_TRUE(grid); - EXPECT_TRUE(grid->test(min)); - EXPECT_TRUE(grid->test(max)); - EXPECT_TRUE(grid->test(pos)); - EXPECT_EQ( uint64_t(21*21*21), bbox.volume() ); - EXPECT_EQ( bbox.volume(), grid->size() ); - EXPECT_EQ( 0u, grid->coordToOffset(min) ); - float *p = grid->values(); - for (uint64_t i=0; isize(); ++i) { - *p++ = 0.0f; - } - EXPECT_EQ( 0.0f, grid->getValue(min) ); - EXPECT_EQ( 0.0f, grid->getValue(pos) ); - EXPECT_EQ( 0.0f, grid->getValue(max) ); - grid->setValue(pos, 1.0f); - EXPECT_EQ( 0.0f, grid->getValue(min) ); - EXPECT_EQ( 1.0f, grid->getValue(pos) ); - EXPECT_EQ( 0.0f, grid->getValue(max)); - for (auto it = bbox.begin(); it; ++it) { - auto &ijk = *it; - EXPECT_TRUE(grid->test(ijk)); - if (ijk == pos) { - EXPECT_EQ( 1.0f, grid->getValue(ijk) ); - } else { - EXPECT_EQ( 0.0f, grid->getValue(ijk) ); - } - } - EXPECT_EQ(nanovdb::GridType::Float, grid->gridType()); - EXPECT_EQ(nanovdb::GridClass::Unknown, grid->gridClass()); - EXPECT_EQ(bbox, grid->indexBBox()); - EXPECT_EQ(nanovdb::Vec3d(min[0], min[1], min[2]), grid->worldBBox()[0]); - EXPECT_EQ(nanovdb::Vec3d(max[0]+1, max[1]+1, max[2]+1), grid->worldBBox()[1]); - EXPECT_EQ(nanovdb::Vec3d(1.0), grid->voxelSize()); - nanovdb::io::writeDense(handle, "data/dense.vol"); - //nanovdb::io::writeDense(*grid, "data/dense.vol"); - } - {// CoordT = nanovdb::Coord - auto handle = nanovdb::io::readDense<>("data/dense.vol"); - const nanovdb::Coord min(-10,0,10), max(10,20,30), pos(0,5,20); - const nanovdb::CoordBBox bbox( min, max ); - auto *grid = handle.grid(); - EXPECT_TRUE(grid); - EXPECT_TRUE(grid->test(min)); - EXPECT_TRUE(grid->test(max)); - EXPECT_TRUE(grid->test(pos)); - EXPECT_EQ( uint64_t(21*21*21), bbox.volume() ); - EXPECT_EQ( bbox.volume(), grid->size() ); - EXPECT_EQ( 0u, grid->coordToOffset(min) ); - EXPECT_EQ( 0.0f, grid->getValue(min) ); - EXPECT_EQ( 1.0f, grid->getValue(pos) ); - EXPECT_EQ( 0.0f, grid->getValue(max) ); - EXPECT_EQ(min, grid->min()); - EXPECT_EQ(max, grid->max()); - for (auto it = bbox.begin(); it; ++it) { - auto &ijk = *it; - EXPECT_TRUE(grid->test(ijk)); - if (ijk == pos) { - EXPECT_EQ( 1.0f, grid->getValue(ijk) ); - } else { - EXPECT_EQ( 0.0f, grid->getValue(ijk) ); - } - } - EXPECT_EQ(nanovdb::GridType::Float, grid->gridType()); - EXPECT_EQ(nanovdb::GridClass::Unknown, grid->gridClass()); - EXPECT_EQ(bbox, grid->indexBBox()); - EXPECT_EQ(nanovdb::Vec3d(min[0], min[1], min[2]), grid->worldBBox()[0]); - EXPECT_EQ(nanovdb::Vec3d(max[0]+1, max[1]+1, max[2]+1), grid->worldBBox()[1]); - EXPECT_EQ(nanovdb::Vec3d(1.0), grid->voxelSize()); - } -} - -#if defined(NANOVDB_USE_OPENVDB) -TEST_F(Benchmark, OpenVDB_CPU) -{ - using GridT = openvdb::FloatGrid; - using CoordT = openvdb::Coord; - using ColorRGB = nanovdb::Image::ColorRGB; - using RealT = float; - using Vec3T = openvdb::math::Vec3; - using RayT = openvdb::math::Ray; - - const std::string image_path = this->getEnvVar("VDB_SCRATCH_PATH", "."); - - auto srcGrid = this->getSrcGrid(); - mTimer.start("Generating NanoVDB grid"); - auto handle = nanovdb::createNanoGrid(*srcGrid, nanovdb::StatsMode::BBox, nanovdb::ChecksumMode::Disable); - mTimer.restart("Writing NanoVDB grid"); -#if defined(NANOVDB_USE_BLOSC) - nanovdb::io::writeGrid("data/test.nvdb", handle, nanovdb::io::Codec::BLOSC); -#elif defined(NANOVDB_USE_ZIP) - nanovdb::io::writeGrid("data/test.nvdb", handle, nanovdb::io::Codec::ZIP); -#else - nanovdb::io::writeGrid("data/test.nvdb", handle, nanovdb::io::Codec::NONE); -#endif - mTimer.stop(); - - {// convert and write DenseGRid - mTimer.start("Generating DenseGrid"); - auto dHandle = nanovdb::convertToDense(*handle.grid()); - mTimer.restart("Writing DenseGrid"); - nanovdb::io::writeDense(dHandle, "data/test.vol"); - mTimer.stop(); - } - - const int width = 1280, height = 720; - const RealT vfov = 25.0f, aspect = RealT(width) / height, radius = 300.0f; - const auto bbox = srcGrid->evalActiveVoxelBoundingBox(); - const openvdb::Vec3d center(0.5 * (bbox.max()[0] + bbox.min()[0]), - 0.5 * (bbox.max()[1] + bbox.min()[1]), - 0.5 * (bbox.max()[2] + bbox.min()[2])); - const Vec3T lookat = srcGrid->indexToWorld(center), up(0, -1, 0); - auto eye = [&lookat, &radius](int angle) { - const RealT theta = angle * openvdb::math::pi() / 180.0f; - return lookat + radius * Vec3T(sin(theta), 0, cos(theta)); - }; - - nanovdb::Camera camera(eye(0), lookat, up, vfov, aspect); - - nanovdb::ImageHandle<> imgHandle(width, height); - auto* img = imgHandle.image(); - - auto kernel2D = [&](const tbb::blocked_range2d& r) { - openvdb::tools::LevelSetRayIntersector, GridT::TreeType::RootNodeType::ChildNodeType::LEVEL, RayT> tester(*srcGrid); - const RealT wScale = 1.0f / width, hScale = 1.0f / height; - auto acc = srcGrid->getAccessor(); - CoordT ijk; - Vec3T xyz; - float v; - for (int w = r.rows().begin(); w != r.rows().end(); ++w) { - for (int h = r.cols().begin(); h != r.cols().end(); ++h) { - const RayT wRay = camera.getRay(w * wScale, h * hScale); - RayT iRay = wRay.applyInverseMap(*srcGrid->transform().baseMap()); - if (tester.intersectsIS(iRay, xyz)) { - ijk = openvdb::Coord::floor(xyz); - v = acc.getValue(ijk); - Vec3T grad(-v); - ijk[0] += 1; - grad[0] += acc.getValue(ijk); - ijk[0] -= 1; - ijk[1] += 1; - grad[1] += acc.getValue(ijk); - ijk[1] -= 1; - ijk[2] += 1; - grad[2] += acc.getValue(ijk); - grad.normalize(); - (*img)(w, h) = ColorRGB(std::abs(grad.dot(iRay.dir())), 0, 0); - } else { - const int checkerboard = 1 << 7; - (*img)(w, h) = ((h & checkerboard) ^ (w & checkerboard)) ? ColorRGB(1, 1, 1) : ColorRGB(0, 0, 0); - } - } - } - }; // kernel - - for (int angle = 0; angle < 6; ++angle) { - camera.update(eye(angle), lookat, up, vfov, aspect); - std::stringstream ss; - ss << "OpenVDB: CPU kernel with " << img->size() << " rays"; - tbb::blocked_range2d range2D(0, img->width(), 0, img->height()); - mTimer.start(ss.str()); -#if 1 - tbb::parallel_for(range2D, kernel2D); -#else - kernel2D(range2D); -#endif - mTimer.stop(); - //mTimer.start("Write image to file"); - ss.str(""); - ss.clear(); - ss << image_path << "/openvdb_cpu_" << std::setfill('0') << std::setw(3) << angle << ".ppm"; - img->writePPM(ss.str(), "Benchmark test"); - //mTimer.stop(); - } // loop over angle -} // OpenVDB_CPU -#endif// NANOVDB_USE_OPENVDB - -TEST_F(Benchmark, DenseGrid_CPU) -{ - using CoordT = nanovdb::Coord; - using ColorRGB = nanovdb::Image::ColorRGB; - using RealT = float; - using Vec3T = nanovdb::Vec3; - using RayT = nanovdb::Ray; - - auto handle = nanovdb::io::readDense("data/test.vol"); - auto* grid = handle.grid(); - EXPECT_TRUE(grid); - EXPECT_TRUE(grid->isLevelSet()); - - const int width = 1280, height = 720; - const RealT vfov = 25.0f, aspect = RealT(width) / height, radius = 300.0f; - const auto bbox = grid->worldBBox(); - const Vec3T lookat(0.5 * (bbox.min() + bbox.max())), up(0, -1, 0); - auto eye = [&lookat, &radius](int angle) { - const RealT theta = angle * RealT(3.14159265358979323846) / 180.0f; - return lookat + radius * Vec3T(sin(theta), 0, cos(theta)); - }; - - nanovdb::Camera camera(eye(0), lookat, up, vfov, aspect); - - nanovdb::ImageHandle<> imgHandle(width, height); - auto* img = imgHandle.image(); - - auto kernel2D = [&](int x0, int y0, int x1, int y1) { - const RealT wScale = 1.0f / width, hScale = 1.0f / height; - for (int w = x0; w != x1; ++w) { - for (int h = y0; h != y1; ++h) { - RayT ray = camera.getRay(w * wScale, h * hScale); - ray = ray.worldToIndexF(*grid); - if (!ray.clip(grid->indexBBox().expandBy(-1))) continue; - nanovdb::DDA dda(ray); - CoordT ijk = dda.voxel(); - EXPECT_TRUE(grid->test(ijk)); - const float v0 = grid->getValue(ijk); - bool hit = false; - while( !hit && dda.step() ) { - ijk = dda.voxel(); - EXPECT_TRUE(grid->test(ijk)); - const float v1 = grid->getValue(ijk); - if (v0*v1>0) continue; - Vec3T grad(-v1); - ijk[0] += 1; - grad[0] += grid->getValue(ijk); - ijk[0] -= 1; - ijk[1] += 1; - grad[1] += grid->getValue(ijk); - ijk[1] -= 1; - ijk[2] += 1; - grad[2] += grid->getValue(ijk); - grad.normalize(); - (*img)(w, h) = ColorRGB(std::abs(grad.dot(ray.dir())), 0, 0); - hit = true; - } - if (!hit) { - const int checkerboard = 1 << 7; - (*img)(w, h) = ((h & checkerboard) ^ (w & checkerboard)) ? ColorRGB(1, 1, 1) : ColorRGB(0, 0, 0); - } - } - } - }; // kernel - - for (int angle = 0; angle < 6; ++angle) { - camera.update(eye(angle), lookat, up, vfov, aspect); - std::stringstream ss; - ss << "DenseGrid: CPU kernel with " << img->size() << " rays"; - mTimer.start(ss.str()); -#if defined(NANOVDB_USE_TBB) - tbb::blocked_range2d range(0, img->width(), 0, img->height()); - tbb::parallel_for(range, [&](const tbb::blocked_range2d& r) { - kernel2D(r.rows().begin(), r.cols().begin(), r.rows().end(), r.cols().end()); - }); -#else - kernel2D(0, 0, img->width(), img->height()); -#endif - mTimer.stop(); - //mTimer.start("Write image to file"); - ss.str(""); - ss.clear(); - ss << "./dense_cpu_" << std::setfill('0') << std::setw(3) << angle << ".ppm"; - img->writePPM(ss.str(), "Benchmark test"); - //mTimer.stop(); - } // loop over angle -} // DenseGrid_CPU - -int main(int argc, char** argv) -{ - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/nanovdb/nanovdb/examples/benchmark/TestBenchmark.cu b/nanovdb/nanovdb/examples/benchmark/TestBenchmark.cu deleted file mode 100644 index cc96fb9648..0000000000 --- a/nanovdb/nanovdb/examples/benchmark/TestBenchmark.cu +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -/// @file TestBenchmark.cu -/// -/// @author Ken Museth -/// -/// @brief A simple ray-tracing benchmark test. - -#include // io::readGrid -#include // createLevelSetTorus -#include "Image.h" -#include "Camera.h" -#include -#include - -#include - -extern "C" void launch_kernels(const nanovdb::GridHandle&, - nanovdb::ImageHandle&, - const nanovdb::Camera*, - cudaStream_t stream); - -std::string getEnvVar(const std::string& name, const std::string def = "") -{ - const char* str = std::getenv(name.c_str()); - return str == nullptr ? def : std::string(str); -} - -TEST(TestBenchmark, NanoVDB_GPU) -{ - using BufferT = nanovdb::CudaDeviceBuffer; - using RealT = float; - using Vec3T = nanovdb::Vec3; - using CameraT = nanovdb::Camera; - nanovdb::CpuTimer timer; - - const std::string image_path = getEnvVar("VDB_SCRATCH_PATH", "."); - - // The first CUDA run time call initializes the CUDA sub-system (loads the runtime API) which takes time! - int deviceCount; - cudaGetDeviceCount(&deviceCount); - for (int device = 0; device < deviceCount; ++device) { - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, device); - printf("Device %d has compute capability %d.%d.\n", - device, - deviceProp.major, - deviceProp.minor); - } - cudaSetDevice(0); - - cudaStream_t stream; - cudaCheck(cudaStreamCreate(&stream)); - -#if defined(NANOVDB_USE_OPENVDB) - auto handle = nanovdb::io::readGrid("data/test.nvdb"); -#else - auto handle = nanovdb::createLevelSetTorus(100.0f, 50.0f); -#endif - //auto handle = nanovdb::io::readGrid("data/test.nvdb"); - const auto* grid = handle.grid(); - EXPECT_TRUE(grid); - EXPECT_TRUE(grid->isLevelSet()); - EXPECT_FALSE(grid->isFogVolume()); - handle.deviceUpload(stream, false); - EXPECT_TRUE(handle.deviceGrid()); - - std::cout << "\nRay-tracing NanoVDB grid named \"" << grid->gridName() << "\"" << std::endl; - - const int width = 1280, height = 720; - const RealT vfov = 25.0f, aspect = RealT(width) / height, radius = 300.0f; - const auto bbox = grid->worldBBox(); - const Vec3T lookat(0.5 * (bbox.min() + bbox.max())), up(0, -1, 0); - auto eye = [&lookat, &radius](int angle) { - const RealT theta = angle * nanovdb::pi() / 180.0f; - return lookat + radius * Vec3T(sin(theta), 0, cos(theta)); - }; - CameraT *host_camera, *dev_camera; - cudaCheck(cudaMalloc((void**)&dev_camera, sizeof(CameraT))); // un-managed memory on the device - cudaCheck(cudaMallocHost((void**)&host_camera, sizeof(CameraT))); - - nanovdb::ImageHandle imgHandle(width, height); - auto* img = imgHandle.image(); - imgHandle.deviceUpload(stream, false); - - for (int angle = 0; angle < 6; ++angle) { - std::stringstream ss; - ss << "NanoVDB: GPU kernel with " << img->size() << " rays"; - host_camera->update(eye(angle), lookat, up, vfov, aspect); - cudaCheck(cudaMemcpyAsync(dev_camera, host_camera, sizeof(CameraT), cudaMemcpyHostToDevice, stream)); - timer.start(ss.str()); - launch_kernels(handle, imgHandle, dev_camera, stream);// defined in BenchKernels_nano.cu - timer.stop(); - - //timer.start("Write image to file"); - imgHandle.deviceDownload(stream); - ss.str(""); - ss.clear(); - ss << image_path << "/nanovdb_gpu_" << std::setfill('0') << std::setw(3) << angle << ".ppm"; - img->writePPM(ss.str(), "Benchmark test"); - //timer.stop(); - - } //frame number angle - - cudaCheck(cudaStreamDestroy(stream)); - cudaCheck(cudaFreeHost(host_camera)); - cudaCheck(cudaFree(dev_camera)); -} // NanoVDB_GPU \ No newline at end of file From a901aeb38253365801cddb7c59968e669583b02e Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Wed, 11 Oct 2023 15:07:25 -0700 Subject: [PATCH 26/49] another major commit, e.g. added lots of new cuda tools in util/cuda Signed-off-by: Ken Museth --- nanovdb/nanovdb/CNanoVDB.h | 2 +- nanovdb/nanovdb/NanoVDB.h | 594 +- nanovdb/nanovdb/PNanoVDB.h | 6767 +++++++++-------- nanovdb/nanovdb/cmd/print/nanovdb_print.cc | 4 +- .../nanovdb/cmd/validate/nanovdb_validate.cc | 2 +- .../index_grid_cuda_kernel.cu | 2 +- .../ex_nodemanager_cuda/nodemanager_cuda.cc | 11 +- .../nodemanager_cuda_kernel.cu | 6 + nanovdb/nanovdb/unittest/TestNanoVDB.cc | 424 +- nanovdb/nanovdb/unittest/TestNanoVDB.cu | 337 +- nanovdb/nanovdb/unittest/TestOpenVDB.cc | 4 - nanovdb/nanovdb/util/CreateNanoGrid.h | 22 +- nanovdb/nanovdb/util/GridChecksum.h | 509 +- nanovdb/nanovdb/util/GridHandle.h | 201 +- nanovdb/nanovdb/util/GridStats.h | 201 +- nanovdb/nanovdb/util/GridValidator.h | 12 +- nanovdb/nanovdb/util/IO.h | 625 +- nanovdb/nanovdb/util/NodeManager.h | 72 +- .../nanovdb/util/cuda/CudaAddBlindData.cuh | 52 +- nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h | 34 +- .../nanovdb/util/cuda/CudaGridChecksum.cuh | 244 + nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh | 46 +- nanovdb/nanovdb/util/cuda/CudaIndexToGrid.cuh | 63 +- nanovdb/nanovdb/util/cuda/CudaNodeManager.cuh | 90 + .../nanovdb/util/cuda/CudaPointsToGrid.cuh | 427 +- .../nanovdb/util/cuda/CudaSignedFloodFill.cuh | 42 +- .../util/cuda/{GpuTimer.cuh => GpuTimer.h} | 50 +- pendingchanges/nanovdb.txt | 12 +- 28 files changed, 6236 insertions(+), 4619 deletions(-) create mode 100644 nanovdb/nanovdb/util/cuda/CudaGridChecksum.cuh create mode 100644 nanovdb/nanovdb/util/cuda/CudaNodeManager.cuh rename nanovdb/nanovdb/util/cuda/{GpuTimer.cuh => GpuTimer.h} (61%) diff --git a/nanovdb/nanovdb/CNanoVDB.h b/nanovdb/nanovdb/CNanoVDB.h index db3802a4aa..a3d8873e7e 100644 --- a/nanovdb/nanovdb/CNanoVDB.h +++ b/nanovdb/nanovdb/CNanoVDB.h @@ -687,7 +687,7 @@ cnanovdb_griddata_valid(const CNANOVDB_GLOBAL cnanovdb_griddata *RESTRICT grid) { if (!grid) return 0; - if (grid->mMagic != 0x304244566f6e614eUL) + if (grid->mMagic != 0x304244566f6e614eUL && grid->mMagic != 0x314244566f6e614eUL) return 0; return 1; } diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index 6c5aad2a3c..4b099f2913 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -102,12 +102,12 @@ Notation: "]---[" implies it has optional padding, and "][" implies zero padding - [GridData(672B)][TreeData(64B)]---[RootData][N x Root::Tile]---[NodeData<5>]---[ModeData<4>]---[LeafData<3>]---[BLINDMETA...]---[BLIND0]---[BLIND1]---etc. - ^ ^ ^ ^ ^ ^ - | | | | | | - +-- Start of 32B aligned buffer | | | | +-- Node0::DataType* leafData - GridType::DataType* gridData | | | | - | | | +-- Node1::DataType* lowerData + [GridData(672B)][TreeData(64B)]---[RootData][N x Root::Tile]---[InternalData<5>]---[InternalData<4>]---[LeafData<3>]---[BLINDMETA...]---[BLIND0]---[BLIND1]---etc. + ^ ^ ^ ^ ^ ^ + | | | | | | + +-- Start of 32B aligned buffer | | | | +-- Node0::DataType* leafData + GridType::DataType* gridData | | | | + | | | +-- Node1::DataType* lowerData RootType::DataType* rootData --+ | | | +-- Node2::DataType* upperData | @@ -118,20 +118,27 @@ #ifndef NANOVDB_NANOVDB_H_HAS_BEEN_INCLUDED #define NANOVDB_NANOVDB_H_HAS_BEEN_INCLUDED +// NANOVDB_MAGIC_NUMBER is currently used for both grids and files (starting with v32.6.0) +// NANOVDB_MAGIC_GRID will soon be used exclusively for grids +// NANOVDB_MAGIC_FILE will soon be used exclusively for files +// NANOVDB_MAGIC_NODE will soon be used exclusively for NodeManager +// | : 0 in 30 corresponds to 0 in NanoVDB0 #define NANOVDB_MAGIC_NUMBER 0x304244566f6e614eUL // "NanoVDB0" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_GRID 0x314244566f6e614eUL // "NanoVDB1" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_FILE 0x324244566f6e614eUL // "NanoVDB2" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_NODE 0x334244566f6e614eUL // "NanoVDB3" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_MASK 0x00FFFFFFFFFFFFFFUL // use this mask to remove the number +//#define NANOVDB_USE_NEW_MAGIC_NUMBERS// used to enable use of the new magic numbers described above #define NANOVDB_MAJOR_VERSION_NUMBER 32 // reflects changes to the ABI and hence also the file format -#define NANOVDB_MINOR_VERSION_NUMBER 5 // reflects changes to the API but not ABI -#define NANOVDB_PATCH_VERSION_NUMBER 1 // reflects changes that does not affect the ABI or API +#define NANOVDB_MINOR_VERSION_NUMBER 6 // reflects changes to the API but not ABI +#define NANOVDB_PATCH_VERSION_NUMBER 0 // reflects changes that does not affect the ABI or API #define TBB_SUPPRESS_DEPRECATED_MESSAGES 1 // This replaces a Coord key at the root level with a single uint64_t #define NANOVDB_USE_SINGLE_ROOT_KEY -// This allows for the old (deprecated) indexing scheme for ValueOnIndex -//#define NANOVDB_USE_OLD_VALUE_ON_INDEX - // This replaces three levels of Coord keys in the ReadAccessor with one Coord //#define NANOVDB_USE_SINGLE_ACCESSOR_KEY @@ -143,6 +150,7 @@ #define NANOVDB_FPN_BRANCHLESS +// Do not change this value! 32 byte alignment is fixed in NanoVDB #define NANOVDB_DATA_ALIGNMENT 32 #if !defined(NANOVDB_ALIGN) @@ -335,7 +343,9 @@ enum class GridType : uint32_t { Unknown = 0, // unknown value type - should ra End = 26 }; // should never be used #ifndef __CUDACC_RTC__ -/// @brief Retuns a c-string used to describe a GridType +/// @brief Maps a GridType to a c-string +/// @param gridType GridType to be mapped to a string +/// @return Retuns a c-string used to describe a GridType inline const char* toStr(GridType gridType) { static const char* LUT[] = {"?", "float", "double", "int16", "int32", "int64", "Vec3f", "Vec3d", "Mask", "Half", @@ -380,8 +390,9 @@ enum class GridFlags : uint32_t { HasMinMax = 1 << 2, // nodes contain min/max of active values HasAverage = 1 << 3, // nodes contain averages of active values HasStdDeviation = 1 << 4, // nodes contain standard deviations of active values - IsBreadthFirst = 1 << 5, // nodes are arranged breadth-first in memory - End = 1 << 6, // use End - 1 as a mask for the 5 lower bit flags + IsBreadthFirst = 1 << 5, // nodes are typically arranged breadth-first in memory + IsLexicographic = 1 << 6, // nodes are occasionally arranged lexicographically in memory + End = 1 << 7, // use End - 1 as a mask for the 5 lower bit flags }; #ifndef __CUDACC_RTC__ @@ -394,6 +405,7 @@ inline const char* toStr(GridFlags gridFlags) "has average", "has standard deviation", "is breadth-first", + "is IsLexicographic", "end"}; static_assert(1 << (sizeof(LUT) / sizeof(char*) - 1) == int(GridFlags::End), "Unexpected size of LUT"); return LUT[static_cast(gridFlags)]; @@ -516,6 +528,32 @@ struct is_const static constexpr bool value = true; }; +// --------------------------> is_pointer <------------------------------------ + +/// @brief Trait used to identify template parameter that are pointers +/// @tparam T Template parameter to be tested +template +struct is_pointer +{ + static constexpr bool value = false; +}; + +/// @brief Template specialization of non-const pointers +/// @tparam T Template parameter to be tested +template +struct is_pointer +{ + static constexpr bool value = true; +}; + +/// @brief Template specialization of const pointers +/// @tparam T Template parameter to be tested +template +struct is_pointer +{ + static constexpr bool value = true; +}; + // --------------------------> remove_const <------------------------------------ template @@ -530,6 +568,22 @@ struct remove_const using type = T; }; +// --------------------------> remove_reference <------------------------------------ + +template +struct remove_reference {using type = T;}; + +template +struct remove_reference {using type = T;}; + +// --------------------------> remove_pointer <------------------------------------ + +template +struct remove_pointer {using type = T;}; + +template +struct remove_pointer {using type = T;}; + // --------------------------> match_const <------------------------------------ template @@ -746,9 +800,18 @@ class Rgba8 static const int SIZE = 4; using ValueType = uint8_t; + /// @brief Default copy constructor Rgba8(const Rgba8&) = default; + + /// @brief Default move constructor Rgba8(Rgba8&&) = default; + + /// @brief Default move assignment operator + /// @return non-const reference to this instance Rgba8& operator=(Rgba8&&) = default; + + /// @brief Default copy assignment operator + /// @return non-const reference to this instance Rgba8& operator=(const Rgba8&) = default; /// @brief Default ctor initializes all channels to zero @@ -929,6 +992,7 @@ class Version uint32_t(NANOVDB_PATCH_VERSION_NUMBER)) { } + __hostdev__ Version(uint32_t data) : mData(data) {} __hostdev__ Version(uint32_t major, uint32_t minor, uint32_t patch) : mData(major << 21 | minor << 10 | patch) { @@ -937,20 +1001,25 @@ class Version NANOVDB_ASSERT(patch < (1u << 10)); // max value of patch is 1023 } __hostdev__ bool operator==(const Version& rhs) const { return mData == rhs.mData; } - __hostdev__ bool operator<(const Version& rhs) const { return mData < rhs.mData; } + __hostdev__ bool operator<( const Version& rhs) const { return mData < rhs.mData; } __hostdev__ bool operator<=(const Version& rhs) const { return mData <= rhs.mData; } - __hostdev__ bool operator>(const Version& rhs) const { return mData > rhs.mData; } + __hostdev__ bool operator>( const Version& rhs) const { return mData > rhs.mData; } __hostdev__ bool operator>=(const Version& rhs) const { return mData >= rhs.mData; } __hostdev__ uint32_t id() const { return mData; } __hostdev__ uint32_t getMajor() const { return (mData >> 21) & ((1u << 11) - 1); } __hostdev__ uint32_t getMinor() const { return (mData >> 10) & ((1u << 11) - 1); } __hostdev__ uint32_t getPatch() const { return mData & ((1u << 10) - 1); } + __hostdev__ bool isCompatible() const { return this->getMajor() == uint32_t(NANOVDB_MAJOR_VERSION_NUMBER);} + /// @brief Check the major version of this instance relative to NANOVDB_MAJOR_VERSION_NUMBER + /// @return return 0 if the major version equals NANOVDB_MAJOR_VERSION_NUMBER, else a negative age if it is + /// older, i.e. smaller, and a positive age if it's newer, i.e.e larger. + __hostdev__ int age() const {return int(this->getMajor()) - int(NANOVDB_MAJOR_VERSION_NUMBER);} #ifndef __CUDACC_RTC__ const char* c_str() const { char* buffer = (char*)malloc(4 + 1 + 4 + 1 + 4 + 1); // xxxx.xxxx.xxxx\0 - snprintf(buffer, 4 + 1 + 4 + 1 + 4 + 1, "%d.%d.%d", this->getMajor(), this->getMinor(), this->getPatch()); // Prevents overflows by enforcing a fixed size of buffer + snprintf(buffer, 4 + 1 + 4 + 1 + 4 + 1, "%u.%u.%u", this->getMajor(), this->getMinor(), this->getPatch()); // Prevents overflows by enforcing a fixed size of buffer return buffer; } #endif @@ -1801,7 +1870,7 @@ __hostdev__ inline Vec4 operator*(T1 scalar, const Vec4& vec) return Vec4(scalar * vec[0], scalar * vec[1], scalar * vec[2], scalar * vec[3]); } template -__hostdev__ inline Vec4 operator/(T1 scalar, const Vec3& vec) +__hostdev__ inline Vec4 operator/(T1 scalar, const Vec4& vec) { return Vec4(scalar / vec[0], scalar / vec[1], scalar / vec[2], scalar / vec[3]); } @@ -1981,9 +2050,9 @@ __hostdev__ inline GridClass mapToGridClass(GridClass defaultClass = GridClass:: template __hostdev__ inline Vec3T matMult(const float* mat, const Vec3T& xyz) { - return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[1], xyz[2] * mat[2])), - fmaf(xyz[0], mat[3], fmaf(xyz[1], mat[4], xyz[2] * mat[5])), - fmaf(xyz[0], mat[6], fmaf(xyz[1], mat[7], xyz[2] * mat[8]))); // 6 fmaf + 3 mult = 9 flops + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[1], static_cast(xyz[2]) * mat[2])), + fmaf(static_cast(xyz[0]), mat[3], fmaf(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[5])), + fmaf(static_cast(xyz[0]), mat[6], fmaf(static_cast(xyz[1]), mat[7], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops } /// @brief Multiply a 3x3 matrix and a 3d vector using 64bit floating point arithmetics @@ -2010,9 +2079,9 @@ __hostdev__ inline Vec3T matMult(const double* mat, const Vec3T& xyz) template __hostdev__ inline Vec3T matMult(const float* mat, const float* vec, const Vec3T& xyz) { - return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[1], fmaf(xyz[2], mat[2], vec[0]))), - fmaf(xyz[0], mat[3], fmaf(xyz[1], mat[4], fmaf(xyz[2], mat[5], vec[1]))), - fmaf(xyz[0], mat[6], fmaf(xyz[1], mat[7], fmaf(xyz[2], mat[8], vec[2])))); // 9 fmaf = 9 flops + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[1], fmaf(static_cast(xyz[2]), mat[2], vec[0]))), + fmaf(static_cast(xyz[0]), mat[3], fmaf(static_cast(xyz[1]), mat[4], fmaf(static_cast(xyz[2]), mat[5], vec[1]))), + fmaf(static_cast(xyz[0]), mat[6], fmaf(static_cast(xyz[1]), mat[7], fmaf(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fmaf = 9 flops } /// @brief Multiply a 3x3 matrix to a 3d vector and add another 3d vector using 64bit floating point arithmetics @@ -2039,9 +2108,9 @@ __hostdev__ inline Vec3T matMult(const double* mat, const double* vec, const Vec template __hostdev__ inline Vec3T matMultT(const float* mat, const Vec3T& xyz) { - return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[3], xyz[2] * mat[6])), - fmaf(xyz[0], mat[1], fmaf(xyz[1], mat[4], xyz[2] * mat[7])), - fmaf(xyz[0], mat[2], fmaf(xyz[1], mat[5], xyz[2] * mat[8]))); // 6 fmaf + 3 mult = 9 flops + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[3], static_cast(xyz[2]) * mat[6])), + fmaf(static_cast(xyz[0]), mat[1], fmaf(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[7])), + fmaf(static_cast(xyz[0]), mat[2], fmaf(static_cast(xyz[1]), mat[5], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops } /// @brief Multiply the transposed of a 3x3 matrix and a 3d vector using 64bit floating point arithmetics @@ -2061,9 +2130,9 @@ __hostdev__ inline Vec3T matMultT(const double* mat, const Vec3T& xyz) template __hostdev__ inline Vec3T matMultT(const float* mat, const float* vec, const Vec3T& xyz) { - return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[3], fmaf(xyz[2], mat[6], vec[0]))), - fmaf(xyz[0], mat[1], fmaf(xyz[1], mat[4], fmaf(xyz[2], mat[7], vec[1]))), - fmaf(xyz[0], mat[2], fmaf(xyz[1], mat[5], fmaf(xyz[2], mat[8], vec[2])))); // 9 fmaf = 9 flops + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[3], fmaf(static_cast(xyz[2]), mat[6], vec[0]))), + fmaf(static_cast(xyz[0]), mat[1], fmaf(static_cast(xyz[1]), mat[4], fmaf(static_cast(xyz[2]), mat[7], vec[1]))), + fmaf(static_cast(xyz[0]), mat[2], fmaf(static_cast(xyz[1]), mat[5], fmaf(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fmaf = 9 flops } template @@ -2089,7 +2158,7 @@ struct BaseBBox __hostdev__ Vec3T& max() { return mCoord[1]; } __hostdev__ const Vec3T& min() const { return mCoord[0]; } __hostdev__ const Vec3T& max() const { return mCoord[1]; } - __hostdev__ Coord& translate(const Vec3T& xyz) + __hostdev__ BaseBBox& translate(const Vec3T& xyz) { mCoord[0] += xyz; mCoord[1] += xyz; @@ -2155,8 +2224,9 @@ struct BBox : public BaseBBox static_assert(is_floating_point::value, "Expected a floating point coordinate type"); using BaseT = BaseBBox; using BaseT::mCoord; + /// @brief Default construction sets BBox to an empty bbox __hostdev__ BBox() - : BaseT(Vec3T(Maximum::value()), + : BaseT(Vec3T( Maximum::value()), Vec3T(-Maximum::value())) { } @@ -2482,7 +2552,7 @@ __hostdev__ inline uint32_t CountOn(uint64_t v) // __popcnt64 intrinsic support was added in VS 2019 16.8 #elif defined(_MSC_VER) && defined(_M_X64) && (_MSC_VER >= 1928) && defined(NANOVDB_USE_INTRINSICS) //#warning Using popcnt64 for CountOn - return __popcnt64(v); + return uint32_t(__popcnt64(v)); #elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) //#warning Using builtin_popcountll for CountOn return __builtin_popcountll(v); @@ -3314,7 +3384,7 @@ struct GetNodeInfo; N0 LeafNodes each with a bit mask, N0 ValueTypes and min/max Example layout: ("---" implies it has a custom offset, "..." implies zero or more) - [GridData][TreeData]---[RootData][ROOT TILES...]---[NodeData<5>]---[ModeData<4>]---[LeafData<3>]---[BLINDMETA...]---[BLIND0]---[BLIND1]---etc. + [GridData][TreeData]---[RootData][ROOT TILES...]---[InternalData<5>]---[InternalData<4>]---[LeafData<3>]---[BLINDMETA...]---[BLIND0]---[BLIND1]---etc. */ /// @brief Struct with all the member data of the Grid (useful during serialization of an openvdb grid) @@ -3358,8 +3428,12 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData GridType gridType = GridType::Unknown, GridClass gridClass = GridClass::Unknown) { +#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS + mMagic = NANOVDB_MAGIC_GRID; +#else mMagic = NANOVDB_MAGIC_NUMBER; - mChecksum = 0u; +#endif + mChecksum = ~uint64_t(0);// all 64 bits ON means checksum is disabled mVersion = Version(); mFlags.initMask(list); mGridIndex = 0u; @@ -3367,7 +3441,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData mGridSize = gridSize; mGridName[0] = '\0'; mMap = map; - mWorldBBox = BBox(); + mWorldBBox = BBox();// invalid bbox mVoxelSize = map.getVoxelSize(); mGridClass = gridClass; mGridType = gridType; @@ -3378,7 +3452,9 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData mData2 = 0u; } // Set and unset various bit flags - __hostdev__ bool isValid() const { return mMagic == NANOVDB_MAGIC_NUMBER; } + __hostdev__ bool isValid() const { + return mMagic == NANOVDB_MAGIC_GRID || (mMagic == NANOVDB_MAGIC_NUMBER && mVersion.isCompatible()); + } __hostdev__ void setMinMaxOn(bool on = true) { mFlags.setMask(GridFlags::HasMinMax, on); } __hostdev__ void setBBoxOn(bool on = true) { mFlags.setMask(GridFlags::HasBBox, on); } __hostdev__ void setLongGridNameOn(bool on = true) { mFlags.setMask(GridFlags::HasLongGridName, on); } @@ -3416,11 +3492,29 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData template __hostdev__ Vec3T applyIJTF(const Vec3T& xyz) const { return mMap.applyIJTF(xyz); } - // @brief Return a non-const void pointer to the tree - __hostdev__ void* treePtr() { return this + 1; } + // @brief Return a non-const uint8_t pointer to the tree + __hostdev__ uint8_t* treePtr() { return reinterpret_cast(this + 1); }// TreeData is always right after GridData - // @brief Return a const void pointer to the tree - __hostdev__ const void* treePtr() const { return this + 1; } + // @brief Return a const uint8_t pointer to the tree + __hostdev__ const uint8_t* treePtr() const { return reinterpret_cast(this + 1); }// TreeData is always right after GridData + + /// @brief Return a non-const uint8_t pointer to the firsr node at @c LEVEL + /// @tparam LEVEL of the node. LEVEL 0 means leaf node and LEVEL 3 means root node + /// @warning If not nodes exist at @c LEVEL NULL is returned + template + __hostdev__ const uint8_t* nodePtr() const + { + static_assert(LEVEL >= 0 && LEVEL <= 3, "invalid LEVEL template parameter"); + auto *treeData = this->treePtr(); + auto nodeOffset = *reinterpret_cast(treeData + 8*LEVEL);// skip LEVEL uint64_t + return nodeOffset ? PtrAdd(treeData, nodeOffset) : nullptr; + } + + /// @brief Return a non-const uint8_t pointer to the firsr node at @c LEVEL + /// @tparam LEVEL of the node. LEVEL 0 means leaf node and LEVEL 3 means root node + /// @warning If not nodes exist at @c LEVEL NULL is returned + template + __hostdev__ uint8_t* nodePtr(){return const_cast(const_cast(this)->template nodePtr());} /// @brief Returns a const reference to the blindMetaData at the specified linear offset. /// @@ -3431,6 +3525,35 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData return PtrAdd(this, mBlindMetadataOffset) + n; } + __hostdev__ const char* gridName() const + { + if (mFlags.isMaskOn(GridFlags::HasLongGridName)) {// search for first blind meta data that contains a name + NANOVDB_ASSERT(mBlindMetadataCount > 0); + for (uint32_t i = 0; i < mBlindMetadataCount; ++i) { + const auto* metaData = this->blindMetaData(i);// EXTREMELY important to be a pointer + if (metaData->mDataClass == GridBlindDataClass::GridName) { + NANOVDB_ASSERT(metaData->mDataType == GridType::Unknown); + return metaData->template getBlindData(); + } + } + NANOVDB_ASSERT(false); // should never hit this! + } + return mGridName; + } + + /// @brief Return memory usage in bytes for this class only. + __hostdev__ static uint64_t memUsage() { return sizeof(GridData); } + + /// @brief return AABB of active values in world space + __hostdev__ const BBox& worldBBox() const { return mWorldBBox; } + + /// @brief return AABB of active values in index space + __hostdev__ const CoordBBox& indexBBox() const {return *(const CoordBBox*)(this->nodePtr<3>());} + + /// @brief test if the grid is empty, e.i the root table has size 0 + /// @return true if this grid contains not data whatsoever + __hostdev__ bool isEmpty() const {return *(const uint32_t*)(this->nodePtr<3>() + sizeof(CoordBBox)) == 0u;} + }; // GridData // Forward declaration of accelerated random access class @@ -3474,7 +3597,7 @@ class Grid : public GridData __hostdev__ const DataType* data() const { return reinterpret_cast(this); } /// @brief Return memory usage in bytes for this class only. - __hostdev__ static uint64_t memUsage() { return sizeof(GridData); } + //__hostdev__ static uint64_t memUsage() { return sizeof(GridData); } /// @brief Return the memory footprint of the entire grid, i.e. including all nodes and blind data __hostdev__ uint64_t gridSize() const { return DataType::mGridSize; } @@ -3561,13 +3684,13 @@ class Grid : public GridData __hostdev__ Vec3T indexToWorldGradF(const Vec3T& grad) const { return DataType::applyIJTF(grad); } /// @brief Computes a AABB of active values in world space - __hostdev__ const BBox& worldBBox() const { return DataType::mWorldBBox; } + //__hostdev__ const BBox& worldBBox() const { return DataType::mWorldBBox; } /// @brief Computes a AABB of active values in index space /// /// @note This method is returning a floating point bounding box and not a CoordBBox. This makes /// it more useful for clipping rays. - __hostdev__ const BBox& indexBBox() const { return this->tree().bbox(); } + //__hostdev__ const BBox& indexBBox() const { return this->tree().bbox(); } /// @brief Return the total number of active voxels in this tree. __hostdev__ uint64_t activeVoxelCount() const { return this->tree().activeVoxelCount(); } @@ -3590,6 +3713,7 @@ class Grid : public GridData __hostdev__ bool hasAverage() const { return DataType::mFlags.isMaskOn(GridFlags::HasAverage); } __hostdev__ bool hasStdDeviation() const { return DataType::mFlags.isMaskOn(GridFlags::HasStdDeviation); } __hostdev__ bool isBreadthFirst() const { return DataType::mFlags.isMaskOn(GridFlags::IsBreadthFirst); } + __hostdev__ bool isLexicographic() const { return DataType::mFlags.isMaskOn(GridFlags::IsLexicographic); } /// @brief return true if the specified node type is layed out breadth-first in memory and has a fixed size. /// This allows for sequential access to the nodes. @@ -3604,27 +3728,7 @@ class Grid : public GridData __hostdev__ bool isSequential() const { return UpperNodeType::FIXED_SIZE && LowerNodeType::FIXED_SIZE && LeafNodeType::FIXED_SIZE && this->isBreadthFirst(); } /// @brief Return a c-string with the name of this grid - __hostdev__ const char* gridName() const - { - if (this->hasLongGridName()) { - NANOVDB_ASSERT(DataType::mBlindMetadataCount > 0); -#if 1// search for first blind meta data that contains a name - for (uint32_t i = 0; i < DataType::mBlindMetadataCount; ++i) { - const auto& metaData = this->blindMetaData(i);// EXTREMELY important to be a reference - if (metaData.mDataClass == GridBlindDataClass::GridName) { - NANOVDB_ASSERT(metaData.mDataType == GridType::Unknown); - return metaData.template getBlindData(); - } - } - NANOVDB_ASSERT(false); // should never hit this! -#else// this assumes that the long grid name is always the last blind meta data - const auto& metaData = this->blindMetaData(DataType::mBlindMetadataCount - 1); // always the last - NANOVDB_ASSERT(metaData.mDataClass == GridBlindDataClass::GridName); - return metaData.template getBlindData(); -#endif - } - return DataType::mGridName; - } + __hostdev__ const char* gridName() const { return DataType::gridName(); } /// @brief Return a c-string with the name of this grid, truncated to 255 characters __hostdev__ const char* shortGridName() const { return DataType::mGridName; } @@ -3633,7 +3737,7 @@ class Grid : public GridData __hostdev__ uint64_t checksum() const { return DataType::mChecksum; } /// @brief Return true if this grid is empty, i.e. contains no values or nodes. - __hostdev__ bool isEmpty() const { return this->tree().isEmpty(); } + //__hostdev__ bool isEmpty() const { return this->tree().isEmpty(); } /// @brief Return the count of blind-data encoded in this grid __hostdev__ uint32_t blindDataCount() const { return DataType::mBlindMetadataCount; } @@ -3733,7 +3837,12 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) TreeData { mNodeOffset[NodeT::LEVEL] = node ? PtrDiff(node, this) : 0; } -}; + + __hostdev__ bool isEmpty() const {return *PtrAdd(this, mNodeOffset[3] + sizeof(BBox)) == 0;} + + /// @brief Return a const reference to the index bounding box of all the active values in this tree, i.e. in all nodes of the tree + __hostdev__ const CoordBBox& bbox() const {return *PtrAdd(this, mNodeOffset[3]);} +};// TreeData // ----------------------------> GridTree <-------------------------------------- @@ -3806,7 +3915,7 @@ class Tree : public TreeData __hostdev__ bool isActive(const CoordType& ijk) const { return this->root().isActive(ijk); } /// @brief Return true if this tree is empty, i.e. contains no values or nodes - __hostdev__ bool isEmpty() const { return this->root().isEmpty(); } + //__hostdev__ bool isEmpty() const { return this->root().isEmpty(); } /// @brief Combines the previous two methods in a single call __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->root().probeValue(ijk, v); } @@ -3818,7 +3927,7 @@ class Tree : public TreeData __hostdev__ void extrema(ValueType& min, ValueType& max) const; /// @brief Return a const reference to the index bounding box of all the active values in this tree, i.e. in all nodes of the tree - __hostdev__ const BBox& bbox() const { return this->root().bbox(); } + //__hostdev__ const BBox& bbox() const { return this->root().bbox(); } /// @brief Return the total number of active voxels in this tree. __hostdev__ uint64_t activeVoxelCount() const { return DataType::mVoxelCount; } @@ -3847,6 +3956,11 @@ class Tree : public TreeData return DataType::mNodeCount[level]; } + __hostdev__ uint32_t totalNodeCount() const + { + return DataType::mNodeCount[0] + DataType::mNodeCount[1] + DataType::mNodeCount[2]; + } + /// @brief return a pointer to the first node of the specified type /// /// @warning Note it may return NULL if no nodes exist @@ -4181,8 +4295,8 @@ class RootNode : public RootData using ChildIterator = ChildIter; using ConstChildIterator = ChildIter; - ChildIterator beginChild() { return ChildIterator(this); } - ConstChildIterator cbeginChild() const { return ConstChildIterator(this); } + __hostdev__ ChildIterator beginChild() { return ChildIterator(this); } + __hostdev__ ConstChildIterator cbeginChild() const { return ConstChildIterator(this); } template class ValueIter : public BaseIter @@ -4230,8 +4344,8 @@ class RootNode : public RootData using ValueIterator = ValueIter; using ConstValueIterator = ValueIter; - ValueIterator beginValue() { return ValueIterator(this); } - ConstValueIterator cbeginValueAll() const { return ConstValueIterator(this); } + __hostdev__ ValueIterator beginValue() { return ValueIterator(this); } + __hostdev__ ConstValueIterator cbeginValueAll() const { return ConstValueIterator(this); } template class ValueOnIter : public BaseIter @@ -4274,8 +4388,8 @@ class RootNode : public RootData using ValueOnIterator = ValueOnIter; using ConstValueOnIterator = ValueOnIter; - ValueOnIterator beginValueOn() { return ValueOnIterator(this); } - ConstValueOnIterator cbeginValueOn() const { return ConstValueOnIterator(this); } + __hostdev__ ValueOnIterator beginValueOn() { return ValueOnIterator(this); } + __hostdev__ ConstValueOnIterator cbeginValueOn() const { return ConstValueOnIterator(this); } template class DenseIter : public BaseIter @@ -4327,9 +4441,9 @@ class RootNode : public RootData using DenseIterator = DenseIter; using ConstDenseIterator = DenseIter; - DenseIterator beginDense() { return DenseIterator(this); } - ConstDenseIterator cbeginDense() const { return ConstDenseIterator(this); } - ConstDenseIterator cbeginChildAll() const { return ConstDenseIterator(this); } + __hostdev__ DenseIterator beginDense() { return DenseIterator(this); } + __hostdev__ ConstDenseIterator cbeginDense() const { return ConstDenseIterator(this); } + __hostdev__ ConstDenseIterator cbeginChildAll() const { return ConstDenseIterator(this); } /// @brief This class cannot be constructed or deleted RootNode() = delete; @@ -4697,10 +4811,17 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData __hostdev__ const StatsT& average() const { return mAverage; } __hostdev__ const StatsT& stdDeviation() const { return mStdDevi; } +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif __hostdev__ void setMin(const ValueT& v) { mMinimum = v; } __hostdev__ void setMax(const ValueT& v) { mMaximum = v; } __hostdev__ void setAvg(const StatsT& v) { mAverage = v; } __hostdev__ void setDev(const StatsT& v) { mStdDevi = v; } +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif /// @brief This class cannot be constructed or deleted InternalData() = delete; @@ -4770,7 +4891,7 @@ class InternalNode : public InternalData } }; // Member class ChildIterator - ChildIterator beginChild() const { return ChildIterator(this); } + __hostdev__ ChildIterator beginChild() const { return ChildIterator(this); } /// @brief Visits all tile values in this node, i.e. both inactive and active tiles class ValueIterator : public MaskIterT @@ -4807,8 +4928,8 @@ class InternalNode : public InternalData } }; // Member class ValueIterator - ValueIterator beginValue() const { return ValueIterator(this); } - ValueIterator cbeginValueAll() const { return ValueIterator(this); } + __hostdev__ ValueIterator beginValue() const { return ValueIterator(this); } + __hostdev__ ValueIterator cbeginValueAll() const { return ValueIterator(this); } /// @brief Visits active tile values of this node only class ValueOnIterator : public MaskIterT @@ -4840,8 +4961,8 @@ class InternalNode : public InternalData } }; // Member class ValueOnIterator - ValueOnIterator beginValueOn() const { return ValueOnIterator(this); } - ValueOnIterator cbeginValueOn() const { return ValueOnIterator(this); } + __hostdev__ ValueOnIterator beginValueOn() const { return ValueOnIterator(this); } + __hostdev__ ValueOnIterator cbeginValueOn() const { return ValueOnIterator(this); } /// @brief Visits all tile values and child nodes of this node class DenseIterator : public Mask::DenseIterator @@ -4884,8 +5005,8 @@ class InternalNode : public InternalData } }; // Member class DenseIterator - DenseIterator beginDense() const { return DenseIterator(this); } - DenseIterator cbeginChildAll() const { return DenseIterator(this); } // matches openvdb + __hostdev__ DenseIterator beginDense() const { return DenseIterator(this); } + __hostdev__ DenseIterator cbeginChildAll() const { return DenseIterator(this); } // matches openvdb /// @brief This class cannot be constructed or deleted InternalNode() = delete; @@ -5578,16 +5699,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafIndexBase uint8_t mBBoxDif[3]; // 3B. uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN MaskT mValueMask; // LOG2DIM(3): 64B. -#ifdef NANOVDB_USE_OLD_VALUE_ON_INDEX - uint64_t mOffset; // 8B offset to first value in this leaf node - union - { - uint8_t mCountOn[8]; - uint64_t mPrefixSum; - }; // prefix sum of active values per 64 bit words -#else uint64_t mOffset, mPrefixSum; // 8B offset to first value in this leaf node and 9-bit prefix sum -#endif __hostdev__ static constexpr uint32_t padding() { return sizeof(LeafIndexBase) - (12u + 3u + 1u + sizeof(MaskT) + 2 * 8u); @@ -5642,11 +5754,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData> 6) & uint32_t(1)) << 8) + CountOn(BaseT::mValueMask.words()[7]); -#else return CountOn(BaseT::mValueMask.words()[7]) + (BaseT::mPrefixSum >> 54u & 511u); // last 9 bits of mPrefixSum do not account for the last word in mValueMask -#endif } __hostdev__ uint64_t lastOffset() const { return BaseT::mOffset + this->valueCount() - 1u; } __hostdev__ uint64_t getMin() const { return this->hasStats() ? this->lastOffset() + 1u : 0u; } @@ -5655,23 +5763,14 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafDatahasStats() ? this->lastOffset() + 4u : 0u; } __hostdev__ uint64_t getValue(uint32_t i) const { -#if 0 // just for debugging - return mValueMask.isOn(i) ? mOffset + mValueMask.countOn(i) : 0u; -#else + //return mValueMask.isOn(i) ? mOffset + mValueMask.countOn(i) : 0u;// for debugging uint32_t n = i >> 6; const uint64_t w = BaseT::mValueMask.words()[n], mask = uint64_t(1) << (i & 63u); if (!(w & mask)) return uint64_t(0); // if i'th value is inactive return offset to background value uint64_t sum = BaseT::mOffset + CountOn(w & (mask - 1u)); -#ifdef NANOVDB_USE_OLD_VALUE_ON_INDEX - if (n--) - sum += BaseT::mCountOn[n] + ((uint32_t(BaseT::mCountOn[7] >> n) & uint32_t(1)) << 8); // exclude first 64 voxels -#else - if (n--) - sum += BaseT::mPrefixSum >> (9u * n) & 511u; -#endif + if (n--) sum += BaseT::mPrefixSum >> (9u * n) & 511u; return sum; -#endif } /// @brief This class cannot be constructed or deleted @@ -5763,8 +5862,6 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } - //__hostdev__ void fill(const ValueType &v) {for (auto *p=mValues, *q=p+512; p!=q; ++p) *p = v;} - /// @brief This class cannot be constructed or deleted LeafData() = delete; LeafData(const LeafData&) = delete; @@ -5830,8 +5927,8 @@ class LeafNode : public LeafData } }; // Member class ValueOnIterator - ValueOnIterator beginValueOn() const { return ValueOnIterator(this); } - ValueOnIterator cbeginValueOn() const { return ValueOnIterator(this); } + __hostdev__ ValueOnIterator beginValueOn() const { return ValueOnIterator(this); } + __hostdev__ ValueOnIterator cbeginValueOn() const { return ValueOnIterator(this); } /// @brief Visits all inactive values in a leaf node class ValueOffIterator : public MaskIterT @@ -5863,8 +5960,8 @@ class LeafNode : public LeafData } }; // Member class ValueOffIterator - ValueOffIterator beginValueOff() const { return ValueOffIterator(this); } - ValueOffIterator cbeginValueOff() const { return ValueOffIterator(this); } + __hostdev__ ValueOffIterator beginValueOff() const { return ValueOffIterator(this); } + __hostdev__ ValueOffIterator cbeginValueOff() const { return ValueOffIterator(this); } /// @brief Visits all values in a leaf node, i.e. both active and inactive values class ValueIterator @@ -5914,8 +6011,8 @@ class LeafNode : public LeafData } }; // Member class ValueIterator - ValueIterator beginValue() const { return ValueIterator(this); } - ValueIterator cbeginValueAll() const { return ValueIterator(this); } + __hostdev__ ValueIterator beginValue() const { return ValueIterator(this); } + __hostdev__ ValueIterator cbeginValueAll() const { return ValueIterator(this); } static_assert(is_same::Type>::value, "Mismatching BuildType"); static constexpr uint32_t LOG2DIM = Log2Dim; @@ -5994,8 +6091,8 @@ class LeafNode : public LeafData __hostdev__ static uint32_t padding() { return DataType::padding(); } - /// @brief return memory usage in bytes for the class - __hostdev__ uint64_t memUsage() { return DataType::memUsage(); } + /// @brief return memory usage in bytes for the leaf node + __hostdev__ uint64_t memUsage() const { return DataType::memUsage(); } /// @brief This class cannot be constructed or deleted LeafNode() = delete; @@ -6052,11 +6149,7 @@ class LeafNode : public LeafData /// @brief Return the linear offset corresponding to the given coordinate __hostdev__ static uint32_t CoordToOffset(const CoordT& ijk) { -#if 0 - return ((ijk[0] & MASK) << (2 * LOG2DIM)) + ((ijk[1] & MASK) << LOG2DIM) + (ijk[2] & MASK); -#else return ((ijk[0] & MASK) << (2 * LOG2DIM)) | ((ijk[1] & MASK) << LOG2DIM) | (ijk[2] & MASK); -#endif } /// @brief Updates the local bounding box of active voxels in this node. Return true if bbox was updated. @@ -7300,7 +7393,7 @@ class GridMetaData { // the RootData follows right after the TreeData return grid.tree().data()->mNodeOffset[3] == sizeof(TreeData); } - __hostdev__ bool isValid() const { return mGridData.mMagic == NANOVDB_MAGIC_NUMBER; } + __hostdev__ bool isValid() const { return mGridData.isValid(); } __hostdev__ const GridType& gridType() const { return mGridData.mGridType; } __hostdev__ const GridClass& gridClass() const { return mGridData.mGridClass; } __hostdev__ bool isLevelSet() const { return mGridData.mGridClass == GridClass::LevelSet; } @@ -7317,6 +7410,7 @@ class GridMetaData __hostdev__ bool hasAverage() const { return mGridData.mFlags.isMaskOn(GridFlags::HasAverage); } __hostdev__ bool hasStdDeviation() const { return mGridData.mFlags.isMaskOn(GridFlags::HasStdDeviation); } __hostdev__ bool isBreadthFirst() const { return mGridData.mFlags.isMaskOn(GridFlags::IsBreadthFirst); } + __hostdev__ bool isLexicographic() const { return mGridData.mFlags.isMaskOn(GridFlags::IsLexicographic); } __hostdev__ uint64_t gridSize() const { return mGridData.mGridSize; } __hostdev__ uint32_t gridIndex() const { return mGridData.mGridIndex; } __hostdev__ uint32_t gridCount() const { return mGridData.mGridCount; } @@ -7455,7 +7549,10 @@ class PointAccessor : public DefaultReadAccessor NANOVDB_ASSERT(mData); NANOVDB_ASSERT(grid.gridType() == GridType::PointIndex); NANOVDB_ASSERT((grid.gridClass() == GridClass::PointIndex && is_same::value) || - (grid.gridClass() == GridClass::PointData && is_same::value)); + (grid.gridClass() == GridClass::PointData && is_same::value) || + (grid.gridClass() == GridClass::PointData && is_same::value) || + (grid.gridClass() == GridClass::PointData && is_same::value) || + (grid.gridClass() == GridClass::PointData && is_same::value)); } /// @brief return true if this access was initialized correctly @@ -7593,12 +7690,10 @@ class ChannelAccessor : public DefaultReadAccessor }; // ChannelAccessor -// the following code block uses std and therefore needs to be ignored by CUDA and HIP -#if !defined(__CUDA_ARCH__) && !defined(__HIP__) - #if 0 // This MiniGridHandle class is only included as a stand-alone example. Note that aligned_alloc is a C++17 feature! -// Normally we recommend using GridHandle defined in util/GridHandle.h +// Normally we recommend using GridHandle defined in util/GridHandle.h but this minimal implementation could be an +// alternative when using the IO medthods defined below. struct MiniGridHandle { struct BufferType { uint8_t *data; @@ -7614,8 +7709,81 @@ struct MiniGridHandle { const uint8_t* data() const {return buffer.data;} };// MiniGridHandle #endif + namespace io { +/// @brief Define compression codecs +/// +/// @note NONE is the default, ZIP is slow but compact and BLOSC offers a great balance. +/// +/// @throw NanoVDB optionally supports ZIP and BLOSC compression and will throw an exception +/// if its support is required but missing. +enum class Codec : uint16_t { NONE = 0, + ZIP = 1, + BLOSC = 2, + END = 3 }; + +/// @brief Data encoded at the head of each segment of a file or stream. +/// +/// @note A file or stream is composed of one or more segments that each contain +// one or more grids. +struct FileHeader {// 16 bytes + uint64_t magic;// 8 bytes + Version version;// 4 bytes version numbers + uint16_t gridCount;// 2 bytes + Codec codec;// 2 bytes + bool isValid() const {return magic == NANOVDB_MAGIC_NUMBER || magic == NANOVDB_MAGIC_FILE;} +}; // FileHeader ( 16 bytes = 2 words ) + +// @brief Data encoded for each of the grids associated with a segment. +// Grid size in memory (uint64_t) | +// Grid size on disk (uint64_t) | +// Grid name hash key (uint64_t) | +// Numer of active voxels (uint64_t) | +// Grid type (uint32_t) | +// Grid class (uint32_t) | +// Characters in grid name (uint32_t) | +// AABB in world space (2*3*double) | one per grid in file +// AABB in index space (2*3*int) | +// Size of a voxel in world units (3*double) | +// Byte size of the grid name (uint32_t) | +// Number of nodes per level (4*uint32_t) | +// Numer of active tiles per level (3*uint32_t) | +// Codec for file compression (uint16_t) | +// Padding due to 8B alignment (uint16_t) | +// Version number (uint32_t) | +struct FileMetaData +{// 176 bytes + uint64_t gridSize, fileSize, nameKey, voxelCount; // 4 * 8 = 32B. + GridType gridType; // 4B. + GridClass gridClass; // 4B. + BBox worldBBox; // 2 * 3 * 8 = 48B. + CoordBBox indexBBox; // 2 * 3 * 4 = 24B. + Vec3d voxelSize; // 24B. + uint32_t nameSize; // 4B. + uint32_t nodeCount[4]; //4 x 4 = 16B + uint32_t tileCount[3];// 3 x 4 = 12B + Codec codec; // 2B + uint16_t padding;// 2B, due to 8B alignment from uint64_t + Version version;// 4B +}; // FileMetaData + +// the following code block uses std and therefore needs to be ignored by CUDA and HIP +#if !defined(__CUDA_ARCH__) && !defined(__HIP__) + +inline const char* toStr(Codec codec) +{ + static const char * LUT[] = { "NONE", "ZIP", "BLOSC" , "END" }; + static_assert(sizeof(LUT) / sizeof(char*) - 1 == int(Codec::END), "Unexpected size of LUT"); + return LUT[static_cast(codec)]; +} + +// Note that starting with version 32.6.0 it is possible to write and read raw grid buffers to +// files, e.g. os.write((const char*)&buffer.data(), buffer.size()) or more conveniently as +// handle.write(fileName). In addition to this simple approach we offer the methods below to +// write traditional uncompressed nanovdb files that unlike raw files include metadata that +// is used for tools like nanovdb_print. + /// /// @brief This is a standalone alternative to io::writeGrid(...,Codec::NONE) defined in util/IO.h /// Unlike the latter this function has no dependencies at all, not even NanoVDB.h, so it also @@ -7631,69 +7799,43 @@ namespace io { /// @throw std::invalid_argument if buffer does not point to a valid NanoVDB grid. /// /// @warning This is pretty ugly code that involves lots of pointer and bit manipulations - not for the faint of heart :) -template // StreamT class must support: "void write(char*, size_t)" -void writeUncompressedGrid(StreamT& os, const void* buffer) -{ - char header[192] = {0}, *dst = header; // combines io::Header + io::MetaData, see util/IO.h - const char *grid = (const char*)buffer, *tree = grid + 672, *root = tree + *(const uint64_t*)(tree + 24); - auto cpy = [&](const char* src, int n) {for (auto *end=src+n; src!=end; ++src) *dst++ = *src; }; - if (*(const uint64_t*)(grid) != 0x304244566f6e614eUL) { - fprintf(stderr, "nanovdb::writeUncompressedGrid: invalid magic number\n"); - exit(EXIT_FAILURE); - } else if (*(const uint32_t*)(grid + 16) >> 21 != 32) { - fprintf(stderr, "nanovdb::writeUncompressedGrid: invalid major version\n"); - exit(EXIT_FAILURE); - } - cpy(grid, 8); // uint64_t Header::magic - cpy(grid + 16, 4); // uint32_t Heder::version - *(uint16_t*)(dst) = 1; - dst += 4; // uint16_t Header::gridCount=1 and uint16_t Header::codec=0 - cpy(grid + 32, 8); // uint64_t MetaData::gridSize - cpy(grid + 32, 8); // uint64_t MetaData::fileSize - dst += 8; // uint64_t MetaData::nameKey - cpy(tree + 56, 8); // uint64_t MetaData::voxelCount - cpy(grid + 636, 4); // uint32_t MetaData::gridType - cpy(grid + 632, 4); // uint32_t MetaData::gridClass - cpy(grid + 560, 48); // double[6] MetaData::worldBBox - cpy(root, 24); // int[6] MetaData::indexBBox - cpy(grid + 608, 24); // double[3] MetaData::voxelSize - const char* gridName = grid + 40; // shortGridName - if (*(const uint32_t*)(grid + 20) & uint32_t(1)) { // has long grid name - gridName = grid + *(const int64_t*)(grid + 640) + 288 * (*(const uint32_t*)(grid + 648) - 1); - gridName += *(const uint64_t*)gridName; // long grid name encoded in blind meta data - } - uint32_t nameSize = 1; // '\0' - for (const char* p = gridName; *p != '\0'; ++p) - ++nameSize; - *(uint32_t*)(dst) = nameSize; - dst += 4; // uint32_t MetaData::nameSize - cpy(tree + 32, 12); // uint32_t[3] MetaData::nodeCount - *(uint32_t*)(dst) = 1; - dst += 4; // uint32_t MetaData::nodeCount[3]=1 - cpy(tree + 44, 12); // uint32_t[3] MetaData::tileCount - dst += 4; // uint16_t codec and padding - cpy(grid + 16, 4); // uint32_t MetaData::version - assert(dst - header == 192); - os.write(header, 192); // write header - os.write(gridName, nameSize); // write grid name - while (1) { // loop over all grids in the buffer (typically just one grid per buffer) - const uint64_t gridSize = *(const uint64_t*)(grid + 32); - os.write(grid, gridSize); // write grid <- bulk of writing! - if (*(const uint32_t*)(grid + 24) >= *(const uint32_t*)(grid + 28) - 1) - break; - grid += gridSize; - } -} // writeUncompressedGrid +template // StreamT class must support: "void write(const char*, size_t)" +void writeUncompressedGrid(StreamT& os, const GridData* gridData, bool raw = false) +{ + NANOVDB_ASSERT(gridData->mMagic == NANOVDB_MAGIC_NUMBER || gridData->mMagic == NANOVDB_MAGIC_GRID); + NANOVDB_ASSERT(gridData->mVersion.isCompatible()); + if (!raw) {// segment with a single grid: FileHeader, FileMetaData, gridName, Grid +#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS + FileHeader head{NANOVDB_MAGIC_FILE, gridData->mVersion, 1u, Codec::NONE}; +#else + FileHeader head{NANOVDB_MAGIC_NUMBER, gridData->mVersion, 1u, Codec::NONE}; +#endif + const char* gridName = gridData->gridName(); + uint32_t nameSize = 1; // '\0' + for (const char* p = gridName; *p != '\0'; ++p) ++nameSize; + const TreeData* treeData = (const TreeData*)gridData->treePtr(); + FileMetaData meta{gridData->mGridSize, gridData->mGridSize, 0u, treeData->mVoxelCount, + gridData->mGridType, gridData->mGridClass, gridData->mWorldBBox, + treeData->bbox(), gridData->mVoxelSize, nameSize, + {treeData->mNodeCount[0], treeData->mNodeCount[1], treeData->mNodeCount[2], 1u}, + {treeData->mTileCount[0], treeData->mTileCount[1], treeData->mTileCount[2]}, + Codec::NONE, 0u, gridData->mVersion }; // FileMetaData + os.write((const char*)&head, sizeof(FileHeader)); // write header + os.write((const char*)&meta, sizeof(FileMetaData)); // write meta data + os.write(gridName, nameSize); // write grid name + } + os.write((const char*)gridData, gridData->mGridSize);// write the grid +}// writeUncompressedGrid /// @brief write multiple NanoVDB grids to a single file, without compression. +/// @note To write all grids in a single GridHandle simply use handle.write("fieNane") template class VecT> -void writeUncompressedGrids(const char* fileName, const VecT& handles) +void writeUncompressedGrids(const char* fileName, const VecT& handles, bool raw = false) { #ifdef NANOVDB_USE_IOSTREAMS // use this to switch between std::ofstream or FILE implementations std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); #else - struct StreamT - { + struct StreamT { FILE* fptr; StreamT(const char* name) { fptr = fopen(name, "wb"); } ~StreamT() { fclose(fptr); } @@ -7705,8 +7847,9 @@ void writeUncompressedGrids(const char* fileName, const VecT& handl fprintf(stderr, "nanovdb::writeUncompressedGrids: Unable to open file \"%s\"for output\n", fileName); exit(EXIT_FAILURE); } - for (auto& handle : handles) - writeUncompressedGrid(os, handle.data()); + for (auto& h : handles) { + for (uint32_t n=0; n& handl /// @details StreamT class must support: "bool read(char*, size_t)" and "void skip(uint32_t)" template class VecT> VecT readUncompressedGrids(StreamT& is, const typename GridHandleT::BufferType& pool = typename GridHandleT::BufferType()) -{ // header1, metadata11, grid11, metadata12, grid2 ... header2, metadata21, grid21, metadata22, grid22 ... - char header[16], metadata[176]; +{ VecT handles; - while (is.read(header, 16)) { // read all segments, e.g. header1, metadata11, grid11, metadata12, grid2 ... - if (*(uint64_t*)(header) != 0x304244566f6e614eUL) { - fprintf(stderr, "nanovdb::readUncompressedGrids: invalid magic number\n"); - exit(EXIT_FAILURE); - } else if (*(uint32_t*)(header + 8) >> 21 != 32) { - fprintf(stderr, "nanovdb::readUncompressedGrids: invalid major version\n"); - exit(EXIT_FAILURE); - } else if (*(uint16_t*)(header + 14) != 0) { - fprintf(stderr, "nanovdb::readUncompressedGrids: invalid codec\n"); - exit(EXIT_FAILURE); + GridData data; + is.read((char*)&data, 40);// we only need to load the first 40 bytes + if (data.mMagic == NANOVDB_MAGIC_GRID || data.isValid()) {// stream contains a raw grid buffer + uint64_t size = data.mGridSize, sum = 0u; + while(data.mGridIndex + 1u < data.mGridCount) { + is.skip(data.mGridSize - 40);// skip grid + is.read((char*)&data, 40);// read 40 bytes + sum += data.mGridSize; } - for (uint16_t i = 0, e = *(uint16_t*)(header + 12); i < e; ++i) { // read all grids in segment - if (!is.read(metadata, 176)) { - fprintf(stderr, "nanovdb::readUncompressedGrids: error reading metadata\n"); + is.skip(-int64_t(sum + 40));// rewind to start + auto buffer = GridHandleT::BufferType::create(size + sum, &pool); + is.read((char*)(buffer.data()), buffer.size()); + handles.emplace_back(std::move(buffer)); + } else {// Header0, MetaData0, gridName0, Grid0...HeaderN, MetaDataN, gridNameN, GridN + is.skip(-40);// rewind + FileHeader head; + while(is.read((char*)&head, sizeof(FileHeader))) { + if (!head.isValid()) { + fprintf(stderr, "nanovdb::readUncompressedGrids: invalid magic number = \"%s\"\n", (const char*)&(head.magic)); + exit(EXIT_FAILURE); + } else if (!head.version.isCompatible()) { + fprintf(stderr, "nanovdb::readUncompressedGrids: invalid major version = \"%s\"\n", head.version.c_str()); + exit(EXIT_FAILURE); + } else if (head.codec != Codec::NONE) { + fprintf(stderr, "nanovdb::readUncompressedGrids: invalid codec = \"%s\"\n", toStr(head.codec)); exit(EXIT_FAILURE); } - const uint64_t gridSize = *(uint64_t*)(metadata); - auto buffer = GridHandleT::BufferType::create(gridSize, &pool); - is.skip(*(uint32_t*)(metadata + 136)); // skip grid name - is.read((char*)buffer.data(), gridSize); - handles.emplace_back(std::move(buffer)); - } + FileMetaData meta; + for (uint16_t i = 0; i < head.gridCount; ++i) { // read all grids in segment + is.read((char*)&meta, sizeof(FileMetaData));// read meta data + is.skip(meta.nameSize); // skip grid name + auto buffer = GridHandleT::BufferType::create(meta.gridSize, &pool); + is.read((char*)buffer.data(), meta.gridSize);// read grid + handles.emplace_back(std::move(buffer)); + }// loop over grids in segment + }// loop over segments } return handles; } // readUncompressedGrids @@ -7750,26 +7906,20 @@ template class VecT> VecT readUncompressedGrids(const char* fileName, const typename GridHandleT::BufferType& buffer = typename GridHandleT::BufferType()) { #ifdef NANOVDB_USE_IOSTREAMS // use this to switch between std::ifstream or FILE implementations - struct StreamT : public std::ifstream - { - StreamT(const char* name) - : std::ifstream(name, std::ios::in | std::ios::binary) - { - } - void skip(uint32_t off) { this->seekg(off, std::ios_base::cur); } + struct StreamT : public std::ifstream { + StreamT(const char* name) : std::ifstream(name, std::ios::in | std::ios::binary){} + void skip(int64_t off) { this->seekg(off, std::ios_base::cur); } }; #else - struct StreamT - { + struct StreamT { FILE* fptr; StreamT(const char* name) { fptr = fopen(name, "rb"); } ~StreamT() { fclose(fptr); } - bool read(char* data, size_t n) - { + bool read(char* data, size_t n) { size_t m = fread(data, 1, n, fptr); return n == m; } - void skip(uint32_t off) { fseek(fptr, off, SEEK_CUR); } + void skip(int64_t off) { fseek(fptr, (long int)off, SEEK_CUR); } bool is_open() const { return fptr != NULL; } }; #endif @@ -7781,10 +7931,10 @@ VecT readUncompressedGrids(const char* fileName, const typename Gri return readUncompressedGrids(is, buffer); } // readUncompressedGrids -} // namespace io - #endif // if !defined(__CUDA_ARCH__) && !defined(__HIP__) +} // namespace io + // ----------------------------> Implementations of random access methods <-------------------------------------- /// @brief Implements Tree::getValue(Coord), i.e. return the value associated with a specific coordinate @c ijk. diff --git a/nanovdb/nanovdb/PNanoVDB.h b/nanovdb/nanovdb/PNanoVDB.h index f32b7e0ac1..24fb68478c 100644 --- a/nanovdb/nanovdb/PNanoVDB.h +++ b/nanovdb/nanovdb/PNanoVDB.h @@ -1,3383 +1,3384 @@ - -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -/*! - \file PNanoVDB.h - - \author Andrew Reidmeyer - - \brief This file is a portable (e.g. pointer-less) C99/GLSL/HLSL port - of NanoVDB.h, which is compatible with most graphics APIs. -*/ - -#ifndef NANOVDB_PNANOVDB_H_HAS_BEEN_INCLUDED -#define NANOVDB_PNANOVDB_H_HAS_BEEN_INCLUDED - -// ------------------------------------------------ Configuration ----------------------------------------------------------- - -// platforms -//#define PNANOVDB_C -//#define PNANOVDB_HLSL -//#define PNANOVDB_GLSL - -// addressing mode -// PNANOVDB_ADDRESS_32 -// PNANOVDB_ADDRESS_64 -#if defined(PNANOVDB_C) -#ifndef PNANOVDB_ADDRESS_32 -#define PNANOVDB_ADDRESS_64 -#endif -#elif defined(PNANOVDB_HLSL) -#ifndef PNANOVDB_ADDRESS_64 -#define PNANOVDB_ADDRESS_32 -#endif -#elif defined(PNANOVDB_GLSL) -#ifndef PNANOVDB_ADDRESS_64 -#define PNANOVDB_ADDRESS_32 -#endif -#endif - -// bounds checking -//#define PNANOVDB_BUF_BOUNDS_CHECK - -// enable HDDA by default on HLSL/GLSL, make explicit on C -#if defined(PNANOVDB_C) -//#define PNANOVDB_HDDA -#ifdef PNANOVDB_HDDA -#ifndef PNANOVDB_CMATH -#define PNANOVDB_CMATH -#endif -#endif -#elif defined(PNANOVDB_HLSL) -#define PNANOVDB_HDDA -#elif defined(PNANOVDB_GLSL) -#define PNANOVDB_HDDA -#endif - -#ifdef PNANOVDB_CMATH -#ifndef __CUDACC_RTC__ -#include -#endif -#endif - -// ------------------------------------------------ Buffer ----------------------------------------------------------- - -#if defined(PNANOVDB_BUF_CUSTOM) -// NOP -#elif defined(PNANOVDB_C) -#define PNANOVDB_BUF_C -#elif defined(PNANOVDB_HLSL) -#define PNANOVDB_BUF_HLSL -#elif defined(PNANOVDB_GLSL) -#define PNANOVDB_BUF_GLSL -#endif - -#if defined(PNANOVDB_BUF_C) -#ifndef __CUDACC_RTC__ -#include -#endif -#if defined(__CUDACC__) -#define PNANOVDB_BUF_FORCE_INLINE static __host__ __device__ __forceinline__ -#elif defined(_WIN32) -#define PNANOVDB_BUF_FORCE_INLINE static inline __forceinline -#else -#define PNANOVDB_BUF_FORCE_INLINE static inline __attribute__((always_inline)) -#endif -typedef struct pnanovdb_buf_t -{ - uint32_t* data; -#ifdef PNANOVDB_BUF_BOUNDS_CHECK - uint64_t size_in_words; -#endif -}pnanovdb_buf_t; -PNANOVDB_BUF_FORCE_INLINE pnanovdb_buf_t pnanovdb_make_buf(uint32_t* data, uint64_t size_in_words) -{ - pnanovdb_buf_t ret; - ret.data = data; -#ifdef PNANOVDB_BUF_BOUNDS_CHECK - ret.size_in_words = size_in_words; -#endif - return ret; -} -#if defined(PNANOVDB_ADDRESS_32) -PNANOVDB_BUF_FORCE_INLINE uint32_t pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint32_t byte_offset) -{ - uint32_t wordaddress = (byte_offset >> 2u); -#ifdef PNANOVDB_BUF_BOUNDS_CHECK - return wordaddress < buf.size_in_words ? buf.data[wordaddress] : 0u; -#else - return buf.data[wordaddress]; -#endif -} -PNANOVDB_BUF_FORCE_INLINE uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint32_t byte_offset) -{ - uint64_t* data64 = (uint64_t*)buf.data; - uint32_t wordaddress64 = (byte_offset >> 3u); -#ifdef PNANOVDB_BUF_BOUNDS_CHECK - uint64_t size_in_words64 = buf.size_in_words >> 1u; - return wordaddress64 < size_in_words64 ? data64[wordaddress64] : 0llu; -#else - return data64[wordaddress64]; -#endif -} -PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint32_t byte_offset, uint32_t value) -{ - uint32_t wordaddress = (byte_offset >> 2u); -#ifdef PNANOVDB_BUF_BOUNDS_CHECK - if (wordaddress < buf.size_in_words) - { - buf.data[wordaddress] = value; -} -#else - buf.data[wordaddress] = value; -#endif -} -PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint32_t byte_offset, uint64_t value) -{ - uint64_t* data64 = (uint64_t*)buf.data; - uint32_t wordaddress64 = (byte_offset >> 3u); -#ifdef PNANOVDB_BUF_BOUNDS_CHECK - uint64_t size_in_words64 = buf.size_in_words >> 1u; - if (wordaddress64 < size_in_words64) - { - data64[wordaddress64] = value; - } -#else - data64[wordaddress64] = value; -#endif -} -#elif defined(PNANOVDB_ADDRESS_64) -PNANOVDB_BUF_FORCE_INLINE uint32_t pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint64_t byte_offset) -{ - uint64_t wordaddress = (byte_offset >> 2u); -#ifdef PNANOVDB_BUF_BOUNDS_CHECK - return wordaddress < buf.size_in_words ? buf.data[wordaddress] : 0u; -#else - return buf.data[wordaddress]; -#endif -} -PNANOVDB_BUF_FORCE_INLINE uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint64_t byte_offset) -{ - uint64_t* data64 = (uint64_t*)buf.data; - uint64_t wordaddress64 = (byte_offset >> 3u); -#ifdef PNANOVDB_BUF_BOUNDS_CHECK - uint64_t size_in_words64 = buf.size_in_words >> 1u; - return wordaddress64 < size_in_words64 ? data64[wordaddress64] : 0llu; -#else - return data64[wordaddress64]; -#endif -} -PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint64_t byte_offset, uint32_t value) -{ - uint64_t wordaddress = (byte_offset >> 2u); -#ifdef PNANOVDB_BUF_BOUNDS_CHECK - if (wordaddress < buf.size_in_words) - { - buf.data[wordaddress] = value; - } -#else - buf.data[wordaddress] = value; -#endif -} -PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint64_t byte_offset, uint64_t value) -{ - uint64_t* data64 = (uint64_t*)buf.data; - uint64_t wordaddress64 = (byte_offset >> 3u); -#ifdef PNANOVDB_BUF_BOUNDS_CHECK - uint64_t size_in_words64 = buf.size_in_words >> 1u; - if (wordaddress64 < size_in_words64) - { - data64[wordaddress64] = value; - } -#else - data64[wordaddress64] = value; -#endif -} -#endif -typedef uint32_t pnanovdb_grid_type_t; -#define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn -#elif defined(PNANOVDB_BUF_HLSL) -#if defined(PNANOVDB_ADDRESS_32) -#define pnanovdb_buf_t StructuredBuffer -uint pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint byte_offset) -{ - return buf[(byte_offset >> 2u)]; -} -uint2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) -{ - uint2 ret; - ret.x = pnanovdb_buf_read_uint32(buf, byte_offset + 0u); - ret.y = pnanovdb_buf_read_uint32(buf, byte_offset + 4u); - return ret; -} -void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint byte_offset, uint value) -{ - // NOP, by default no write in HLSL -} -void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uint2 value) -{ - // NOP, by default no write in HLSL -} -#elif defined(PNANOVDB_ADDRESS_64) -#define pnanovdb_buf_t StructuredBuffer -uint pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint64_t byte_offset) -{ - return buf[uint(byte_offset >> 2u)]; -} -uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint64_t byte_offset) -{ - uint64_t ret; - ret = pnanovdb_buf_read_uint32(buf, byte_offset + 0u); - ret = ret + (uint64_t(pnanovdb_buf_read_uint32(buf, byte_offset + 4u)) << 32u); - return ret; -} -void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint64_t byte_offset, uint value) -{ - // NOP, by default no write in HLSL -} -void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint64_t byte_offset, uint64_t value) -{ - // NOP, by default no write in HLSL -} -#endif -#define pnanovdb_grid_type_t uint -#define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn -#elif defined(PNANOVDB_BUF_GLSL) -struct pnanovdb_buf_t -{ - uint unused; // to satisfy min struct size? -}; -uint pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint byte_offset) -{ - return pnanovdb_buf_data[(byte_offset >> 2u)]; -} -uvec2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) -{ - uvec2 ret; - ret.x = pnanovdb_buf_read_uint32(buf, byte_offset + 0u); - ret.y = pnanovdb_buf_read_uint32(buf, byte_offset + 4u); - return ret; -} -void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint byte_offset, uint value) -{ - // NOP, by default no write in HLSL -} -void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uvec2 value) -{ - // NOP, by default no write in HLSL -} -#define pnanovdb_grid_type_t uint -#define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn -#endif - -// ------------------------------------------------ Basic Types ----------------------------------------------------------- - -// force inline -#if defined(PNANOVDB_C) -#if defined(__CUDACC__) -#define PNANOVDB_FORCE_INLINE static __host__ __device__ __forceinline__ -#elif defined(_WIN32) -#define PNANOVDB_FORCE_INLINE static inline __forceinline -#else -#define PNANOVDB_FORCE_INLINE static inline __attribute__((always_inline)) -#endif -#elif defined(PNANOVDB_HLSL) -#define PNANOVDB_FORCE_INLINE -#elif defined(PNANOVDB_GLSL) -#define PNANOVDB_FORCE_INLINE -#endif - -// struct typedef, static const, inout -#if defined(PNANOVDB_C) -#define PNANOVDB_STRUCT_TYPEDEF(X) typedef struct X X; -#define PNANOVDB_STATIC_CONST static const -#define PNANOVDB_INOUT(X) X* -#define PNANOVDB_IN(X) const X* -#define PNANOVDB_DEREF(X) (*X) -#define PNANOVDB_REF(X) &X -#elif defined(PNANOVDB_HLSL) -#define PNANOVDB_STRUCT_TYPEDEF(X) -#define PNANOVDB_STATIC_CONST static const -#define PNANOVDB_INOUT(X) inout X -#define PNANOVDB_IN(X) X -#define PNANOVDB_DEREF(X) X -#define PNANOVDB_REF(X) X -#elif defined(PNANOVDB_GLSL) -#define PNANOVDB_STRUCT_TYPEDEF(X) -#define PNANOVDB_STATIC_CONST const -#define PNANOVDB_INOUT(X) inout X -#define PNANOVDB_IN(X) X -#define PNANOVDB_DEREF(X) X -#define PNANOVDB_REF(X) X -#endif - -// basic types, type conversion -#if defined(PNANOVDB_C) -#define PNANOVDB_NATIVE_64 -#ifndef __CUDACC_RTC__ -#include -#endif -#if !defined(PNANOVDB_MEMCPY_CUSTOM) -#ifndef __CUDACC_RTC__ -#include -#endif -#define pnanovdb_memcpy memcpy -#endif -typedef uint32_t pnanovdb_uint32_t; -typedef int32_t pnanovdb_int32_t; -typedef int32_t pnanovdb_bool_t; -#define PNANOVDB_FALSE 0 -#define PNANOVDB_TRUE 1 -typedef uint64_t pnanovdb_uint64_t; -typedef int64_t pnanovdb_int64_t; -typedef struct pnanovdb_coord_t -{ - pnanovdb_int32_t x, y, z; -}pnanovdb_coord_t; -typedef struct pnanovdb_vec3_t -{ - float x, y, z; -}pnanovdb_vec3_t; -PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_uint32_as_int32(pnanovdb_uint32_t v) { return (pnanovdb_int32_t)v; } -PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return (pnanovdb_int64_t)v; } -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return (pnanovdb_uint64_t)v; } -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return (pnanovdb_uint32_t)v; } -PNANOVDB_FORCE_INLINE float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { float vf; pnanovdb_memcpy(&vf, &v, sizeof(vf)); return vf; } -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return *((pnanovdb_uint32_t*)(&v)); } -PNANOVDB_FORCE_INLINE double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { double vf; pnanovdb_memcpy(&vf, &v, sizeof(vf)); return vf; } -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { return *((pnanovdb_uint64_t*)(&v)); } -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return (pnanovdb_uint32_t)v; } -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return (pnanovdb_uint32_t)(v >> 32u); } -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return ((pnanovdb_uint64_t)x) | (((pnanovdb_uint64_t)y) << 32u); } -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return ((pnanovdb_uint64_t)x); } -PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return a == b; } -PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a == 0; } -#ifdef PNANOVDB_CMATH -PNANOVDB_FORCE_INLINE float pnanovdb_floor(float v) { return floorf(v); } -#endif -PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_float_to_int32(float v) { return (pnanovdb_int32_t)v; } -PNANOVDB_FORCE_INLINE float pnanovdb_int32_to_float(pnanovdb_int32_t v) { return (float)v; } -PNANOVDB_FORCE_INLINE float pnanovdb_uint32_to_float(pnanovdb_uint32_t v) { return (float)v; } -PNANOVDB_FORCE_INLINE float pnanovdb_min(float a, float b) { return a < b ? a : b; } -PNANOVDB_FORCE_INLINE float pnanovdb_max(float a, float b) { return a > b ? a : b; } -#elif defined(PNANOVDB_HLSL) -typedef uint pnanovdb_uint32_t; -typedef int pnanovdb_int32_t; -typedef bool pnanovdb_bool_t; -#define PNANOVDB_FALSE false -#define PNANOVDB_TRUE true -typedef int3 pnanovdb_coord_t; -typedef float3 pnanovdb_vec3_t; -pnanovdb_int32_t pnanovdb_uint32_as_int32(pnanovdb_uint32_t v) { return int(v); } -pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return uint(v); } -float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { return asfloat(v); } -pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return asuint(v); } -float pnanovdb_floor(float v) { return floor(v); } -pnanovdb_int32_t pnanovdb_float_to_int32(float v) { return int(v); } -float pnanovdb_int32_to_float(pnanovdb_int32_t v) { return float(v); } -float pnanovdb_uint32_to_float(pnanovdb_uint32_t v) { return float(v); } -float pnanovdb_min(float a, float b) { return min(a, b); } -float pnanovdb_max(float a, float b) { return max(a, b); } -#if defined(PNANOVDB_ADDRESS_32) -typedef uint2 pnanovdb_uint64_t; -typedef int2 pnanovdb_int64_t; -pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return int2(v); } -pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uint2(v); } -double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return asdouble(v.x, v.y); } -pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { uint2 ret; asuint(v, ret.x, ret.y); return ret; } -pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return v.x; } -pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return v.y; } -pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uint2(x, y); } -pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return uint2(x, 0); } -bool pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return (a.x == b.x) && (a.y == b.y); } -bool pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a.x == 0 && a.y == 0; } -#else -typedef uint64_t pnanovdb_uint64_t; -typedef int64_t pnanovdb_int64_t; -pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return int64_t(v); } -pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uint64_t(v); } -double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return asdouble(uint(v), uint(v >> 32u)); } -pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { uint2 ret; asuint(v, ret.x, ret.y); return uint64_t(ret.x) + (uint64_t(ret.y) << 32u); } -pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return uint(v); } -pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return uint(v >> 32u); } -pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uint64_t(x) + (uint64_t(y) << 32u); } -pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return uint64_t(x); } -bool pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return a == b; } -bool pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a == 0; } -#endif -#elif defined(PNANOVDB_GLSL) -#define pnanovdb_uint32_t uint -#define pnanovdb_int32_t int -#define pnanovdb_bool_t bool -#define PNANOVDB_FALSE false -#define PNANOVDB_TRUE true -#define pnanovdb_uint64_t uvec2 -#define pnanovdb_int64_t ivec2 -#define pnanovdb_coord_t ivec3 -#define pnanovdb_vec3_t vec3 -pnanovdb_int32_t pnanovdb_uint32_as_int32(pnanovdb_uint32_t v) { return int(v); } -pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return ivec2(v); } -pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uvec2(v); } -pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return uint(v); } -float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { return uintBitsToFloat(v); } -pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return floatBitsToUint(v); } -double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return packDouble2x32(uvec2(v.x, v.y)); } -pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { return unpackDouble2x32(v); } -pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return v.x; } -pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return v.y; } -pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uvec2(x, y); } -pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return uvec2(x, 0); } -bool pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return (a.x == b.x) && (a.y == b.y); } -bool pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a.x == 0 && a.y == 0; } -float pnanovdb_floor(float v) { return floor(v); } -pnanovdb_int32_t pnanovdb_float_to_int32(float v) { return int(v); } -float pnanovdb_int32_to_float(pnanovdb_int32_t v) { return float(v); } -float pnanovdb_uint32_to_float(pnanovdb_uint32_t v) { return float(v); } -float pnanovdb_min(float a, float b) { return min(a, b); } -float pnanovdb_max(float a, float b) { return max(a, b); } -#endif - -// ------------------------------------------------ Coord/Vec3 Utilties ----------------------------------------------------------- - -#if defined(PNANOVDB_C) -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_uniform(float a) -{ - pnanovdb_vec3_t v; - v.x = a; - v.y = a; - v.z = a; - return v; -} -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_add(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) -{ - pnanovdb_vec3_t v; - v.x = a.x + b.x; - v.y = a.y + b.y; - v.z = a.z + b.z; - return v; -} -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_sub(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) -{ - pnanovdb_vec3_t v; - v.x = a.x - b.x; - v.y = a.y - b.y; - v.z = a.z - b.z; - return v; -} -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_mul(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) -{ - pnanovdb_vec3_t v; - v.x = a.x * b.x; - v.y = a.y * b.y; - v.z = a.z * b.z; - return v; -} -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_div(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) -{ - pnanovdb_vec3_t v; - v.x = a.x / b.x; - v.y = a.y / b.y; - v.z = a.z / b.z; - return v; -} -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_min(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) -{ - pnanovdb_vec3_t v; - v.x = a.x < b.x ? a.x : b.x; - v.y = a.y < b.y ? a.y : b.y; - v.z = a.z < b.z ? a.z : b.z; - return v; -} -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_max(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) -{ - pnanovdb_vec3_t v; - v.x = a.x > b.x ? a.x : b.x; - v.y = a.y > b.y ? a.y : b.y; - v.z = a.z > b.z ? a.z : b.z; - return v; -} -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_coord_to_vec3(const pnanovdb_coord_t coord) -{ - pnanovdb_vec3_t v; - v.x = pnanovdb_int32_to_float(coord.x); - v.y = pnanovdb_int32_to_float(coord.y); - v.z = pnanovdb_int32_to_float(coord.z); - return v; -} -PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_coord_uniform(const pnanovdb_int32_t a) -{ - pnanovdb_coord_t v; - v.x = a; - v.y = a; - v.z = a; - return v; -} -PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_coord_add(pnanovdb_coord_t a, pnanovdb_coord_t b) -{ - pnanovdb_coord_t v; - v.x = a.x + b.x; - v.y = a.y + b.y; - v.z = a.z + b.z; - return v; -} -#elif defined(PNANOVDB_HLSL) -pnanovdb_vec3_t pnanovdb_vec3_uniform(float a) { return float3(a, a, a); } -pnanovdb_vec3_t pnanovdb_vec3_add(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a + b; } -pnanovdb_vec3_t pnanovdb_vec3_sub(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a - b; } -pnanovdb_vec3_t pnanovdb_vec3_mul(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a * b; } -pnanovdb_vec3_t pnanovdb_vec3_div(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a / b; } -pnanovdb_vec3_t pnanovdb_vec3_min(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return min(a, b); } -pnanovdb_vec3_t pnanovdb_vec3_max(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return max(a, b); } -pnanovdb_vec3_t pnanovdb_coord_to_vec3(pnanovdb_coord_t coord) { return float3(coord); } -pnanovdb_coord_t pnanovdb_coord_uniform(pnanovdb_int32_t a) { return int3(a, a, a); } -pnanovdb_coord_t pnanovdb_coord_add(pnanovdb_coord_t a, pnanovdb_coord_t b) { return a + b; } -#elif defined(PNANOVDB_GLSL) -pnanovdb_vec3_t pnanovdb_vec3_uniform(float a) { return vec3(a, a, a); } -pnanovdb_vec3_t pnanovdb_vec3_add(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a + b; } -pnanovdb_vec3_t pnanovdb_vec3_sub(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a - b; } -pnanovdb_vec3_t pnanovdb_vec3_mul(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a * b; } -pnanovdb_vec3_t pnanovdb_vec3_div(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a / b; } -pnanovdb_vec3_t pnanovdb_vec3_min(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return min(a, b); } -pnanovdb_vec3_t pnanovdb_vec3_max(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return max(a, b); } -pnanovdb_vec3_t pnanovdb_coord_to_vec3(const pnanovdb_coord_t coord) { return vec3(coord); } -pnanovdb_coord_t pnanovdb_coord_uniform(pnanovdb_int32_t a) { return ivec3(a, a, a); } -pnanovdb_coord_t pnanovdb_coord_add(pnanovdb_coord_t a, pnanovdb_coord_t b) { return a + b; } -#endif - -// ------------------------------------------------ Uint64 Utils ----------------------------------------------------------- - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint32_countbits(pnanovdb_uint32_t value) -{ -#if defined(PNANOVDB_C) -#if defined(_MSC_VER) && (_MSC_VER >= 1928) && defined(PNANOVDB_USE_INTRINSICS) - return __popcnt(value); -#elif (defined(__GNUC__) || defined(__clang__)) && defined(PNANOVDB_USE_INTRINSICS) - return __builtin_popcount(value); -#else - value = value - ((value >> 1) & 0x55555555); - value = (value & 0x33333333) + ((value >> 2) & 0x33333333); - value = (value + (value >> 4)) & 0x0F0F0F0F; - return (value * 0x01010101) >> 24; -#endif -#elif defined(PNANOVDB_HLSL) - return countbits(value); -#elif defined(PNANOVDB_GLSL) - return bitCount(value); -#endif -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_countbits(pnanovdb_uint64_t value) -{ - return pnanovdb_uint32_countbits(pnanovdb_uint64_low(value)) + pnanovdb_uint32_countbits(pnanovdb_uint64_high(value)); -} - -#if defined(PNANOVDB_ADDRESS_32) -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_offset(pnanovdb_uint64_t a, pnanovdb_uint32_t b) -{ - pnanovdb_uint32_t low = pnanovdb_uint64_low(a); - pnanovdb_uint32_t high = pnanovdb_uint64_high(a); - low += b; - if (low < b) - { - high += 1u; - } - return pnanovdb_uint32_as_uint64(low, high); -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_dec(pnanovdb_uint64_t a) -{ - pnanovdb_uint32_t low = pnanovdb_uint64_low(a); - pnanovdb_uint32_t high = pnanovdb_uint64_high(a); - if (low == 0u) - { - high -= 1u; - } - low -= 1u; - return pnanovdb_uint32_as_uint64(low, high); -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_to_uint32_lsr(pnanovdb_uint64_t a, pnanovdb_uint32_t b) -{ - pnanovdb_uint32_t low = pnanovdb_uint64_low(a); - pnanovdb_uint32_t high = pnanovdb_uint64_high(a); - return (b >= 32u) ? - (high >> (b - 32)) : - ((low >> b) | ((b > 0) ? (high << (32u - b)) : 0u)); -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_bit_mask(pnanovdb_uint32_t bit_idx) -{ - pnanovdb_uint32_t mask_low = bit_idx < 32u ? 1u << bit_idx : 0u; - pnanovdb_uint32_t mask_high = bit_idx >= 32u ? 1u << (bit_idx - 32u) : 0u; - return pnanovdb_uint32_as_uint64(mask_low, mask_high); -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_and(pnanovdb_uint64_t a, pnanovdb_uint64_t b) -{ - return pnanovdb_uint32_as_uint64( - pnanovdb_uint64_low(a) & pnanovdb_uint64_low(b), - pnanovdb_uint64_high(a) & pnanovdb_uint64_high(b) - ); -} - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_uint64_any_bit(pnanovdb_uint64_t a) -{ - return pnanovdb_uint64_low(a) != 0u || pnanovdb_uint64_high(a) != 0u; -} - -#else -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_offset(pnanovdb_uint64_t a, pnanovdb_uint32_t b) -{ - return a + b; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_dec(pnanovdb_uint64_t a) -{ - return a - 1u; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_to_uint32_lsr(pnanovdb_uint64_t a, pnanovdb_uint32_t b) -{ - return pnanovdb_uint64_low(a >> b); -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_bit_mask(pnanovdb_uint32_t bit_idx) -{ - return 1llu << bit_idx; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_and(pnanovdb_uint64_t a, pnanovdb_uint64_t b) -{ - return a & b; -} - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_uint64_any_bit(pnanovdb_uint64_t a) -{ - return a != 0llu; -} -#endif - -// ------------------------------------------------ Address Type ----------------------------------------------------------- - -#if defined(PNANOVDB_ADDRESS_32) -struct pnanovdb_address_t -{ - pnanovdb_uint32_t byte_offset; -}; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_address_t) - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) -{ - pnanovdb_address_t ret = address; - ret.byte_offset += byte_offset; - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_neg(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) -{ - pnanovdb_address_t ret = address; - ret.byte_offset -= byte_offset; - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_product(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset, pnanovdb_uint32_t multiplier) -{ - pnanovdb_address_t ret = address; - ret.byte_offset += byte_offset * multiplier; - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset) -{ - pnanovdb_address_t ret = address; - // lose high bits on 32-bit - ret.byte_offset += pnanovdb_uint64_low(byte_offset); - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64_product(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset, pnanovdb_uint32_t multiplier) -{ - pnanovdb_address_t ret = address; - ret.byte_offset += pnanovdb_uint64_low(byte_offset) * multiplier; - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_address_mask(pnanovdb_address_t address, pnanovdb_uint32_t mask) -{ - return address.byte_offset & mask; -} -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_mask_inv(pnanovdb_address_t address, pnanovdb_uint32_t mask) -{ - pnanovdb_address_t ret = address; - ret.byte_offset &= (~mask); - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_null() -{ - pnanovdb_address_t ret = { 0 }; - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_is_null(pnanovdb_address_t address) -{ - return address.byte_offset == 0u; -} -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_in_interval(pnanovdb_address_t address, pnanovdb_address_t min_address, pnanovdb_address_t max_address) -{ - return address.byte_offset >= min_address.byte_offset && address.byte_offset < max_address.byte_offset; -} -#elif defined(PNANOVDB_ADDRESS_64) -struct pnanovdb_address_t -{ - pnanovdb_uint64_t byte_offset; -}; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_address_t) - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) -{ - pnanovdb_address_t ret = address; - ret.byte_offset += byte_offset; - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_neg(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) -{ - pnanovdb_address_t ret = address; - ret.byte_offset -= byte_offset; - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_product(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset, pnanovdb_uint32_t multiplier) -{ - pnanovdb_address_t ret = address; - ret.byte_offset += pnanovdb_uint32_as_uint64_low(byte_offset) * pnanovdb_uint32_as_uint64_low(multiplier); - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset) -{ - pnanovdb_address_t ret = address; - ret.byte_offset += byte_offset; - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64_product(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset, pnanovdb_uint32_t multiplier) -{ - pnanovdb_address_t ret = address; - ret.byte_offset += byte_offset * pnanovdb_uint32_as_uint64_low(multiplier); - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_address_mask(pnanovdb_address_t address, pnanovdb_uint32_t mask) -{ - return pnanovdb_uint64_low(address.byte_offset) & mask; -} -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_mask_inv(pnanovdb_address_t address, pnanovdb_uint32_t mask) -{ - pnanovdb_address_t ret = address; - ret.byte_offset &= (~pnanovdb_uint32_as_uint64_low(mask)); - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_null() -{ - pnanovdb_address_t ret = { 0 }; - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_is_null(pnanovdb_address_t address) -{ - return address.byte_offset == 0llu; -} -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_in_interval(pnanovdb_address_t address, pnanovdb_address_t min_address, pnanovdb_address_t max_address) -{ - return address.byte_offset >= min_address.byte_offset && address.byte_offset < max_address.byte_offset; -} -#endif - -// ------------------------------------------------ High Level Buffer Read ----------------------------------------------------------- - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint32(pnanovdb_buf_t buf, pnanovdb_address_t address) -{ - return pnanovdb_buf_read_uint32(buf, address.byte_offset); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_read_uint64(pnanovdb_buf_t buf, pnanovdb_address_t address) -{ - return pnanovdb_buf_read_uint64(buf, address.byte_offset); -} -PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_read_int32(pnanovdb_buf_t buf, pnanovdb_address_t address) -{ - return pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, address)); -} -PNANOVDB_FORCE_INLINE float pnanovdb_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address) -{ - return pnanovdb_uint32_as_float(pnanovdb_read_uint32(buf, address)); -} -PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_read_int64(pnanovdb_buf_t buf, pnanovdb_address_t address) -{ - return pnanovdb_uint64_as_int64(pnanovdb_read_uint64(buf, address)); -} -PNANOVDB_FORCE_INLINE double pnanovdb_read_double(pnanovdb_buf_t buf, pnanovdb_address_t address) -{ - return pnanovdb_uint64_as_double(pnanovdb_read_uint64(buf, address)); -} -PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_read_coord(pnanovdb_buf_t buf, pnanovdb_address_t address) -{ - pnanovdb_coord_t ret; - ret.x = pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 0u))); - ret.y = pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 4u))); - ret.z = pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 8u))); - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3(pnanovdb_buf_t buf, pnanovdb_address_t address) -{ - pnanovdb_vec3_t ret; - ret.x = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 0u)); - ret.y = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 4u)); - ret.z = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 8u)); - return ret; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint16(pnanovdb_buf_t buf, pnanovdb_address_t address) -{ - pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(address, 3u)); - return (raw >> (pnanovdb_address_mask(address, 2) << 3)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint8(pnanovdb_buf_t buf, pnanovdb_address_t address) -{ - pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(address, 3u)); - return (raw >> (pnanovdb_address_mask(address, 3) << 3)) & 255; -} -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3u16(pnanovdb_buf_t buf, pnanovdb_address_t address) -{ - pnanovdb_vec3_t ret; - const float scale = 1.f / 65535.f; - ret.x = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 0u))) - 0.5f; - ret.y = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 2u))) - 0.5f; - ret.z = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 4u))) - 0.5f; - return ret; -} -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3u8(pnanovdb_buf_t buf, pnanovdb_address_t address) -{ - pnanovdb_vec3_t ret; - const float scale = 1.f / 255.f; - ret.x = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 0u))) - 0.5f; - ret.y = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 1u))) - 0.5f; - ret.z = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 2u))) - 0.5f; - return ret; -} - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_read_bit(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint32_t bit_offset) -{ - pnanovdb_address_t word_address = pnanovdb_address_mask_inv(address, 3u); - pnanovdb_uint32_t bit_index = (pnanovdb_address_mask(address, 3u) << 3u) + bit_offset; - pnanovdb_uint32_t value_word = pnanovdb_buf_read_uint32(buf, word_address.byte_offset); - return ((value_word >> bit_index) & 1) != 0u; -} - -#if defined(PNANOVDB_C) -PNANOVDB_FORCE_INLINE short pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_address_t address) -{ - pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, address); - return (short)(raw >> (pnanovdb_address_mask(address, 2) << 3)); -} -#elif defined(PNANOVDB_HLSL) -PNANOVDB_FORCE_INLINE float pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_address_t address) -{ - pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, address); - return f16tof32(raw >> (pnanovdb_address_mask(address, 2) << 3)); -} -#elif defined(PNANOVDB_GLSL) -PNANOVDB_FORCE_INLINE float pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_address_t address) -{ - pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, address); - return unpackHalf2x16(raw >> (pnanovdb_address_mask(address, 2) << 3)).x; -} -#endif - -// ------------------------------------------------ High Level Buffer Write ----------------------------------------------------------- - -PNANOVDB_FORCE_INLINE void pnanovdb_write_uint32(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint32_t value) -{ - pnanovdb_buf_write_uint32(buf, address.byte_offset, value); -} -PNANOVDB_FORCE_INLINE void pnanovdb_write_uint64(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint64_t value) -{ - pnanovdb_buf_write_uint64(buf, address.byte_offset, value); -} -PNANOVDB_FORCE_INLINE void pnanovdb_write_int32(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_int32_t value) -{ - pnanovdb_write_uint32(buf, address, pnanovdb_int32_as_uint32(value)); -} -PNANOVDB_FORCE_INLINE void pnanovdb_write_int64(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_int64_t value) -{ - pnanovdb_buf_write_uint64(buf, address.byte_offset, pnanovdb_int64_as_uint64(value)); -} -PNANOVDB_FORCE_INLINE void pnanovdb_write_float(pnanovdb_buf_t buf, pnanovdb_address_t address, float value) -{ - pnanovdb_write_uint32(buf, address, pnanovdb_float_as_uint32(value)); -} -PNANOVDB_FORCE_INLINE void pnanovdb_write_double(pnanovdb_buf_t buf, pnanovdb_address_t address, double value) -{ - pnanovdb_write_uint64(buf, address, pnanovdb_double_as_uint64(value)); -} -PNANOVDB_FORCE_INLINE void pnanovdb_write_coord(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) value) -{ - pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 0u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).x)); - pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 4u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).y)); - pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 8u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).z)); -} -PNANOVDB_FORCE_INLINE void pnanovdb_write_vec3(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_vec3_t) value) -{ - pnanovdb_write_float(buf, pnanovdb_address_offset(address, 0u), PNANOVDB_DEREF(value).x); - pnanovdb_write_float(buf, pnanovdb_address_offset(address, 4u), PNANOVDB_DEREF(value).y); - pnanovdb_write_float(buf, pnanovdb_address_offset(address, 8u), PNANOVDB_DEREF(value).z); -} - -// ------------------------------------------------ Core Structures ----------------------------------------------------------- - -#define PNANOVDB_MAGIC_NUMBER 0x304244566f6e614eUL// "NanoVDB0" in hex - little endian (uint64_t) - -#define PNANOVDB_MAJOR_VERSION_NUMBER 32// reflects changes to the ABI -#define PNANOVDB_MINOR_VERSION_NUMBER 5// reflects changes to the API but not ABI -#define PNANOVDB_PATCH_VERSION_NUMBER 1// reflects bug-fixes with no ABI or API changes - -#define PNANOVDB_GRID_TYPE_UNKNOWN 0 -#define PNANOVDB_GRID_TYPE_FLOAT 1 -#define PNANOVDB_GRID_TYPE_DOUBLE 2 -#define PNANOVDB_GRID_TYPE_INT16 3 -#define PNANOVDB_GRID_TYPE_INT32 4 -#define PNANOVDB_GRID_TYPE_INT64 5 -#define PNANOVDB_GRID_TYPE_VEC3F 6 -#define PNANOVDB_GRID_TYPE_VEC3D 7 -#define PNANOVDB_GRID_TYPE_MASK 8 -#define PNANOVDB_GRID_TYPE_HALF 9 -#define PNANOVDB_GRID_TYPE_UINT32 10 -#define PNANOVDB_GRID_TYPE_BOOLEAN 11 -#define PNANOVDB_GRID_TYPE_RGBA8 12 -#define PNANOVDB_GRID_TYPE_FP4 13 -#define PNANOVDB_GRID_TYPE_FP8 14 -#define PNANOVDB_GRID_TYPE_FP16 15 -#define PNANOVDB_GRID_TYPE_FPN 16 -#define PNANOVDB_GRID_TYPE_VEC4F 17 -#define PNANOVDB_GRID_TYPE_VEC4D 18 -#define PNANOVDB_GRID_TYPE_INDEX 19 -#define PNANOVDB_GRID_TYPE_ONINDEX 20 -#define PNANOVDB_GRID_TYPE_INDEXMASK 21 -#define PNANOVDB_GRID_TYPE_ONINDEXMASK 22 -#define PNANOVDB_GRID_TYPE_POINTINDEX 23 -#define PNANOVDB_GRID_TYPE_VEC3U8 24 -#define PNANOVDB_GRID_TYPE_VEC3U16 25 -#define PNANOVDB_GRID_TYPE_END 26 - -#define PNANOVDB_GRID_CLASS_UNKNOWN 0 -#define PNANOVDB_GRID_CLASS_LEVEL_SET 1 // narrow band level set, e.g. SDF -#define PNANOVDB_GRID_CLASS_FOG_VOLUME 2 // fog volume, e.g. density -#define PNANOVDB_GRID_CLASS_STAGGERED 3 // staggered MAC grid, e.g. velocity -#define PNANOVDB_GRID_CLASS_POINT_INDEX 4 // point index grid -#define PNANOVDB_GRID_CLASS_POINT_DATA 5 // point data grid -#define PNANOVDB_GRID_CLASS_TOPOLOGY 6 // grid with active states only (no values) -#define PNANOVDB_GRID_CLASS_VOXEL_VOLUME 7 // volume of geometric cubes, e.g. minecraft -#define PNANOVDB_GRID_CLASS_INDEX_GRID 8 // grid whose values are offsets, e.g. into an external array -#define PNANOVDB_GRID_CLASS_TENSOR_GRID 9 // grid which can have extra metadata and features -#define PNANOVDB_GRID_CLASS_END 10 - -#define PNANOVDB_GRID_FLAGS_HAS_LONG_GRID_NAME (1 << 0) -#define PNANOVDB_GRID_FLAGS_HAS_BBOX (1 << 1) -#define PNANOVDB_GRID_FLAGS_HAS_MIN_MAX (1 << 2) -#define PNANOVDB_GRID_FLAGS_HAS_AVERAGE (1 << 3) -#define PNANOVDB_GRID_FLAGS_HAS_STD_DEVIATION (1 << 4) -#define PNANOVDB_GRID_FLAGS_IS_BREADTH_FIRST (1 << 5) -#define PNANOVDB_GRID_FLAGS_END (1 << 6) - -#define PNANOVDB_LEAF_TYPE_DEFAULT 0 -#define PNANOVDB_LEAF_TYPE_LITE 1 -#define PNANOVDB_LEAF_TYPE_FP 2 -#define PNANOVDB_LEAF_TYPE_INDEX 3 -#define PNANOVDB_LEAF_TYPE_INDEXMASK 4 -#define PNANOVDB_LEAF_TYPE_POINTINDEX 5 - -// BuildType = Unknown, float, double, int16_t, int32_t, int64_t, Vec3f, Vec3d, Mask, ... -// bit count of values in leaf nodes, i.e. 8*sizeof(*nanovdb::LeafNode::mValues) or zero if no values are stored -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_value_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 0, 16, 32, 1, 32, 4, 8, 16, 0, 128, 256, 0, 0, 0, 0, 16, 24, 48 }; -// bit count of the Tile union in InternalNodes, i.e. 8*sizeof(nanovdb::InternalData::Tile) -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_table_strides_bits[PNANOVDB_GRID_TYPE_END] = { 64, 64, 64, 64, 64, 64, 128, 192, 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 256, 64, 64, 64, 64, 64, 64, 64 }; -// bit count of min/max values, i.e. 8*sizeof(nanovdb::LeafData::mMinimum) or zero if no min/max exists -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 8, 16, 32, 8, 32, 32, 32, 32, 32, 128, 256, 64, 64, 64, 64, 64, 24, 48 }; -// bit alignment of the value type, controlled by the smallest native type, which is why it is always 0, 8, 16, 32, or 64, e.g. for Vec3f it is 32 -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_aligns_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 32, 64, 8, 16, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 8, 16 }; -// bit alignment of the stats (avg/std-dev) types, e.g. 8*sizeof(nanovdb::LeafData::mAverage) -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_stat_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 32, 32, 64, 32, 64, 8, 32, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 32, 32 }; -// one of the 4 leaf types defined above, e.g. PNANOVDB_LEAF_TYPE_INDEX = 3 -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_leaf_type[PNANOVDB_GRID_TYPE_END] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 2, 2, 2, 0, 0, 3, 3, 4, 4, 5, 0, 0 }; - -struct pnanovdb_map_t -{ - float matf[9]; - float invmatf[9]; - float vecf[3]; - float taperf; - double matd[9]; - double invmatd[9]; - double vecd[3]; - double taperd; -}; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_map_t) -struct pnanovdb_map_handle_t { pnanovdb_address_t address; }; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_map_handle_t) - -#define PNANOVDB_MAP_SIZE 264 - -#define PNANOVDB_MAP_OFF_MATF 0 -#define PNANOVDB_MAP_OFF_INVMATF 36 -#define PNANOVDB_MAP_OFF_VECF 72 -#define PNANOVDB_MAP_OFF_TAPERF 84 -#define PNANOVDB_MAP_OFF_MATD 88 -#define PNANOVDB_MAP_OFF_INVMATD 160 -#define PNANOVDB_MAP_OFF_VECD 232 -#define PNANOVDB_MAP_OFF_TAPERD 256 - -PNANOVDB_FORCE_INLINE float pnanovdb_map_get_matf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { - return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATF + 4u * index)); -} -PNANOVDB_FORCE_INLINE float pnanovdb_map_get_invmatf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { - return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATF + 4u * index)); -} -PNANOVDB_FORCE_INLINE float pnanovdb_map_get_vecf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { - return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECF + 4u * index)); -} -PNANOVDB_FORCE_INLINE float pnanovdb_map_get_taperf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { - return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERF)); -} -PNANOVDB_FORCE_INLINE double pnanovdb_map_get_matd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { - return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATD + 8u * index)); -} -PNANOVDB_FORCE_INLINE double pnanovdb_map_get_invmatd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { - return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATD + 8u * index)); -} -PNANOVDB_FORCE_INLINE double pnanovdb_map_get_vecd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { - return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECD + 8u * index)); -} -PNANOVDB_FORCE_INLINE double pnanovdb_map_get_taperd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { - return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERD)); -} - -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_matf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float matf) { - pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATF + 4u * index), matf); -} -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_invmatf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float invmatf) { - pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATF + 4u * index), invmatf); -} -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_vecf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float vecf) { - pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECF + 4u * index), vecf); -} -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_taperf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float taperf) { - pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERF), taperf); -} -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_matd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double matd) { - pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATD + 8u * index), matd); -} -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_invmatd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double invmatd) { - pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATD + 8u * index), invmatd); -} -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_vecd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double vecd) { - pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECD + 8u * index), vecd); -} -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_taperd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double taperd) { - pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERD), taperd); -} - -struct pnanovdb_grid_t -{ - pnanovdb_uint64_t magic; // 8 bytes, 0 - pnanovdb_uint64_t checksum; // 8 bytes, 8 - pnanovdb_uint32_t version; // 4 bytes, 16 - pnanovdb_uint32_t flags; // 4 bytes, 20 - pnanovdb_uint32_t grid_index; // 4 bytes, 24 - pnanovdb_uint32_t grid_count; // 4 bytes, 28 - pnanovdb_uint64_t grid_size; // 8 bytes, 32 - pnanovdb_uint32_t grid_name[256 / 4]; // 256 bytes, 40 - pnanovdb_map_t map; // 264 bytes, 296 - double world_bbox[6]; // 48 bytes, 560 - double voxel_size[3]; // 24 bytes, 608 - pnanovdb_uint32_t grid_class; // 4 bytes, 632 - pnanovdb_uint32_t grid_type; // 4 bytes, 636 - pnanovdb_int64_t blind_metadata_offset; // 8 bytes, 640 - pnanovdb_uint32_t blind_metadata_count; // 4 bytes, 648 - pnanovdb_uint32_t pad[5]; // 20 bytes, 652 -}; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_grid_t) -struct pnanovdb_grid_handle_t { pnanovdb_address_t address; }; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_grid_handle_t) - -#define PNANOVDB_GRID_SIZE 672 - -#define PNANOVDB_GRID_OFF_MAGIC 0 -#define PNANOVDB_GRID_OFF_CHECKSUM 8 -#define PNANOVDB_GRID_OFF_VERSION 16 -#define PNANOVDB_GRID_OFF_FLAGS 20 -#define PNANOVDB_GRID_OFF_GRID_INDEX 24 -#define PNANOVDB_GRID_OFF_GRID_COUNT 28 -#define PNANOVDB_GRID_OFF_GRID_SIZE 32 -#define PNANOVDB_GRID_OFF_GRID_NAME 40 -#define PNANOVDB_GRID_OFF_MAP 296 -#define PNANOVDB_GRID_OFF_WORLD_BBOX 560 -#define PNANOVDB_GRID_OFF_VOXEL_SIZE 608 -#define PNANOVDB_GRID_OFF_GRID_CLASS 632 -#define PNANOVDB_GRID_OFF_GRID_TYPE 636 -#define PNANOVDB_GRID_OFF_BLIND_METADATA_OFFSET 640 -#define PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT 648 - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_grid_get_magic(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { - return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_MAGIC)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_grid_get_checksum(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { - return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_CHECKSUM)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_version(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VERSION)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_flags(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_FLAGS)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_index(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_INDEX)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_COUNT)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_grid_get_grid_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { - return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_SIZE)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_name(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_NAME + 4u * index)); -} -PNANOVDB_FORCE_INLINE pnanovdb_map_handle_t pnanovdb_grid_get_map(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { - pnanovdb_map_handle_t ret; - ret.address = pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_MAP); - return ret; -} -PNANOVDB_FORCE_INLINE double pnanovdb_grid_get_world_bbox(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index) { - return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_WORLD_BBOX + 8u * index)); -} -PNANOVDB_FORCE_INLINE double pnanovdb_grid_get_voxel_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index) { - return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VOXEL_SIZE + 8u * index)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_class(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_CLASS)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_type(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_TYPE)); -} -PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_grid_get_blind_metadata_offset(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { - return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_OFFSET)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_blind_metadata_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT)); -} - -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_magic(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t magic) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_MAGIC), magic); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_checksum(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t checksum) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_CHECKSUM), checksum); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_version(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t version) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VERSION), version); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_flags(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t flags) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_FLAGS), flags); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_index(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_index) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_INDEX), grid_index); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_count) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_COUNT), grid_count); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t grid_size) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_SIZE), grid_size); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_name(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, pnanovdb_uint32_t grid_name) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_NAME + 4u * index), grid_name); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_world_bbox(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, double world_bbox) { - pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_WORLD_BBOX + 8u * index), world_bbox); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_voxel_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, double voxel_size) { - pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VOXEL_SIZE + 8u * index), voxel_size); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_class(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_class) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_CLASS), grid_class); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_type(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_type) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_TYPE), grid_type); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_blind_metadata_offset(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t blind_metadata_offset) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_OFFSET), blind_metadata_offset); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_blind_metadata_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t metadata_count) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT), metadata_count); -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_make_version(pnanovdb_uint32_t major, pnanovdb_uint32_t minor, pnanovdb_uint32_t patch_num) -{ - return (major << 21u) | (minor << 10u) | patch_num; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_major(pnanovdb_uint32_t version) -{ - return (version >> 21u) & ((1u << 11u) - 1u); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_minor(pnanovdb_uint32_t version) -{ - return (version >> 10u) & ((1u << 11u) - 1u); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_patch(pnanovdb_uint32_t version) -{ - return version & ((1u << 10u) - 1u); -} - -struct pnanovdb_gridblindmetadata_t -{ - pnanovdb_int64_t byte_offset; // 8 bytes, 0 - pnanovdb_uint64_t element_count; // 8 bytes, 8 - pnanovdb_uint32_t flags; // 4 bytes, 16 - pnanovdb_uint32_t semantic; // 4 bytes, 20 - pnanovdb_uint32_t data_class; // 4 bytes, 24 - pnanovdb_uint32_t data_type; // 4 bytes, 28 - pnanovdb_uint32_t name[256 / 4]; // 256 bytes, 32 -}; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_gridblindmetadata_t) -struct pnanovdb_gridblindmetadata_handle_t { pnanovdb_address_t address; }; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_gridblindmetadata_handle_t) - -#define PNANOVDB_GRIDBLINDMETADATA_SIZE 288 - -#define PNANOVDB_GRIDBLINDMETADATA_OFF_BYTE_OFFSET 0 -#define PNANOVDB_GRIDBLINDMETADATA_OFF_ELEMENT_COUNT 8 -#define PNANOVDB_GRIDBLINDMETADATA_OFF_FLAGS 16 -#define PNANOVDB_GRIDBLINDMETADATA_OFF_SEMANTIC 20 -#define PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_CLASS 24 -#define PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_TYPE 28 -#define PNANOVDB_GRIDBLINDMETADATA_OFF_NAME 32 - -PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_gridblindmetadata_get_byte_offset(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { - return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_BYTE_OFFSET)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_gridblindmetadata_get_element_count(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { - return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_ELEMENT_COUNT)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_flags(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_FLAGS)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_semantic(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_SEMANTIC)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_data_class(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_CLASS)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_data_type(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_TYPE)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_name(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p, pnanovdb_uint32_t index) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_NAME + 4u * index)); -} - -struct pnanovdb_tree_t -{ - pnanovdb_uint64_t node_offset_leaf; - pnanovdb_uint64_t node_offset_lower; - pnanovdb_uint64_t node_offset_upper; - pnanovdb_uint64_t node_offset_root; - pnanovdb_uint32_t node_count_leaf; - pnanovdb_uint32_t node_count_lower; - pnanovdb_uint32_t node_count_upper; - pnanovdb_uint32_t tile_count_leaf; - pnanovdb_uint32_t tile_count_lower; - pnanovdb_uint32_t tile_count_upper; - pnanovdb_uint64_t voxel_count; -}; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_tree_t) -struct pnanovdb_tree_handle_t { pnanovdb_address_t address; }; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_tree_handle_t) - -#define PNANOVDB_TREE_SIZE 64 - -#define PNANOVDB_TREE_OFF_NODE_OFFSET_LEAF 0 -#define PNANOVDB_TREE_OFF_NODE_OFFSET_LOWER 8 -#define PNANOVDB_TREE_OFF_NODE_OFFSET_UPPER 16 -#define PNANOVDB_TREE_OFF_NODE_OFFSET_ROOT 24 -#define PNANOVDB_TREE_OFF_NODE_COUNT_LEAF 32 -#define PNANOVDB_TREE_OFF_NODE_COUNT_LOWER 36 -#define PNANOVDB_TREE_OFF_NODE_COUNT_UPPER 40 -#define PNANOVDB_TREE_OFF_TILE_COUNT_LEAF 44 -#define PNANOVDB_TREE_OFF_TILE_COUNT_LOWER 48 -#define PNANOVDB_TREE_OFF_TILE_COUNT_UPPER 52 -#define PNANOVDB_TREE_OFF_VOXEL_COUNT 56 - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { - return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LEAF)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { - return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LOWER)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { - return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_UPPER)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_root(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { - return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_ROOT)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_node_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LEAF)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_node_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LOWER)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_node_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_UPPER)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_tile_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LEAF)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_tile_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LOWER)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_tile_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_UPPER)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_voxel_count(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { - return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_VOXEL_COUNT)); -} - -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_leaf) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LEAF), node_offset_leaf); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_lower) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LOWER), node_offset_lower); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_upper) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_UPPER), node_offset_upper); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_root(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_root) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_ROOT), node_offset_root); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_leaf) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LEAF), node_count_leaf); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_lower) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LOWER), node_count_lower); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_upper) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_UPPER), node_count_upper); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_leaf) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LEAF), tile_count_leaf); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_lower) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LOWER), tile_count_lower); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_upper) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_UPPER), tile_count_upper); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_voxel_count(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t voxel_count) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_VOXEL_COUNT), voxel_count); -} - -struct pnanovdb_root_t -{ - pnanovdb_coord_t bbox_min; - pnanovdb_coord_t bbox_max; - pnanovdb_uint32_t table_size; - pnanovdb_uint32_t pad1; // background can start here - // background, min, max -}; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_t) -struct pnanovdb_root_handle_t { pnanovdb_address_t address; }; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_handle_t) - -#define PNANOVDB_ROOT_BASE_SIZE 28 - -#define PNANOVDB_ROOT_OFF_BBOX_MIN 0 -#define PNANOVDB_ROOT_OFF_BBOX_MAX 12 -#define PNANOVDB_ROOT_OFF_TABLE_SIZE 24 - -PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_root_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_root_handle_t p) { - return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MIN)); -} -PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_root_get_bbox_max(pnanovdb_buf_t buf, pnanovdb_root_handle_t p) { - return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MAX)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_get_tile_count(pnanovdb_buf_t buf, pnanovdb_root_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_TABLE_SIZE)); -} - -PNANOVDB_FORCE_INLINE void pnanovdb_root_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { - pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MIN), bbox_min); -} -PNANOVDB_FORCE_INLINE void pnanovdb_root_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { - pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MAX), bbox_max); -} -PNANOVDB_FORCE_INLINE void pnanovdb_root_set_tile_count(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, pnanovdb_uint32_t tile_count) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_TABLE_SIZE), tile_count); -} - -struct pnanovdb_root_tile_t -{ - pnanovdb_uint64_t key; - pnanovdb_int64_t child; // signed byte offset from root to the child node, 0 means it is a constant tile, so use value - pnanovdb_uint32_t state; - pnanovdb_uint32_t pad1; // value can start here - // value -}; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_tile_t) -struct pnanovdb_root_tile_handle_t { pnanovdb_address_t address; }; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_tile_handle_t) - -#define PNANOVDB_ROOT_TILE_BASE_SIZE 20 - -#define PNANOVDB_ROOT_TILE_OFF_KEY 0 -#define PNANOVDB_ROOT_TILE_OFF_CHILD 8 -#define PNANOVDB_ROOT_TILE_OFF_STATE 16 - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_tile_get_key(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p) { - return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_KEY)); -} -PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_root_tile_get_child(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p) { - return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_CHILD)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_tile_get_state(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_STATE)); -} - -PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_key(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_uint64_t key) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_KEY), key); -} -PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_child(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_int64_t child) { - pnanovdb_write_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_CHILD), child); -} -PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_state(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_uint32_t state) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_STATE), state); -} - -struct pnanovdb_upper_t -{ - pnanovdb_coord_t bbox_min; - pnanovdb_coord_t bbox_max; - pnanovdb_uint64_t flags; - pnanovdb_uint32_t value_mask[1024]; - pnanovdb_uint32_t child_mask[1024]; - // min, max - // alignas(32) pnanovdb_uint32_t table[]; -}; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_upper_t) -struct pnanovdb_upper_handle_t { pnanovdb_address_t address; }; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_upper_handle_t) - -#define PNANOVDB_UPPER_TABLE_COUNT 32768 -#define PNANOVDB_UPPER_BASE_SIZE 8224 - -#define PNANOVDB_UPPER_OFF_BBOX_MIN 0 -#define PNANOVDB_UPPER_OFF_BBOX_MAX 12 -#define PNANOVDB_UPPER_OFF_FLAGS 24 -#define PNANOVDB_UPPER_OFF_VALUE_MASK 32 -#define PNANOVDB_UPPER_OFF_CHILD_MASK 4128 - -PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_upper_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p) { - return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MIN)); -} -PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_upper_get_bbox_max(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p) { - return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MAX)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_upper_get_flags(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p) { - return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_FLAGS)); -} -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_upper_get_value_mask(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, pnanovdb_uint32_t bit_index) { - pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_VALUE_MASK + 4u * (bit_index >> 5u))); - return ((value >> (bit_index & 31u)) & 1) != 0u; -} -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_upper_get_child_mask(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, pnanovdb_uint32_t bit_index) { - pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_CHILD_MASK + 4u * (bit_index >> 5u))); - return ((value >> (bit_index & 31u)) & 1) != 0u; -} - -PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { - pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MIN), bbox_min); -} -PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { - pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MAX), bbox_max); -} -PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_child_mask(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, pnanovdb_uint32_t bit_index, pnanovdb_bool_t value) { - pnanovdb_address_t addr = pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_CHILD_MASK + 4u * (bit_index >> 5u)); - pnanovdb_uint32_t valueMask = pnanovdb_read_uint32(buf, addr); - if (!value) { valueMask &= ~(1u << (bit_index & 31u)); } - if (value) valueMask |= (1u << (bit_index & 31u)); - pnanovdb_write_uint32(buf, addr, valueMask); -} - -struct pnanovdb_lower_t -{ - pnanovdb_coord_t bbox_min; - pnanovdb_coord_t bbox_max; - pnanovdb_uint64_t flags; - pnanovdb_uint32_t value_mask[128]; - pnanovdb_uint32_t child_mask[128]; - // min, max - // alignas(32) pnanovdb_uint32_t table[]; -}; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_lower_t) -struct pnanovdb_lower_handle_t { pnanovdb_address_t address; }; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_lower_handle_t) - -#define PNANOVDB_LOWER_TABLE_COUNT 4096 -#define PNANOVDB_LOWER_BASE_SIZE 1056 - -#define PNANOVDB_LOWER_OFF_BBOX_MIN 0 -#define PNANOVDB_LOWER_OFF_BBOX_MAX 12 -#define PNANOVDB_LOWER_OFF_FLAGS 24 -#define PNANOVDB_LOWER_OFF_VALUE_MASK 32 -#define PNANOVDB_LOWER_OFF_CHILD_MASK 544 - -PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_lower_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p) { - return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MIN)); -} -PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_lower_get_bbox_max(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p) { - return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MAX)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_lower_get_flags(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p) { - return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_FLAGS)); -} -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_lower_get_value_mask(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, pnanovdb_uint32_t bit_index) { - pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_VALUE_MASK + 4u * (bit_index >> 5u))); - return ((value >> (bit_index & 31u)) & 1) != 0u; -} -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_lower_get_child_mask(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, pnanovdb_uint32_t bit_index) { - pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_CHILD_MASK + 4u * (bit_index >> 5u))); - return ((value >> (bit_index & 31u)) & 1) != 0u; -} - -PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { - pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MIN), bbox_min); -} -PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { - pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MAX), bbox_max); -} -PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_child_mask(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, pnanovdb_uint32_t bit_index, pnanovdb_bool_t value) { - pnanovdb_address_t addr = pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_CHILD_MASK + 4u * (bit_index >> 5u)); - pnanovdb_uint32_t valueMask = pnanovdb_read_uint32(buf, addr); - if (!value) { valueMask &= ~(1u << (bit_index & 31u)); } - if (value) valueMask |= (1u << (bit_index & 31u)); - pnanovdb_write_uint32(buf, addr, valueMask); -} - -struct pnanovdb_leaf_t -{ - pnanovdb_coord_t bbox_min; - pnanovdb_uint32_t bbox_dif_and_flags; - pnanovdb_uint32_t value_mask[16]; - // min, max - // alignas(32) pnanovdb_uint32_t values[]; -}; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_leaf_t) -struct pnanovdb_leaf_handle_t { pnanovdb_address_t address; }; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_leaf_handle_t) - -#define PNANOVDB_LEAF_TABLE_COUNT 512 -#define PNANOVDB_LEAF_BASE_SIZE 80 - -#define PNANOVDB_LEAF_OFF_BBOX_MIN 0 -#define PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS 12 -#define PNANOVDB_LEAF_OFF_VALUE_MASK 16 - -#define PNANOVDB_LEAF_TABLE_NEG_OFF_BBOX_DIF_AND_FLAGS 84 -#define PNANOVDB_LEAF_TABLE_NEG_OFF_MINIMUM 16 -#define PNANOVDB_LEAF_TABLE_NEG_OFF_QUANTUM 12 - -PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_leaf_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p) { - return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_MIN)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_get_bbox_dif_and_flags(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS)); -} -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_get_value_mask(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, pnanovdb_uint32_t bit_index) { - pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 4u * (bit_index >> 5u))); - return ((value >> (bit_index & 31u)) & 1) != 0u; -} - -PNANOVDB_FORCE_INLINE void pnanovdb_leaf_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { - pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_MIN), bbox_min); -} -PNANOVDB_FORCE_INLINE void pnanovdb_leaf_set_bbox_dif_and_flags(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, pnanovdb_uint32_t bbox_dif_and_flags) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS), bbox_dif_and_flags); -} - -struct pnanovdb_grid_type_constants_t -{ - pnanovdb_uint32_t root_off_background; - pnanovdb_uint32_t root_off_min; - pnanovdb_uint32_t root_off_max; - pnanovdb_uint32_t root_off_ave; - pnanovdb_uint32_t root_off_stddev; - pnanovdb_uint32_t root_size; - pnanovdb_uint32_t value_stride_bits; - pnanovdb_uint32_t table_stride; - pnanovdb_uint32_t root_tile_off_value; - pnanovdb_uint32_t root_tile_size; - pnanovdb_uint32_t upper_off_min; - pnanovdb_uint32_t upper_off_max; - pnanovdb_uint32_t upper_off_ave; - pnanovdb_uint32_t upper_off_stddev; - pnanovdb_uint32_t upper_off_table; - pnanovdb_uint32_t upper_size; - pnanovdb_uint32_t lower_off_min; - pnanovdb_uint32_t lower_off_max; - pnanovdb_uint32_t lower_off_ave; - pnanovdb_uint32_t lower_off_stddev; - pnanovdb_uint32_t lower_off_table; - pnanovdb_uint32_t lower_size; - pnanovdb_uint32_t leaf_off_min; - pnanovdb_uint32_t leaf_off_max; - pnanovdb_uint32_t leaf_off_ave; - pnanovdb_uint32_t leaf_off_stddev; - pnanovdb_uint32_t leaf_off_table; - pnanovdb_uint32_t leaf_size; -}; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_grid_type_constants_t) - -// The following table with offsets will nedd to be updates as new GridTypes are added in NanoVDB.h -PNANOVDB_STATIC_CONST pnanovdb_grid_type_constants_t pnanovdb_grid_type_constants[PNANOVDB_GRID_TYPE_END] = -{ -{28, 28, 28, 28, 28, 32, 0, 8, 20, 32, 8224, 8224, 8224, 8224, 8224, 270368, 1056, 1056, 1056, 1056, 1056, 33824, 80, 80, 80, 80, 96, 96}, -{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, -{32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, -{28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, -{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, -{32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, -{28, 40, 52, 64, 68, 96, 96, 16, 20, 32, 8224, 8236, 8248, 8252, 8256, 532544, 1056, 1068, 1080, 1084, 1088, 66624, 80, 92, 104, 108, 128, 6272}, -{32, 56, 80, 104, 112, 128, 192, 24, 24, 64, 8224, 8248, 8272, 8280, 8288, 794720, 1056, 1080, 1104, 1112, 1120, 99424, 80, 104, 128, 136, 160, 12448}, -{28, 29, 30, 31, 32, 64, 0, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 96}, -{28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, -{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, -{28, 29, 30, 31, 32, 64, 1, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 160}, -{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, -{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 352}, -{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 608}, -{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 1120}, -{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 96}, -{28, 44, 60, 76, 80, 96, 128, 16, 20, 64, 8224, 8240, 8256, 8260, 8288, 532576, 1056, 1072, 1088, 1092, 1120, 66656, 80, 96, 112, 116, 128, 8320}, -{32, 64, 96, 128, 136, 160, 256, 32, 24, 64, 8224, 8256, 8288, 8296, 8320, 1056896, 1056, 1088, 1120, 1128, 1152, 132224, 80, 112, 144, 152, 160, 16544}, -{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 96}, -{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 96}, -{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 160}, -{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 160}, -{32, 40, 48, 56, 64, 96, 16, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 96, 96, 1120}, -{28, 31, 34, 40, 44, 64, 24, 8, 20, 32, 8224, 8227, 8232, 8236, 8256, 270400, 1056, 1059, 1064, 1068, 1088, 33856, 80, 83, 88, 92, 96, 1632}, -{28, 34, 40, 48, 52, 64, 48, 8, 20, 32, 8224, 8230, 8236, 8240, 8256, 270400, 1056, 1062, 1068, 1072, 1088, 33856, 80, 86, 92, 96, 128, 3200}, -}; - -// ------------------------------------------------ Basic Lookup ----------------------------------------------------------- - -PNANOVDB_FORCE_INLINE pnanovdb_gridblindmetadata_handle_t pnanovdb_grid_get_gridblindmetadata(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, pnanovdb_uint32_t index) -{ - pnanovdb_gridblindmetadata_handle_t meta = { grid.address }; - pnanovdb_uint64_t byte_offset = pnanovdb_grid_get_blind_metadata_offset(buf, grid); - meta.address = pnanovdb_address_offset64(meta.address, byte_offset); - meta.address = pnanovdb_address_offset_product(meta.address, PNANOVDB_GRIDBLINDMETADATA_SIZE, index); - return meta; -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_grid_get_gridblindmetadata_value_address(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, pnanovdb_uint32_t index) -{ - pnanovdb_gridblindmetadata_handle_t meta = pnanovdb_grid_get_gridblindmetadata(buf, grid, index); - pnanovdb_int64_t byte_offset = pnanovdb_gridblindmetadata_get_byte_offset(buf, meta); - pnanovdb_address_t address = pnanovdb_address_offset64(meta.address, pnanovdb_int64_as_uint64(byte_offset)); - return address; -} - -PNANOVDB_FORCE_INLINE pnanovdb_tree_handle_t pnanovdb_grid_get_tree(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid) -{ - pnanovdb_tree_handle_t tree = { grid.address }; - tree.address = pnanovdb_address_offset(tree.address, PNANOVDB_GRID_SIZE); - return tree; -} - -PNANOVDB_FORCE_INLINE pnanovdb_root_handle_t pnanovdb_tree_get_root(pnanovdb_buf_t buf, pnanovdb_tree_handle_t tree) -{ - pnanovdb_root_handle_t root = { tree.address }; - pnanovdb_uint64_t byte_offset = pnanovdb_tree_get_node_offset_root(buf, tree); - root.address = pnanovdb_address_offset64(root.address, byte_offset); - return root; -} - -PNANOVDB_FORCE_INLINE pnanovdb_root_tile_handle_t pnanovdb_root_get_tile(pnanovdb_grid_type_t grid_type, pnanovdb_root_handle_t root, pnanovdb_uint32_t n) -{ - pnanovdb_root_tile_handle_t tile = { root.address }; - tile.address = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_size)); - tile.address = pnanovdb_address_offset_product(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_size), n); - return tile; -} - -PNANOVDB_FORCE_INLINE pnanovdb_root_tile_handle_t pnanovdb_root_get_tile_zero(pnanovdb_grid_type_t grid_type, pnanovdb_root_handle_t root) -{ - pnanovdb_root_tile_handle_t tile = { root.address }; - tile.address = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_size)); - return tile; -} - -PNANOVDB_FORCE_INLINE pnanovdb_upper_handle_t pnanovdb_root_get_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, pnanovdb_root_tile_handle_t tile) -{ - pnanovdb_upper_handle_t upper = { root.address }; - upper.address = pnanovdb_address_offset64(upper.address, pnanovdb_int64_as_uint64(pnanovdb_root_tile_get_child(buf, tile))); - return upper; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_coord_to_key(PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ -#if defined(PNANOVDB_NATIVE_64) - pnanovdb_uint64_t iu = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x) >> 12u; - pnanovdb_uint64_t ju = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).y) >> 12u; - pnanovdb_uint64_t ku = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).z) >> 12u; - return (ku) | (ju << 21u) | (iu << 42u); -#else - pnanovdb_uint32_t iu = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x) >> 12u; - pnanovdb_uint32_t ju = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).y) >> 12u; - pnanovdb_uint32_t ku = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).z) >> 12u; - pnanovdb_uint32_t key_x = ku | (ju << 21); - pnanovdb_uint32_t key_y = (iu << 10) | (ju >> 11); - return pnanovdb_uint32_as_uint64(key_x, key_y); -#endif -} - -PNANOVDB_FORCE_INLINE pnanovdb_root_tile_handle_t pnanovdb_root_find_tile(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - pnanovdb_uint32_t tile_count = pnanovdb_uint32_as_int32(pnanovdb_root_get_tile_count(buf, root)); - pnanovdb_root_tile_handle_t tile = pnanovdb_root_get_tile_zero(grid_type, root); - pnanovdb_uint64_t key = pnanovdb_coord_to_key(ijk); - for (pnanovdb_uint32_t i = 0u; i < tile_count; i++) - { - if (pnanovdb_uint64_is_equal(key, pnanovdb_root_tile_get_key(buf, tile))) - { - return tile; - } - tile.address = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_size)); - } - pnanovdb_root_tile_handle_t null_handle = { pnanovdb_address_null() }; - return null_handle; -} - -// ----------------------------- Leaf Node --------------------------------------- - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - return (((PNANOVDB_DEREF(ijk).x & 7) >> 0) << (2 * 3)) + - (((PNANOVDB_DEREF(ijk).y & 7) >> 0) << (3)) + - ((PNANOVDB_DEREF(ijk).z & 7) >> 0); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_min); - return pnanovdb_address_offset(node.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_max); - return pnanovdb_address_offset(node.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_ave); - return pnanovdb_address_offset(node.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_stddev); - return pnanovdb_address_offset(node.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_table_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node, pnanovdb_uint32_t n) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_table) + ((PNANOVDB_GRID_TYPE_GET(grid_type, value_stride_bits) * n) >> 3u); - return pnanovdb_address_offset(node.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); - return pnanovdb_leaf_get_table_address(grid_type, buf, leaf, n); -} - -// ----------------------------- Leaf FP Types Specialization --------------------------------------- - -PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t value_log_bits) -{ - // value_log_bits // 2 3 4 - pnanovdb_uint32_t value_bits = 1u << value_log_bits; // 4 8 16 - pnanovdb_uint32_t value_mask = (1u << value_bits) - 1u; // 0xF 0xFF 0xFFFF - pnanovdb_uint32_t values_per_word_bits = 5u - value_log_bits; // 3 2 1 - pnanovdb_uint32_t values_per_word_mask = (1u << values_per_word_bits) - 1u; // 7 3 1 - - pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); - float minimum = pnanovdb_read_float(buf, pnanovdb_address_offset_neg(address, PNANOVDB_LEAF_TABLE_NEG_OFF_MINIMUM)); - float quantum = pnanovdb_read_float(buf, pnanovdb_address_offset_neg(address, PNANOVDB_LEAF_TABLE_NEG_OFF_QUANTUM)); - pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, ((n >> values_per_word_bits) << 2u))); - pnanovdb_uint32_t value_compressed = (raw >> ((n & values_per_word_mask) << value_log_bits)) & value_mask; - return pnanovdb_uint32_to_float(value_compressed) * quantum + minimum; -} - -PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp4_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - return pnanovdb_leaf_fp_read_float(buf, address, ijk, 2u); -} - -PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp8_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - return pnanovdb_leaf_fp_read_float(buf, address, ijk, 3u); -} - -PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp16_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - return pnanovdb_leaf_fp_read_float(buf, address, ijk, 4u); -} - -PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fpn_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - pnanovdb_uint32_t bbox_dif_and_flags = pnanovdb_read_uint32(buf, pnanovdb_address_offset_neg(address, PNANOVDB_LEAF_TABLE_NEG_OFF_BBOX_DIF_AND_FLAGS)); - pnanovdb_uint32_t flags = bbox_dif_and_flags >> 24u; - pnanovdb_uint32_t value_log_bits = flags >> 5; // b = 0, 1, 2, 3, 4 corresponding to 1, 2, 4, 8, 16 bits - return pnanovdb_leaf_fp_read_float(buf, address, ijk, value_log_bits); -} - -// ----------------------------- Leaf Index Specialization --------------------------------------- - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_index_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) -{ - return (pnanovdb_leaf_get_bbox_dif_and_flags(buf, leaf) & (1u << 28u)) != 0u; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) -{ - return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, min_address), 512u); -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) -{ - return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, max_address), 513u); -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) -{ - return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, ave_address), 514u); -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) -{ - return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, dev_address), 515u); -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); - pnanovdb_uint64_t offset = pnanovdb_read_uint64(buf, value_address); - return pnanovdb_uint64_offset(offset, n); -} - -// ----------------------------- Leaf IndexMask Specialization --------------------------------------- - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_indexmask_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) -{ - return pnanovdb_leaf_index_has_stats(buf, leaf); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) -{ - return pnanovdb_leaf_index_get_min_index(buf, min_address); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) -{ - return pnanovdb_leaf_index_get_max_index(buf, max_address); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) -{ - return pnanovdb_leaf_index_get_ave_index(buf, ave_address); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) -{ - return pnanovdb_leaf_index_get_dev_index(buf, dev_address); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - return pnanovdb_leaf_index_get_value_index(buf, value_address, ijk); -} -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_indexmask_get_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n) -{ - pnanovdb_uint32_t word_idx = n >> 5; - pnanovdb_uint32_t bit_idx = n & 31; - pnanovdb_uint32_t val_mask = - pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); - return (val_mask & (1u << bit_idx)) != 0u; -} -PNANOVDB_FORCE_INLINE void pnanovdb_leaf_indexmask_set_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n, pnanovdb_bool_t v) -{ - pnanovdb_uint32_t word_idx = n >> 5; - pnanovdb_uint32_t bit_idx = n & 31; - pnanovdb_uint32_t val_mask = - pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); - if (v) - { - val_mask = val_mask | (1u << bit_idx); - } - else - { - val_mask = val_mask & ~(1u << bit_idx); - } - pnanovdb_write_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx), val_mask); -} - -// ----------------------------- Leaf OnIndex Specialization --------------------------------------- - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_onindex_get_value_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) -{ - pnanovdb_uint64_t val_mask = pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 8u * 7u)); - pnanovdb_uint64_t prefix_sum = pnanovdb_read_uint64( - buf, pnanovdb_address_offset(leaf.address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table) + 8u)); - return pnanovdb_uint64_countbits(val_mask) + (pnanovdb_uint64_to_uint32_lsr(prefix_sum, 54u) & 511u); -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_last_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) -{ - return pnanovdb_uint64_offset( - pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table))), - pnanovdb_leaf_onindex_get_value_count(buf, leaf) - 1u); -} - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindex_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) -{ - return (pnanovdb_leaf_get_bbox_dif_and_flags(buf, leaf) & (1u << 28u)) != 0u; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) -{ - pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(min_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; - pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); - if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) - { - idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 1u); - } - return idx; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) -{ - pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(max_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; - pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); - if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) - { - idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 2u); - } - return idx; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) -{ - pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(ave_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; - pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); - if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) - { - idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 3u); - } - return idx; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) -{ - pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(dev_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; - pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); - if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) - { - idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 4u); - } - return idx; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); - pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(value_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; - - pnanovdb_uint32_t word_idx = n >> 6u; - pnanovdb_uint32_t bit_idx = n & 63u; - pnanovdb_uint64_t val_mask = pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 8u * word_idx)); - pnanovdb_uint64_t mask = pnanovdb_uint64_bit_mask(bit_idx); - pnanovdb_uint64_t value_index = pnanovdb_uint32_as_uint64_low(0u); - if (pnanovdb_uint64_any_bit(pnanovdb_uint64_and(val_mask, mask))) - { - pnanovdb_uint32_t sum = 0u; - sum += pnanovdb_uint64_countbits(pnanovdb_uint64_and(val_mask, pnanovdb_uint64_dec(mask))); - if (word_idx > 0u) - { - pnanovdb_uint64_t prefix_sum = pnanovdb_read_uint64(buf, pnanovdb_address_offset(value_address, 8u)); - sum += pnanovdb_uint64_to_uint32_lsr(prefix_sum, 9u * (word_idx - 1u)) & 511u; - } - pnanovdb_uint64_t offset = pnanovdb_read_uint64(buf, value_address); - value_index = pnanovdb_uint64_offset(offset, sum); - } - return value_index; -} - -// ----------------------------- Leaf OnIndexMask Specialization --------------------------------------- - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_onindexmask_get_value_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) -{ - return pnanovdb_leaf_onindex_get_value_count(buf, leaf); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_last_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) -{ - return pnanovdb_leaf_onindex_get_last_offset(buf, leaf); -} -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindexmask_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) -{ - return pnanovdb_leaf_onindex_has_stats(buf, leaf); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) -{ - return pnanovdb_leaf_onindex_get_min_index(buf, min_address); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) -{ - return pnanovdb_leaf_onindex_get_max_index(buf, max_address); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) -{ - return pnanovdb_leaf_onindex_get_ave_index(buf, ave_address); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) -{ - return pnanovdb_leaf_onindex_get_dev_index(buf, dev_address); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - return pnanovdb_leaf_onindex_get_value_index(buf, value_address, ijk); -} -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindexmask_get_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n) -{ - pnanovdb_uint32_t word_idx = n >> 5; - pnanovdb_uint32_t bit_idx = n & 31; - pnanovdb_uint32_t val_mask = - pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); - return (val_mask & (1u << bit_idx)) != 0u; -} -PNANOVDB_FORCE_INLINE void pnanovdb_leaf_onindexmask_set_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n, pnanovdb_bool_t v) -{ - pnanovdb_uint32_t word_idx = n >> 5; - pnanovdb_uint32_t bit_idx = n & 31; - pnanovdb_uint32_t val_mask = - pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); - if (v) - { - val_mask = val_mask | (1u << bit_idx); - } - else - { - val_mask = val_mask & ~(1u << bit_idx); - } - pnanovdb_write_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx), val_mask); -} - -// ----------------------------- Leaf PointIndex Specialization --------------------------------------- - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) -{ - return pnanovdb_read_uint64(buf, pnanovdb_leaf_get_min_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_point_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) -{ - return pnanovdb_read_uint64(buf, pnanovdb_leaf_get_max_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf)); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_first(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) -{ - return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), - (i == 0u ? 0u : pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i - 1u)))); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_last(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) -{ - return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), - pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i))); -} -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_value(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) -{ - return pnanovdb_uint32_as_uint64_low(pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i))); -} -PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_value_only(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i, pnanovdb_uint32_t value) -{ - pnanovdb_address_t addr = pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i); - pnanovdb_uint32_t raw32 = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(addr, 3u)); - if ((i & 1) == 0u) - { - raw32 = (raw32 & 0xFFFF0000) | (value & 0x0000FFFF); - } - else - { - raw32 = (raw32 & 0x0000FFFF) | (value << 16u); - } - pnanovdb_write_uint32(buf, addr, raw32); -} -PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_on(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) -{ - pnanovdb_uint32_t word_idx = i >> 5; - pnanovdb_uint32_t bit_idx = i & 31; - pnanovdb_address_t addr = pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 4u * word_idx); - pnanovdb_uint32_t val_mask = pnanovdb_read_uint32(buf, addr); - val_mask = val_mask | (1u << bit_idx); - pnanovdb_write_uint32(buf, addr, val_mask); -} -PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_value(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i, pnanovdb_uint32_t value) -{ - pnanovdb_leaf_pointindex_set_on(buf, leaf, i); - pnanovdb_leaf_pointindex_set_value_only(buf, leaf, i, value); -} - -// ------------------------------------------------ Lower Node ----------------------------------------------------------- - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_lower_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - return (((PNANOVDB_DEREF(ijk).x & 127) >> 3) << (2 * 4)) + - (((PNANOVDB_DEREF(ijk).y & 127) >> 3) << (4)) + - ((PNANOVDB_DEREF(ijk).z & 127) >> 3); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_min); - return pnanovdb_address_offset(node.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_max); - return pnanovdb_address_offset(node.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_ave); - return pnanovdb_address_offset(node.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_stddev); - return pnanovdb_address_offset(node.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_table_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node, pnanovdb_uint32_t n) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_table) + PNANOVDB_GRID_TYPE_GET(grid_type, table_stride) * n; - return pnanovdb_address_offset(node.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_lower_get_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node, pnanovdb_uint32_t n) -{ - pnanovdb_address_t table_address = pnanovdb_lower_get_table_address(grid_type, buf, node, n); - return pnanovdb_read_int64(buf, table_address); -} - -PNANOVDB_FORCE_INLINE pnanovdb_leaf_handle_t pnanovdb_lower_get_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, pnanovdb_uint32_t n) -{ - pnanovdb_leaf_handle_t leaf = { lower.address }; - leaf.address = pnanovdb_address_offset64(leaf.address, pnanovdb_int64_as_uint64(pnanovdb_lower_get_table_child(grid_type, buf, lower, n))); - return leaf; -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) -{ - pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); - pnanovdb_address_t value_address; - if (pnanovdb_lower_get_child_mask(buf, lower, n)) - { - pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); - value_address = pnanovdb_leaf_get_value_address(grid_type, buf, child, ijk); - PNANOVDB_DEREF(level) = 0u; - } - else - { - value_address = pnanovdb_lower_get_table_address(grid_type, buf, lower, n); - PNANOVDB_DEREF(level) = 1u; - } - return value_address; -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - pnanovdb_uint32_t level; - return pnanovdb_lower_get_value_address_and_level(grid_type, buf, lower, ijk, PNANOVDB_REF(level)); -} - -// ------------------------------------------------ Upper Node ----------------------------------------------------------- - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_upper_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - return (((PNANOVDB_DEREF(ijk).x & 4095) >> 7) << (2 * 5)) + - (((PNANOVDB_DEREF(ijk).y & 4095) >> 7) << (5)) + - ((PNANOVDB_DEREF(ijk).z & 4095) >> 7); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_min); - return pnanovdb_address_offset(node.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_max); - return pnanovdb_address_offset(node.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_ave); - return pnanovdb_address_offset(node.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_stddev); - return pnanovdb_address_offset(node.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_table_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node, pnanovdb_uint32_t n) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_table) + PNANOVDB_GRID_TYPE_GET(grid_type, table_stride) * n; - return pnanovdb_address_offset(node.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_upper_get_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node, pnanovdb_uint32_t n) -{ - pnanovdb_address_t bufAddress = pnanovdb_upper_get_table_address(grid_type, buf, node, n); - return pnanovdb_read_int64(buf, bufAddress); -} - -PNANOVDB_FORCE_INLINE pnanovdb_lower_handle_t pnanovdb_upper_get_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, pnanovdb_uint32_t n) -{ - pnanovdb_lower_handle_t lower = { upper.address }; - lower.address = pnanovdb_address_offset64(lower.address, pnanovdb_int64_as_uint64(pnanovdb_upper_get_table_child(grid_type, buf, upper, n))); - return lower; -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) -{ - pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); - pnanovdb_address_t value_address; - if (pnanovdb_upper_get_child_mask(buf, upper, n)) - { - pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); - value_address = pnanovdb_lower_get_value_address_and_level(grid_type, buf, child, ijk, level); - } - else - { - value_address = pnanovdb_upper_get_table_address(grid_type, buf, upper, n); - PNANOVDB_DEREF(level) = 2u; - } - return value_address; -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - pnanovdb_uint32_t level; - return pnanovdb_upper_get_value_address_and_level(grid_type, buf, upper, ijk, PNANOVDB_REF(level)); -} - -PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node, pnanovdb_uint32_t n, pnanovdb_int64_t child) -{ - pnanovdb_address_t bufAddress = pnanovdb_upper_get_table_address(grid_type, buf, node, n); - pnanovdb_write_int64(buf, bufAddress, child); -} - -// ------------------------------------------------ Root ----------------------------------------------------------- - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_min); - return pnanovdb_address_offset(root.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_max); - return pnanovdb_address_offset(root.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_ave); - return pnanovdb_address_offset(root.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_stddev); - return pnanovdb_address_offset(root.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_tile_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t root_tile) -{ - pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_off_value); - return pnanovdb_address_offset(root_tile.address, byte_offset); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) -{ - pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); - pnanovdb_address_t ret; - if (pnanovdb_address_is_null(tile.address)) - { - ret = pnanovdb_address_offset(root.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_off_background)); - PNANOVDB_DEREF(level) = 4u; - } - else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) - { - ret = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_off_value)); - PNANOVDB_DEREF(level) = 3u; - } - else - { - pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); - ret = pnanovdb_upper_get_value_address_and_level(grid_type, buf, child, ijk, level); - } - return ret; -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - pnanovdb_uint32_t level; - return pnanovdb_root_get_value_address_and_level(grid_type, buf, root, ijk, PNANOVDB_REF(level)); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_bit(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) bit_index) -{ - pnanovdb_uint32_t level; - pnanovdb_address_t address = pnanovdb_root_get_value_address_and_level(grid_type, buf, root, ijk, PNANOVDB_REF(level)); - PNANOVDB_DEREF(bit_index) = level == 0u ? pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x & 7) : 0u; - return address; -} - -PNANOVDB_FORCE_INLINE float pnanovdb_root_fp4_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) -{ - float ret; - if (level == 0) - { - ret = pnanovdb_leaf_fp4_read_float(buf, address, ijk); - } - else - { - ret = pnanovdb_read_float(buf, address); - } - return ret; -} - -PNANOVDB_FORCE_INLINE float pnanovdb_root_fp8_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) -{ - float ret; - if (level == 0) - { - ret = pnanovdb_leaf_fp8_read_float(buf, address, ijk); - } - else - { - ret = pnanovdb_read_float(buf, address); - } - return ret; -} - -PNANOVDB_FORCE_INLINE float pnanovdb_root_fp16_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) -{ - float ret; - if (level == 0) - { - ret = pnanovdb_leaf_fp16_read_float(buf, address, ijk); - } - else - { - ret = pnanovdb_read_float(buf, address); - } - return ret; -} - -PNANOVDB_FORCE_INLINE float pnanovdb_root_fpn_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) -{ - float ret; - if (level == 0) - { - ret = pnanovdb_leaf_fpn_read_float(buf, address, ijk); - } - else - { - ret = pnanovdb_read_float(buf, address); - } - return ret; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_index_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) -{ - pnanovdb_uint64_t ret; - if (level == 0) - { - ret = pnanovdb_leaf_index_get_value_index(buf, address, ijk); - } - else - { - ret = pnanovdb_read_uint64(buf, address); - } - return ret; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_onindex_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) -{ - pnanovdb_uint64_t ret; - if (level == 0) - { - ret = pnanovdb_leaf_onindex_get_value_index(buf, address, ijk); - } - else - { - ret = pnanovdb_read_uint64(buf, address); - } - return ret; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_pointindex_get_point_range( - pnanovdb_buf_t buf, - pnanovdb_address_t value_address, - PNANOVDB_IN(pnanovdb_coord_t) ijk, - pnanovdb_uint32_t level, - PNANOVDB_INOUT(pnanovdb_uint64_t)range_begin, - PNANOVDB_INOUT(pnanovdb_uint64_t)range_end -) -{ - pnanovdb_uint32_t local_range_begin = 0u; - pnanovdb_uint32_t local_range_end = 0u; - pnanovdb_uint64_t offset = pnanovdb_uint32_as_uint64_low(0u); - if (level == 0) - { - pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); - // recover leaf address - pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(value_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_POINTINDEX, leaf_off_table) + 2u * n) }; - if (n > 0u) - { - local_range_begin = pnanovdb_read_uint16(buf, pnanovdb_address_offset_neg(value_address, 2u)); - } - local_range_end = pnanovdb_read_uint16(buf, value_address); - offset = pnanovdb_leaf_pointindex_get_offset(buf, leaf); - } - PNANOVDB_DEREF(range_begin) = pnanovdb_uint64_offset(offset, local_range_begin); - PNANOVDB_DEREF(range_end) = pnanovdb_uint64_offset(offset, local_range_end); - return pnanovdb_uint32_as_uint64_low(local_range_end - local_range_begin); -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_pointindex_get_point_address_range( - pnanovdb_buf_t buf, - pnanovdb_grid_type_t value_type, - pnanovdb_address_t value_address, - pnanovdb_address_t blindmetadata_value_address, - PNANOVDB_IN(pnanovdb_coord_t) ijk, - pnanovdb_uint32_t level, - PNANOVDB_INOUT(pnanovdb_address_t)address_begin, - PNANOVDB_INOUT(pnanovdb_address_t)address_end -) -{ - pnanovdb_uint64_t range_begin; - pnanovdb_uint64_t range_end; - pnanovdb_uint64_t range_size = pnanovdb_root_pointindex_get_point_range(buf, value_address, ijk, level, PNANOVDB_REF(range_begin), PNANOVDB_REF(range_end)); - - pnanovdb_address_t base_address = blindmetadata_value_address; - pnanovdb_uint32_t stride = 12u; // vec3f - if (value_type == PNANOVDB_GRID_TYPE_VEC3U8) - { - stride = 3u; - } - else if (value_type == PNANOVDB_GRID_TYPE_VEC3U16) - { - stride = 6u; - } - PNANOVDB_DEREF(address_begin) = pnanovdb_address_offset64_product(blindmetadata_value_address, range_begin, stride); - PNANOVDB_DEREF(address_end) = pnanovdb_address_offset64_product(blindmetadata_value_address, range_end, stride); - return range_size; -} - -// ------------------------------------------------ ReadAccessor ----------------------------------------------------------- - -struct pnanovdb_readaccessor_t -{ - pnanovdb_coord_t key; - pnanovdb_leaf_handle_t leaf; - pnanovdb_lower_handle_t lower; - pnanovdb_upper_handle_t upper; - pnanovdb_root_handle_t root; -}; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_readaccessor_t) - -PNANOVDB_FORCE_INLINE void pnanovdb_readaccessor_init(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, pnanovdb_root_handle_t root) -{ - PNANOVDB_DEREF(acc).key.x = 0x7FFFFFFF; - PNANOVDB_DEREF(acc).key.y = 0x7FFFFFFF; - PNANOVDB_DEREF(acc).key.z = 0x7FFFFFFF; - PNANOVDB_DEREF(acc).leaf.address = pnanovdb_address_null(); - PNANOVDB_DEREF(acc).lower.address = pnanovdb_address_null(); - PNANOVDB_DEREF(acc).upper.address = pnanovdb_address_null(); - PNANOVDB_DEREF(acc).root = root; -} - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_iscached0(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, int dirty) -{ - if (pnanovdb_address_is_null(PNANOVDB_DEREF(acc).leaf.address)) { return PNANOVDB_FALSE; } - if ((dirty & ~((1u << 3) - 1u)) != 0) - { - PNANOVDB_DEREF(acc).leaf.address = pnanovdb_address_null(); - return PNANOVDB_FALSE; - } - return PNANOVDB_TRUE; -} -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_iscached1(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, int dirty) -{ - if (pnanovdb_address_is_null(PNANOVDB_DEREF(acc).lower.address)) { return PNANOVDB_FALSE; } - if ((dirty & ~((1u << 7) - 1u)) != 0) - { - PNANOVDB_DEREF(acc).lower.address = pnanovdb_address_null(); - return PNANOVDB_FALSE; - } - return PNANOVDB_TRUE; -} -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_iscached2(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, int dirty) -{ - if (pnanovdb_address_is_null(PNANOVDB_DEREF(acc).upper.address)) { return PNANOVDB_FALSE; } - if ((dirty & ~((1u << 12) - 1u)) != 0) - { - PNANOVDB_DEREF(acc).upper.address = pnanovdb_address_null(); - return PNANOVDB_FALSE; - } - return PNANOVDB_TRUE; -} -PNANOVDB_FORCE_INLINE int pnanovdb_readaccessor_computedirty(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - return (PNANOVDB_DEREF(ijk).x ^ PNANOVDB_DEREF(acc).key.x) | (PNANOVDB_DEREF(ijk).y ^ PNANOVDB_DEREF(acc).key.y) | (PNANOVDB_DEREF(ijk).z ^ PNANOVDB_DEREF(acc).key.z); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) -{ - pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); - return pnanovdb_leaf_get_table_address(grid_type, buf, leaf, n); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address_and_level_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_INOUT(pnanovdb_uint32_t) level) -{ - pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); - pnanovdb_address_t value_address; - if (pnanovdb_lower_get_child_mask(buf, lower, n)) - { - pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); - PNANOVDB_DEREF(acc).leaf = child; - PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); - value_address = pnanovdb_leaf_get_value_address_and_cache(grid_type, buf, child, ijk, acc); - PNANOVDB_DEREF(level) = 0u; - } - else - { - value_address = pnanovdb_lower_get_table_address(grid_type, buf, lower, n); - PNANOVDB_DEREF(level) = 1u; - } - return value_address; -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) -{ - pnanovdb_uint32_t level; - return pnanovdb_lower_get_value_address_and_level_and_cache(grid_type, buf, lower, ijk, acc, PNANOVDB_REF(level)); -} - -PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node, pnanovdb_uint32_t n, pnanovdb_int64_t child) -{ - pnanovdb_address_t table_address = pnanovdb_lower_get_table_address(grid_type, buf, node, n); - pnanovdb_write_int64(buf, table_address, child); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address_and_level_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_INOUT(pnanovdb_uint32_t) level) -{ - pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); - pnanovdb_address_t value_address; - if (pnanovdb_upper_get_child_mask(buf, upper, n)) - { - pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); - PNANOVDB_DEREF(acc).lower = child; - PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); - value_address = pnanovdb_lower_get_value_address_and_level_and_cache(grid_type, buf, child, ijk, acc, level); - } - else - { - value_address = pnanovdb_upper_get_table_address(grid_type, buf, upper, n); - PNANOVDB_DEREF(level) = 2u; - } - return value_address; -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) -{ - pnanovdb_uint32_t level; - return pnanovdb_upper_get_value_address_and_level_and_cache(grid_type, buf, upper, ijk, acc, PNANOVDB_REF(level)); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_and_level_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_INOUT(pnanovdb_uint32_t) level) -{ - pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); - pnanovdb_address_t ret; - if (pnanovdb_address_is_null(tile.address)) - { - ret = pnanovdb_address_offset(root.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_off_background)); - PNANOVDB_DEREF(level) = 4u; - } - else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) - { - ret = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_off_value)); - PNANOVDB_DEREF(level) = 3u; - } - else - { - pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); - PNANOVDB_DEREF(acc).upper = child; - PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); - ret = pnanovdb_upper_get_value_address_and_level_and_cache(grid_type, buf, child, ijk, acc, level); - } - return ret; -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) -{ - pnanovdb_uint32_t level; - return pnanovdb_root_get_value_address_and_level_and_cache(grid_type, buf, root, ijk, acc, PNANOVDB_REF(level)); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_readaccessor_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) -{ - int dirty = pnanovdb_readaccessor_computedirty(acc, ijk); - - pnanovdb_address_t value_address; - if (pnanovdb_readaccessor_iscached0(acc, dirty)) - { - value_address = pnanovdb_leaf_get_value_address_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).leaf, ijk, acc); - PNANOVDB_DEREF(level) = 0u; - } - else if (pnanovdb_readaccessor_iscached1(acc, dirty)) - { - value_address = pnanovdb_lower_get_value_address_and_level_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).lower, ijk, acc, level); - } - else if (pnanovdb_readaccessor_iscached2(acc, dirty)) - { - value_address = pnanovdb_upper_get_value_address_and_level_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).upper, ijk, acc, level); - } - else - { - value_address = pnanovdb_root_get_value_address_and_level_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).root, ijk, acc, level); - } - return value_address; -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_readaccessor_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - pnanovdb_uint32_t level; - return pnanovdb_readaccessor_get_value_address_and_level(grid_type, buf, acc, ijk, PNANOVDB_REF(level)); -} - -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_readaccessor_get_value_address_bit(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) bit_index) -{ - pnanovdb_uint32_t level; - pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address_and_level(grid_type, buf, acc, ijk, PNANOVDB_REF(level)); - PNANOVDB_DEREF(bit_index) = level == 0u ? pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x & 7) : 0u; - return address; -} - -// ------------------------------------------------ ReadAccessor GetDim ----------------------------------------------------------- - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) -{ - return 1u; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_lower_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) -{ - pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); - pnanovdb_uint32_t ret; - if (pnanovdb_lower_get_child_mask(buf, lower, n)) - { - pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); - PNANOVDB_DEREF(acc).leaf = child; - PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); - ret = pnanovdb_leaf_get_dim_and_cache(grid_type, buf, child, ijk, acc); - } - else - { - ret = (1u << (3u)); // node 0 dim - } - return ret; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_upper_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) -{ - pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); - pnanovdb_uint32_t ret; - if (pnanovdb_upper_get_child_mask(buf, upper, n)) - { - pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); - PNANOVDB_DEREF(acc).lower = child; - PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); - ret = pnanovdb_lower_get_dim_and_cache(grid_type, buf, child, ijk, acc); - } - else - { - ret = (1u << (4u + 3u)); // node 1 dim - } - return ret; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) -{ - pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); - pnanovdb_uint32_t ret; - if (pnanovdb_address_is_null(tile.address)) - { - ret = 1u << (5u + 4u + 3u); // background, node 2 dim - } - else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) - { - ret = 1u << (5u + 4u + 3u); // tile value, node 2 dim - } - else - { - pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); - PNANOVDB_DEREF(acc).upper = child; - PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); - ret = pnanovdb_upper_get_dim_and_cache(grid_type, buf, child, ijk, acc); - } - return ret; -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_readaccessor_get_dim(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - int dirty = pnanovdb_readaccessor_computedirty(acc, ijk); - - pnanovdb_uint32_t dim; - if (pnanovdb_readaccessor_iscached0(acc, dirty)) - { - dim = pnanovdb_leaf_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).leaf, ijk, acc); - } - else if (pnanovdb_readaccessor_iscached1(acc, dirty)) - { - dim = pnanovdb_lower_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).lower, ijk, acc); - } - else if (pnanovdb_readaccessor_iscached2(acc, dirty)) - { - dim = pnanovdb_upper_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).upper, ijk, acc); - } - else - { - dim = pnanovdb_root_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).root, ijk, acc); - } - return dim; -} - -// ------------------------------------------------ ReadAccessor IsActive ----------------------------------------------------------- - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) -{ - pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); - return pnanovdb_leaf_get_value_mask(buf, leaf, n); -} - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_lower_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) -{ - pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); - pnanovdb_bool_t is_active; - if (pnanovdb_lower_get_child_mask(buf, lower, n)) - { - pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); - PNANOVDB_DEREF(acc).leaf = child; - PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); - is_active = pnanovdb_leaf_is_active_and_cache(grid_type, buf, child, ijk, acc); - } - else - { - is_active = pnanovdb_lower_get_value_mask(buf, lower, n); - } - return is_active; -} - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_upper_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) -{ - pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); - pnanovdb_bool_t is_active; - if (pnanovdb_upper_get_child_mask(buf, upper, n)) - { - pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); - PNANOVDB_DEREF(acc).lower = child; - PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); - is_active = pnanovdb_lower_is_active_and_cache(grid_type, buf, child, ijk, acc); - } - else - { - is_active = pnanovdb_upper_get_value_mask(buf, upper, n); - } - return is_active; -} - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_root_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) -{ - pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); - pnanovdb_bool_t is_active; - if (pnanovdb_address_is_null(tile.address)) - { - is_active = PNANOVDB_FALSE; // background - } - else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) - { - pnanovdb_uint32_t state = pnanovdb_root_tile_get_state(buf, tile); - is_active = state != 0u; // tile value - } - else - { - pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); - PNANOVDB_DEREF(acc).upper = child; - PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); - is_active = pnanovdb_upper_is_active_and_cache(grid_type, buf, child, ijk, acc); - } - return is_active; -} - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_is_active(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) -{ - int dirty = pnanovdb_readaccessor_computedirty(acc, ijk); - - pnanovdb_bool_t is_active; - if (pnanovdb_readaccessor_iscached0(acc, dirty)) - { - is_active = pnanovdb_leaf_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).leaf, ijk, acc); - } - else if (pnanovdb_readaccessor_iscached1(acc, dirty)) - { - is_active = pnanovdb_lower_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).lower, ijk, acc); - } - else if (pnanovdb_readaccessor_iscached2(acc, dirty)) - { - is_active = pnanovdb_upper_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).upper, ijk, acc); - } - else - { - is_active = pnanovdb_root_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).root, ijk, acc); - } - return is_active; -} - -// ------------------------------------------------ Map Transforms ----------------------------------------------------------- - -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) -{ - pnanovdb_vec3_t dst; - float sx = PNANOVDB_DEREF(src).x; - float sy = PNANOVDB_DEREF(src).y; - float sz = PNANOVDB_DEREF(src).z; - dst.x = sx * pnanovdb_map_get_matf(buf, map, 0) + sy * pnanovdb_map_get_matf(buf, map, 1) + sz * pnanovdb_map_get_matf(buf, map, 2) + pnanovdb_map_get_vecf(buf, map, 0); - dst.y = sx * pnanovdb_map_get_matf(buf, map, 3) + sy * pnanovdb_map_get_matf(buf, map, 4) + sz * pnanovdb_map_get_matf(buf, map, 5) + pnanovdb_map_get_vecf(buf, map, 1); - dst.z = sx * pnanovdb_map_get_matf(buf, map, 6) + sy * pnanovdb_map_get_matf(buf, map, 7) + sz * pnanovdb_map_get_matf(buf, map, 8) + pnanovdb_map_get_vecf(buf, map, 2); - return dst; -} - -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply_inverse(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) -{ - pnanovdb_vec3_t dst; - float sx = PNANOVDB_DEREF(src).x - pnanovdb_map_get_vecf(buf, map, 0); - float sy = PNANOVDB_DEREF(src).y - pnanovdb_map_get_vecf(buf, map, 1); - float sz = PNANOVDB_DEREF(src).z - pnanovdb_map_get_vecf(buf, map, 2); - dst.x = sx * pnanovdb_map_get_invmatf(buf, map, 0) + sy * pnanovdb_map_get_invmatf(buf, map, 1) + sz * pnanovdb_map_get_invmatf(buf, map, 2); - dst.y = sx * pnanovdb_map_get_invmatf(buf, map, 3) + sy * pnanovdb_map_get_invmatf(buf, map, 4) + sz * pnanovdb_map_get_invmatf(buf, map, 5); - dst.z = sx * pnanovdb_map_get_invmatf(buf, map, 6) + sy * pnanovdb_map_get_invmatf(buf, map, 7) + sz * pnanovdb_map_get_invmatf(buf, map, 8); - return dst; -} - -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply_jacobi(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) -{ - pnanovdb_vec3_t dst; - float sx = PNANOVDB_DEREF(src).x; - float sy = PNANOVDB_DEREF(src).y; - float sz = PNANOVDB_DEREF(src).z; - dst.x = sx * pnanovdb_map_get_matf(buf, map, 0) + sy * pnanovdb_map_get_matf(buf, map, 1) + sz * pnanovdb_map_get_matf(buf, map, 2); - dst.y = sx * pnanovdb_map_get_matf(buf, map, 3) + sy * pnanovdb_map_get_matf(buf, map, 4) + sz * pnanovdb_map_get_matf(buf, map, 5); - dst.z = sx * pnanovdb_map_get_matf(buf, map, 6) + sy * pnanovdb_map_get_matf(buf, map, 7) + sz * pnanovdb_map_get_matf(buf, map, 8); - return dst; -} - -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply_inverse_jacobi(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) -{ - pnanovdb_vec3_t dst; - float sx = PNANOVDB_DEREF(src).x; - float sy = PNANOVDB_DEREF(src).y; - float sz = PNANOVDB_DEREF(src).z; - dst.x = sx * pnanovdb_map_get_invmatf(buf, map, 0) + sy * pnanovdb_map_get_invmatf(buf, map, 1) + sz * pnanovdb_map_get_invmatf(buf, map, 2); - dst.y = sx * pnanovdb_map_get_invmatf(buf, map, 3) + sy * pnanovdb_map_get_invmatf(buf, map, 4) + sz * pnanovdb_map_get_invmatf(buf, map, 5); - dst.z = sx * pnanovdb_map_get_invmatf(buf, map, 6) + sy * pnanovdb_map_get_invmatf(buf, map, 7) + sz * pnanovdb_map_get_invmatf(buf, map, 8); - return dst; -} - -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_world_to_indexf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) -{ - pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); - return pnanovdb_map_apply_inverse(buf, map, src); -} - -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_index_to_worldf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) -{ - pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); - return pnanovdb_map_apply(buf, map, src); -} - -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_world_to_index_dirf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) -{ - pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); - return pnanovdb_map_apply_inverse_jacobi(buf, map, src); -} - -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_index_to_world_dirf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) -{ - pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); - return pnanovdb_map_apply_jacobi(buf, map, src); -} - -// ------------------------------------------------ DitherLUT ----------------------------------------------------------- - -// This table was generated with -/************** - -static constexpr inline uint32 -SYSwang_inthash(uint32 key) -{ - // From http://www.concentric.net/~Ttwang/tech/inthash.htm - key += ~(key << 16); - key ^= (key >> 5); - key += (key << 3); - key ^= (key >> 13); - key += ~(key << 9); - key ^= (key >> 17); - return key; -} - -static void -ut_initDitherR(float *pattern, float offset, - int x, int y, int z, int res, int goalres) -{ - // These offsets are designed to maximize the difference between - // dither values in nearby voxels within a given 2x2x2 cell, without - // producing axis-aligned artifacts. The are organized in row-major - // order. - static const float theDitherOffset[] = {0,4,6,2,5,1,3,7}; - static const float theScale = 0.125F; - int key = (((z << res) + y) << res) + x; - - if (res == goalres) - { - pattern[key] = offset; - return; - } - - // Randomly flip (on each axis) the dithering patterns used by the - // subcells. This key is xor'd with the subcell index below before - // looking up in the dither offset list. - key = SYSwang_inthash(key) & 7; - - x <<= 1; - y <<= 1; - z <<= 1; - - offset *= theScale; - for (int i = 0; i < 8; i++) - ut_initDitherR(pattern, offset+theDitherOffset[i ^ key]*theScale, - x+(i&1), y+((i&2)>>1), z+((i&4)>>2), res+1, goalres); -} - -// This is a compact algorithm that accomplishes essentially the same thing -// as ut_initDither() above. We should eventually switch to use this and -// clean the dead code. -static fpreal32 * -ut_initDitherRecursive(int goalres) -{ - const int nfloat = 1 << (goalres*3); - float *pattern = new float[nfloat]; - ut_initDitherR(pattern, 1.0F, 0, 0, 0, 0, goalres); - - // This has built an even spacing from 1/nfloat to 1.0. - // however, our dither pattern should be 1/(nfloat+1) to nfloat/(nfloat+1) - // So we do a correction here. Note that the earlier calculations are - // done with powers of 2 so are exact, so it does make sense to delay - // the renormalization to this pass. - float correctionterm = nfloat / (nfloat+1.0F); - for (int i = 0; i < nfloat; i++) - pattern[i] *= correctionterm; - return pattern; -} - - theDitherMatrix = ut_initDitherRecursive(3); - - for (int i = 0; i < 512/8; i ++) - { - for (int j = 0; j < 8; j ++) - std::cout << theDitherMatrix[i*8+j] << "f, "; - std::cout << std::endl; - } - - **************/ - -PNANOVDB_STATIC_CONST float pnanovdb_dither_lut[512] = -{ - 0.14425f, 0.643275f, 0.830409f, 0.331384f, 0.105263f, 0.604289f, 0.167641f, 0.666667f, - 0.892788f, 0.393762f, 0.0818713f, 0.580897f, 0.853801f, 0.354776f, 0.916179f, 0.417154f, - 0.612086f, 0.11306f, 0.79922f, 0.300195f, 0.510721f, 0.0116959f, 0.947368f, 0.448343f, - 0.362573f, 0.861598f, 0.0506823f, 0.549708f, 0.261209f, 0.760234f, 0.19883f, 0.697856f, - 0.140351f, 0.639376f, 0.576998f, 0.0779727f, 0.522417f, 0.0233918f, 0.460039f, 0.959064f, - 0.888889f, 0.389864f, 0.327485f, 0.826511f, 0.272904f, 0.77193f, 0.709552f, 0.210526f, - 0.483431f, 0.982456f, 0.296296f, 0.795322f, 0.116959f, 0.615984f, 0.0545809f, 0.553606f, - 0.732943f, 0.233918f, 0.545809f, 0.0467836f, 0.865497f, 0.366472f, 0.803119f, 0.304094f, - 0.518519f, 0.0194932f, 0.45614f, 0.955166f, 0.729045f, 0.230019f, 0.54191f, 0.042885f, - 0.269006f, 0.768031f, 0.705653f, 0.206628f, 0.479532f, 0.978558f, 0.292398f, 0.791423f, - 0.237817f, 0.736842f, 0.424951f, 0.923977f, 0.136452f, 0.635478f, 0.323587f, 0.822612f, - 0.986355f, 0.487329f, 0.674464f, 0.175439f, 0.88499f, 0.385965f, 0.573099f, 0.0740741f, - 0.51462f, 0.0155945f, 0.202729f, 0.701754f, 0.148148f, 0.647174f, 0.834308f, 0.335283f, - 0.265107f, 0.764133f, 0.951267f, 0.452242f, 0.896686f, 0.397661f, 0.08577f, 0.584795f, - 0.8577f, 0.358674f, 0.920078f, 0.421053f, 0.740741f, 0.241715f, 0.678363f, 0.179337f, - 0.109162f, 0.608187f, 0.17154f, 0.670565f, 0.491228f, 0.990253f, 0.42885f, 0.927875f, - 0.0662768f, 0.565302f, 0.62768f, 0.128655f, 0.183236f, 0.682261f, 0.744639f, 0.245614f, - 0.814815f, 0.315789f, 0.378168f, 0.877193f, 0.931774f, 0.432749f, 0.495127f, 0.994152f, - 0.0350877f, 0.534113f, 0.97076f, 0.471735f, 0.214425f, 0.71345f, 0.526316f, 0.0272904f, - 0.783626f, 0.2846f, 0.222222f, 0.721248f, 0.962963f, 0.463938f, 0.276803f, 0.775828f, - 0.966862f, 0.467836f, 0.405458f, 0.904483f, 0.0701754f, 0.569201f, 0.881092f, 0.382066f, - 0.218324f, 0.717349f, 0.654971f, 0.155945f, 0.818713f, 0.319688f, 0.132554f, 0.631579f, - 0.0623782f, 0.561404f, 0.748538f, 0.249513f, 0.912281f, 0.413255f, 0.974659f, 0.475634f, - 0.810916f, 0.311891f, 0.499025f, 0.998051f, 0.163743f, 0.662768f, 0.226121f, 0.725146f, - 0.690058f, 0.191033f, 0.00389864f, 0.502924f, 0.557505f, 0.0584795f, 0.120858f, 0.619883f, - 0.440546f, 0.939571f, 0.752437f, 0.253411f, 0.307992f, 0.807018f, 0.869396f, 0.37037f, - 0.658869f, 0.159844f, 0.346979f, 0.846004f, 0.588694f, 0.0896686f, 0.152047f, 0.651072f, - 0.409357f, 0.908382f, 0.596491f, 0.0974659f, 0.339181f, 0.838207f, 0.900585f, 0.401559f, - 0.34308f, 0.842105f, 0.779727f, 0.280702f, 0.693957f, 0.194932f, 0.25731f, 0.756335f, - 0.592593f, 0.0935673f, 0.0311891f, 0.530214f, 0.444444f, 0.94347f, 0.506823f, 0.00779727f, - 0.68616f, 0.187135f, 0.124756f, 0.623782f, 0.288499f, 0.787524f, 0.350877f, 0.849903f, - 0.436647f, 0.935673f, 0.873294f, 0.374269f, 0.538012f, 0.0389864f, 0.60039f, 0.101365f, - 0.57115f, 0.0721248f, 0.758285f, 0.259259f, 0.719298f, 0.220273f, 0.532164f, 0.0331384f, - 0.321637f, 0.820663f, 0.00974659f, 0.508772f, 0.469786f, 0.968811f, 0.282651f, 0.781676f, - 0.539961f, 0.0409357f, 0.727096f, 0.22807f, 0.500975f, 0.00194932f, 0.563353f, 0.0643275f, - 0.290448f, 0.789474f, 0.477583f, 0.976608f, 0.251462f, 0.750487f, 0.31384f, 0.812865f, - 0.94152f, 0.442495f, 0.879142f, 0.380117f, 0.37232f, 0.871345f, 0.309942f, 0.808967f, - 0.192982f, 0.692008f, 0.130604f, 0.62963f, 0.621832f, 0.122807f, 0.559454f, 0.0604289f, - 0.660819f, 0.161793f, 0.723197f, 0.224172f, 0.403509f, 0.902534f, 0.840156f, 0.341131f, - 0.411306f, 0.910331f, 0.473684f, 0.97271f, 0.653021f, 0.153996f, 0.0916179f, 0.590643f, - 0.196881f, 0.695906f, 0.384016f, 0.883041f, 0.0955166f, 0.594542f, 0.157895f, 0.65692f, - 0.945419f, 0.446394f, 0.633528f, 0.134503f, 0.844055f, 0.345029f, 0.906433f, 0.407407f, - 0.165692f, 0.664717f, 0.103314f, 0.602339f, 0.126706f, 0.625731f, 0.189084f, 0.688109f, - 0.91423f, 0.415205f, 0.851852f, 0.352827f, 0.875244f, 0.376218f, 0.937622f, 0.438596f, - 0.317739f, 0.816764f, 0.255361f, 0.754386f, 0.996101f, 0.497076f, 0.933723f, 0.434698f, - 0.567251f, 0.0682261f, 0.504873f, 0.00584795f, 0.247563f, 0.746589f, 0.185185f, 0.684211f, - 0.037037f, 0.536062f, 0.0994152f, 0.598441f, 0.777778f, 0.278752f, 0.465887f, 0.964912f, - 0.785575f, 0.28655f, 0.847953f, 0.348928f, 0.0292398f, 0.528265f, 0.7154f, 0.216374f, - 0.39961f, 0.898636f, 0.961014f, 0.461988f, 0.0487329f, 0.547758f, 0.111111f, 0.610136f, - 0.649123f, 0.150097f, 0.212476f, 0.711501f, 0.797271f, 0.298246f, 0.859649f, 0.360624f, - 0.118908f, 0.617934f, 0.0565302f, 0.555556f, 0.329435f, 0.82846f, 0.516569f, 0.0175439f, - 0.867446f, 0.368421f, 0.805068f, 0.306043f, 0.578947f, 0.079922f, 0.267057f, 0.766082f, - 0.270955f, 0.76998f, 0.707602f, 0.208577f, 0.668616f, 0.169591f, 0.606238f, 0.107212f, - 0.520468f, 0.0214425f, 0.45809f, 0.957115f, 0.419103f, 0.918129f, 0.356725f, 0.855751f, - 0.988304f, 0.489279f, 0.426901f, 0.925926f, 0.450292f, 0.949318f, 0.512671f, 0.0136452f, - 0.239766f, 0.738791f, 0.676413f, 0.177388f, 0.699805f, 0.20078f, 0.263158f, 0.762183f, - 0.773879f, 0.274854f, 0.337232f, 0.836257f, 0.672515f, 0.173489f, 0.734893f, 0.235867f, - 0.0253411f, 0.524366f, 0.586745f, 0.0877193f, 0.423002f, 0.922027f, 0.48538f, 0.984405f, - 0.74269f, 0.243665f, 0.680312f, 0.181287f, 0.953216f, 0.454191f, 0.1423f, 0.641326f, - 0.493177f, 0.992203f, 0.430799f, 0.929825f, 0.204678f, 0.703704f, 0.890838f, 0.391813f, - 0.894737f, 0.395712f, 0.0838207f, 0.582846f, 0.0448343f, 0.54386f, 0.231969f, 0.730994f, - 0.146199f, 0.645224f, 0.832359f, 0.333333f, 0.793372f, 0.294347f, 0.980507f, 0.481481f, - 0.364522f, 0.863548f, 0.80117f, 0.302144f, 0.824561f, 0.325536f, 0.138402f, 0.637427f, - 0.614035f, 0.11501f, 0.0526316f, 0.551657f, 0.0760234f, 0.575049f, 0.88694f, 0.387914f, -}; - -PNANOVDB_FORCE_INLINE float pnanovdb_dither_lookup(pnanovdb_bool_t enabled, int offset) -{ - return enabled ? pnanovdb_dither_lut[offset & 511] : 0.5f; -} - -// ------------------------------------------------ HDDA ----------------------------------------------------------- - -#ifdef PNANOVDB_HDDA - -// Comment out to disable this explicit round-off check -#define PNANOVDB_ENFORCE_FORWARD_STEPPING - -#define PNANOVDB_HDDA_FLOAT_MAX 1e38f - -struct pnanovdb_hdda_t -{ - pnanovdb_int32_t dim; - float tmin; - float tmax; - pnanovdb_coord_t voxel; - pnanovdb_coord_t step; - pnanovdb_vec3_t delta; - pnanovdb_vec3_t next; -}; -PNANOVDB_STRUCT_TYPEDEF(pnanovdb_hdda_t) - -PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_hdda_pos_to_ijk(PNANOVDB_IN(pnanovdb_vec3_t) pos) -{ - pnanovdb_coord_t voxel; - voxel.x = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).x)); - voxel.y = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).y)); - voxel.z = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).z)); - return voxel; -} - -PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_hdda_pos_to_voxel(PNANOVDB_IN(pnanovdb_vec3_t) pos, int dim) -{ - pnanovdb_coord_t voxel; - voxel.x = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).x)) & (~(dim - 1)); - voxel.y = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).y)) & (~(dim - 1)); - voxel.z = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).z)) & (~(dim - 1)); - return voxel; -} - -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_hdda_ray_start(PNANOVDB_IN(pnanovdb_vec3_t) origin, float tmin, PNANOVDB_IN(pnanovdb_vec3_t) direction) -{ - pnanovdb_vec3_t pos = pnanovdb_vec3_add( - pnanovdb_vec3_mul(PNANOVDB_DEREF(direction), pnanovdb_vec3_uniform(tmin)), - PNANOVDB_DEREF(origin) - ); - return pos; -} - -PNANOVDB_FORCE_INLINE void pnanovdb_hdda_init(PNANOVDB_INOUT(pnanovdb_hdda_t) hdda, PNANOVDB_IN(pnanovdb_vec3_t) origin, float tmin, PNANOVDB_IN(pnanovdb_vec3_t) direction, float tmax, int dim) -{ - PNANOVDB_DEREF(hdda).dim = dim; - PNANOVDB_DEREF(hdda).tmin = tmin; - PNANOVDB_DEREF(hdda).tmax = tmax; - - pnanovdb_vec3_t pos = pnanovdb_hdda_ray_start(origin, tmin, direction); - pnanovdb_vec3_t dir_inv = pnanovdb_vec3_div(pnanovdb_vec3_uniform(1.f), PNANOVDB_DEREF(direction)); - - PNANOVDB_DEREF(hdda).voxel = pnanovdb_hdda_pos_to_voxel(PNANOVDB_REF(pos), dim); - - // x - if (PNANOVDB_DEREF(direction).x == 0.f) - { - PNANOVDB_DEREF(hdda).next.x = PNANOVDB_HDDA_FLOAT_MAX; - PNANOVDB_DEREF(hdda).step.x = 0; - PNANOVDB_DEREF(hdda).delta.x = 0.f; - } - else if (dir_inv.x > 0.f) - { - PNANOVDB_DEREF(hdda).step.x = 1; - PNANOVDB_DEREF(hdda).next.x = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.x + dim - pos.x) * dir_inv.x; - PNANOVDB_DEREF(hdda).delta.x = dir_inv.x; - } - else - { - PNANOVDB_DEREF(hdda).step.x = -1; - PNANOVDB_DEREF(hdda).next.x = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.x - pos.x) * dir_inv.x; - PNANOVDB_DEREF(hdda).delta.x = -dir_inv.x; - } - - // y - if (PNANOVDB_DEREF(direction).y == 0.f) - { - PNANOVDB_DEREF(hdda).next.y = PNANOVDB_HDDA_FLOAT_MAX; - PNANOVDB_DEREF(hdda).step.y = 0; - PNANOVDB_DEREF(hdda).delta.y = 0.f; - } - else if (dir_inv.y > 0.f) - { - PNANOVDB_DEREF(hdda).step.y = 1; - PNANOVDB_DEREF(hdda).next.y = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.y + dim - pos.y) * dir_inv.y; - PNANOVDB_DEREF(hdda).delta.y = dir_inv.y; - } - else - { - PNANOVDB_DEREF(hdda).step.y = -1; - PNANOVDB_DEREF(hdda).next.y = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.y - pos.y) * dir_inv.y; - PNANOVDB_DEREF(hdda).delta.y = -dir_inv.y; - } - - // z - if (PNANOVDB_DEREF(direction).z == 0.f) - { - PNANOVDB_DEREF(hdda).next.z = PNANOVDB_HDDA_FLOAT_MAX; - PNANOVDB_DEREF(hdda).step.z = 0; - PNANOVDB_DEREF(hdda).delta.z = 0.f; - } - else if (dir_inv.z > 0.f) - { - PNANOVDB_DEREF(hdda).step.z = 1; - PNANOVDB_DEREF(hdda).next.z = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.z + dim - pos.z) * dir_inv.z; - PNANOVDB_DEREF(hdda).delta.z = dir_inv.z; - } - else - { - PNANOVDB_DEREF(hdda).step.z = -1; - PNANOVDB_DEREF(hdda).next.z = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.z - pos.z) * dir_inv.z; - PNANOVDB_DEREF(hdda).delta.z = -dir_inv.z; - } -} - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_update(PNANOVDB_INOUT(pnanovdb_hdda_t) hdda, PNANOVDB_IN(pnanovdb_vec3_t) origin, PNANOVDB_IN(pnanovdb_vec3_t) direction, int dim) -{ - if (PNANOVDB_DEREF(hdda).dim == dim) - { - return PNANOVDB_FALSE; - } - PNANOVDB_DEREF(hdda).dim = dim; - - pnanovdb_vec3_t pos = pnanovdb_vec3_add( - pnanovdb_vec3_mul(PNANOVDB_DEREF(direction), pnanovdb_vec3_uniform(PNANOVDB_DEREF(hdda).tmin)), - PNANOVDB_DEREF(origin) - ); - pnanovdb_vec3_t dir_inv = pnanovdb_vec3_div(pnanovdb_vec3_uniform(1.f), PNANOVDB_DEREF(direction)); - - PNANOVDB_DEREF(hdda).voxel = pnanovdb_hdda_pos_to_voxel(PNANOVDB_REF(pos), dim); - - if (PNANOVDB_DEREF(hdda).step.x != 0) - { - PNANOVDB_DEREF(hdda).next.x = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.x - pos.x) * dir_inv.x; - if (PNANOVDB_DEREF(hdda).step.x > 0) - { - PNANOVDB_DEREF(hdda).next.x += dim * dir_inv.x; - } - } - if (PNANOVDB_DEREF(hdda).step.y != 0) - { - PNANOVDB_DEREF(hdda).next.y = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.y - pos.y) * dir_inv.y; - if (PNANOVDB_DEREF(hdda).step.y > 0) - { - PNANOVDB_DEREF(hdda).next.y += dim * dir_inv.y; - } - } - if (PNANOVDB_DEREF(hdda).step.z != 0) - { - PNANOVDB_DEREF(hdda).next.z = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.z - pos.z) * dir_inv.z; - if (PNANOVDB_DEREF(hdda).step.z > 0) - { - PNANOVDB_DEREF(hdda).next.z += dim * dir_inv.z; - } - } - - return PNANOVDB_TRUE; -} - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_step(PNANOVDB_INOUT(pnanovdb_hdda_t) hdda) -{ - pnanovdb_bool_t ret; - if (PNANOVDB_DEREF(hdda).next.x < PNANOVDB_DEREF(hdda).next.y && PNANOVDB_DEREF(hdda).next.x < PNANOVDB_DEREF(hdda).next.z) - { -#ifdef PNANOVDB_ENFORCE_FORWARD_STEPPING - if (PNANOVDB_DEREF(hdda).next.x <= PNANOVDB_DEREF(hdda).tmin) - { - PNANOVDB_DEREF(hdda).next.x += PNANOVDB_DEREF(hdda).tmin - 0.999999f * PNANOVDB_DEREF(hdda).next.x + 1.0e-6f; - } -#endif - PNANOVDB_DEREF(hdda).tmin = PNANOVDB_DEREF(hdda).next.x; - PNANOVDB_DEREF(hdda).next.x += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).delta.x; - PNANOVDB_DEREF(hdda).voxel.x += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).step.x; - ret = PNANOVDB_DEREF(hdda).tmin <= PNANOVDB_DEREF(hdda).tmax; - } - else if (PNANOVDB_DEREF(hdda).next.y < PNANOVDB_DEREF(hdda).next.z) - { -#ifdef PNANOVDB_ENFORCE_FORWARD_STEPPING - if (PNANOVDB_DEREF(hdda).next.y <= PNANOVDB_DEREF(hdda).tmin) - { - PNANOVDB_DEREF(hdda).next.y += PNANOVDB_DEREF(hdda).tmin - 0.999999f * PNANOVDB_DEREF(hdda).next.y + 1.0e-6f; - } -#endif - PNANOVDB_DEREF(hdda).tmin = PNANOVDB_DEREF(hdda).next.y; - PNANOVDB_DEREF(hdda).next.y += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).delta.y; - PNANOVDB_DEREF(hdda).voxel.y += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).step.y; - ret = PNANOVDB_DEREF(hdda).tmin <= PNANOVDB_DEREF(hdda).tmax; - } - else - { -#ifdef PNANOVDB_ENFORCE_FORWARD_STEPPING - if (PNANOVDB_DEREF(hdda).next.z <= PNANOVDB_DEREF(hdda).tmin) - { - PNANOVDB_DEREF(hdda).next.z += PNANOVDB_DEREF(hdda).tmin - 0.999999f * PNANOVDB_DEREF(hdda).next.z + 1.0e-6f; - } -#endif - PNANOVDB_DEREF(hdda).tmin = PNANOVDB_DEREF(hdda).next.z; - PNANOVDB_DEREF(hdda).next.z += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).delta.z; - PNANOVDB_DEREF(hdda).voxel.z += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).step.z; - ret = PNANOVDB_DEREF(hdda).tmin <= PNANOVDB_DEREF(hdda).tmax; - } - return ret; -} - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_ray_clip( - PNANOVDB_IN(pnanovdb_vec3_t) bbox_min, - PNANOVDB_IN(pnanovdb_vec3_t) bbox_max, - PNANOVDB_IN(pnanovdb_vec3_t) origin, PNANOVDB_INOUT(float) tmin, - PNANOVDB_IN(pnanovdb_vec3_t) direction, PNANOVDB_INOUT(float) tmax -) -{ - pnanovdb_vec3_t dir_inv = pnanovdb_vec3_div(pnanovdb_vec3_uniform(1.f), PNANOVDB_DEREF(direction)); - pnanovdb_vec3_t t0 = pnanovdb_vec3_mul(pnanovdb_vec3_sub(PNANOVDB_DEREF(bbox_min), PNANOVDB_DEREF(origin)), dir_inv); - pnanovdb_vec3_t t1 = pnanovdb_vec3_mul(pnanovdb_vec3_sub(PNANOVDB_DEREF(bbox_max), PNANOVDB_DEREF(origin)), dir_inv); - pnanovdb_vec3_t tmin3 = pnanovdb_vec3_min(t0, t1); - pnanovdb_vec3_t tmax3 = pnanovdb_vec3_max(t0, t1); - float tnear = pnanovdb_max(tmin3.x, pnanovdb_max(tmin3.y, tmin3.z)); - float tfar = pnanovdb_min(tmax3.x, pnanovdb_min(tmax3.y, tmax3.z)); - pnanovdb_bool_t hit = tnear <= tfar; - PNANOVDB_DEREF(tmin) = pnanovdb_max(PNANOVDB_DEREF(tmin), tnear); - PNANOVDB_DEREF(tmax) = pnanovdb_min(PNANOVDB_DEREF(tmax), tfar); - return hit; -} - -PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_zero_crossing( - pnanovdb_grid_type_t grid_type, - pnanovdb_buf_t buf, - PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, - PNANOVDB_IN(pnanovdb_vec3_t) origin, float tmin, - PNANOVDB_IN(pnanovdb_vec3_t) direction, float tmax, - PNANOVDB_INOUT(float) thit, - PNANOVDB_INOUT(float) v -) -{ - pnanovdb_coord_t bbox_min = pnanovdb_root_get_bbox_min(buf, PNANOVDB_DEREF(acc).root); - pnanovdb_coord_t bbox_max = pnanovdb_root_get_bbox_max(buf, PNANOVDB_DEREF(acc).root); - pnanovdb_vec3_t bbox_minf = pnanovdb_coord_to_vec3(bbox_min); - pnanovdb_vec3_t bbox_maxf = pnanovdb_coord_to_vec3(pnanovdb_coord_add(bbox_max, pnanovdb_coord_uniform(1))); - - pnanovdb_bool_t hit = pnanovdb_hdda_ray_clip(PNANOVDB_REF(bbox_minf), PNANOVDB_REF(bbox_maxf), origin, PNANOVDB_REF(tmin), direction, PNANOVDB_REF(tmax)); - if (!hit || tmax > 1.0e20f) - { - return PNANOVDB_FALSE; - } - - pnanovdb_vec3_t pos = pnanovdb_hdda_ray_start(origin, tmin, direction); - pnanovdb_coord_t ijk = pnanovdb_hdda_pos_to_ijk(PNANOVDB_REF(pos)); - - pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk)); - float v0 = pnanovdb_read_float(buf, address); - - pnanovdb_int32_t dim = pnanovdb_uint32_as_int32(pnanovdb_readaccessor_get_dim(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk))); - pnanovdb_hdda_t hdda; - pnanovdb_hdda_init(PNANOVDB_REF(hdda), origin, tmin, direction, tmax, dim); - while (pnanovdb_hdda_step(PNANOVDB_REF(hdda))) - { - pnanovdb_vec3_t pos_start = pnanovdb_hdda_ray_start(origin, hdda.tmin + 1.0001f, direction); - ijk = pnanovdb_hdda_pos_to_ijk(PNANOVDB_REF(pos_start)); - dim = pnanovdb_uint32_as_int32(pnanovdb_readaccessor_get_dim(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk))); - pnanovdb_hdda_update(PNANOVDB_REF(hdda), origin, direction, dim); - if (hdda.dim > 1 || !pnanovdb_readaccessor_is_active(grid_type, buf, acc, PNANOVDB_REF(ijk))) - { - continue; - } - while (pnanovdb_hdda_step(PNANOVDB_REF(hdda)) && pnanovdb_readaccessor_is_active(grid_type, buf, acc, PNANOVDB_REF(hdda.voxel))) - { - ijk = hdda.voxel; - pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk)); - PNANOVDB_DEREF(v) = pnanovdb_read_float(buf, address); - if (PNANOVDB_DEREF(v) * v0 < 0.f) - { - PNANOVDB_DEREF(thit) = hdda.tmin; - return PNANOVDB_TRUE; - } - } - } - return PNANOVDB_FALSE; -} - -#endif - -#endif // end of NANOVDB_PNANOVDB_H_HAS_BEEN_INCLUDED + +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file PNanoVDB.h + + \author Andrew Reidmeyer + + \brief This file is a portable (e.g. pointer-less) C99/GLSL/HLSL port + of NanoVDB.h, which is compatible with most graphics APIs. +*/ + +#ifndef NANOVDB_PNANOVDB_H_HAS_BEEN_INCLUDED +#define NANOVDB_PNANOVDB_H_HAS_BEEN_INCLUDED + +// ------------------------------------------------ Configuration ----------------------------------------------------------- + +// platforms +//#define PNANOVDB_C +//#define PNANOVDB_HLSL +//#define PNANOVDB_GLSL + +// addressing mode +// PNANOVDB_ADDRESS_32 +// PNANOVDB_ADDRESS_64 +#if defined(PNANOVDB_C) +#ifndef PNANOVDB_ADDRESS_32 +#define PNANOVDB_ADDRESS_64 +#endif +#elif defined(PNANOVDB_HLSL) +#ifndef PNANOVDB_ADDRESS_64 +#define PNANOVDB_ADDRESS_32 +#endif +#elif defined(PNANOVDB_GLSL) +#ifndef PNANOVDB_ADDRESS_64 +#define PNANOVDB_ADDRESS_32 +#endif +#endif + +// bounds checking +//#define PNANOVDB_BUF_BOUNDS_CHECK + +// enable HDDA by default on HLSL/GLSL, make explicit on C +#if defined(PNANOVDB_C) +//#define PNANOVDB_HDDA +#ifdef PNANOVDB_HDDA +#ifndef PNANOVDB_CMATH +#define PNANOVDB_CMATH +#endif +#endif +#elif defined(PNANOVDB_HLSL) +#define PNANOVDB_HDDA +#elif defined(PNANOVDB_GLSL) +#define PNANOVDB_HDDA +#endif + +#ifdef PNANOVDB_CMATH +#ifndef __CUDACC_RTC__ +#include +#endif +#endif + +// ------------------------------------------------ Buffer ----------------------------------------------------------- + +#if defined(PNANOVDB_BUF_CUSTOM) +// NOP +#elif defined(PNANOVDB_C) +#define PNANOVDB_BUF_C +#elif defined(PNANOVDB_HLSL) +#define PNANOVDB_BUF_HLSL +#elif defined(PNANOVDB_GLSL) +#define PNANOVDB_BUF_GLSL +#endif + +#if defined(PNANOVDB_BUF_C) +#ifndef __CUDACC_RTC__ +#include +#endif +#if defined(__CUDACC__) +#define PNANOVDB_BUF_FORCE_INLINE static __host__ __device__ __forceinline__ +#elif defined(_WIN32) +#define PNANOVDB_BUF_FORCE_INLINE static inline __forceinline +#else +#define PNANOVDB_BUF_FORCE_INLINE static inline __attribute__((always_inline)) +#endif +typedef struct pnanovdb_buf_t +{ + uint32_t* data; +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words; +#endif +}pnanovdb_buf_t; +PNANOVDB_BUF_FORCE_INLINE pnanovdb_buf_t pnanovdb_make_buf(uint32_t* data, uint64_t size_in_words) +{ + pnanovdb_buf_t ret; + ret.data = data; +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + ret.size_in_words = size_in_words; +#endif + return ret; +} +#if defined(PNANOVDB_ADDRESS_32) +PNANOVDB_BUF_FORCE_INLINE uint32_t pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint32_t byte_offset) +{ + uint32_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + return wordaddress < buf.size_in_words ? buf.data[wordaddress] : 0u; +#else + return buf.data[wordaddress]; +#endif +} +PNANOVDB_BUF_FORCE_INLINE uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint32_t byte_offset) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint32_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + return wordaddress64 < size_in_words64 ? data64[wordaddress64] : 0llu; +#else + return data64[wordaddress64]; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint32_t byte_offset, uint32_t value) +{ + uint32_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + if (wordaddress < buf.size_in_words) + { + buf.data[wordaddress] = value; +} +#else + buf.data[wordaddress] = value; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint32_t byte_offset, uint64_t value) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint32_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + if (wordaddress64 < size_in_words64) + { + data64[wordaddress64] = value; + } +#else + data64[wordaddress64] = value; +#endif +} +#elif defined(PNANOVDB_ADDRESS_64) +PNANOVDB_BUF_FORCE_INLINE uint32_t pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint64_t byte_offset) +{ + uint64_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + return wordaddress < buf.size_in_words ? buf.data[wordaddress] : 0u; +#else + return buf.data[wordaddress]; +#endif +} +PNANOVDB_BUF_FORCE_INLINE uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint64_t byte_offset) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint64_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + return wordaddress64 < size_in_words64 ? data64[wordaddress64] : 0llu; +#else + return data64[wordaddress64]; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint64_t byte_offset, uint32_t value) +{ + uint64_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + if (wordaddress < buf.size_in_words) + { + buf.data[wordaddress] = value; + } +#else + buf.data[wordaddress] = value; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint64_t byte_offset, uint64_t value) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint64_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + if (wordaddress64 < size_in_words64) + { + data64[wordaddress64] = value; + } +#else + data64[wordaddress64] = value; +#endif +} +#endif +typedef uint32_t pnanovdb_grid_type_t; +#define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn +#elif defined(PNANOVDB_BUF_HLSL) +#if defined(PNANOVDB_ADDRESS_32) +#define pnanovdb_buf_t StructuredBuffer +uint pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint byte_offset) +{ + return buf[(byte_offset >> 2u)]; +} +uint2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) +{ + uint2 ret; + ret.x = pnanovdb_buf_read_uint32(buf, byte_offset + 0u); + ret.y = pnanovdb_buf_read_uint32(buf, byte_offset + 4u); + return ret; +} +void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint byte_offset, uint value) +{ + // NOP, by default no write in HLSL +} +void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uint2 value) +{ + // NOP, by default no write in HLSL +} +#elif defined(PNANOVDB_ADDRESS_64) +#define pnanovdb_buf_t StructuredBuffer +uint pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint64_t byte_offset) +{ + return buf[uint(byte_offset >> 2u)]; +} +uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint64_t byte_offset) +{ + uint64_t ret; + ret = pnanovdb_buf_read_uint32(buf, byte_offset + 0u); + ret = ret + (uint64_t(pnanovdb_buf_read_uint32(buf, byte_offset + 4u)) << 32u); + return ret; +} +void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint64_t byte_offset, uint value) +{ + // NOP, by default no write in HLSL +} +void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint64_t byte_offset, uint64_t value) +{ + // NOP, by default no write in HLSL +} +#endif +#define pnanovdb_grid_type_t uint +#define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn +#elif defined(PNANOVDB_BUF_GLSL) +struct pnanovdb_buf_t +{ + uint unused; // to satisfy min struct size? +}; +uint pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint byte_offset) +{ + return pnanovdb_buf_data[(byte_offset >> 2u)]; +} +uvec2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) +{ + uvec2 ret; + ret.x = pnanovdb_buf_read_uint32(buf, byte_offset + 0u); + ret.y = pnanovdb_buf_read_uint32(buf, byte_offset + 4u); + return ret; +} +void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint byte_offset, uint value) +{ + // NOP, by default no write in HLSL +} +void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uvec2 value) +{ + // NOP, by default no write in HLSL +} +#define pnanovdb_grid_type_t uint +#define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn +#endif + +// ------------------------------------------------ Basic Types ----------------------------------------------------------- + +// force inline +#if defined(PNANOVDB_C) +#if defined(__CUDACC__) +#define PNANOVDB_FORCE_INLINE static __host__ __device__ __forceinline__ +#elif defined(_WIN32) +#define PNANOVDB_FORCE_INLINE static inline __forceinline +#else +#define PNANOVDB_FORCE_INLINE static inline __attribute__((always_inline)) +#endif +#elif defined(PNANOVDB_HLSL) +#define PNANOVDB_FORCE_INLINE +#elif defined(PNANOVDB_GLSL) +#define PNANOVDB_FORCE_INLINE +#endif + +// struct typedef, static const, inout +#if defined(PNANOVDB_C) +#define PNANOVDB_STRUCT_TYPEDEF(X) typedef struct X X; +#define PNANOVDB_STATIC_CONST static const +#define PNANOVDB_INOUT(X) X* +#define PNANOVDB_IN(X) const X* +#define PNANOVDB_DEREF(X) (*X) +#define PNANOVDB_REF(X) &X +#elif defined(PNANOVDB_HLSL) +#define PNANOVDB_STRUCT_TYPEDEF(X) +#define PNANOVDB_STATIC_CONST static const +#define PNANOVDB_INOUT(X) inout X +#define PNANOVDB_IN(X) X +#define PNANOVDB_DEREF(X) X +#define PNANOVDB_REF(X) X +#elif defined(PNANOVDB_GLSL) +#define PNANOVDB_STRUCT_TYPEDEF(X) +#define PNANOVDB_STATIC_CONST const +#define PNANOVDB_INOUT(X) inout X +#define PNANOVDB_IN(X) X +#define PNANOVDB_DEREF(X) X +#define PNANOVDB_REF(X) X +#endif + +// basic types, type conversion +#if defined(PNANOVDB_C) +#define PNANOVDB_NATIVE_64 +#ifndef __CUDACC_RTC__ +#include +#endif +#if !defined(PNANOVDB_MEMCPY_CUSTOM) +#ifndef __CUDACC_RTC__ +#include +#endif +#define pnanovdb_memcpy memcpy +#endif +typedef uint32_t pnanovdb_uint32_t; +typedef int32_t pnanovdb_int32_t; +typedef int32_t pnanovdb_bool_t; +#define PNANOVDB_FALSE 0 +#define PNANOVDB_TRUE 1 +typedef uint64_t pnanovdb_uint64_t; +typedef int64_t pnanovdb_int64_t; +typedef struct pnanovdb_coord_t +{ + pnanovdb_int32_t x, y, z; +}pnanovdb_coord_t; +typedef struct pnanovdb_vec3_t +{ + float x, y, z; +}pnanovdb_vec3_t; +PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_uint32_as_int32(pnanovdb_uint32_t v) { return (pnanovdb_int32_t)v; } +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return (pnanovdb_int64_t)v; } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return (pnanovdb_uint64_t)v; } +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return (pnanovdb_uint32_t)v; } +PNANOVDB_FORCE_INLINE float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { float vf; pnanovdb_memcpy(&vf, &v, sizeof(vf)); return vf; } +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return *((pnanovdb_uint32_t*)(&v)); } +PNANOVDB_FORCE_INLINE double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { double vf; pnanovdb_memcpy(&vf, &v, sizeof(vf)); return vf; } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { return *((pnanovdb_uint64_t*)(&v)); } +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return (pnanovdb_uint32_t)v; } +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return (pnanovdb_uint32_t)(v >> 32u); } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return ((pnanovdb_uint64_t)x) | (((pnanovdb_uint64_t)y) << 32u); } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return ((pnanovdb_uint64_t)x); } +PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return a == b; } +PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a == 0; } +#ifdef PNANOVDB_CMATH +PNANOVDB_FORCE_INLINE float pnanovdb_floor(float v) { return floorf(v); } +#endif +PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_float_to_int32(float v) { return (pnanovdb_int32_t)v; } +PNANOVDB_FORCE_INLINE float pnanovdb_int32_to_float(pnanovdb_int32_t v) { return (float)v; } +PNANOVDB_FORCE_INLINE float pnanovdb_uint32_to_float(pnanovdb_uint32_t v) { return (float)v; } +PNANOVDB_FORCE_INLINE float pnanovdb_min(float a, float b) { return a < b ? a : b; } +PNANOVDB_FORCE_INLINE float pnanovdb_max(float a, float b) { return a > b ? a : b; } +#elif defined(PNANOVDB_HLSL) +typedef uint pnanovdb_uint32_t; +typedef int pnanovdb_int32_t; +typedef bool pnanovdb_bool_t; +#define PNANOVDB_FALSE false +#define PNANOVDB_TRUE true +typedef int3 pnanovdb_coord_t; +typedef float3 pnanovdb_vec3_t; +pnanovdb_int32_t pnanovdb_uint32_as_int32(pnanovdb_uint32_t v) { return int(v); } +pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return uint(v); } +float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { return asfloat(v); } +pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return asuint(v); } +float pnanovdb_floor(float v) { return floor(v); } +pnanovdb_int32_t pnanovdb_float_to_int32(float v) { return int(v); } +float pnanovdb_int32_to_float(pnanovdb_int32_t v) { return float(v); } +float pnanovdb_uint32_to_float(pnanovdb_uint32_t v) { return float(v); } +float pnanovdb_min(float a, float b) { return min(a, b); } +float pnanovdb_max(float a, float b) { return max(a, b); } +#if defined(PNANOVDB_ADDRESS_32) +typedef uint2 pnanovdb_uint64_t; +typedef int2 pnanovdb_int64_t; +pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return int2(v); } +pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uint2(v); } +double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return asdouble(v.x, v.y); } +pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { uint2 ret; asuint(v, ret.x, ret.y); return ret; } +pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return v.x; } +pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return v.y; } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uint2(x, y); } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return uint2(x, 0); } +bool pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return (a.x == b.x) && (a.y == b.y); } +bool pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a.x == 0 && a.y == 0; } +#else +typedef uint64_t pnanovdb_uint64_t; +typedef int64_t pnanovdb_int64_t; +pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return int64_t(v); } +pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uint64_t(v); } +double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return asdouble(uint(v), uint(v >> 32u)); } +pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { uint2 ret; asuint(v, ret.x, ret.y); return uint64_t(ret.x) + (uint64_t(ret.y) << 32u); } +pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return uint(v); } +pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return uint(v >> 32u); } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uint64_t(x) + (uint64_t(y) << 32u); } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return uint64_t(x); } +bool pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return a == b; } +bool pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a == 0; } +#endif +#elif defined(PNANOVDB_GLSL) +#define pnanovdb_uint32_t uint +#define pnanovdb_int32_t int +#define pnanovdb_bool_t bool +#define PNANOVDB_FALSE false +#define PNANOVDB_TRUE true +#define pnanovdb_uint64_t uvec2 +#define pnanovdb_int64_t ivec2 +#define pnanovdb_coord_t ivec3 +#define pnanovdb_vec3_t vec3 +pnanovdb_int32_t pnanovdb_uint32_as_int32(pnanovdb_uint32_t v) { return int(v); } +pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return ivec2(v); } +pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uvec2(v); } +pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return uint(v); } +float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { return uintBitsToFloat(v); } +pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return floatBitsToUint(v); } +double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return packDouble2x32(uvec2(v.x, v.y)); } +pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { return unpackDouble2x32(v); } +pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return v.x; } +pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return v.y; } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uvec2(x, y); } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return uvec2(x, 0); } +bool pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return (a.x == b.x) && (a.y == b.y); } +bool pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a.x == 0 && a.y == 0; } +float pnanovdb_floor(float v) { return floor(v); } +pnanovdb_int32_t pnanovdb_float_to_int32(float v) { return int(v); } +float pnanovdb_int32_to_float(pnanovdb_int32_t v) { return float(v); } +float pnanovdb_uint32_to_float(pnanovdb_uint32_t v) { return float(v); } +float pnanovdb_min(float a, float b) { return min(a, b); } +float pnanovdb_max(float a, float b) { return max(a, b); } +#endif + +// ------------------------------------------------ Coord/Vec3 Utilties ----------------------------------------------------------- + +#if defined(PNANOVDB_C) +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_uniform(float a) +{ + pnanovdb_vec3_t v; + v.x = a; + v.y = a; + v.z = a; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_add(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x + b.x; + v.y = a.y + b.y; + v.z = a.z + b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_sub(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x - b.x; + v.y = a.y - b.y; + v.z = a.z - b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_mul(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x * b.x; + v.y = a.y * b.y; + v.z = a.z * b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_div(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x / b.x; + v.y = a.y / b.y; + v.z = a.z / b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_min(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x < b.x ? a.x : b.x; + v.y = a.y < b.y ? a.y : b.y; + v.z = a.z < b.z ? a.z : b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_max(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x > b.x ? a.x : b.x; + v.y = a.y > b.y ? a.y : b.y; + v.z = a.z > b.z ? a.z : b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_coord_to_vec3(const pnanovdb_coord_t coord) +{ + pnanovdb_vec3_t v; + v.x = pnanovdb_int32_to_float(coord.x); + v.y = pnanovdb_int32_to_float(coord.y); + v.z = pnanovdb_int32_to_float(coord.z); + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_coord_uniform(const pnanovdb_int32_t a) +{ + pnanovdb_coord_t v; + v.x = a; + v.y = a; + v.z = a; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_coord_add(pnanovdb_coord_t a, pnanovdb_coord_t b) +{ + pnanovdb_coord_t v; + v.x = a.x + b.x; + v.y = a.y + b.y; + v.z = a.z + b.z; + return v; +} +#elif defined(PNANOVDB_HLSL) +pnanovdb_vec3_t pnanovdb_vec3_uniform(float a) { return float3(a, a, a); } +pnanovdb_vec3_t pnanovdb_vec3_add(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a + b; } +pnanovdb_vec3_t pnanovdb_vec3_sub(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a - b; } +pnanovdb_vec3_t pnanovdb_vec3_mul(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a * b; } +pnanovdb_vec3_t pnanovdb_vec3_div(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a / b; } +pnanovdb_vec3_t pnanovdb_vec3_min(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return min(a, b); } +pnanovdb_vec3_t pnanovdb_vec3_max(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return max(a, b); } +pnanovdb_vec3_t pnanovdb_coord_to_vec3(pnanovdb_coord_t coord) { return float3(coord); } +pnanovdb_coord_t pnanovdb_coord_uniform(pnanovdb_int32_t a) { return int3(a, a, a); } +pnanovdb_coord_t pnanovdb_coord_add(pnanovdb_coord_t a, pnanovdb_coord_t b) { return a + b; } +#elif defined(PNANOVDB_GLSL) +pnanovdb_vec3_t pnanovdb_vec3_uniform(float a) { return vec3(a, a, a); } +pnanovdb_vec3_t pnanovdb_vec3_add(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a + b; } +pnanovdb_vec3_t pnanovdb_vec3_sub(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a - b; } +pnanovdb_vec3_t pnanovdb_vec3_mul(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a * b; } +pnanovdb_vec3_t pnanovdb_vec3_div(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a / b; } +pnanovdb_vec3_t pnanovdb_vec3_min(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return min(a, b); } +pnanovdb_vec3_t pnanovdb_vec3_max(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return max(a, b); } +pnanovdb_vec3_t pnanovdb_coord_to_vec3(const pnanovdb_coord_t coord) { return vec3(coord); } +pnanovdb_coord_t pnanovdb_coord_uniform(pnanovdb_int32_t a) { return ivec3(a, a, a); } +pnanovdb_coord_t pnanovdb_coord_add(pnanovdb_coord_t a, pnanovdb_coord_t b) { return a + b; } +#endif + +// ------------------------------------------------ Uint64 Utils ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint32_countbits(pnanovdb_uint32_t value) +{ +#if defined(PNANOVDB_C) +#if defined(_MSC_VER) && (_MSC_VER >= 1928) && defined(PNANOVDB_USE_INTRINSICS) + return __popcnt(value); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(PNANOVDB_USE_INTRINSICS) + return __builtin_popcount(value); +#else + value = value - ((value >> 1) & 0x55555555); + value = (value & 0x33333333) + ((value >> 2) & 0x33333333); + value = (value + (value >> 4)) & 0x0F0F0F0F; + return (value * 0x01010101) >> 24; +#endif +#elif defined(PNANOVDB_HLSL) + return countbits(value); +#elif defined(PNANOVDB_GLSL) + return bitCount(value); +#endif +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_countbits(pnanovdb_uint64_t value) +{ + return pnanovdb_uint32_countbits(pnanovdb_uint64_low(value)) + pnanovdb_uint32_countbits(pnanovdb_uint64_high(value)); +} + +#if defined(PNANOVDB_ADDRESS_32) +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_offset(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + pnanovdb_uint32_t low = pnanovdb_uint64_low(a); + pnanovdb_uint32_t high = pnanovdb_uint64_high(a); + low += b; + if (low < b) + { + high += 1u; + } + return pnanovdb_uint32_as_uint64(low, high); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_dec(pnanovdb_uint64_t a) +{ + pnanovdb_uint32_t low = pnanovdb_uint64_low(a); + pnanovdb_uint32_t high = pnanovdb_uint64_high(a); + if (low == 0u) + { + high -= 1u; + } + low -= 1u; + return pnanovdb_uint32_as_uint64(low, high); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_to_uint32_lsr(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + pnanovdb_uint32_t low = pnanovdb_uint64_low(a); + pnanovdb_uint32_t high = pnanovdb_uint64_high(a); + return (b >= 32u) ? + (high >> (b - 32)) : + ((low >> b) | ((b > 0) ? (high << (32u - b)) : 0u)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_bit_mask(pnanovdb_uint32_t bit_idx) +{ + pnanovdb_uint32_t mask_low = bit_idx < 32u ? 1u << bit_idx : 0u; + pnanovdb_uint32_t mask_high = bit_idx >= 32u ? 1u << (bit_idx - 32u) : 0u; + return pnanovdb_uint32_as_uint64(mask_low, mask_high); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_and(pnanovdb_uint64_t a, pnanovdb_uint64_t b) +{ + return pnanovdb_uint32_as_uint64( + pnanovdb_uint64_low(a) & pnanovdb_uint64_low(b), + pnanovdb_uint64_high(a) & pnanovdb_uint64_high(b) + ); +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_uint64_any_bit(pnanovdb_uint64_t a) +{ + return pnanovdb_uint64_low(a) != 0u || pnanovdb_uint64_high(a) != 0u; +} + +#else +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_offset(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + return a + b; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_dec(pnanovdb_uint64_t a) +{ + return a - 1u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_to_uint32_lsr(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + return pnanovdb_uint64_low(a >> b); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_bit_mask(pnanovdb_uint32_t bit_idx) +{ + return 1llu << bit_idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_and(pnanovdb_uint64_t a, pnanovdb_uint64_t b) +{ + return a & b; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_uint64_any_bit(pnanovdb_uint64_t a) +{ + return a != 0llu; +} +#endif + +// ------------------------------------------------ Address Type ----------------------------------------------------------- + +#if defined(PNANOVDB_ADDRESS_32) +struct pnanovdb_address_t +{ + pnanovdb_uint32_t byte_offset; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_address_t) + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_neg(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) +{ + pnanovdb_address_t ret = address; + ret.byte_offset -= byte_offset; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_product(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset * multiplier; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset) +{ + pnanovdb_address_t ret = address; + // lose high bits on 32-bit + ret.byte_offset += pnanovdb_uint64_low(byte_offset); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64_product(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += pnanovdb_uint64_low(byte_offset) * multiplier; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_address_mask(pnanovdb_address_t address, pnanovdb_uint32_t mask) +{ + return address.byte_offset & mask; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_mask_inv(pnanovdb_address_t address, pnanovdb_uint32_t mask) +{ + pnanovdb_address_t ret = address; + ret.byte_offset &= (~mask); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_null() +{ + pnanovdb_address_t ret = { 0 }; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_is_null(pnanovdb_address_t address) +{ + return address.byte_offset == 0u; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_in_interval(pnanovdb_address_t address, pnanovdb_address_t min_address, pnanovdb_address_t max_address) +{ + return address.byte_offset >= min_address.byte_offset && address.byte_offset < max_address.byte_offset; +} +#elif defined(PNANOVDB_ADDRESS_64) +struct pnanovdb_address_t +{ + pnanovdb_uint64_t byte_offset; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_address_t) + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_neg(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) +{ + pnanovdb_address_t ret = address; + ret.byte_offset -= byte_offset; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_product(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += pnanovdb_uint32_as_uint64_low(byte_offset) * pnanovdb_uint32_as_uint64_low(multiplier); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64_product(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset * pnanovdb_uint32_as_uint64_low(multiplier); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_address_mask(pnanovdb_address_t address, pnanovdb_uint32_t mask) +{ + return pnanovdb_uint64_low(address.byte_offset) & mask; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_mask_inv(pnanovdb_address_t address, pnanovdb_uint32_t mask) +{ + pnanovdb_address_t ret = address; + ret.byte_offset &= (~pnanovdb_uint32_as_uint64_low(mask)); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_null() +{ + pnanovdb_address_t ret = { 0 }; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_is_null(pnanovdb_address_t address) +{ + return address.byte_offset == 0llu; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_in_interval(pnanovdb_address_t address, pnanovdb_address_t min_address, pnanovdb_address_t max_address) +{ + return address.byte_offset >= min_address.byte_offset && address.byte_offset < max_address.byte_offset; +} +#endif + +// ------------------------------------------------ High Level Buffer Read ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint32(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_buf_read_uint32(buf, address.byte_offset); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_read_uint64(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_buf_read_uint64(buf, address.byte_offset); +} +PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_read_int32(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, address)); +} +PNANOVDB_FORCE_INLINE float pnanovdb_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_uint32_as_float(pnanovdb_read_uint32(buf, address)); +} +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_read_int64(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_uint64_as_int64(pnanovdb_read_uint64(buf, address)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_read_double(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_uint64_as_double(pnanovdb_read_uint64(buf, address)); +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_read_coord(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_coord_t ret; + ret.x = pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 0u))); + ret.y = pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 4u))); + ret.z = pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 8u))); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_vec3_t ret; + ret.x = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 0u)); + ret.y = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 4u)); + ret.z = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 8u)); + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint16(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(address, 3u)); + return (raw >> (pnanovdb_address_mask(address, 2) << 3)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint8(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(address, 3u)); + return (raw >> (pnanovdb_address_mask(address, 3) << 3)) & 255; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3u16(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_vec3_t ret; + const float scale = 1.f / 65535.f; + ret.x = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 0u))) - 0.5f; + ret.y = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 2u))) - 0.5f; + ret.z = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 4u))) - 0.5f; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3u8(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_vec3_t ret; + const float scale = 1.f / 255.f; + ret.x = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 0u))) - 0.5f; + ret.y = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 1u))) - 0.5f; + ret.z = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 2u))) - 0.5f; + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_read_bit(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint32_t bit_offset) +{ + pnanovdb_address_t word_address = pnanovdb_address_mask_inv(address, 3u); + pnanovdb_uint32_t bit_index = (pnanovdb_address_mask(address, 3u) << 3u) + bit_offset; + pnanovdb_uint32_t value_word = pnanovdb_buf_read_uint32(buf, word_address.byte_offset); + return ((value_word >> bit_index) & 1) != 0u; +} + +#if defined(PNANOVDB_C) +PNANOVDB_FORCE_INLINE short pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, address); + return (short)(raw >> (pnanovdb_address_mask(address, 2) << 3)); +} +#elif defined(PNANOVDB_HLSL) +PNANOVDB_FORCE_INLINE float pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, address); + return f16tof32(raw >> (pnanovdb_address_mask(address, 2) << 3)); +} +#elif defined(PNANOVDB_GLSL) +PNANOVDB_FORCE_INLINE float pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, address); + return unpackHalf2x16(raw >> (pnanovdb_address_mask(address, 2) << 3)).x; +} +#endif + +// ------------------------------------------------ High Level Buffer Write ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE void pnanovdb_write_uint32(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint32_t value) +{ + pnanovdb_buf_write_uint32(buf, address.byte_offset, value); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_uint64(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint64_t value) +{ + pnanovdb_buf_write_uint64(buf, address.byte_offset, value); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_int32(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_int32_t value) +{ + pnanovdb_write_uint32(buf, address, pnanovdb_int32_as_uint32(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_int64(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_int64_t value) +{ + pnanovdb_buf_write_uint64(buf, address.byte_offset, pnanovdb_int64_as_uint64(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_float(pnanovdb_buf_t buf, pnanovdb_address_t address, float value) +{ + pnanovdb_write_uint32(buf, address, pnanovdb_float_as_uint32(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_double(pnanovdb_buf_t buf, pnanovdb_address_t address, double value) +{ + pnanovdb_write_uint64(buf, address, pnanovdb_double_as_uint64(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_coord(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) value) +{ + pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 0u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).x)); + pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 4u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).y)); + pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 8u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).z)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_vec3(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_vec3_t) value) +{ + pnanovdb_write_float(buf, pnanovdb_address_offset(address, 0u), PNANOVDB_DEREF(value).x); + pnanovdb_write_float(buf, pnanovdb_address_offset(address, 4u), PNANOVDB_DEREF(value).y); + pnanovdb_write_float(buf, pnanovdb_address_offset(address, 8u), PNANOVDB_DEREF(value).z); +} + +// ------------------------------------------------ Core Structures ----------------------------------------------------------- + +#define PNANOVDB_MAGIC_NUMBER 0x304244566f6e614eUL// "NanoVDB0" in hex - little endian (uint64_t) +#define PNANOVDB_MAGIC_GRID 0x314244566f6e614eUL// "NanoVDB1" in hex - little endian (uint64_t) +#define PNANOVDB_MAGIC_FILE 0x324244566f6e614eUL// "NanoVDB2" in hex - little endian (uint64_t) + +#define PNANOVDB_MAJOR_VERSION_NUMBER 32// reflects changes to the ABI +#define PNANOVDB_MINOR_VERSION_NUMBER 6// reflects changes to the API but not ABI +#define PNANOVDB_PATCH_VERSION_NUMBER 0// reflects bug-fixes with no ABI or API changes + +#define PNANOVDB_GRID_TYPE_UNKNOWN 0 +#define PNANOVDB_GRID_TYPE_FLOAT 1 +#define PNANOVDB_GRID_TYPE_DOUBLE 2 +#define PNANOVDB_GRID_TYPE_INT16 3 +#define PNANOVDB_GRID_TYPE_INT32 4 +#define PNANOVDB_GRID_TYPE_INT64 5 +#define PNANOVDB_GRID_TYPE_VEC3F 6 +#define PNANOVDB_GRID_TYPE_VEC3D 7 +#define PNANOVDB_GRID_TYPE_MASK 8 +#define PNANOVDB_GRID_TYPE_HALF 9 +#define PNANOVDB_GRID_TYPE_UINT32 10 +#define PNANOVDB_GRID_TYPE_BOOLEAN 11 +#define PNANOVDB_GRID_TYPE_RGBA8 12 +#define PNANOVDB_GRID_TYPE_FP4 13 +#define PNANOVDB_GRID_TYPE_FP8 14 +#define PNANOVDB_GRID_TYPE_FP16 15 +#define PNANOVDB_GRID_TYPE_FPN 16 +#define PNANOVDB_GRID_TYPE_VEC4F 17 +#define PNANOVDB_GRID_TYPE_VEC4D 18 +#define PNANOVDB_GRID_TYPE_INDEX 19 +#define PNANOVDB_GRID_TYPE_ONINDEX 20 +#define PNANOVDB_GRID_TYPE_INDEXMASK 21 +#define PNANOVDB_GRID_TYPE_ONINDEXMASK 22 +#define PNANOVDB_GRID_TYPE_POINTINDEX 23 +#define PNANOVDB_GRID_TYPE_VEC3U8 24 +#define PNANOVDB_GRID_TYPE_VEC3U16 25 +#define PNANOVDB_GRID_TYPE_END 26 + +#define PNANOVDB_GRID_CLASS_UNKNOWN 0 +#define PNANOVDB_GRID_CLASS_LEVEL_SET 1 // narrow band level set, e.g. SDF +#define PNANOVDB_GRID_CLASS_FOG_VOLUME 2 // fog volume, e.g. density +#define PNANOVDB_GRID_CLASS_STAGGERED 3 // staggered MAC grid, e.g. velocity +#define PNANOVDB_GRID_CLASS_POINT_INDEX 4 // point index grid +#define PNANOVDB_GRID_CLASS_POINT_DATA 5 // point data grid +#define PNANOVDB_GRID_CLASS_TOPOLOGY 6 // grid with active states only (no values) +#define PNANOVDB_GRID_CLASS_VOXEL_VOLUME 7 // volume of geometric cubes, e.g. minecraft +#define PNANOVDB_GRID_CLASS_INDEX_GRID 8 // grid whose values are offsets, e.g. into an external array +#define PNANOVDB_GRID_CLASS_TENSOR_GRID 9 // grid which can have extra metadata and features +#define PNANOVDB_GRID_CLASS_END 10 + +#define PNANOVDB_GRID_FLAGS_HAS_LONG_GRID_NAME (1 << 0) +#define PNANOVDB_GRID_FLAGS_HAS_BBOX (1 << 1) +#define PNANOVDB_GRID_FLAGS_HAS_MIN_MAX (1 << 2) +#define PNANOVDB_GRID_FLAGS_HAS_AVERAGE (1 << 3) +#define PNANOVDB_GRID_FLAGS_HAS_STD_DEVIATION (1 << 4) +#define PNANOVDB_GRID_FLAGS_IS_BREADTH_FIRST (1 << 5) +#define PNANOVDB_GRID_FLAGS_END (1 << 6) + +#define PNANOVDB_LEAF_TYPE_DEFAULT 0 +#define PNANOVDB_LEAF_TYPE_LITE 1 +#define PNANOVDB_LEAF_TYPE_FP 2 +#define PNANOVDB_LEAF_TYPE_INDEX 3 +#define PNANOVDB_LEAF_TYPE_INDEXMASK 4 +#define PNANOVDB_LEAF_TYPE_POINTINDEX 5 + +// BuildType = Unknown, float, double, int16_t, int32_t, int64_t, Vec3f, Vec3d, Mask, ... +// bit count of values in leaf nodes, i.e. 8*sizeof(*nanovdb::LeafNode::mValues) or zero if no values are stored +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_value_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 0, 16, 32, 1, 32, 4, 8, 16, 0, 128, 256, 0, 0, 0, 0, 16, 24, 48 }; +// bit count of the Tile union in InternalNodes, i.e. 8*sizeof(nanovdb::InternalData::Tile) +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_table_strides_bits[PNANOVDB_GRID_TYPE_END] = { 64, 64, 64, 64, 64, 64, 128, 192, 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 256, 64, 64, 64, 64, 64, 64, 64 }; +// bit count of min/max values, i.e. 8*sizeof(nanovdb::LeafData::mMinimum) or zero if no min/max exists +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 8, 16, 32, 8, 32, 32, 32, 32, 32, 128, 256, 64, 64, 64, 64, 64, 24, 48 }; +// bit alignment of the value type, controlled by the smallest native type, which is why it is always 0, 8, 16, 32, or 64, e.g. for Vec3f it is 32 +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_aligns_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 32, 64, 8, 16, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 8, 16 }; +// bit alignment of the stats (avg/std-dev) types, e.g. 8*sizeof(nanovdb::LeafData::mAverage) +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_stat_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 32, 32, 64, 32, 64, 8, 32, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 32, 32 }; +// one of the 4 leaf types defined above, e.g. PNANOVDB_LEAF_TYPE_INDEX = 3 +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_leaf_type[PNANOVDB_GRID_TYPE_END] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 2, 2, 2, 0, 0, 3, 3, 4, 4, 5, 0, 0 }; + +struct pnanovdb_map_t +{ + float matf[9]; + float invmatf[9]; + float vecf[3]; + float taperf; + double matd[9]; + double invmatd[9]; + double vecd[3]; + double taperd; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_map_t) +struct pnanovdb_map_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_map_handle_t) + +#define PNANOVDB_MAP_SIZE 264 + +#define PNANOVDB_MAP_OFF_MATF 0 +#define PNANOVDB_MAP_OFF_INVMATF 36 +#define PNANOVDB_MAP_OFF_VECF 72 +#define PNANOVDB_MAP_OFF_TAPERF 84 +#define PNANOVDB_MAP_OFF_MATD 88 +#define PNANOVDB_MAP_OFF_INVMATD 160 +#define PNANOVDB_MAP_OFF_VECD 232 +#define PNANOVDB_MAP_OFF_TAPERD 256 + +PNANOVDB_FORCE_INLINE float pnanovdb_map_get_matf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATF + 4u * index)); +} +PNANOVDB_FORCE_INLINE float pnanovdb_map_get_invmatf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATF + 4u * index)); +} +PNANOVDB_FORCE_INLINE float pnanovdb_map_get_vecf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECF + 4u * index)); +} +PNANOVDB_FORCE_INLINE float pnanovdb_map_get_taperf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERF)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_map_get_matd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATD + 8u * index)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_map_get_invmatd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATD + 8u * index)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_map_get_vecd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECD + 8u * index)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_map_get_taperd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERD)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_matf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float matf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATF + 4u * index), matf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_invmatf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float invmatf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATF + 4u * index), invmatf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_vecf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float vecf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECF + 4u * index), vecf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_taperf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float taperf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERF), taperf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_matd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double matd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATD + 8u * index), matd); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_invmatd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double invmatd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATD + 8u * index), invmatd); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_vecd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double vecd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECD + 8u * index), vecd); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_taperd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double taperd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERD), taperd); +} + +struct pnanovdb_grid_t +{ + pnanovdb_uint64_t magic; // 8 bytes, 0 + pnanovdb_uint64_t checksum; // 8 bytes, 8 + pnanovdb_uint32_t version; // 4 bytes, 16 + pnanovdb_uint32_t flags; // 4 bytes, 20 + pnanovdb_uint32_t grid_index; // 4 bytes, 24 + pnanovdb_uint32_t grid_count; // 4 bytes, 28 + pnanovdb_uint64_t grid_size; // 8 bytes, 32 + pnanovdb_uint32_t grid_name[256 / 4]; // 256 bytes, 40 + pnanovdb_map_t map; // 264 bytes, 296 + double world_bbox[6]; // 48 bytes, 560 + double voxel_size[3]; // 24 bytes, 608 + pnanovdb_uint32_t grid_class; // 4 bytes, 632 + pnanovdb_uint32_t grid_type; // 4 bytes, 636 + pnanovdb_int64_t blind_metadata_offset; // 8 bytes, 640 + pnanovdb_uint32_t blind_metadata_count; // 4 bytes, 648 + pnanovdb_uint32_t pad[5]; // 20 bytes, 652 +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_grid_t) +struct pnanovdb_grid_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_grid_handle_t) + +#define PNANOVDB_GRID_SIZE 672 + +#define PNANOVDB_GRID_OFF_MAGIC 0 +#define PNANOVDB_GRID_OFF_CHECKSUM 8 +#define PNANOVDB_GRID_OFF_VERSION 16 +#define PNANOVDB_GRID_OFF_FLAGS 20 +#define PNANOVDB_GRID_OFF_GRID_INDEX 24 +#define PNANOVDB_GRID_OFF_GRID_COUNT 28 +#define PNANOVDB_GRID_OFF_GRID_SIZE 32 +#define PNANOVDB_GRID_OFF_GRID_NAME 40 +#define PNANOVDB_GRID_OFF_MAP 296 +#define PNANOVDB_GRID_OFF_WORLD_BBOX 560 +#define PNANOVDB_GRID_OFF_VOXEL_SIZE 608 +#define PNANOVDB_GRID_OFF_GRID_CLASS 632 +#define PNANOVDB_GRID_OFF_GRID_TYPE 636 +#define PNANOVDB_GRID_OFF_BLIND_METADATA_OFFSET 640 +#define PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT 648 + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_grid_get_magic(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_MAGIC)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_grid_get_checksum(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_CHECKSUM)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_version(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VERSION)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_flags(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_FLAGS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_index(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_INDEX)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_COUNT)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_grid_get_grid_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_SIZE)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_name(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_NAME + 4u * index)); +} +PNANOVDB_FORCE_INLINE pnanovdb_map_handle_t pnanovdb_grid_get_map(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + pnanovdb_map_handle_t ret; + ret.address = pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_MAP); + return ret; +} +PNANOVDB_FORCE_INLINE double pnanovdb_grid_get_world_bbox(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_WORLD_BBOX + 8u * index)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_grid_get_voxel_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VOXEL_SIZE + 8u * index)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_class(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_CLASS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_type(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_TYPE)); +} +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_grid_get_blind_metadata_offset(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_OFFSET)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_blind_metadata_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_magic(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t magic) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_MAGIC), magic); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_checksum(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t checksum) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_CHECKSUM), checksum); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_version(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t version) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VERSION), version); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_flags(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t flags) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_FLAGS), flags); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_index(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_index) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_INDEX), grid_index); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_count) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_COUNT), grid_count); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t grid_size) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_SIZE), grid_size); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_name(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, pnanovdb_uint32_t grid_name) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_NAME + 4u * index), grid_name); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_world_bbox(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, double world_bbox) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_WORLD_BBOX + 8u * index), world_bbox); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_voxel_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, double voxel_size) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VOXEL_SIZE + 8u * index), voxel_size); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_class(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_class) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_CLASS), grid_class); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_type(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_type) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_TYPE), grid_type); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_blind_metadata_offset(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t blind_metadata_offset) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_OFFSET), blind_metadata_offset); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_blind_metadata_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t metadata_count) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT), metadata_count); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_make_version(pnanovdb_uint32_t major, pnanovdb_uint32_t minor, pnanovdb_uint32_t patch_num) +{ + return (major << 21u) | (minor << 10u) | patch_num; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_major(pnanovdb_uint32_t version) +{ + return (version >> 21u) & ((1u << 11u) - 1u); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_minor(pnanovdb_uint32_t version) +{ + return (version >> 10u) & ((1u << 11u) - 1u); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_patch(pnanovdb_uint32_t version) +{ + return version & ((1u << 10u) - 1u); +} + +struct pnanovdb_gridblindmetadata_t +{ + pnanovdb_int64_t byte_offset; // 8 bytes, 0 + pnanovdb_uint64_t element_count; // 8 bytes, 8 + pnanovdb_uint32_t flags; // 4 bytes, 16 + pnanovdb_uint32_t semantic; // 4 bytes, 20 + pnanovdb_uint32_t data_class; // 4 bytes, 24 + pnanovdb_uint32_t data_type; // 4 bytes, 28 + pnanovdb_uint32_t name[256 / 4]; // 256 bytes, 32 +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_gridblindmetadata_t) +struct pnanovdb_gridblindmetadata_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_gridblindmetadata_handle_t) + +#define PNANOVDB_GRIDBLINDMETADATA_SIZE 288 + +#define PNANOVDB_GRIDBLINDMETADATA_OFF_BYTE_OFFSET 0 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_ELEMENT_COUNT 8 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_FLAGS 16 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_SEMANTIC 20 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_CLASS 24 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_TYPE 28 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_NAME 32 + +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_gridblindmetadata_get_byte_offset(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_BYTE_OFFSET)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_gridblindmetadata_get_element_count(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_ELEMENT_COUNT)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_flags(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_FLAGS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_semantic(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_SEMANTIC)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_data_class(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_CLASS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_data_type(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_TYPE)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_name(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_NAME + 4u * index)); +} + +struct pnanovdb_tree_t +{ + pnanovdb_uint64_t node_offset_leaf; + pnanovdb_uint64_t node_offset_lower; + pnanovdb_uint64_t node_offset_upper; + pnanovdb_uint64_t node_offset_root; + pnanovdb_uint32_t node_count_leaf; + pnanovdb_uint32_t node_count_lower; + pnanovdb_uint32_t node_count_upper; + pnanovdb_uint32_t tile_count_leaf; + pnanovdb_uint32_t tile_count_lower; + pnanovdb_uint32_t tile_count_upper; + pnanovdb_uint64_t voxel_count; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_tree_t) +struct pnanovdb_tree_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_tree_handle_t) + +#define PNANOVDB_TREE_SIZE 64 + +#define PNANOVDB_TREE_OFF_NODE_OFFSET_LEAF 0 +#define PNANOVDB_TREE_OFF_NODE_OFFSET_LOWER 8 +#define PNANOVDB_TREE_OFF_NODE_OFFSET_UPPER 16 +#define PNANOVDB_TREE_OFF_NODE_OFFSET_ROOT 24 +#define PNANOVDB_TREE_OFF_NODE_COUNT_LEAF 32 +#define PNANOVDB_TREE_OFF_NODE_COUNT_LOWER 36 +#define PNANOVDB_TREE_OFF_NODE_COUNT_UPPER 40 +#define PNANOVDB_TREE_OFF_TILE_COUNT_LEAF 44 +#define PNANOVDB_TREE_OFF_TILE_COUNT_LOWER 48 +#define PNANOVDB_TREE_OFF_TILE_COUNT_UPPER 52 +#define PNANOVDB_TREE_OFF_VOXEL_COUNT 56 + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LEAF)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LOWER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_UPPER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_root(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_ROOT)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_node_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LEAF)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_node_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LOWER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_node_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_UPPER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_tile_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LEAF)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_tile_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LOWER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_tile_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_UPPER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_voxel_count(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_VOXEL_COUNT)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_leaf) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LEAF), node_offset_leaf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_lower) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LOWER), node_offset_lower); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_upper) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_UPPER), node_offset_upper); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_root(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_root) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_ROOT), node_offset_root); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_leaf) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LEAF), node_count_leaf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_lower) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LOWER), node_count_lower); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_upper) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_UPPER), node_count_upper); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_leaf) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LEAF), tile_count_leaf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_lower) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LOWER), tile_count_lower); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_upper) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_UPPER), tile_count_upper); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_voxel_count(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t voxel_count) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_VOXEL_COUNT), voxel_count); +} + +struct pnanovdb_root_t +{ + pnanovdb_coord_t bbox_min; + pnanovdb_coord_t bbox_max; + pnanovdb_uint32_t table_size; + pnanovdb_uint32_t pad1; // background can start here + // background, min, max +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_t) +struct pnanovdb_root_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_handle_t) + +#define PNANOVDB_ROOT_BASE_SIZE 28 + +#define PNANOVDB_ROOT_OFF_BBOX_MIN 0 +#define PNANOVDB_ROOT_OFF_BBOX_MAX 12 +#define PNANOVDB_ROOT_OFF_TABLE_SIZE 24 + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_root_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_root_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MIN)); +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_root_get_bbox_max(pnanovdb_buf_t buf, pnanovdb_root_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MAX)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_get_tile_count(pnanovdb_buf_t buf, pnanovdb_root_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_TABLE_SIZE)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_root_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MAX), bbox_max); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_set_tile_count(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, pnanovdb_uint32_t tile_count) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_TABLE_SIZE), tile_count); +} + +struct pnanovdb_root_tile_t +{ + pnanovdb_uint64_t key; + pnanovdb_int64_t child; // signed byte offset from root to the child node, 0 means it is a constant tile, so use value + pnanovdb_uint32_t state; + pnanovdb_uint32_t pad1; // value can start here + // value +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_tile_t) +struct pnanovdb_root_tile_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_tile_handle_t) + +#define PNANOVDB_ROOT_TILE_BASE_SIZE 20 + +#define PNANOVDB_ROOT_TILE_OFF_KEY 0 +#define PNANOVDB_ROOT_TILE_OFF_CHILD 8 +#define PNANOVDB_ROOT_TILE_OFF_STATE 16 + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_tile_get_key(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_KEY)); +} +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_root_tile_get_child(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p) { + return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_CHILD)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_tile_get_state(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_STATE)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_key(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_uint64_t key) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_KEY), key); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_child(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_int64_t child) { + pnanovdb_write_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_CHILD), child); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_state(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_uint32_t state) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_STATE), state); +} + +struct pnanovdb_upper_t +{ + pnanovdb_coord_t bbox_min; + pnanovdb_coord_t bbox_max; + pnanovdb_uint64_t flags; + pnanovdb_uint32_t value_mask[1024]; + pnanovdb_uint32_t child_mask[1024]; + // min, max + // alignas(32) pnanovdb_uint32_t table[]; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_upper_t) +struct pnanovdb_upper_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_upper_handle_t) + +#define PNANOVDB_UPPER_TABLE_COUNT 32768 +#define PNANOVDB_UPPER_BASE_SIZE 8224 + +#define PNANOVDB_UPPER_OFF_BBOX_MIN 0 +#define PNANOVDB_UPPER_OFF_BBOX_MAX 12 +#define PNANOVDB_UPPER_OFF_FLAGS 24 +#define PNANOVDB_UPPER_OFF_VALUE_MASK 32 +#define PNANOVDB_UPPER_OFF_CHILD_MASK 4128 + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_upper_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MIN)); +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_upper_get_bbox_max(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MAX)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_upper_get_flags(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_FLAGS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_upper_get_value_mask(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, pnanovdb_uint32_t bit_index) { + pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_VALUE_MASK + 4u * (bit_index >> 5u))); + return ((value >> (bit_index & 31u)) & 1) != 0u; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_upper_get_child_mask(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, pnanovdb_uint32_t bit_index) { + pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_CHILD_MASK + 4u * (bit_index >> 5u))); + return ((value >> (bit_index & 31u)) & 1) != 0u; +} + +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MAX), bbox_max); +} +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_child_mask(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, pnanovdb_uint32_t bit_index, pnanovdb_bool_t value) { + pnanovdb_address_t addr = pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_CHILD_MASK + 4u * (bit_index >> 5u)); + pnanovdb_uint32_t valueMask = pnanovdb_read_uint32(buf, addr); + if (!value) { valueMask &= ~(1u << (bit_index & 31u)); } + if (value) valueMask |= (1u << (bit_index & 31u)); + pnanovdb_write_uint32(buf, addr, valueMask); +} + +struct pnanovdb_lower_t +{ + pnanovdb_coord_t bbox_min; + pnanovdb_coord_t bbox_max; + pnanovdb_uint64_t flags; + pnanovdb_uint32_t value_mask[128]; + pnanovdb_uint32_t child_mask[128]; + // min, max + // alignas(32) pnanovdb_uint32_t table[]; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_lower_t) +struct pnanovdb_lower_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_lower_handle_t) + +#define PNANOVDB_LOWER_TABLE_COUNT 4096 +#define PNANOVDB_LOWER_BASE_SIZE 1056 + +#define PNANOVDB_LOWER_OFF_BBOX_MIN 0 +#define PNANOVDB_LOWER_OFF_BBOX_MAX 12 +#define PNANOVDB_LOWER_OFF_FLAGS 24 +#define PNANOVDB_LOWER_OFF_VALUE_MASK 32 +#define PNANOVDB_LOWER_OFF_CHILD_MASK 544 + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_lower_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MIN)); +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_lower_get_bbox_max(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MAX)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_lower_get_flags(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_FLAGS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_lower_get_value_mask(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, pnanovdb_uint32_t bit_index) { + pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_VALUE_MASK + 4u * (bit_index >> 5u))); + return ((value >> (bit_index & 31u)) & 1) != 0u; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_lower_get_child_mask(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, pnanovdb_uint32_t bit_index) { + pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_CHILD_MASK + 4u * (bit_index >> 5u))); + return ((value >> (bit_index & 31u)) & 1) != 0u; +} + +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MAX), bbox_max); +} +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_child_mask(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, pnanovdb_uint32_t bit_index, pnanovdb_bool_t value) { + pnanovdb_address_t addr = pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_CHILD_MASK + 4u * (bit_index >> 5u)); + pnanovdb_uint32_t valueMask = pnanovdb_read_uint32(buf, addr); + if (!value) { valueMask &= ~(1u << (bit_index & 31u)); } + if (value) valueMask |= (1u << (bit_index & 31u)); + pnanovdb_write_uint32(buf, addr, valueMask); +} + +struct pnanovdb_leaf_t +{ + pnanovdb_coord_t bbox_min; + pnanovdb_uint32_t bbox_dif_and_flags; + pnanovdb_uint32_t value_mask[16]; + // min, max + // alignas(32) pnanovdb_uint32_t values[]; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_leaf_t) +struct pnanovdb_leaf_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_leaf_handle_t) + +#define PNANOVDB_LEAF_TABLE_COUNT 512 +#define PNANOVDB_LEAF_BASE_SIZE 80 + +#define PNANOVDB_LEAF_OFF_BBOX_MIN 0 +#define PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS 12 +#define PNANOVDB_LEAF_OFF_VALUE_MASK 16 + +#define PNANOVDB_LEAF_TABLE_NEG_OFF_BBOX_DIF_AND_FLAGS 84 +#define PNANOVDB_LEAF_TABLE_NEG_OFF_MINIMUM 16 +#define PNANOVDB_LEAF_TABLE_NEG_OFF_QUANTUM 12 + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_leaf_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_MIN)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_get_bbox_dif_and_flags(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_get_value_mask(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, pnanovdb_uint32_t bit_index) { + pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 4u * (bit_index >> 5u))); + return ((value >> (bit_index & 31u)) & 1) != 0u; +} + +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_set_bbox_dif_and_flags(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, pnanovdb_uint32_t bbox_dif_and_flags) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS), bbox_dif_and_flags); +} + +struct pnanovdb_grid_type_constants_t +{ + pnanovdb_uint32_t root_off_background; + pnanovdb_uint32_t root_off_min; + pnanovdb_uint32_t root_off_max; + pnanovdb_uint32_t root_off_ave; + pnanovdb_uint32_t root_off_stddev; + pnanovdb_uint32_t root_size; + pnanovdb_uint32_t value_stride_bits; + pnanovdb_uint32_t table_stride; + pnanovdb_uint32_t root_tile_off_value; + pnanovdb_uint32_t root_tile_size; + pnanovdb_uint32_t upper_off_min; + pnanovdb_uint32_t upper_off_max; + pnanovdb_uint32_t upper_off_ave; + pnanovdb_uint32_t upper_off_stddev; + pnanovdb_uint32_t upper_off_table; + pnanovdb_uint32_t upper_size; + pnanovdb_uint32_t lower_off_min; + pnanovdb_uint32_t lower_off_max; + pnanovdb_uint32_t lower_off_ave; + pnanovdb_uint32_t lower_off_stddev; + pnanovdb_uint32_t lower_off_table; + pnanovdb_uint32_t lower_size; + pnanovdb_uint32_t leaf_off_min; + pnanovdb_uint32_t leaf_off_max; + pnanovdb_uint32_t leaf_off_ave; + pnanovdb_uint32_t leaf_off_stddev; + pnanovdb_uint32_t leaf_off_table; + pnanovdb_uint32_t leaf_size; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_grid_type_constants_t) + +// The following table with offsets will nedd to be updates as new GridTypes are added in NanoVDB.h +PNANOVDB_STATIC_CONST pnanovdb_grid_type_constants_t pnanovdb_grid_type_constants[PNANOVDB_GRID_TYPE_END] = +{ +{28, 28, 28, 28, 28, 32, 0, 8, 20, 32, 8224, 8224, 8224, 8224, 8224, 270368, 1056, 1056, 1056, 1056, 1056, 33824, 80, 80, 80, 80, 96, 96}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, +{28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, +{28, 40, 52, 64, 68, 96, 96, 16, 20, 32, 8224, 8236, 8248, 8252, 8256, 532544, 1056, 1068, 1080, 1084, 1088, 66624, 80, 92, 104, 108, 128, 6272}, +{32, 56, 80, 104, 112, 128, 192, 24, 24, 64, 8224, 8248, 8272, 8280, 8288, 794720, 1056, 1080, 1104, 1112, 1120, 99424, 80, 104, 128, 136, 160, 12448}, +{28, 29, 30, 31, 32, 64, 0, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 96}, +{28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{28, 29, 30, 31, 32, 64, 1, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 160}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 352}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 608}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 1120}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 96}, +{28, 44, 60, 76, 80, 96, 128, 16, 20, 64, 8224, 8240, 8256, 8260, 8288, 532576, 1056, 1072, 1088, 1092, 1120, 66656, 80, 96, 112, 116, 128, 8320}, +{32, 64, 96, 128, 136, 160, 256, 32, 24, 64, 8224, 8256, 8288, 8296, 8320, 1056896, 1056, 1088, 1120, 1128, 1152, 132224, 80, 112, 144, 152, 160, 16544}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 96}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 96}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 160}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 160}, +{32, 40, 48, 56, 64, 96, 16, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 96, 96, 1120}, +{28, 31, 34, 40, 44, 64, 24, 8, 20, 32, 8224, 8227, 8232, 8236, 8256, 270400, 1056, 1059, 1064, 1068, 1088, 33856, 80, 83, 88, 92, 96, 1632}, +{28, 34, 40, 48, 52, 64, 48, 8, 20, 32, 8224, 8230, 8236, 8240, 8256, 270400, 1056, 1062, 1068, 1072, 1088, 33856, 80, 86, 92, 96, 128, 3200}, +}; + +// ------------------------------------------------ Basic Lookup ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_gridblindmetadata_handle_t pnanovdb_grid_get_gridblindmetadata(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, pnanovdb_uint32_t index) +{ + pnanovdb_gridblindmetadata_handle_t meta = { grid.address }; + pnanovdb_uint64_t byte_offset = pnanovdb_grid_get_blind_metadata_offset(buf, grid); + meta.address = pnanovdb_address_offset64(meta.address, byte_offset); + meta.address = pnanovdb_address_offset_product(meta.address, PNANOVDB_GRIDBLINDMETADATA_SIZE, index); + return meta; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_grid_get_gridblindmetadata_value_address(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, pnanovdb_uint32_t index) +{ + pnanovdb_gridblindmetadata_handle_t meta = pnanovdb_grid_get_gridblindmetadata(buf, grid, index); + pnanovdb_int64_t byte_offset = pnanovdb_gridblindmetadata_get_byte_offset(buf, meta); + pnanovdb_address_t address = pnanovdb_address_offset64(meta.address, pnanovdb_int64_as_uint64(byte_offset)); + return address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_tree_handle_t pnanovdb_grid_get_tree(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid) +{ + pnanovdb_tree_handle_t tree = { grid.address }; + tree.address = pnanovdb_address_offset(tree.address, PNANOVDB_GRID_SIZE); + return tree; +} + +PNANOVDB_FORCE_INLINE pnanovdb_root_handle_t pnanovdb_tree_get_root(pnanovdb_buf_t buf, pnanovdb_tree_handle_t tree) +{ + pnanovdb_root_handle_t root = { tree.address }; + pnanovdb_uint64_t byte_offset = pnanovdb_tree_get_node_offset_root(buf, tree); + root.address = pnanovdb_address_offset64(root.address, byte_offset); + return root; +} + +PNANOVDB_FORCE_INLINE pnanovdb_root_tile_handle_t pnanovdb_root_get_tile(pnanovdb_grid_type_t grid_type, pnanovdb_root_handle_t root, pnanovdb_uint32_t n) +{ + pnanovdb_root_tile_handle_t tile = { root.address }; + tile.address = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_size)); + tile.address = pnanovdb_address_offset_product(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_size), n); + return tile; +} + +PNANOVDB_FORCE_INLINE pnanovdb_root_tile_handle_t pnanovdb_root_get_tile_zero(pnanovdb_grid_type_t grid_type, pnanovdb_root_handle_t root) +{ + pnanovdb_root_tile_handle_t tile = { root.address }; + tile.address = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_size)); + return tile; +} + +PNANOVDB_FORCE_INLINE pnanovdb_upper_handle_t pnanovdb_root_get_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, pnanovdb_root_tile_handle_t tile) +{ + pnanovdb_upper_handle_t upper = { root.address }; + upper.address = pnanovdb_address_offset64(upper.address, pnanovdb_int64_as_uint64(pnanovdb_root_tile_get_child(buf, tile))); + return upper; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_coord_to_key(PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ +#if defined(PNANOVDB_NATIVE_64) + pnanovdb_uint64_t iu = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x) >> 12u; + pnanovdb_uint64_t ju = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).y) >> 12u; + pnanovdb_uint64_t ku = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).z) >> 12u; + return (ku) | (ju << 21u) | (iu << 42u); +#else + pnanovdb_uint32_t iu = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x) >> 12u; + pnanovdb_uint32_t ju = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).y) >> 12u; + pnanovdb_uint32_t ku = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).z) >> 12u; + pnanovdb_uint32_t key_x = ku | (ju << 21); + pnanovdb_uint32_t key_y = (iu << 10) | (ju >> 11); + return pnanovdb_uint32_as_uint64(key_x, key_y); +#endif +} + +PNANOVDB_FORCE_INLINE pnanovdb_root_tile_handle_t pnanovdb_root_find_tile(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t tile_count = pnanovdb_uint32_as_int32(pnanovdb_root_get_tile_count(buf, root)); + pnanovdb_root_tile_handle_t tile = pnanovdb_root_get_tile_zero(grid_type, root); + pnanovdb_uint64_t key = pnanovdb_coord_to_key(ijk); + for (pnanovdb_uint32_t i = 0u; i < tile_count; i++) + { + if (pnanovdb_uint64_is_equal(key, pnanovdb_root_tile_get_key(buf, tile))) + { + return tile; + } + tile.address = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_size)); + } + pnanovdb_root_tile_handle_t null_handle = { pnanovdb_address_null() }; + return null_handle; +} + +// ----------------------------- Leaf Node --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return (((PNANOVDB_DEREF(ijk).x & 7) >> 0) << (2 * 3)) + + (((PNANOVDB_DEREF(ijk).y & 7) >> 0) << (3)) + + ((PNANOVDB_DEREF(ijk).z & 7) >> 0); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_min); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_max); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_ave); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_stddev); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_table_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_table) + ((PNANOVDB_GRID_TYPE_GET(grid_type, value_stride_bits) * n) >> 3u); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + return pnanovdb_leaf_get_table_address(grid_type, buf, leaf, n); +} + +// ----------------------------- Leaf FP Types Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t value_log_bits) +{ + // value_log_bits // 2 3 4 + pnanovdb_uint32_t value_bits = 1u << value_log_bits; // 4 8 16 + pnanovdb_uint32_t value_mask = (1u << value_bits) - 1u; // 0xF 0xFF 0xFFFF + pnanovdb_uint32_t values_per_word_bits = 5u - value_log_bits; // 3 2 1 + pnanovdb_uint32_t values_per_word_mask = (1u << values_per_word_bits) - 1u; // 7 3 1 + + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + float minimum = pnanovdb_read_float(buf, pnanovdb_address_offset_neg(address, PNANOVDB_LEAF_TABLE_NEG_OFF_MINIMUM)); + float quantum = pnanovdb_read_float(buf, pnanovdb_address_offset_neg(address, PNANOVDB_LEAF_TABLE_NEG_OFF_QUANTUM)); + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, ((n >> values_per_word_bits) << 2u))); + pnanovdb_uint32_t value_compressed = (raw >> ((n & values_per_word_mask) << value_log_bits)) & value_mask; + return pnanovdb_uint32_to_float(value_compressed) * quantum + minimum; +} + +PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp4_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_fp_read_float(buf, address, ijk, 2u); +} + +PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp8_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_fp_read_float(buf, address, ijk, 3u); +} + +PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp16_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_fp_read_float(buf, address, ijk, 4u); +} + +PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fpn_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t bbox_dif_and_flags = pnanovdb_read_uint32(buf, pnanovdb_address_offset_neg(address, PNANOVDB_LEAF_TABLE_NEG_OFF_BBOX_DIF_AND_FLAGS)); + pnanovdb_uint32_t flags = bbox_dif_and_flags >> 24u; + pnanovdb_uint32_t value_log_bits = flags >> 5; // b = 0, 1, 2, 3, 4 corresponding to 1, 2, 4, 8, 16 bits + return pnanovdb_leaf_fp_read_float(buf, address, ijk, value_log_bits); +} + +// ----------------------------- Leaf Index Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_index_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return (pnanovdb_leaf_get_bbox_dif_and_flags(buf, leaf) & (1u << 28u)) != 0u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, min_address), 512u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, max_address), 513u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, ave_address), 514u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, dev_address), 515u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + pnanovdb_uint64_t offset = pnanovdb_read_uint64(buf, value_address); + return pnanovdb_uint64_offset(offset, n); +} + +// ----------------------------- Leaf IndexMask Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_indexmask_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_index_has_stats(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + return pnanovdb_leaf_index_get_min_index(buf, min_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + return pnanovdb_leaf_index_get_max_index(buf, max_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + return pnanovdb_leaf_index_get_ave_index(buf, ave_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + return pnanovdb_leaf_index_get_dev_index(buf, dev_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_index_get_value_index(buf, value_address, ijk); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_indexmask_get_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + return (val_mask & (1u << bit_idx)) != 0u; +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_indexmask_set_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n, pnanovdb_bool_t v) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + if (v) + { + val_mask = val_mask | (1u << bit_idx); + } + else + { + val_mask = val_mask & ~(1u << bit_idx); + } + pnanovdb_write_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx), val_mask); +} + +// ----------------------------- Leaf OnIndex Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_onindex_get_value_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + pnanovdb_uint64_t val_mask = pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 8u * 7u)); + pnanovdb_uint64_t prefix_sum = pnanovdb_read_uint64( + buf, pnanovdb_address_offset(leaf.address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table) + 8u)); + return pnanovdb_uint64_countbits(val_mask) + (pnanovdb_uint64_to_uint32_lsr(prefix_sum, 54u) & 511u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_last_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_uint64_offset( + pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table))), + pnanovdb_leaf_onindex_get_value_count(buf, leaf) - 1u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindex_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return (pnanovdb_leaf_get_bbox_dif_and_flags(buf, leaf) & (1u << 28u)) != 0u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(min_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 1u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(max_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 2u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(ave_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 3u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(dev_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 4u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(value_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + + pnanovdb_uint32_t word_idx = n >> 6u; + pnanovdb_uint32_t bit_idx = n & 63u; + pnanovdb_uint64_t val_mask = pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 8u * word_idx)); + pnanovdb_uint64_t mask = pnanovdb_uint64_bit_mask(bit_idx); + pnanovdb_uint64_t value_index = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_uint64_any_bit(pnanovdb_uint64_and(val_mask, mask))) + { + pnanovdb_uint32_t sum = 0u; + sum += pnanovdb_uint64_countbits(pnanovdb_uint64_and(val_mask, pnanovdb_uint64_dec(mask))); + if (word_idx > 0u) + { + pnanovdb_uint64_t prefix_sum = pnanovdb_read_uint64(buf, pnanovdb_address_offset(value_address, 8u)); + sum += pnanovdb_uint64_to_uint32_lsr(prefix_sum, 9u * (word_idx - 1u)) & 511u; + } + pnanovdb_uint64_t offset = pnanovdb_read_uint64(buf, value_address); + value_index = pnanovdb_uint64_offset(offset, sum); + } + return value_index; +} + +// ----------------------------- Leaf OnIndexMask Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_onindexmask_get_value_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_onindex_get_value_count(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_last_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_onindex_get_last_offset(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindexmask_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_onindex_has_stats(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + return pnanovdb_leaf_onindex_get_min_index(buf, min_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + return pnanovdb_leaf_onindex_get_max_index(buf, max_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + return pnanovdb_leaf_onindex_get_ave_index(buf, ave_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + return pnanovdb_leaf_onindex_get_dev_index(buf, dev_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_onindex_get_value_index(buf, value_address, ijk); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindexmask_get_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + return (val_mask & (1u << bit_idx)) != 0u; +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_onindexmask_set_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n, pnanovdb_bool_t v) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + if (v) + { + val_mask = val_mask | (1u << bit_idx); + } + else + { + val_mask = val_mask & ~(1u << bit_idx); + } + pnanovdb_write_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx), val_mask); +} + +// ----------------------------- Leaf PointIndex Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_read_uint64(buf, pnanovdb_leaf_get_min_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_point_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_read_uint64(buf, pnanovdb_leaf_get_max_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_first(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), + (i == 0u ? 0u : pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i - 1u)))); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_last(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), + pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i))); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_value(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + return pnanovdb_uint32_as_uint64_low(pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i))); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_value_only(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i, pnanovdb_uint32_t value) +{ + pnanovdb_address_t addr = pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i); + pnanovdb_uint32_t raw32 = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(addr, 3u)); + if ((i & 1) == 0u) + { + raw32 = (raw32 & 0xFFFF0000) | (value & 0x0000FFFF); + } + else + { + raw32 = (raw32 & 0x0000FFFF) | (value << 16u); + } + pnanovdb_write_uint32(buf, addr, raw32); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_on(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + pnanovdb_uint32_t word_idx = i >> 5; + pnanovdb_uint32_t bit_idx = i & 31; + pnanovdb_address_t addr = pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 4u * word_idx); + pnanovdb_uint32_t val_mask = pnanovdb_read_uint32(buf, addr); + val_mask = val_mask | (1u << bit_idx); + pnanovdb_write_uint32(buf, addr, val_mask); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_value(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i, pnanovdb_uint32_t value) +{ + pnanovdb_leaf_pointindex_set_on(buf, leaf, i); + pnanovdb_leaf_pointindex_set_value_only(buf, leaf, i, value); +} + +// ------------------------------------------------ Lower Node ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_lower_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return (((PNANOVDB_DEREF(ijk).x & 127) >> 3) << (2 * 4)) + + (((PNANOVDB_DEREF(ijk).y & 127) >> 3) << (4)) + + ((PNANOVDB_DEREF(ijk).z & 127) >> 3); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_min); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_max); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_ave); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_stddev); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_table_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_table) + PNANOVDB_GRID_TYPE_GET(grid_type, table_stride) * n; + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_lower_get_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node, pnanovdb_uint32_t n) +{ + pnanovdb_address_t table_address = pnanovdb_lower_get_table_address(grid_type, buf, node, n); + return pnanovdb_read_int64(buf, table_address); +} + +PNANOVDB_FORCE_INLINE pnanovdb_leaf_handle_t pnanovdb_lower_get_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, pnanovdb_uint32_t n) +{ + pnanovdb_leaf_handle_t leaf = { lower.address }; + leaf.address = pnanovdb_address_offset64(leaf.address, pnanovdb_int64_as_uint64(pnanovdb_lower_get_table_child(grid_type, buf, lower, n))); + return leaf; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); + pnanovdb_address_t value_address; + if (pnanovdb_lower_get_child_mask(buf, lower, n)) + { + pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); + value_address = pnanovdb_leaf_get_value_address(grid_type, buf, child, ijk); + PNANOVDB_DEREF(level) = 0u; + } + else + { + value_address = pnanovdb_lower_get_table_address(grid_type, buf, lower, n); + PNANOVDB_DEREF(level) = 1u; + } + return value_address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t level; + return pnanovdb_lower_get_value_address_and_level(grid_type, buf, lower, ijk, PNANOVDB_REF(level)); +} + +// ------------------------------------------------ Upper Node ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_upper_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return (((PNANOVDB_DEREF(ijk).x & 4095) >> 7) << (2 * 5)) + + (((PNANOVDB_DEREF(ijk).y & 4095) >> 7) << (5)) + + ((PNANOVDB_DEREF(ijk).z & 4095) >> 7); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_min); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_max); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_ave); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_stddev); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_table_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_table) + PNANOVDB_GRID_TYPE_GET(grid_type, table_stride) * n; + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_upper_get_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node, pnanovdb_uint32_t n) +{ + pnanovdb_address_t bufAddress = pnanovdb_upper_get_table_address(grid_type, buf, node, n); + return pnanovdb_read_int64(buf, bufAddress); +} + +PNANOVDB_FORCE_INLINE pnanovdb_lower_handle_t pnanovdb_upper_get_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, pnanovdb_uint32_t n) +{ + pnanovdb_lower_handle_t lower = { upper.address }; + lower.address = pnanovdb_address_offset64(lower.address, pnanovdb_int64_as_uint64(pnanovdb_upper_get_table_child(grid_type, buf, upper, n))); + return lower; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); + pnanovdb_address_t value_address; + if (pnanovdb_upper_get_child_mask(buf, upper, n)) + { + pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); + value_address = pnanovdb_lower_get_value_address_and_level(grid_type, buf, child, ijk, level); + } + else + { + value_address = pnanovdb_upper_get_table_address(grid_type, buf, upper, n); + PNANOVDB_DEREF(level) = 2u; + } + return value_address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t level; + return pnanovdb_upper_get_value_address_and_level(grid_type, buf, upper, ijk, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node, pnanovdb_uint32_t n, pnanovdb_int64_t child) +{ + pnanovdb_address_t bufAddress = pnanovdb_upper_get_table_address(grid_type, buf, node, n); + pnanovdb_write_int64(buf, bufAddress, child); +} + +// ------------------------------------------------ Root ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_min); + return pnanovdb_address_offset(root.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_max); + return pnanovdb_address_offset(root.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_ave); + return pnanovdb_address_offset(root.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_stddev); + return pnanovdb_address_offset(root.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_tile_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t root_tile) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_off_value); + return pnanovdb_address_offset(root_tile.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); + pnanovdb_address_t ret; + if (pnanovdb_address_is_null(tile.address)) + { + ret = pnanovdb_address_offset(root.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_off_background)); + PNANOVDB_DEREF(level) = 4u; + } + else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) + { + ret = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_off_value)); + PNANOVDB_DEREF(level) = 3u; + } + else + { + pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); + ret = pnanovdb_upper_get_value_address_and_level(grid_type, buf, child, ijk, level); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t level; + return pnanovdb_root_get_value_address_and_level(grid_type, buf, root, ijk, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_bit(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) bit_index) +{ + pnanovdb_uint32_t level; + pnanovdb_address_t address = pnanovdb_root_get_value_address_and_level(grid_type, buf, root, ijk, PNANOVDB_REF(level)); + PNANOVDB_DEREF(bit_index) = level == 0u ? pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x & 7) : 0u; + return address; +} + +PNANOVDB_FORCE_INLINE float pnanovdb_root_fp4_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + float ret; + if (level == 0) + { + ret = pnanovdb_leaf_fp4_read_float(buf, address, ijk); + } + else + { + ret = pnanovdb_read_float(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE float pnanovdb_root_fp8_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + float ret; + if (level == 0) + { + ret = pnanovdb_leaf_fp8_read_float(buf, address, ijk); + } + else + { + ret = pnanovdb_read_float(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE float pnanovdb_root_fp16_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + float ret; + if (level == 0) + { + ret = pnanovdb_leaf_fp16_read_float(buf, address, ijk); + } + else + { + ret = pnanovdb_read_float(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE float pnanovdb_root_fpn_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + float ret; + if (level == 0) + { + ret = pnanovdb_leaf_fpn_read_float(buf, address, ijk); + } + else + { + ret = pnanovdb_read_float(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_index_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + pnanovdb_uint64_t ret; + if (level == 0) + { + ret = pnanovdb_leaf_index_get_value_index(buf, address, ijk); + } + else + { + ret = pnanovdb_read_uint64(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_onindex_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + pnanovdb_uint64_t ret; + if (level == 0) + { + ret = pnanovdb_leaf_onindex_get_value_index(buf, address, ijk); + } + else + { + ret = pnanovdb_read_uint64(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_pointindex_get_point_range( + pnanovdb_buf_t buf, + pnanovdb_address_t value_address, + PNANOVDB_IN(pnanovdb_coord_t) ijk, + pnanovdb_uint32_t level, + PNANOVDB_INOUT(pnanovdb_uint64_t)range_begin, + PNANOVDB_INOUT(pnanovdb_uint64_t)range_end +) +{ + pnanovdb_uint32_t local_range_begin = 0u; + pnanovdb_uint32_t local_range_end = 0u; + pnanovdb_uint64_t offset = pnanovdb_uint32_as_uint64_low(0u); + if (level == 0) + { + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + // recover leaf address + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(value_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_POINTINDEX, leaf_off_table) + 2u * n) }; + if (n > 0u) + { + local_range_begin = pnanovdb_read_uint16(buf, pnanovdb_address_offset_neg(value_address, 2u)); + } + local_range_end = pnanovdb_read_uint16(buf, value_address); + offset = pnanovdb_leaf_pointindex_get_offset(buf, leaf); + } + PNANOVDB_DEREF(range_begin) = pnanovdb_uint64_offset(offset, local_range_begin); + PNANOVDB_DEREF(range_end) = pnanovdb_uint64_offset(offset, local_range_end); + return pnanovdb_uint32_as_uint64_low(local_range_end - local_range_begin); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_pointindex_get_point_address_range( + pnanovdb_buf_t buf, + pnanovdb_grid_type_t value_type, + pnanovdb_address_t value_address, + pnanovdb_address_t blindmetadata_value_address, + PNANOVDB_IN(pnanovdb_coord_t) ijk, + pnanovdb_uint32_t level, + PNANOVDB_INOUT(pnanovdb_address_t)address_begin, + PNANOVDB_INOUT(pnanovdb_address_t)address_end +) +{ + pnanovdb_uint64_t range_begin; + pnanovdb_uint64_t range_end; + pnanovdb_uint64_t range_size = pnanovdb_root_pointindex_get_point_range(buf, value_address, ijk, level, PNANOVDB_REF(range_begin), PNANOVDB_REF(range_end)); + + pnanovdb_uint32_t stride = 12u; // vec3f + if (value_type == PNANOVDB_GRID_TYPE_VEC3U8) + { + stride = 3u; + } + else if (value_type == PNANOVDB_GRID_TYPE_VEC3U16) + { + stride = 6u; + } + PNANOVDB_DEREF(address_begin) = pnanovdb_address_offset64_product(blindmetadata_value_address, range_begin, stride); + PNANOVDB_DEREF(address_end) = pnanovdb_address_offset64_product(blindmetadata_value_address, range_end, stride); + return range_size; +} + +// ------------------------------------------------ ReadAccessor ----------------------------------------------------------- + +struct pnanovdb_readaccessor_t +{ + pnanovdb_coord_t key; + pnanovdb_leaf_handle_t leaf; + pnanovdb_lower_handle_t lower; + pnanovdb_upper_handle_t upper; + pnanovdb_root_handle_t root; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_readaccessor_t) + +PNANOVDB_FORCE_INLINE void pnanovdb_readaccessor_init(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, pnanovdb_root_handle_t root) +{ + PNANOVDB_DEREF(acc).key.x = 0x7FFFFFFF; + PNANOVDB_DEREF(acc).key.y = 0x7FFFFFFF; + PNANOVDB_DEREF(acc).key.z = 0x7FFFFFFF; + PNANOVDB_DEREF(acc).leaf.address = pnanovdb_address_null(); + PNANOVDB_DEREF(acc).lower.address = pnanovdb_address_null(); + PNANOVDB_DEREF(acc).upper.address = pnanovdb_address_null(); + PNANOVDB_DEREF(acc).root = root; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_iscached0(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, int dirty) +{ + if (pnanovdb_address_is_null(PNANOVDB_DEREF(acc).leaf.address)) { return PNANOVDB_FALSE; } + if ((dirty & ~((1u << 3) - 1u)) != 0) + { + PNANOVDB_DEREF(acc).leaf.address = pnanovdb_address_null(); + return PNANOVDB_FALSE; + } + return PNANOVDB_TRUE; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_iscached1(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, int dirty) +{ + if (pnanovdb_address_is_null(PNANOVDB_DEREF(acc).lower.address)) { return PNANOVDB_FALSE; } + if ((dirty & ~((1u << 7) - 1u)) != 0) + { + PNANOVDB_DEREF(acc).lower.address = pnanovdb_address_null(); + return PNANOVDB_FALSE; + } + return PNANOVDB_TRUE; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_iscached2(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, int dirty) +{ + if (pnanovdb_address_is_null(PNANOVDB_DEREF(acc).upper.address)) { return PNANOVDB_FALSE; } + if ((dirty & ~((1u << 12) - 1u)) != 0) + { + PNANOVDB_DEREF(acc).upper.address = pnanovdb_address_null(); + return PNANOVDB_FALSE; + } + return PNANOVDB_TRUE; +} +PNANOVDB_FORCE_INLINE int pnanovdb_readaccessor_computedirty(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return (PNANOVDB_DEREF(ijk).x ^ PNANOVDB_DEREF(acc).key.x) | (PNANOVDB_DEREF(ijk).y ^ PNANOVDB_DEREF(acc).key.y) | (PNANOVDB_DEREF(ijk).z ^ PNANOVDB_DEREF(acc).key.z); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + return pnanovdb_leaf_get_table_address(grid_type, buf, leaf, n); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address_and_level_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); + pnanovdb_address_t value_address; + if (pnanovdb_lower_get_child_mask(buf, lower, n)) + { + pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); + PNANOVDB_DEREF(acc).leaf = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + value_address = pnanovdb_leaf_get_value_address_and_cache(grid_type, buf, child, ijk, acc); + PNANOVDB_DEREF(level) = 0u; + } + else + { + value_address = pnanovdb_lower_get_table_address(grid_type, buf, lower, n); + PNANOVDB_DEREF(level) = 1u; + } + return value_address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t level; + return pnanovdb_lower_get_value_address_and_level_and_cache(grid_type, buf, lower, ijk, acc, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node, pnanovdb_uint32_t n, pnanovdb_int64_t child) +{ + pnanovdb_address_t table_address = pnanovdb_lower_get_table_address(grid_type, buf, node, n); + pnanovdb_write_int64(buf, table_address, child); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address_and_level_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); + pnanovdb_address_t value_address; + if (pnanovdb_upper_get_child_mask(buf, upper, n)) + { + pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); + PNANOVDB_DEREF(acc).lower = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + value_address = pnanovdb_lower_get_value_address_and_level_and_cache(grid_type, buf, child, ijk, acc, level); + } + else + { + value_address = pnanovdb_upper_get_table_address(grid_type, buf, upper, n); + PNANOVDB_DEREF(level) = 2u; + } + return value_address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t level; + return pnanovdb_upper_get_value_address_and_level_and_cache(grid_type, buf, upper, ijk, acc, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_and_level_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); + pnanovdb_address_t ret; + if (pnanovdb_address_is_null(tile.address)) + { + ret = pnanovdb_address_offset(root.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_off_background)); + PNANOVDB_DEREF(level) = 4u; + } + else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) + { + ret = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_off_value)); + PNANOVDB_DEREF(level) = 3u; + } + else + { + pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); + PNANOVDB_DEREF(acc).upper = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + ret = pnanovdb_upper_get_value_address_and_level_and_cache(grid_type, buf, child, ijk, acc, level); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t level; + return pnanovdb_root_get_value_address_and_level_and_cache(grid_type, buf, root, ijk, acc, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_readaccessor_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + int dirty = pnanovdb_readaccessor_computedirty(acc, ijk); + + pnanovdb_address_t value_address; + if (pnanovdb_readaccessor_iscached0(acc, dirty)) + { + value_address = pnanovdb_leaf_get_value_address_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).leaf, ijk, acc); + PNANOVDB_DEREF(level) = 0u; + } + else if (pnanovdb_readaccessor_iscached1(acc, dirty)) + { + value_address = pnanovdb_lower_get_value_address_and_level_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).lower, ijk, acc, level); + } + else if (pnanovdb_readaccessor_iscached2(acc, dirty)) + { + value_address = pnanovdb_upper_get_value_address_and_level_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).upper, ijk, acc, level); + } + else + { + value_address = pnanovdb_root_get_value_address_and_level_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).root, ijk, acc, level); + } + return value_address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_readaccessor_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t level; + return pnanovdb_readaccessor_get_value_address_and_level(grid_type, buf, acc, ijk, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_readaccessor_get_value_address_bit(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) bit_index) +{ + pnanovdb_uint32_t level; + pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address_and_level(grid_type, buf, acc, ijk, PNANOVDB_REF(level)); + PNANOVDB_DEREF(bit_index) = level == 0u ? pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x & 7) : 0u; + return address; +} + +// ------------------------------------------------ ReadAccessor GetDim ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + return 1u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_lower_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); + pnanovdb_uint32_t ret; + if (pnanovdb_lower_get_child_mask(buf, lower, n)) + { + pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); + PNANOVDB_DEREF(acc).leaf = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + ret = pnanovdb_leaf_get_dim_and_cache(grid_type, buf, child, ijk, acc); + } + else + { + ret = (1u << (3u)); // node 0 dim + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_upper_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); + pnanovdb_uint32_t ret; + if (pnanovdb_upper_get_child_mask(buf, upper, n)) + { + pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); + PNANOVDB_DEREF(acc).lower = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + ret = pnanovdb_lower_get_dim_and_cache(grid_type, buf, child, ijk, acc); + } + else + { + ret = (1u << (4u + 3u)); // node 1 dim + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); + pnanovdb_uint32_t ret; + if (pnanovdb_address_is_null(tile.address)) + { + ret = 1u << (5u + 4u + 3u); // background, node 2 dim + } + else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) + { + ret = 1u << (5u + 4u + 3u); // tile value, node 2 dim + } + else + { + pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); + PNANOVDB_DEREF(acc).upper = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + ret = pnanovdb_upper_get_dim_and_cache(grid_type, buf, child, ijk, acc); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_readaccessor_get_dim(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + int dirty = pnanovdb_readaccessor_computedirty(acc, ijk); + + pnanovdb_uint32_t dim; + if (pnanovdb_readaccessor_iscached0(acc, dirty)) + { + dim = pnanovdb_leaf_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).leaf, ijk, acc); + } + else if (pnanovdb_readaccessor_iscached1(acc, dirty)) + { + dim = pnanovdb_lower_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).lower, ijk, acc); + } + else if (pnanovdb_readaccessor_iscached2(acc, dirty)) + { + dim = pnanovdb_upper_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).upper, ijk, acc); + } + else + { + dim = pnanovdb_root_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).root, ijk, acc); + } + return dim; +} + +// ------------------------------------------------ ReadAccessor IsActive ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + return pnanovdb_leaf_get_value_mask(buf, leaf, n); +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_lower_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); + pnanovdb_bool_t is_active; + if (pnanovdb_lower_get_child_mask(buf, lower, n)) + { + pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); + PNANOVDB_DEREF(acc).leaf = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + is_active = pnanovdb_leaf_is_active_and_cache(grid_type, buf, child, ijk, acc); + } + else + { + is_active = pnanovdb_lower_get_value_mask(buf, lower, n); + } + return is_active; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_upper_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); + pnanovdb_bool_t is_active; + if (pnanovdb_upper_get_child_mask(buf, upper, n)) + { + pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); + PNANOVDB_DEREF(acc).lower = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + is_active = pnanovdb_lower_is_active_and_cache(grid_type, buf, child, ijk, acc); + } + else + { + is_active = pnanovdb_upper_get_value_mask(buf, upper, n); + } + return is_active; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_root_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); + pnanovdb_bool_t is_active; + if (pnanovdb_address_is_null(tile.address)) + { + is_active = PNANOVDB_FALSE; // background + } + else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) + { + pnanovdb_uint32_t state = pnanovdb_root_tile_get_state(buf, tile); + is_active = state != 0u; // tile value + } + else + { + pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); + PNANOVDB_DEREF(acc).upper = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + is_active = pnanovdb_upper_is_active_and_cache(grid_type, buf, child, ijk, acc); + } + return is_active; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_is_active(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + int dirty = pnanovdb_readaccessor_computedirty(acc, ijk); + + pnanovdb_bool_t is_active; + if (pnanovdb_readaccessor_iscached0(acc, dirty)) + { + is_active = pnanovdb_leaf_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).leaf, ijk, acc); + } + else if (pnanovdb_readaccessor_iscached1(acc, dirty)) + { + is_active = pnanovdb_lower_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).lower, ijk, acc); + } + else if (pnanovdb_readaccessor_iscached2(acc, dirty)) + { + is_active = pnanovdb_upper_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).upper, ijk, acc); + } + else + { + is_active = pnanovdb_root_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).root, ijk, acc); + } + return is_active; +} + +// ------------------------------------------------ Map Transforms ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_vec3_t dst; + float sx = PNANOVDB_DEREF(src).x; + float sy = PNANOVDB_DEREF(src).y; + float sz = PNANOVDB_DEREF(src).z; + dst.x = sx * pnanovdb_map_get_matf(buf, map, 0) + sy * pnanovdb_map_get_matf(buf, map, 1) + sz * pnanovdb_map_get_matf(buf, map, 2) + pnanovdb_map_get_vecf(buf, map, 0); + dst.y = sx * pnanovdb_map_get_matf(buf, map, 3) + sy * pnanovdb_map_get_matf(buf, map, 4) + sz * pnanovdb_map_get_matf(buf, map, 5) + pnanovdb_map_get_vecf(buf, map, 1); + dst.z = sx * pnanovdb_map_get_matf(buf, map, 6) + sy * pnanovdb_map_get_matf(buf, map, 7) + sz * pnanovdb_map_get_matf(buf, map, 8) + pnanovdb_map_get_vecf(buf, map, 2); + return dst; +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply_inverse(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_vec3_t dst; + float sx = PNANOVDB_DEREF(src).x - pnanovdb_map_get_vecf(buf, map, 0); + float sy = PNANOVDB_DEREF(src).y - pnanovdb_map_get_vecf(buf, map, 1); + float sz = PNANOVDB_DEREF(src).z - pnanovdb_map_get_vecf(buf, map, 2); + dst.x = sx * pnanovdb_map_get_invmatf(buf, map, 0) + sy * pnanovdb_map_get_invmatf(buf, map, 1) + sz * pnanovdb_map_get_invmatf(buf, map, 2); + dst.y = sx * pnanovdb_map_get_invmatf(buf, map, 3) + sy * pnanovdb_map_get_invmatf(buf, map, 4) + sz * pnanovdb_map_get_invmatf(buf, map, 5); + dst.z = sx * pnanovdb_map_get_invmatf(buf, map, 6) + sy * pnanovdb_map_get_invmatf(buf, map, 7) + sz * pnanovdb_map_get_invmatf(buf, map, 8); + return dst; +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply_jacobi(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_vec3_t dst; + float sx = PNANOVDB_DEREF(src).x; + float sy = PNANOVDB_DEREF(src).y; + float sz = PNANOVDB_DEREF(src).z; + dst.x = sx * pnanovdb_map_get_matf(buf, map, 0) + sy * pnanovdb_map_get_matf(buf, map, 1) + sz * pnanovdb_map_get_matf(buf, map, 2); + dst.y = sx * pnanovdb_map_get_matf(buf, map, 3) + sy * pnanovdb_map_get_matf(buf, map, 4) + sz * pnanovdb_map_get_matf(buf, map, 5); + dst.z = sx * pnanovdb_map_get_matf(buf, map, 6) + sy * pnanovdb_map_get_matf(buf, map, 7) + sz * pnanovdb_map_get_matf(buf, map, 8); + return dst; +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply_inverse_jacobi(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_vec3_t dst; + float sx = PNANOVDB_DEREF(src).x; + float sy = PNANOVDB_DEREF(src).y; + float sz = PNANOVDB_DEREF(src).z; + dst.x = sx * pnanovdb_map_get_invmatf(buf, map, 0) + sy * pnanovdb_map_get_invmatf(buf, map, 1) + sz * pnanovdb_map_get_invmatf(buf, map, 2); + dst.y = sx * pnanovdb_map_get_invmatf(buf, map, 3) + sy * pnanovdb_map_get_invmatf(buf, map, 4) + sz * pnanovdb_map_get_invmatf(buf, map, 5); + dst.z = sx * pnanovdb_map_get_invmatf(buf, map, 6) + sy * pnanovdb_map_get_invmatf(buf, map, 7) + sz * pnanovdb_map_get_invmatf(buf, map, 8); + return dst; +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_world_to_indexf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); + return pnanovdb_map_apply_inverse(buf, map, src); +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_index_to_worldf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); + return pnanovdb_map_apply(buf, map, src); +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_world_to_index_dirf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); + return pnanovdb_map_apply_inverse_jacobi(buf, map, src); +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_index_to_world_dirf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); + return pnanovdb_map_apply_jacobi(buf, map, src); +} + +// ------------------------------------------------ DitherLUT ----------------------------------------------------------- + +// This table was generated with +/************** + +static constexpr inline uint32 +SYSwang_inthash(uint32 key) +{ + // From http://www.concentric.net/~Ttwang/tech/inthash.htm + key += ~(key << 16); + key ^= (key >> 5); + key += (key << 3); + key ^= (key >> 13); + key += ~(key << 9); + key ^= (key >> 17); + return key; +} + +static void +ut_initDitherR(float *pattern, float offset, + int x, int y, int z, int res, int goalres) +{ + // These offsets are designed to maximize the difference between + // dither values in nearby voxels within a given 2x2x2 cell, without + // producing axis-aligned artifacts. The are organized in row-major + // order. + static const float theDitherOffset[] = {0,4,6,2,5,1,3,7}; + static const float theScale = 0.125F; + int key = (((z << res) + y) << res) + x; + + if (res == goalres) + { + pattern[key] = offset; + return; + } + + // Randomly flip (on each axis) the dithering patterns used by the + // subcells. This key is xor'd with the subcell index below before + // looking up in the dither offset list. + key = SYSwang_inthash(key) & 7; + + x <<= 1; + y <<= 1; + z <<= 1; + + offset *= theScale; + for (int i = 0; i < 8; i++) + ut_initDitherR(pattern, offset+theDitherOffset[i ^ key]*theScale, + x+(i&1), y+((i&2)>>1), z+((i&4)>>2), res+1, goalres); +} + +// This is a compact algorithm that accomplishes essentially the same thing +// as ut_initDither() above. We should eventually switch to use this and +// clean the dead code. +static fpreal32 * +ut_initDitherRecursive(int goalres) +{ + const int nfloat = 1 << (goalres*3); + float *pattern = new float[nfloat]; + ut_initDitherR(pattern, 1.0F, 0, 0, 0, 0, goalres); + + // This has built an even spacing from 1/nfloat to 1.0. + // however, our dither pattern should be 1/(nfloat+1) to nfloat/(nfloat+1) + // So we do a correction here. Note that the earlier calculations are + // done with powers of 2 so are exact, so it does make sense to delay + // the renormalization to this pass. + float correctionterm = nfloat / (nfloat+1.0F); + for (int i = 0; i < nfloat; i++) + pattern[i] *= correctionterm; + return pattern; +} + + theDitherMatrix = ut_initDitherRecursive(3); + + for (int i = 0; i < 512/8; i ++) + { + for (int j = 0; j < 8; j ++) + std::cout << theDitherMatrix[i*8+j] << "f, "; + std::cout << std::endl; + } + + **************/ + +PNANOVDB_STATIC_CONST float pnanovdb_dither_lut[512] = +{ + 0.14425f, 0.643275f, 0.830409f, 0.331384f, 0.105263f, 0.604289f, 0.167641f, 0.666667f, + 0.892788f, 0.393762f, 0.0818713f, 0.580897f, 0.853801f, 0.354776f, 0.916179f, 0.417154f, + 0.612086f, 0.11306f, 0.79922f, 0.300195f, 0.510721f, 0.0116959f, 0.947368f, 0.448343f, + 0.362573f, 0.861598f, 0.0506823f, 0.549708f, 0.261209f, 0.760234f, 0.19883f, 0.697856f, + 0.140351f, 0.639376f, 0.576998f, 0.0779727f, 0.522417f, 0.0233918f, 0.460039f, 0.959064f, + 0.888889f, 0.389864f, 0.327485f, 0.826511f, 0.272904f, 0.77193f, 0.709552f, 0.210526f, + 0.483431f, 0.982456f, 0.296296f, 0.795322f, 0.116959f, 0.615984f, 0.0545809f, 0.553606f, + 0.732943f, 0.233918f, 0.545809f, 0.0467836f, 0.865497f, 0.366472f, 0.803119f, 0.304094f, + 0.518519f, 0.0194932f, 0.45614f, 0.955166f, 0.729045f, 0.230019f, 0.54191f, 0.042885f, + 0.269006f, 0.768031f, 0.705653f, 0.206628f, 0.479532f, 0.978558f, 0.292398f, 0.791423f, + 0.237817f, 0.736842f, 0.424951f, 0.923977f, 0.136452f, 0.635478f, 0.323587f, 0.822612f, + 0.986355f, 0.487329f, 0.674464f, 0.175439f, 0.88499f, 0.385965f, 0.573099f, 0.0740741f, + 0.51462f, 0.0155945f, 0.202729f, 0.701754f, 0.148148f, 0.647174f, 0.834308f, 0.335283f, + 0.265107f, 0.764133f, 0.951267f, 0.452242f, 0.896686f, 0.397661f, 0.08577f, 0.584795f, + 0.8577f, 0.358674f, 0.920078f, 0.421053f, 0.740741f, 0.241715f, 0.678363f, 0.179337f, + 0.109162f, 0.608187f, 0.17154f, 0.670565f, 0.491228f, 0.990253f, 0.42885f, 0.927875f, + 0.0662768f, 0.565302f, 0.62768f, 0.128655f, 0.183236f, 0.682261f, 0.744639f, 0.245614f, + 0.814815f, 0.315789f, 0.378168f, 0.877193f, 0.931774f, 0.432749f, 0.495127f, 0.994152f, + 0.0350877f, 0.534113f, 0.97076f, 0.471735f, 0.214425f, 0.71345f, 0.526316f, 0.0272904f, + 0.783626f, 0.2846f, 0.222222f, 0.721248f, 0.962963f, 0.463938f, 0.276803f, 0.775828f, + 0.966862f, 0.467836f, 0.405458f, 0.904483f, 0.0701754f, 0.569201f, 0.881092f, 0.382066f, + 0.218324f, 0.717349f, 0.654971f, 0.155945f, 0.818713f, 0.319688f, 0.132554f, 0.631579f, + 0.0623782f, 0.561404f, 0.748538f, 0.249513f, 0.912281f, 0.413255f, 0.974659f, 0.475634f, + 0.810916f, 0.311891f, 0.499025f, 0.998051f, 0.163743f, 0.662768f, 0.226121f, 0.725146f, + 0.690058f, 0.191033f, 0.00389864f, 0.502924f, 0.557505f, 0.0584795f, 0.120858f, 0.619883f, + 0.440546f, 0.939571f, 0.752437f, 0.253411f, 0.307992f, 0.807018f, 0.869396f, 0.37037f, + 0.658869f, 0.159844f, 0.346979f, 0.846004f, 0.588694f, 0.0896686f, 0.152047f, 0.651072f, + 0.409357f, 0.908382f, 0.596491f, 0.0974659f, 0.339181f, 0.838207f, 0.900585f, 0.401559f, + 0.34308f, 0.842105f, 0.779727f, 0.280702f, 0.693957f, 0.194932f, 0.25731f, 0.756335f, + 0.592593f, 0.0935673f, 0.0311891f, 0.530214f, 0.444444f, 0.94347f, 0.506823f, 0.00779727f, + 0.68616f, 0.187135f, 0.124756f, 0.623782f, 0.288499f, 0.787524f, 0.350877f, 0.849903f, + 0.436647f, 0.935673f, 0.873294f, 0.374269f, 0.538012f, 0.0389864f, 0.60039f, 0.101365f, + 0.57115f, 0.0721248f, 0.758285f, 0.259259f, 0.719298f, 0.220273f, 0.532164f, 0.0331384f, + 0.321637f, 0.820663f, 0.00974659f, 0.508772f, 0.469786f, 0.968811f, 0.282651f, 0.781676f, + 0.539961f, 0.0409357f, 0.727096f, 0.22807f, 0.500975f, 0.00194932f, 0.563353f, 0.0643275f, + 0.290448f, 0.789474f, 0.477583f, 0.976608f, 0.251462f, 0.750487f, 0.31384f, 0.812865f, + 0.94152f, 0.442495f, 0.879142f, 0.380117f, 0.37232f, 0.871345f, 0.309942f, 0.808967f, + 0.192982f, 0.692008f, 0.130604f, 0.62963f, 0.621832f, 0.122807f, 0.559454f, 0.0604289f, + 0.660819f, 0.161793f, 0.723197f, 0.224172f, 0.403509f, 0.902534f, 0.840156f, 0.341131f, + 0.411306f, 0.910331f, 0.473684f, 0.97271f, 0.653021f, 0.153996f, 0.0916179f, 0.590643f, + 0.196881f, 0.695906f, 0.384016f, 0.883041f, 0.0955166f, 0.594542f, 0.157895f, 0.65692f, + 0.945419f, 0.446394f, 0.633528f, 0.134503f, 0.844055f, 0.345029f, 0.906433f, 0.407407f, + 0.165692f, 0.664717f, 0.103314f, 0.602339f, 0.126706f, 0.625731f, 0.189084f, 0.688109f, + 0.91423f, 0.415205f, 0.851852f, 0.352827f, 0.875244f, 0.376218f, 0.937622f, 0.438596f, + 0.317739f, 0.816764f, 0.255361f, 0.754386f, 0.996101f, 0.497076f, 0.933723f, 0.434698f, + 0.567251f, 0.0682261f, 0.504873f, 0.00584795f, 0.247563f, 0.746589f, 0.185185f, 0.684211f, + 0.037037f, 0.536062f, 0.0994152f, 0.598441f, 0.777778f, 0.278752f, 0.465887f, 0.964912f, + 0.785575f, 0.28655f, 0.847953f, 0.348928f, 0.0292398f, 0.528265f, 0.7154f, 0.216374f, + 0.39961f, 0.898636f, 0.961014f, 0.461988f, 0.0487329f, 0.547758f, 0.111111f, 0.610136f, + 0.649123f, 0.150097f, 0.212476f, 0.711501f, 0.797271f, 0.298246f, 0.859649f, 0.360624f, + 0.118908f, 0.617934f, 0.0565302f, 0.555556f, 0.329435f, 0.82846f, 0.516569f, 0.0175439f, + 0.867446f, 0.368421f, 0.805068f, 0.306043f, 0.578947f, 0.079922f, 0.267057f, 0.766082f, + 0.270955f, 0.76998f, 0.707602f, 0.208577f, 0.668616f, 0.169591f, 0.606238f, 0.107212f, + 0.520468f, 0.0214425f, 0.45809f, 0.957115f, 0.419103f, 0.918129f, 0.356725f, 0.855751f, + 0.988304f, 0.489279f, 0.426901f, 0.925926f, 0.450292f, 0.949318f, 0.512671f, 0.0136452f, + 0.239766f, 0.738791f, 0.676413f, 0.177388f, 0.699805f, 0.20078f, 0.263158f, 0.762183f, + 0.773879f, 0.274854f, 0.337232f, 0.836257f, 0.672515f, 0.173489f, 0.734893f, 0.235867f, + 0.0253411f, 0.524366f, 0.586745f, 0.0877193f, 0.423002f, 0.922027f, 0.48538f, 0.984405f, + 0.74269f, 0.243665f, 0.680312f, 0.181287f, 0.953216f, 0.454191f, 0.1423f, 0.641326f, + 0.493177f, 0.992203f, 0.430799f, 0.929825f, 0.204678f, 0.703704f, 0.890838f, 0.391813f, + 0.894737f, 0.395712f, 0.0838207f, 0.582846f, 0.0448343f, 0.54386f, 0.231969f, 0.730994f, + 0.146199f, 0.645224f, 0.832359f, 0.333333f, 0.793372f, 0.294347f, 0.980507f, 0.481481f, + 0.364522f, 0.863548f, 0.80117f, 0.302144f, 0.824561f, 0.325536f, 0.138402f, 0.637427f, + 0.614035f, 0.11501f, 0.0526316f, 0.551657f, 0.0760234f, 0.575049f, 0.88694f, 0.387914f, +}; + +PNANOVDB_FORCE_INLINE float pnanovdb_dither_lookup(pnanovdb_bool_t enabled, int offset) +{ + return enabled ? pnanovdb_dither_lut[offset & 511] : 0.5f; +} + +// ------------------------------------------------ HDDA ----------------------------------------------------------- + +#ifdef PNANOVDB_HDDA + +// Comment out to disable this explicit round-off check +#define PNANOVDB_ENFORCE_FORWARD_STEPPING + +#define PNANOVDB_HDDA_FLOAT_MAX 1e38f + +struct pnanovdb_hdda_t +{ + pnanovdb_int32_t dim; + float tmin; + float tmax; + pnanovdb_coord_t voxel; + pnanovdb_coord_t step; + pnanovdb_vec3_t delta; + pnanovdb_vec3_t next; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_hdda_t) + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_hdda_pos_to_ijk(PNANOVDB_IN(pnanovdb_vec3_t) pos) +{ + pnanovdb_coord_t voxel; + voxel.x = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).x)); + voxel.y = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).y)); + voxel.z = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).z)); + return voxel; +} + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_hdda_pos_to_voxel(PNANOVDB_IN(pnanovdb_vec3_t) pos, int dim) +{ + pnanovdb_coord_t voxel; + voxel.x = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).x)) & (~(dim - 1)); + voxel.y = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).y)) & (~(dim - 1)); + voxel.z = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).z)) & (~(dim - 1)); + return voxel; +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_hdda_ray_start(PNANOVDB_IN(pnanovdb_vec3_t) origin, float tmin, PNANOVDB_IN(pnanovdb_vec3_t) direction) +{ + pnanovdb_vec3_t pos = pnanovdb_vec3_add( + pnanovdb_vec3_mul(PNANOVDB_DEREF(direction), pnanovdb_vec3_uniform(tmin)), + PNANOVDB_DEREF(origin) + ); + return pos; +} + +PNANOVDB_FORCE_INLINE void pnanovdb_hdda_init(PNANOVDB_INOUT(pnanovdb_hdda_t) hdda, PNANOVDB_IN(pnanovdb_vec3_t) origin, float tmin, PNANOVDB_IN(pnanovdb_vec3_t) direction, float tmax, int dim) +{ + PNANOVDB_DEREF(hdda).dim = dim; + PNANOVDB_DEREF(hdda).tmin = tmin; + PNANOVDB_DEREF(hdda).tmax = tmax; + + pnanovdb_vec3_t pos = pnanovdb_hdda_ray_start(origin, tmin, direction); + pnanovdb_vec3_t dir_inv = pnanovdb_vec3_div(pnanovdb_vec3_uniform(1.f), PNANOVDB_DEREF(direction)); + + PNANOVDB_DEREF(hdda).voxel = pnanovdb_hdda_pos_to_voxel(PNANOVDB_REF(pos), dim); + + // x + if (PNANOVDB_DEREF(direction).x == 0.f) + { + PNANOVDB_DEREF(hdda).next.x = PNANOVDB_HDDA_FLOAT_MAX; + PNANOVDB_DEREF(hdda).step.x = 0; + PNANOVDB_DEREF(hdda).delta.x = 0.f; + } + else if (dir_inv.x > 0.f) + { + PNANOVDB_DEREF(hdda).step.x = 1; + PNANOVDB_DEREF(hdda).next.x = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.x + dim - pos.x) * dir_inv.x; + PNANOVDB_DEREF(hdda).delta.x = dir_inv.x; + } + else + { + PNANOVDB_DEREF(hdda).step.x = -1; + PNANOVDB_DEREF(hdda).next.x = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.x - pos.x) * dir_inv.x; + PNANOVDB_DEREF(hdda).delta.x = -dir_inv.x; + } + + // y + if (PNANOVDB_DEREF(direction).y == 0.f) + { + PNANOVDB_DEREF(hdda).next.y = PNANOVDB_HDDA_FLOAT_MAX; + PNANOVDB_DEREF(hdda).step.y = 0; + PNANOVDB_DEREF(hdda).delta.y = 0.f; + } + else if (dir_inv.y > 0.f) + { + PNANOVDB_DEREF(hdda).step.y = 1; + PNANOVDB_DEREF(hdda).next.y = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.y + dim - pos.y) * dir_inv.y; + PNANOVDB_DEREF(hdda).delta.y = dir_inv.y; + } + else + { + PNANOVDB_DEREF(hdda).step.y = -1; + PNANOVDB_DEREF(hdda).next.y = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.y - pos.y) * dir_inv.y; + PNANOVDB_DEREF(hdda).delta.y = -dir_inv.y; + } + + // z + if (PNANOVDB_DEREF(direction).z == 0.f) + { + PNANOVDB_DEREF(hdda).next.z = PNANOVDB_HDDA_FLOAT_MAX; + PNANOVDB_DEREF(hdda).step.z = 0; + PNANOVDB_DEREF(hdda).delta.z = 0.f; + } + else if (dir_inv.z > 0.f) + { + PNANOVDB_DEREF(hdda).step.z = 1; + PNANOVDB_DEREF(hdda).next.z = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.z + dim - pos.z) * dir_inv.z; + PNANOVDB_DEREF(hdda).delta.z = dir_inv.z; + } + else + { + PNANOVDB_DEREF(hdda).step.z = -1; + PNANOVDB_DEREF(hdda).next.z = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.z - pos.z) * dir_inv.z; + PNANOVDB_DEREF(hdda).delta.z = -dir_inv.z; + } +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_update(PNANOVDB_INOUT(pnanovdb_hdda_t) hdda, PNANOVDB_IN(pnanovdb_vec3_t) origin, PNANOVDB_IN(pnanovdb_vec3_t) direction, int dim) +{ + if (PNANOVDB_DEREF(hdda).dim == dim) + { + return PNANOVDB_FALSE; + } + PNANOVDB_DEREF(hdda).dim = dim; + + pnanovdb_vec3_t pos = pnanovdb_vec3_add( + pnanovdb_vec3_mul(PNANOVDB_DEREF(direction), pnanovdb_vec3_uniform(PNANOVDB_DEREF(hdda).tmin)), + PNANOVDB_DEREF(origin) + ); + pnanovdb_vec3_t dir_inv = pnanovdb_vec3_div(pnanovdb_vec3_uniform(1.f), PNANOVDB_DEREF(direction)); + + PNANOVDB_DEREF(hdda).voxel = pnanovdb_hdda_pos_to_voxel(PNANOVDB_REF(pos), dim); + + if (PNANOVDB_DEREF(hdda).step.x != 0) + { + PNANOVDB_DEREF(hdda).next.x = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.x - pos.x) * dir_inv.x; + if (PNANOVDB_DEREF(hdda).step.x > 0) + { + PNANOVDB_DEREF(hdda).next.x += dim * dir_inv.x; + } + } + if (PNANOVDB_DEREF(hdda).step.y != 0) + { + PNANOVDB_DEREF(hdda).next.y = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.y - pos.y) * dir_inv.y; + if (PNANOVDB_DEREF(hdda).step.y > 0) + { + PNANOVDB_DEREF(hdda).next.y += dim * dir_inv.y; + } + } + if (PNANOVDB_DEREF(hdda).step.z != 0) + { + PNANOVDB_DEREF(hdda).next.z = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.z - pos.z) * dir_inv.z; + if (PNANOVDB_DEREF(hdda).step.z > 0) + { + PNANOVDB_DEREF(hdda).next.z += dim * dir_inv.z; + } + } + + return PNANOVDB_TRUE; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_step(PNANOVDB_INOUT(pnanovdb_hdda_t) hdda) +{ + pnanovdb_bool_t ret; + if (PNANOVDB_DEREF(hdda).next.x < PNANOVDB_DEREF(hdda).next.y && PNANOVDB_DEREF(hdda).next.x < PNANOVDB_DEREF(hdda).next.z) + { +#ifdef PNANOVDB_ENFORCE_FORWARD_STEPPING + if (PNANOVDB_DEREF(hdda).next.x <= PNANOVDB_DEREF(hdda).tmin) + { + PNANOVDB_DEREF(hdda).next.x += PNANOVDB_DEREF(hdda).tmin - 0.999999f * PNANOVDB_DEREF(hdda).next.x + 1.0e-6f; + } +#endif + PNANOVDB_DEREF(hdda).tmin = PNANOVDB_DEREF(hdda).next.x; + PNANOVDB_DEREF(hdda).next.x += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).delta.x; + PNANOVDB_DEREF(hdda).voxel.x += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).step.x; + ret = PNANOVDB_DEREF(hdda).tmin <= PNANOVDB_DEREF(hdda).tmax; + } + else if (PNANOVDB_DEREF(hdda).next.y < PNANOVDB_DEREF(hdda).next.z) + { +#ifdef PNANOVDB_ENFORCE_FORWARD_STEPPING + if (PNANOVDB_DEREF(hdda).next.y <= PNANOVDB_DEREF(hdda).tmin) + { + PNANOVDB_DEREF(hdda).next.y += PNANOVDB_DEREF(hdda).tmin - 0.999999f * PNANOVDB_DEREF(hdda).next.y + 1.0e-6f; + } +#endif + PNANOVDB_DEREF(hdda).tmin = PNANOVDB_DEREF(hdda).next.y; + PNANOVDB_DEREF(hdda).next.y += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).delta.y; + PNANOVDB_DEREF(hdda).voxel.y += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).step.y; + ret = PNANOVDB_DEREF(hdda).tmin <= PNANOVDB_DEREF(hdda).tmax; + } + else + { +#ifdef PNANOVDB_ENFORCE_FORWARD_STEPPING + if (PNANOVDB_DEREF(hdda).next.z <= PNANOVDB_DEREF(hdda).tmin) + { + PNANOVDB_DEREF(hdda).next.z += PNANOVDB_DEREF(hdda).tmin - 0.999999f * PNANOVDB_DEREF(hdda).next.z + 1.0e-6f; + } +#endif + PNANOVDB_DEREF(hdda).tmin = PNANOVDB_DEREF(hdda).next.z; + PNANOVDB_DEREF(hdda).next.z += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).delta.z; + PNANOVDB_DEREF(hdda).voxel.z += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).step.z; + ret = PNANOVDB_DEREF(hdda).tmin <= PNANOVDB_DEREF(hdda).tmax; + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_ray_clip( + PNANOVDB_IN(pnanovdb_vec3_t) bbox_min, + PNANOVDB_IN(pnanovdb_vec3_t) bbox_max, + PNANOVDB_IN(pnanovdb_vec3_t) origin, PNANOVDB_INOUT(float) tmin, + PNANOVDB_IN(pnanovdb_vec3_t) direction, PNANOVDB_INOUT(float) tmax +) +{ + pnanovdb_vec3_t dir_inv = pnanovdb_vec3_div(pnanovdb_vec3_uniform(1.f), PNANOVDB_DEREF(direction)); + pnanovdb_vec3_t t0 = pnanovdb_vec3_mul(pnanovdb_vec3_sub(PNANOVDB_DEREF(bbox_min), PNANOVDB_DEREF(origin)), dir_inv); + pnanovdb_vec3_t t1 = pnanovdb_vec3_mul(pnanovdb_vec3_sub(PNANOVDB_DEREF(bbox_max), PNANOVDB_DEREF(origin)), dir_inv); + pnanovdb_vec3_t tmin3 = pnanovdb_vec3_min(t0, t1); + pnanovdb_vec3_t tmax3 = pnanovdb_vec3_max(t0, t1); + float tnear = pnanovdb_max(tmin3.x, pnanovdb_max(tmin3.y, tmin3.z)); + float tfar = pnanovdb_min(tmax3.x, pnanovdb_min(tmax3.y, tmax3.z)); + pnanovdb_bool_t hit = tnear <= tfar; + PNANOVDB_DEREF(tmin) = pnanovdb_max(PNANOVDB_DEREF(tmin), tnear); + PNANOVDB_DEREF(tmax) = pnanovdb_min(PNANOVDB_DEREF(tmax), tfar); + return hit; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_zero_crossing( + pnanovdb_grid_type_t grid_type, + pnanovdb_buf_t buf, + PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, + PNANOVDB_IN(pnanovdb_vec3_t) origin, float tmin, + PNANOVDB_IN(pnanovdb_vec3_t) direction, float tmax, + PNANOVDB_INOUT(float) thit, + PNANOVDB_INOUT(float) v +) +{ + pnanovdb_coord_t bbox_min = pnanovdb_root_get_bbox_min(buf, PNANOVDB_DEREF(acc).root); + pnanovdb_coord_t bbox_max = pnanovdb_root_get_bbox_max(buf, PNANOVDB_DEREF(acc).root); + pnanovdb_vec3_t bbox_minf = pnanovdb_coord_to_vec3(bbox_min); + pnanovdb_vec3_t bbox_maxf = pnanovdb_coord_to_vec3(pnanovdb_coord_add(bbox_max, pnanovdb_coord_uniform(1))); + + pnanovdb_bool_t hit = pnanovdb_hdda_ray_clip(PNANOVDB_REF(bbox_minf), PNANOVDB_REF(bbox_maxf), origin, PNANOVDB_REF(tmin), direction, PNANOVDB_REF(tmax)); + if (!hit || tmax > 1.0e20f) + { + return PNANOVDB_FALSE; + } + + pnanovdb_vec3_t pos = pnanovdb_hdda_ray_start(origin, tmin, direction); + pnanovdb_coord_t ijk = pnanovdb_hdda_pos_to_ijk(PNANOVDB_REF(pos)); + + pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk)); + float v0 = pnanovdb_read_float(buf, address); + + pnanovdb_int32_t dim = pnanovdb_uint32_as_int32(pnanovdb_readaccessor_get_dim(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk))); + pnanovdb_hdda_t hdda; + pnanovdb_hdda_init(PNANOVDB_REF(hdda), origin, tmin, direction, tmax, dim); + while (pnanovdb_hdda_step(PNANOVDB_REF(hdda))) + { + pnanovdb_vec3_t pos_start = pnanovdb_hdda_ray_start(origin, hdda.tmin + 1.0001f, direction); + ijk = pnanovdb_hdda_pos_to_ijk(PNANOVDB_REF(pos_start)); + dim = pnanovdb_uint32_as_int32(pnanovdb_readaccessor_get_dim(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk))); + pnanovdb_hdda_update(PNANOVDB_REF(hdda), origin, direction, dim); + if (hdda.dim > 1 || !pnanovdb_readaccessor_is_active(grid_type, buf, acc, PNANOVDB_REF(ijk))) + { + continue; + } + while (pnanovdb_hdda_step(PNANOVDB_REF(hdda)) && pnanovdb_readaccessor_is_active(grid_type, buf, acc, PNANOVDB_REF(hdda.voxel))) + { + ijk = hdda.voxel; + pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk)); + PNANOVDB_DEREF(v) = pnanovdb_read_float(buf, address); + if (PNANOVDB_DEREF(v) * v0 < 0.f) + { + PNANOVDB_DEREF(thit) = hdda.tmin; + return PNANOVDB_TRUE; + } + } + } + return PNANOVDB_FALSE; +} + +#endif + +#endif // end of NANOVDB_PNANOVDB_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/cmd/print/nanovdb_print.cc b/nanovdb/nanovdb/cmd/print/nanovdb_print.cc index ff16ada7e7..5336a07190 100644 --- a/nanovdb/nanovdb/cmd/print/nanovdb_print.cc +++ b/nanovdb/nanovdb/cmd/print/nanovdb_print.cc @@ -148,7 +148,7 @@ int main(int argc, char* argv[]) for (auto& file : fileNames) { auto list = nanovdb::io::readGridMetaData(file); if (!gridName.empty()) { - std::vector tmp; + std::vector tmp; for (auto& m : list) { if (nameKey == m.nameKey && gridName == m.gridName) tmp.emplace_back(m); @@ -316,7 +316,7 @@ int main(int argc, char* argv[]) exitStatus = EXIT_FAILURE; } catch (...) { - std::cerr << "Exception oof unexpected type caught" << std::endl; + std::cerr << "Exception of unexpected type caught" << std::endl; exitStatus = EXIT_FAILURE; } diff --git a/nanovdb/nanovdb/cmd/validate/nanovdb_validate.cc b/nanovdb/nanovdb/cmd/validate/nanovdb_validate.cc index ae70dd310c..faec25aa4d 100644 --- a/nanovdb/nanovdb/cmd/validate/nanovdb_validate.cc +++ b/nanovdb/nanovdb/cmd/validate/nanovdb_validate.cc @@ -77,7 +77,7 @@ int main(int argc, char* argv[]) for (auto& file : fileNames) { auto list = nanovdb::io::readGridMetaData(file); if (!gridName.empty()) { - std::vector tmp; + std::vector tmp; for (auto& m : list) { if (nameKey == m.nameKey && gridName == m.gridName) tmp.emplace_back(m); diff --git a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu index 3a78c94093..5bb29979cf 100644 --- a/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu +++ b/nanovdb/nanovdb/examples/ex_index_grid_cuda/index_grid_cuda_kernel.cu @@ -39,4 +39,4 @@ extern "C" void launch_kernels(const nanovdb::NanoGrid* g gpu_kernel<<<1, 1, 0, stream>>>(gpuGrid); // Launch the device kernel asynchronously cpu_kernel(cpuGrid); // Launch the host "kernel" (synchronously) -} +} \ No newline at end of file diff --git a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc index 56021ea80c..633eb5628e 100644 --- a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc +++ b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc @@ -10,7 +10,9 @@ extern "C" void launch_kernels(const nanovdb::NodeManager*, const nanovdb::NodeManager*, cudaStream_t stream); -openvdb::FloatGrid::Ptr createLevelSetSphere(); +extern "C" nanovdb::NodeManagerHandle cudaCreateNodeManager(const nanovdb::NanoGrid*); + +//openvdb::FloatGrid::Ptr createLevelSetSphere();// not sure why this is needed /// @brief This examples depends on OpenVDB, NanoVDB and CUDA. int main() @@ -34,9 +36,14 @@ int main() } auto nodeHandle = nanovdb::createNodeManager(*grid); - nodeHandle.deviceUpload(deviceGrid, stream, false); auto *nodeMgr = nodeHandle.template mgr(); +#if 0// this approach copies a NodeManager from host to device + nodeHandle.deviceUpload(deviceGrid, stream, false); auto *deviceNodeMgr = nodeHandle.template deviceMgr(); +#else// the approach below constructs a new NodeManager directly for a device grid + auto nodeHandle2 = cudaCreateNodeManager(deviceGrid); + auto *deviceNodeMgr = nodeHandle2.template deviceMgr(); +#endif if (!deviceNodeMgr || !nodeMgr) { throw std::runtime_error("NodeManagerHandle did not contain a grid with value type float"); } diff --git a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda_kernel.cu b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda_kernel.cu index 97d8703a13..f2fb99ff3f 100644 --- a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda_kernel.cu +++ b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda_kernel.cu @@ -4,6 +4,7 @@ #include // this defined the core tree data structure of NanoVDB accessable on both the host and device #include #include // required since GridHandle has device code +#include #include // for printf // This is called by the host only @@ -26,4 +27,9 @@ extern "C" void launch_kernels(const nanovdb::NodeManager* deviceMgr, gpu_kernel<<<1, 1, 0, stream>>>(deviceMgr); // Launch the device kernel asynchronously cpu_kernel(cpuMgr); // Launch the host "kernel" (synchronously) +} + +// Simple wrapper that makes sure nanovdb::cudaCreateNodeManager is initiated +extern "C" auto cudaCreateNodeManager(const nanovdb::NanoGrid *d_grid) { + return nanovdb::cudaCreateNodeManager(d_grid); } \ No newline at end of file diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cc b/nanovdb/nanovdb/unittest/TestNanoVDB.cc index a17417beef..090f33ca9f 100644 --- a/nanovdb/nanovdb/unittest/TestNanoVDB.cc +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cc @@ -94,6 +94,60 @@ struct Sphere const nanovdb::Vec3 mCenter; const ValueT mRadius, mVoxelSize, mBackground; }; // Sphere + +class DataBuffer : public std::streambuf +{ +public: + DataBuffer(void* data, size_t size) + { + char* start = static_cast(data); + char* stop = start + size; + this->setg(start, start, stop); + } + + std::iostream::pos_type seekoff(std::iostream::off_type off, std::ios_base::seekdir way, std::ios_base::openmode which) override + { + if (which & std::ios_base::in) + { + if (way == std::ios_base::cur) + { + gbump(off); + } + else if (way == std::ios_base::end) + { + setg(eback(), egptr() + off, egptr()); + } + else if (way == std::ios_base::beg) + { + setg(eback(), eback() + off, egptr()); + } + } + + if (which & std::ios_base::out) + { + if (way == std::ios_base::cur) + { + pbump(off); + } + else if (way == std::ios_base::end) + { + setp(pbase(), epptr()); + pbump(epptr() - pbase() + off); + } + else if (way == std::ios_base::beg) + { + setp(pbase(), epptr()); + pbump(off); + } + } + + return gptr() - eback(); + } + std::iostream::pos_type seekpos(std::iostream::pos_type sp, std::ios_base::openmode which) override + { + return seekoff(sp - std::iostream::pos_type(std::iostream::off_type(0)), std::ios_base::beg, which); + } +}; } // namespace // The fixture for testing class. @@ -302,9 +356,9 @@ TEST_F(TestNanoVDB, Basic) d } t; EXPECT_EQ(sizeof(int), sizeof(t)); } - {// Check size of io::MetaData - EXPECT_EQ(176u, sizeof(nanovdb::io::MetaData)); - //std::cerr << "sizeof(MetaData) = " << sizeof(nanovdb::io::MetaData) << std::endl; + {// Check size of io::FileMetaData + EXPECT_EQ(176u, sizeof(nanovdb::io::FileMetaData)); + //std::cerr << "sizeof(FileMetaData) = " << sizeof(nanovdb::io::FileMetaData) << std::endl; } } @@ -392,6 +446,29 @@ TEST_F(TestNanoVDB, Magic) ss2 >> magic; EXPECT_EQ(magic, nanovdb::io::reverseEndianness(NANOVDB_MAGIC_NUMBER)); + + {// test all magic numbers + const std::string a_str("NanoVDB0"), b_str("NanoVDB1"), c_str("NanoVDB2"); + const uint64_t a = NANOVDB_MAGIC_NUMBER;// NanoVDB0 + const uint64_t b = NANOVDB_MAGIC_GRID;// NanoVDB1 + const uint64_t c = NANOVDB_MAGIC_FILE;// NanoVDB2 + const uint64_t m = NANOVDB_MAGIC_MASK;// masks out most significant byte + const char *aa= (const char*)&a, *bb = (const char*)&b, *cc = (const char*)&c; + for (int i=0; i<8; ++i) { + EXPECT_EQ(a_str[i], aa[i]); + EXPECT_EQ(b_str[i], bb[i]); + EXPECT_EQ(c_str[i], cc[i]); + } + for (int i=0; i<7; ++i) { + EXPECT_EQ(aa[i], bb[i]); + EXPECT_EQ(aa[i], cc[i]); + } + EXPECT_EQ('0', aa[7]); + EXPECT_EQ('1', bb[7]); + EXPECT_EQ('2', cc[7]); + EXPECT_EQ(m & a, m & b); + EXPECT_EQ(NANOVDB_MAGIC_MASK & NANOVDB_MAGIC_NUMBER, NANOVDB_MAGIC_MASK & NANOVDB_MAGIC_FILE); + } }// Magic TEST_F(TestNanoVDB, FindBits) @@ -413,31 +490,54 @@ TEST_F(TestNanoVDB, CRC32) { // test function that uses iterators const std::string s{"The quick brown fox jumps over the lazy dog"}; std::stringstream ss; - ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::crc32(s.begin(), s.end()); + ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::crc32::checksum(s.c_str(), s.size()); EXPECT_EQ("414fa339", ss.str()); } { // test the checksum for a modified string const std::string s{"The quick brown Fox jumps over the lazy dog"}; std::stringstream ss; - ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::crc32(s.begin(), s.end()); + ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::crc32::checksum(s.c_str(), s.size()); EXPECT_NE("414fa339", ss.str()); } { // test function that uses void pointer and byte size const std::string s{"The quick brown fox jumps over the lazy dog"}; std::stringstream ss; - ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::crc32(s.data(), s.size()); + ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::crc32::checksum(s.c_str(), s.size()); EXPECT_EQ("414fa339", ss.str()); } { // test accumulation - nanovdb::CRC32 crc; const std::string s1{"The quick brown fox jum"}; - crc(s1.begin(), s1.end()); + uint32_t crc = nanovdb::crc32::checksum(s1.c_str(), s1.size()); const std::string s2{"ps over the lazy dog"}; - crc(s2.begin(), s2.end()); + crc = nanovdb::crc32::checksum(s2.c_str(), s2.size(), crc); std::stringstream ss; - ss << std::hex << std::setw(8) << std::setfill('0') << crc.checksum(); + ss << std::hex << std::setw(8) << std::setfill('0') << crc; EXPECT_EQ("414fa339", ss.str()); } + { // test accumulation with lookup table + auto lut = nanovdb::crc32::createLut(); + const std::string s1{"The quick brown fox jum"}; + uint32_t crc = nanovdb::crc32::checksum(s1.c_str(), s1.size(), lut.get()); + const std::string s2{"ps over the lazy dog"}; + crc = nanovdb::crc32::checksum(s2.c_str(), s2.size(), lut.get(), crc); + std::stringstream ss; + ss << std::hex << std::setw(8) << std::setfill('0') << crc; + EXPECT_EQ("414fa339", ss.str()); + } + { + EXPECT_EQ(~uint64_t(0), nanovdb::GridChecksum::EMPTY); + nanovdb::GridChecksum cs(~uint64_t(0)); + EXPECT_EQ(nanovdb::ChecksumMode::Disable, cs.mode()); + EXPECT_TRUE(cs.isEmpty()); + EXPECT_FALSE(cs.isFull()); + } + { + nanovdb::GridChecksum cs; + EXPECT_EQ(~uint64_t(0), cs.checksum()); + EXPECT_EQ(nanovdb::ChecksumMode::Disable, cs.mode()); + EXPECT_TRUE(cs.isEmpty()); + EXPECT_FALSE(cs.isFull()); + } } TEST_F(TestNanoVDB, Range1D) @@ -712,6 +812,16 @@ TEST_F(TestNanoVDB, DitherLUT) TEST_F(TestNanoVDB, Traits) { + {// is_same + bool test = nanovdb::is_same::value; + EXPECT_TRUE(test); + test = nanovdb::is_same::value; + EXPECT_FALSE(test); + test = nanovdb::is_same::value; + EXPECT_FALSE(test); + test = nanovdb::is_same::value; + EXPECT_FALSE(test); + } {// float using A = typename nanovdb::BuildToValueMap::Type; bool test = nanovdb::is_same::value; @@ -2812,6 +2922,7 @@ TEST_F(TestNanoVDB, GridBuilderEmpty) EXPECT_EQ(uint32_t(NANOVDB_MAJOR_VERSION_NUMBER), meta->version().getMajor()); EXPECT_EQ(uint32_t(NANOVDB_MINOR_VERSION_NUMBER), meta->version().getMinor()); EXPECT_EQ(uint32_t(NANOVDB_PATCH_VERSION_NUMBER), meta->version().getPatch()); + EXPECT_TRUE(meta->isBreadthFirst()); auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); EXPECT_EQ("test", std::string(dstGrid->gridName())); @@ -2820,6 +2931,12 @@ TEST_F(TestNanoVDB, GridBuilderEmpty) EXPECT_EQ(0.0f, srcAcc.getValue(nanovdb::Coord(1, 2, 3))); EXPECT_FALSE(srcAcc.isActive(nanovdb::Coord(1, 2, 3))); EXPECT_EQ(0.0f, dstAcc.getValue(nanovdb::Coord(1, 2, 3))); + EXPECT_TRUE(dstGrid->isEmpty()); + EXPECT_TRUE(dstGrid->tree().isEmpty()); + EXPECT_TRUE(dstGrid->tree().root().isEmpty()); + EXPECT_EQ(0u, dstGrid->tree().nodeCount(0)); + EXPECT_EQ(0u, dstGrid->tree().nodeCount(1)); + EXPECT_EQ(0u, dstGrid->tree().nodeCount(2)); EXPECT_EQ(dstGrid->tree().root().minimum(), 0.0f); EXPECT_EQ(dstGrid->tree().root().maximum(), 0.0f); @@ -2855,6 +2972,12 @@ TEST_F(TestNanoVDB, BuilderGridEmpty) EXPECT_EQ(0.0f, srcAcc.getValue(nanovdb::Coord(1, 2, 3))); EXPECT_FALSE(srcAcc.isActive(nanovdb::Coord(1, 2, 3))); EXPECT_EQ(0.0f, dstAcc.getValue(nanovdb::Coord(1, 2, 3))); + EXPECT_TRUE(dstGrid->isEmpty()); + EXPECT_TRUE(dstGrid->tree().isEmpty()); + EXPECT_TRUE(dstGrid->tree().root().isEmpty()); + EXPECT_EQ(0u, dstGrid->tree().nodeCount(0)); + EXPECT_EQ(0u, dstGrid->tree().nodeCount(1)); + EXPECT_EQ(0u, dstGrid->tree().nodeCount(2)); EXPECT_EQ(dstGrid->tree().root().minimum(), 0.0f); EXPECT_EQ(dstGrid->tree().root().maximum(), 0.0f); @@ -2908,10 +3031,16 @@ TEST_F(TestNanoVDB, CreateNanoGrid_Basic1) EXPECT_NEAR(dstGrid->tree().root().average(), 1.0f, 1e-6); EXPECT_NEAR(dstGrid->tree().root().variance(), 0.0f,1e-6); EXPECT_NEAR(dstGrid->tree().root().stdDeviation(), 0.0f, 1e-6); + EXPECT_FALSE(dstGrid->isEmpty()); + EXPECT_FALSE(dstGrid->tree().isEmpty()); + EXPECT_FALSE(dstGrid->tree().root().isEmpty()); + EXPECT_EQ(1u, dstGrid->tree().nodeCount(0)); + EXPECT_EQ(1u, dstGrid->tree().nodeCount(1)); + EXPECT_EQ(1u, dstGrid->tree().nodeCount(2)); } } // GridBuilderBasic1 -TEST_F(TestNanoVDB, CreateNanoGrid_Tile) +TEST_F(TestNanoVDB, CreateNanoGrid_addTile) { { // 1 grid point and 1 tile using SrcGridT = nanovdb::build::Grid; @@ -2961,7 +3090,7 @@ TEST_F(TestNanoVDB, CreateNanoGrid_Tile) EXPECT_NEAR(dstGrid->tree().root().variance(), 0.0f,1e-6); EXPECT_NEAR(dstGrid->tree().root().stdDeviation(), 0.00069f, 1e-6); } -} // GridBuilderTile +} // CreateNanoGrid_addTile TEST_F(TestNanoVDB, GridBuilderValueMask) { @@ -3000,6 +3129,12 @@ TEST_F(TestNanoVDB, GridBuilderValueMask) EXPECT_FALSE(srcAcc.isActive(nanovdb::Coord(2, 2, 3))); EXPECT_EQ(ijk, dstGrid->indexBBox()[0]); EXPECT_EQ(ijk, dstGrid->indexBBox()[1]); + EXPECT_FALSE(dstGrid->isEmpty()); + EXPECT_FALSE(dstGrid->tree().isEmpty()); + EXPECT_FALSE(dstGrid->tree().root().isEmpty()); + EXPECT_EQ(1u, dstGrid->tree().nodeCount(0)); + EXPECT_EQ(1u, dstGrid->tree().nodeCount(1)); + EXPECT_EQ(1u, dstGrid->tree().nodeCount(2)); //EXPECT_EQ(dstGrid->tree().root().minimum(), false);// minimum active value //EXPECT_EQ(dstGrid->tree().root().maximum(), true);// maximum active value //EXPECT_NEAR(dstGrid->tree().root().average(), 1.0f, 1e-6); @@ -3049,6 +3184,9 @@ TEST_F(TestNanoVDB, GridBuilderBasic2) auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); EXPECT_EQ("test", std::string(dstGrid->gridName())); + EXPECT_FALSE(dstGrid->isEmpty()); + EXPECT_FALSE(dstGrid->tree().isEmpty()); + EXPECT_FALSE(dstGrid->tree().root().isEmpty()); EXPECT_EQ(2u, dstGrid->activeVoxelCount()); EXPECT_EQ(2u, dstGrid->tree().nodeCount(0)); EXPECT_EQ(2u, dstGrid->tree().nodeCount(1)); @@ -3147,6 +3285,9 @@ TEST_F(TestNanoVDB, GridBuilderPrune) EXPECT_EQ(nanovdb::Coord(0), dstGrid->indexBBox()[0]); EXPECT_EQ(nanovdb::Coord(8*16-1), dstGrid->indexBBox()[1]); + EXPECT_FALSE(dstGrid->isEmpty()); + EXPECT_FALSE(dstGrid->tree().isEmpty()); + EXPECT_FALSE(dstGrid->tree().root().isEmpty()); EXPECT_EQ(0u, dstGrid->tree().nodeCount(0));// all pruned away EXPECT_EQ(0u, dstGrid->tree().nodeCount(1));// all pruned away EXPECT_EQ(1u, dstGrid->tree().nodeCount(2)); @@ -3289,7 +3430,6 @@ TEST_F(TestNanoVDB, GridBuilder_Vec4f) } } // GridBuilder_Vec4f - TEST_F(TestNanoVDB, GridBuilder_Fp4) { using VoxelT = nanovdb::Fp4; @@ -3358,7 +3498,7 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) EXPECT_EQ(nanovdb::Coord(-10,-12,-50), dstGrid->indexBBox()[0]); EXPECT_EQ(nanovdb::Coord( 50, 20, 30), dstGrid->indexBBox()[1]); - auto mgrHandle = nanovdb::createNodeManager(*dstGrid); + auto mgrHandle = nanovdb::createNodeManager(*dstGrid); auto *nodeMgr = mgrHandle.mgr(); EXPECT_TRUE(nanovdb::isValid(nodeMgr)); EXPECT_TRUE(nodeMgr->isLinear()); @@ -3405,10 +3545,24 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) nanovdb::forEach(nanoGrid->indexBBox(), kernel); nanovdb::io::writeGrid("data/sphere_fp4.nvdb", handle); - handle = nanovdb::io::readGrid("data/sphere_fp4.nvdb"); + ASSERT_THROW(nanovdb::io::readGrid("data/sphere_fp4.nvdb", 1), std::runtime_error); + //nanovdb::CpuTimer timer; + //timer.start("read all grids"); + //handle = nanovdb::io::readGrid("data/sphere_fp4.nvdb"); + //timer.start("read first grid"); + handle = nanovdb::io::readGrid("data/sphere_fp4.nvdb", 0); + //timer.stop(); nanoGrid = handle.grid(); EXPECT_TRUE(nanoGrid); + nanovdb::forEach(nanoGrid->indexBBox(), kernel); + //timer.start("read first grid"); + //handle = nanovdb::io::readGrid("data/sphere_fp4.nvdb", 0); + //timer.start("read all grids"); + handle = nanovdb::io::readGrid("data/sphere_fp4.nvdb"); + //timer.stop(); + nanoGrid = handle.grid(); + EXPECT_TRUE(nanoGrid); nanovdb::forEach(nanoGrid->indexBBox(), kernel); } } // GridBuilder_Fp4 @@ -5815,6 +5969,12 @@ TEST_F(TestNanoVDB, StencilIntersection) TEST_F(TestNanoVDB, MultiFile) { + { // check nanovdb::io::stringHash + EXPECT_EQ(nanovdb::io::stringHash("generated_id_0"), nanovdb::io::stringHash("generated_id_0")); + EXPECT_NE(nanovdb::io::stringHash("generated_id_0"), nanovdb::io::stringHash("generated_id_1")); + EXPECT_EQ(0u, nanovdb::io::stringHash("\0")); + EXPECT_EQ(0u, nanovdb::io::stringHash(nullptr)); + } std::vector> handles; { // add an int32_t grid nanovdb::build::Grid grid(-1, "Int32 grid"); @@ -7408,6 +7568,26 @@ TEST_F(TestNanoVDB, writeReadUncompressedGrid) EXPECT_EQ(1.0f, fltGrid2->tree().getValue(ijk)); }// writeReadUncompressedGrid +TEST_F(TestNanoVDB, writeReadUncompressedGridRaw) +{ + using GridHandleT = nanovdb::GridHandle; + const nanovdb::Coord ijk(101,0,0); + std::vector handles1; + handles1.emplace_back(nanovdb::createLevelSetSphere()); + EXPECT_EQ(1u, handles1.size()); + auto *fltGrid1 = handles1[0].grid(); + EXPECT_TRUE(fltGrid1); + EXPECT_EQ(1.0f, fltGrid1->tree().getValue(ijk)); + + nanovdb::io::writeUncompressedGrids("data/test1_raw.nvdb", handles1, true); + + auto handles2 = nanovdb::io::readUncompressedGrids("data/test1_raw.nvdb"); + EXPECT_EQ(1u, handles2.size()); + + auto *fltGrid2 = handles2[0].grid(); + EXPECT_TRUE(fltGrid2); + EXPECT_EQ(1.0f, fltGrid2->tree().getValue(ijk)); +}// writeReadUncompressedGridRaw TEST_F(TestNanoVDB, GridMetaData) { @@ -7746,6 +7926,220 @@ TEST_F(TestNanoVDB, mergeSplitGrids) //timer.stop(); }// mergeSplitGrids +TEST_F(TestNanoVDB, writeReadRadGrid) +{ + const nanovdb::Coord ijk(101,0,0); + auto handle1 = nanovdb::createLevelSetSphere(); + auto *fltGrid = handle1.grid(); + EXPECT_TRUE(fltGrid); + //std::cerr << "Grid size: " << (fltGrid->gridSize() >> 20) << " MB\n"; + EXPECT_EQ(1.0f, fltGrid->tree().getValue(ijk)); + + {// create an IndexGrid with an internal channel and write it to file + auto handle = nanovdb::createNanoGrid(*fltGrid,1u, true, true);// 1 channel, include stats and tile values + handle.write("data/raw_grid.nvdb"); + } + {// read and test IndexGrid + nanovdb::GridHandle<> handle; + ASSERT_THROW(handle.read("data/merge1.nvdb"), std::logic_error); + } + {// read and test IndexGrid + nanovdb::GridHandle<> tmp; + tmp.read("data/raw_grid.nvdb"); + auto *idxGrid = tmp.grid(); + EXPECT_TRUE(idxGrid); + //std::cerr << "Dense IndexGrid size: " << (idxGrid->gridSize() >> 20) << " MB\n"; + EXPECT_GT(idxGrid->gridSize(), fltGrid->gridSize()); + nanovdb::ChannelAccessor acc(*idxGrid, 0u);// channel ID = 0 + EXPECT_TRUE(acc); + EXPECT_EQ(1.0f, acc(ijk)); + + // compute the gradient from channel ID 0 + nanovdb::GradStencil> stencil(acc); + stencil.moveTo(ijk); + EXPECT_EQ(nanovdb::Vec3f(1.0f,0.0f,0.0f), stencil.gradient()); + + EXPECT_EQ(0.0f, acc(100,0,0)); + acc(100,0,0) = 1.0f;// legal since acc was template on "float" and not "const float" + EXPECT_EQ(1.0f, acc(100,0,0)); + EXPECT_EQ(nanovdb::Vec3f(1.0f,0.0f,0.0f), stencil.gradient());// since stencil caches + stencil.moveTo(ijk);// re-populates the stencil cache + EXPECT_EQ(nanovdb::Vec3f(0.5f,0.0f,0.0f), stencil.gradient()); + } +}// writeReadRadGrid + +TEST_F(TestNanoVDB, GridHandleIO) +{ + auto handle = nanovdb::createLevelSetSphere(); + EXPECT_TRUE(handle.grid()); + handle.write("data/sphere_raw.nvdb"); + ASSERT_THROW(handle.read("data/dummy_raw.nvdb"), std::ios_base::failure); + ASSERT_THROW(handle.read("data/dummy_raw.nvdb"), std::exception); + handle.read("data/sphere_raw.nvdb"); + auto *grid = handle.grid(); + EXPECT_TRUE(handle.grid()); + handle.read("data/raw_grid.nvdb"); + EXPECT_FALSE(handle.grid()); + EXPECT_TRUE(handle.grid()); + ASSERT_THROW(handle.read("data/merge1.nvdb"), std::logic_error); + ASSERT_THROW(handle.read("data/merge1.nvdb"), std::exception); +} + +TEST_F(TestNanoVDB, GridCountAndIndex) +{ + {// create multiple grids and write them to file + std::vector> handles; + handles.emplace_back(nanovdb::createLevelSetSphere()); + handles.emplace_back(nanovdb::createLevelSetSphere()); + handles.emplace_back(nanovdb::createLevelSetSphere()); + EXPECT_EQ(3u, handles.size()); + for (auto &h : handles) EXPECT_EQ(1u, h.gridCount()); + nanovdb::io::writeGrids("data/3_spheres.nvdb", handles); + } + {// default readGrid + auto handle = nanovdb::io::readGrid("data/3_spheres.nvdb"); + EXPECT_EQ(1u, handle.gridCount()); + auto *grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_EQ(0u, grid->gridIndex()); + EXPECT_EQ(1u, grid->gridCount()); + EXPECT_TRUE(nanovdb::validateChecksum(*grid)); + EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + } + {// readGrid one by one + for (uint32_t i=0; i<3u; ++i) { + auto handle = nanovdb::io::readGrid("data/3_spheres.nvdb", i); + EXPECT_EQ(1u, handle.gridCount()); + auto *grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_EQ(0u, grid->gridIndex()); + EXPECT_EQ(1u, grid->gridCount()); + EXPECT_TRUE(nanovdb::validateChecksum(*grid)); + EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + } + } + {// read all grids + auto handle = nanovdb::io::readGrid("data/3_spheres.nvdb", -1); + handle.write("data/3_spheres_raw.nvdb"); + EXPECT_EQ(3u, handle.gridCount()); + for (uint32_t i=0; i(i); + EXPECT_TRUE(grid); + EXPECT_EQ(i, grid->gridIndex()); + EXPECT_EQ(3u, grid->gridCount()); + EXPECT_TRUE(nanovdb::validateChecksum(*grid)); + EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + } + } + {// read all raw grids + auto handle = nanovdb::io::readGrid("data/3_spheres_raw.nvdb", -1); + handle.write("data/3_spheres_raw.nvdb"); + EXPECT_EQ(3u, handle.gridCount()); + for (uint32_t i=0; i(i); + EXPECT_TRUE(grid); + EXPECT_EQ(i, grid->gridIndex()); + EXPECT_EQ(3u, grid->gridCount()); + EXPECT_TRUE(nanovdb::validateChecksum(*grid)); + EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + } + } + {// read all raw grids + nanovdb::GridHandle<> handle; + handle.read("data/3_spheres_raw.nvdb"); + EXPECT_EQ(3u, handle.gridCount()); + for (uint32_t i=0; i(i); + EXPECT_TRUE(grid); + EXPECT_EQ(i, grid->gridIndex()); + EXPECT_EQ(3u, grid->gridCount()); + EXPECT_TRUE(nanovdb::validateChecksum(*grid)); + EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + } + } + {// read single raw grid + nanovdb::GridHandle<> handle; + for (uint32_t i=0; i<3u; ++i) { + handle.read("data/3_spheres_raw.nvdb", i); + EXPECT_EQ(1u, handle.gridCount()); + auto *grid = handle.grid(0u); + EXPECT_TRUE(grid); + EXPECT_EQ(0u, grid->gridIndex()); + EXPECT_EQ(1u, grid->gridCount()); + EXPECT_TRUE(nanovdb::validateChecksum(*grid)); + EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + } + ASSERT_THROW(handle.read("data/3_spheres_raw.nvdb", 4), std::runtime_error); + ASSERT_THROW(handle.read("data/3_spheres_raw.nvdb",-1), std::runtime_error); + } + {// read raw grids one by one + for (uint32_t i=0; i<3u; ++i) { + auto handle = nanovdb::io::readGrid("data/3_spheres_raw.nvdb", i); + EXPECT_EQ(1u, handle.gridCount()); + auto *grid = handle.grid(); + EXPECT_TRUE(grid); + EXPECT_EQ(0u, grid->gridIndex()); + EXPECT_EQ(1u, grid->gridCount()); + EXPECT_TRUE(nanovdb::validateChecksum(*grid)); + EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + } + ASSERT_THROW(nanovdb::io::readGrid("data/3_spheres_raw.nvdb", 4), std::runtime_error); + } +}// GridCountAndIndex + +TEST_F(TestNanoVDB, CustomStreamIO) +{ + std::ostringstream outputStream(std::ios_base::out | std::ios_base::binary); + { + std::vector> handles; + handles.emplace_back(nanovdb::createLevelSetSphere()); + EXPECT_EQ(1u, handles.size()); + nanovdb::io::writeGrids(outputStream, handles, nanovdb::io::Codec::NONE); + } + + std::string payload = outputStream.str(); + std::unique_ptr pool(new uint8_t[payload.length()+NANOVDB_DATA_ALIGNMENT]); + uint8_t *buffer = nanovdb::alignPtr(pool.get()); + std::memcpy(buffer, payload.data(), payload.length()); + DataBuffer dataBuffer(buffer, payload.length()); + std::istream dataStream(&dataBuffer); + { + std::vector> handles = nanovdb::io::readGrids(dataStream); + EXPECT_EQ(1u, handles.size()); + auto *grid = handles[0].grid(0u); + EXPECT_TRUE(grid); + EXPECT_EQ(0u, grid->gridIndex()); + EXPECT_EQ(1u, grid->gridCount()); + EXPECT_TRUE(nanovdb::validateChecksum(*grid)); + EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + } +}// CustomStreamIO + +TEST_F(TestNanoVDB, CustomStreamGridHandleIO) +{ + std::ostringstream outputStream(std::ios_base::out | std::ios_base::binary); + { + nanovdb::createLevelSetSphere().write(outputStream); + } + + std::string payload = outputStream.str(); + std::unique_ptr pool(new uint8_t[payload.length()+NANOVDB_DATA_ALIGNMENT]); + uint8_t *buffer = nanovdb::alignPtr(pool.get()); + std::memcpy(buffer, payload.data(), payload.length()); + DataBuffer dataBuffer(buffer, payload.length()); + std::istream dataStream(&dataBuffer); + { + nanovdb::GridHandle handle; + handle.read(dataStream); + auto *grid = handle.grid(0u); + EXPECT_TRUE(grid); + EXPECT_EQ(0u, grid->gridIndex()); + EXPECT_EQ(1u, grid->gridCount()); + EXPECT_TRUE(nanovdb::validateChecksum(*grid)); + EXPECT_TRUE(nanovdb::validateChecksum(*grid, nanovdb::ChecksumMode::Full)); + } +}// CustomStreamGridHandleIO + int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cu b/nanovdb/nanovdb/unittest/TestNanoVDB.cu index 3cce7a2537..9c2ffe7710 100644 --- a/nanovdb/nanovdb/unittest/TestNanoVDB.cu +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cu @@ -7,11 +7,14 @@ #include #include #include +#include #include #include #include #include #include +#include +#include #include #include @@ -77,6 +80,10 @@ void host2device2host(size_t count) for (size_t i=0; i>>(1, [=] __device__ (size_t) { *d_n = cudaStrcmp(d_str, "this is a test #2"); }); @@ -137,8 +144,7 @@ TEST(TestNanoVDBCUDA, Basic_CudaPointsToGrid_float) cudaCheck(cudaMalloc(&d_coords, num_points * sizeof(nanovdb::Coord))); cudaCheck(cudaMemcpy(d_coords, coords, num_points * sizeof(nanovdb::Coord), cudaMemcpyHostToDevice));// CPU -> GPU - nanovdb::CudaPointsToGrid converter; - auto handle = converter.getHandle(d_coords, num_points); + auto handle = nanovdb::cudaVoxelsToGrid(d_coords, num_points); cudaCheck(cudaFree(d_coords)); EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU EXPECT_FALSE(handle.data());// no grid was yet allocated on the CPU @@ -536,9 +542,7 @@ TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_old) //timer.stop(); //timer.start("Building grid on GPU from "+std::to_string(voxels.size())+" points"); - nanovdb::CudaPointsToGrid converter; - //converter.setVerbose(); - auto handle = converter.getHandle(d_coords, voxelCount); + auto handle = nanovdb::cudaVoxelsToGrid(d_coords, voxelCount, 1.0); //timer.stop(); EXPECT_TRUE(handle.deviceData());// grid only exists on the GPU @@ -951,6 +955,8 @@ TEST(TestNanoVDBCUDA, CudaSignedFloodFill) EXPECT_EQ( 3.0f, acc(103,0,0)); EXPECT_EQ( 0.0f, acc(100,0,0)); EXPECT_EQ(-3.0f, acc( 97,0,0)); + EXPECT_FALSE(floatGrid->isLexicographic()); + EXPECT_TRUE(floatGrid->isBreadthFirst()); }// CudaSignedFloodFill TEST(TestNanoVDBCUDA, OneVoxelToGrid) @@ -1056,6 +1062,8 @@ TEST(TestNanoVDBCUDA, ThreePointsToGrid) EXPECT_TRUE(data); grid = handle.grid(); EXPECT_TRUE(grid); + EXPECT_TRUE(grid->isLexicographic()); + EXPECT_FALSE(grid->isBreadthFirst()); EXPECT_EQ(1u, grid->blindDataCount()); const Vec3T *blindData = grid->getBlindData(0); EXPECT_TRUE(blindData); @@ -1168,6 +1176,8 @@ TEST(TestNanoVDBCUDA, EightVoxelsToFloatGrid) EXPECT_TRUE(data); grid = handle.grid(); EXPECT_TRUE(grid); + EXPECT_TRUE(grid->isLexicographic()); + EXPECT_FALSE(grid->isBreadthFirst()); //timer.start("Unit-testing grid on the CPU"); auto acc = grid->getAccessor(); @@ -1252,6 +1262,8 @@ TEST(TestNanoVDBCUDA, Random_CudaPointsToGrid_World64) EXPECT_TRUE(data); grid = handle.grid(); EXPECT_TRUE(grid); + EXPECT_TRUE(grid->isLexicographic()); + EXPECT_FALSE(grid->isBreadthFirst()); EXPECT_EQ(nanovdb::Vec3d(voxelSize), grid->voxelSize()); EXPECT_TRUE(nanovdb::CoordBBox::createCube(min, max-1).isInside(grid->indexBBox())); //std::cerr << grid->indexBBox() << std::endl; @@ -1365,6 +1377,8 @@ TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_World64) EXPECT_TRUE(data); grid = handle.grid(); EXPECT_TRUE(grid); + EXPECT_TRUE(grid->isLexicographic()); + EXPECT_FALSE(grid->isBreadthFirst()); EXPECT_EQ(nanovdb::Vec3d(voxelSize), grid->voxelSize()); EXPECT_EQ(pointCount, grid->pointCount()); EXPECT_TRUE(nanovdb::CoordBBox::createCube(min, max-1).isInside(grid->indexBBox())); @@ -1489,6 +1503,8 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_World32) grid = handle.grid(); EXPECT_TRUE(grid); EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); + EXPECT_TRUE(grid->isLexicographic()); + EXPECT_FALSE(grid->isBreadthFirst()); //std::cerr << grid->indexBBox() << std::endl; EXPECT_STREQ("World32: Vec3 point coordinates in world space", grid->blindMetaData(0).mName); @@ -1614,6 +1630,8 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel32) grid = handle.grid(); EXPECT_TRUE(grid); EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); + EXPECT_TRUE(grid->isLexicographic()); + EXPECT_FALSE(grid->isBreadthFirst()); //std::cerr << grid->indexBBox() << std::endl; EXPECT_STREQ("Voxel32: Vec3 point coordinates in voxel space", grid->blindMetaData(0).mName); @@ -1746,6 +1764,8 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel16) grid = handle.grid(); EXPECT_TRUE(grid); EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); + EXPECT_TRUE(grid->isLexicographic()); + EXPECT_FALSE(grid->isBreadthFirst()); //std::cerr << grid->indexBBox() << std::endl; EXPECT_STREQ("Voxel16: Vec3 point coordinates in voxel space", grid->blindMetaData(0).mName); @@ -1873,6 +1893,8 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel8) grid = handle.grid(); EXPECT_TRUE(grid); EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); + EXPECT_TRUE(grid->isLexicographic()); + EXPECT_FALSE(grid->isBreadthFirst()); //std::cerr << grid->indexBBox() << std::endl; EXPECT_STREQ("Voxel8: Vec3 point coordinates in voxel space", grid->blindMetaData(0).mName); @@ -2000,6 +2022,8 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_PointID) grid = handle.grid(); EXPECT_TRUE(grid); EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); + EXPECT_TRUE(grid->isLexicographic()); + EXPECT_FALSE(grid->isBreadthFirst()); //std::cerr << grid->indexBBox() << std::endl; EXPECT_STREQ("PointID: uint32_t indices to points", grid->blindMetaData(0).mName); @@ -2201,3 +2225,304 @@ TEST(TestNanoVDBCUDA, testGridHandleCopy) EXPECT_EQ(-3.0f, acc( 97,0,0)); } } + +TEST(TestNanoVDBCUDA, compareNodeOrdering) +{ + using namespace nanovdb; + const int voxelCount = 4; + Coord coords[voxelCount]={Coord(0,0,0), Coord(256,0,0), Coord(0,0,8), Coord(0,2,4)}; + GridHandle handle1, handle2; + + { + build::FloatGrid grid(0.0f); + auto acc = grid.getAccessor(); + for (int i=0; i(); + EXPECT_TRUE(grid1); + EXPECT_TRUE(grid1->isBreadthFirst()); + EXPECT_FALSE(grid1->isLexicographic()); + + { + Coord *d_coords = nullptr; + cudaCheck(cudaMalloc(&d_coords, voxelCount * sizeof(Coord))); + cudaCheck(cudaMemcpy(d_coords, coords, voxelCount * sizeof(Coord), cudaMemcpyHostToDevice));// CPU -> GPU +#if 0 + auto cudaHandle = cudaVoxelsToGrid(d_coords, voxelCount); +#else + auto cudaHandle = cudaVoxelsToGrid(nanovdb::make_fancy(d_coords), voxelCount); +#endif + cudaCheck(cudaFree(d_coords)); + cudaHandle.deviceDownload(); + handle2 = cudaHandle.copy(); + } + auto grid2 = handle2.grid(); + EXPECT_TRUE(grid2); + EXPECT_FALSE(grid2->isBreadthFirst()); + EXPECT_TRUE(grid2->isLexicographic()); + + // Check that both grids have the rxpecteds voxel values + for (int i=0; itree().getValue(coords[i])); + EXPECT_EQ(1.0f, grid1->tree().getValue(coords[i])); + } + + // Check that both grid have the same count counts + for (int i=0; i<3; ++i) EXPECT_EQ(grid1->tree().nodeCount(i), grid2->tree().nodeCount(i)); + + {// Check that the order of the leaf nodes are identical + auto *leaf1 = grid1->tree().getFirstLeaf(), *leaf2 = grid2->tree().getFirstLeaf(); + EXPECT_TRUE(leaf1); + EXPECT_TRUE(leaf2); + for (int i=0; itree().nodeCount(0); ++i) { + EXPECT_EQ(leaf1[i].origin(), leaf2[i].origin()); + EXPECT_EQ(leaf1[i].valueMask(), leaf2[i].valueMask()); + } + } + + {// Check that the order of the lower nodes are identical + auto *lower1 = grid1->tree().getFirstLower(), *lower2 = grid2->tree().getFirstLower(); + EXPECT_TRUE(lower1); + EXPECT_TRUE(lower2); + for (int i=0; itree().nodeCount(1); ++i) { + EXPECT_EQ(lower1[i].origin(), lower2[i].origin()); + EXPECT_EQ(lower1[i].valueMask(), lower2[i].valueMask()); + EXPECT_EQ(lower1[i].childMask(), lower2[i].childMask()); + } + } + + {// Check that the order of the upper nodes are identical + auto *upper1 = grid1->tree().getFirstUpper(), *upper2 = grid2->tree().getFirstUpper(); + EXPECT_TRUE(upper1); + EXPECT_TRUE(upper2); + for (int i=0; itree().nodeCount(2); ++i) { + EXPECT_EQ(upper1[i].origin(), upper2[i].origin()); + EXPECT_EQ(upper1[i].valueMask(), upper2[i].valueMask()); + EXPECT_EQ(upper1[i].childMask(), upper2[i].childMask()); + } + } +} + +namespace { +template +void test_ptr(const PtrT ptr) +{ + using T = typename nanovdb::pointer_traits::element_type; + static const bool test = nanovdb::is_same::type>::value; + EXPECT_TRUE(test); + EXPECT_EQ(sizeof(float), nanovdb::pointer_traits::element_size); + EXPECT_EQ(3.14f, *ptr); + EXPECT_EQ(3.14f, ptr[0]); +} +}// anonymous namespace + +TEST(TestNanoVDBCUDA, fancy_ptr) +{ + EXPECT_EQ(sizeof(uint8_t), nanovdb::pointer_traits::element_size); + EXPECT_EQ(sizeof(float), nanovdb::pointer_traits>::element_size); + EXPECT_EQ(sizeof(uint8_t), nanovdb::pointer_traits>::element_size); + + {// test raw pointer + bool test = nanovdb::is_same::element_type, float>::value; + EXPECT_TRUE(test); + test = nanovdb::is_same::element_type, const float>::value; + EXPECT_TRUE(test); + EXPECT_EQ(sizeof(float), nanovdb::pointer_traits::element_size); + EXPECT_EQ(sizeof(float), nanovdb::pointer_traits::element_size); + } + {// test std::shared_ptr + bool test = nanovdb::is_same>::element_type, float>::value; + EXPECT_TRUE(test); + test = nanovdb::is_same>::element_type, const float>::value; + EXPECT_TRUE(test); + EXPECT_EQ(sizeof(float), nanovdb::pointer_traits>::element_size); + EXPECT_EQ(sizeof(float), nanovdb::pointer_traits>::element_size); + } + {// test std::unique_ptr + bool test = nanovdb::is_same>::element_type, float>::value; + EXPECT_TRUE(test); + test = nanovdb::is_same>::element_type, const float>::value; + EXPECT_TRUE(test); + EXPECT_EQ(sizeof(float), nanovdb::pointer_traits>::element_size); + EXPECT_EQ(sizeof(float), nanovdb::pointer_traits>::element_size); + } + {// test fancy_ptr + bool test = nanovdb::is_same>::element_type, const float>::value; + EXPECT_TRUE(test); + EXPECT_EQ(sizeof(float), nanovdb::pointer_traits>::element_size); + test = nanovdb::is_same>::element_type, const float>::value; + EXPECT_TRUE(test); + EXPECT_EQ(sizeof(float), nanovdb::pointer_traits>::element_size); + } + + float *p = new float; + *p = 3.14f; + test_ptr(p); + auto q = nanovdb::make_fancy(p); + test_ptr(q); + delete p; +} + +TEST(TestNanoVDBCUDA, CudaGridChecksum) +{ + uint32_t checksum; + const std::string s{"The quick brown fox jumps over the lazy dog"}; + { // test CPU implementation of crc32 without a lookup table + std::stringstream ss; + ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::crc32::checksum(s.c_str(), s.size()); + EXPECT_EQ("414fa339", ss.str());// 414FA339 from https://rosettagit.org/drafts/crc-32/#c-1 + } + { // test CPU implementation of crc32 with a lookup table + auto lut = nanovdb::crc32::createLut(); + std::stringstream ss; + ss << std::hex << std::setw(8) << std::setfill('0') << nanovdb::crc32::checksum(s.c_str(), s.size(), lut.get()); + EXPECT_EQ("414fa339", ss.str());// 414FA339 from https://rosettagit.org/drafts/crc-32/#c-1 + } + {// test GPU implementation + char* d_str; + uint32_t *d_checksum; + cudaCheck(cudaMalloc((void**)&d_checksum, 4)); + cudaCheck(cudaMalloc((void**)&d_str, s.size())); + cudaCheck(cudaMemcpy(d_str, s.data(), s.size(), cudaMemcpyHostToDevice)); + nanovdb::crc32::checksumKernel<<<1, 1>>>((const uint8_t*)d_str, d_checksum, 1, s.size()); + cudaCheck(cudaMemcpy(&checksum, d_checksum, 4, cudaMemcpyDeviceToHost)); + cudaCheck(cudaFree(d_str)); + cudaCheck(cudaFree(d_checksum)); + std::stringstream ss; + ss << std::hex << std::setw(8) << std::setfill('0') << checksum; + EXPECT_EQ("414fa339", ss.str());// 414FA339 from https://rosettagit.org/drafts/crc-32/#c-1 + } + auto handle = nanovdb::createLevelSetSphere(100); + EXPECT_TRUE(handle.data()); + auto *grid = handle.grid(); + EXPECT_TRUE(grid); + handle.deviceUpload(); + EXPECT_TRUE(handle.deviceData()); +#if 0// entire grid or just GridData+TreeData+RootData + const size_t size = handle.size(); +#else + const uint64_t size = grid->memUsage() + grid->tree().memUsage() + grid->tree().root().memUsage() - 16; +#endif + //std::cerr << "Grid + tree + root data is " << size << " bytes\n"; + nanovdb::CpuTimer cpuTimer; + nanovdb::GpuTimer gpuTimer; + {//benchmark CPU version that uses a table + //cpuTimer.start("CPU Tabled CRC of level set sphere"); + auto lut = nanovdb::crc32::createLut(); + checksum = nanovdb::crc32::checksum(handle.data()+16, size, lut.get()); + //cpuTimer.stop(); + //std::cerr << checksum << std::endl; + } + {//benchmark CPU version that uses no table + //cpuTimer.start("CPU Untabled CRC of level set sphere"); + auto checksum2 = nanovdb::crc32::checksum(handle.data()+16, size); + //cpuTimer.stop(); + //std::cerr << checksum2 << std::endl; + EXPECT_EQ(checksum, checksum2); + } + {//benchmark CPU version that uses table + //cpuTimer.start("CPU tabled crc32::CRC of level set sphere"); + auto lut = nanovdb::crc32::createLut(); + auto checksum2 = nanovdb::crc32::checksum(handle.data()+16, size, lut.get()); + //cpuTimer.stop(); + //std::cerr << checksum2 << std::endl; + EXPECT_EQ(checksum, checksum2); + } + uint32_t checksum2, *d_checksum; + cudaCheck(cudaMalloc((void**)&d_checksum, 4)); + {//benchmark GPU version that uses no table + //gpuTimer.start("GPU Untabled CRC of level set sphere"); + nanovdb::crc32::checksumKernel<<<1, 1>>>(handle.deviceData()+16, d_checksum, 1, size); + //gpuTimer.stop(); + cudaCheck(cudaMemcpy(&checksum2, d_checksum, 4, cudaMemcpyDeviceToHost)); + //std::cerr << checksum2 << std::endl; + EXPECT_EQ(checksum, checksum2); + } + {//benchmark GPU version that uses no table + //gpuTimer.start("GPU tabled CRC of level set sphere"); + uint32_t *d_lut = nanovdb::crc32::cudaCreateLut(); + nanovdb::crc32::checksumKernel<<<1, 1>>>(handle.deviceData()+16, d_checksum, 1, size, d_lut); + //gpuTimer.stop(); + cudaCheck(cudaMemcpy(&checksum2, d_checksum, 4, cudaMemcpyDeviceToHost)); + cudaCheck(cudaFree(d_lut)); + //std::cerr << checksum2 << std::endl; + EXPECT_EQ(checksum, checksum2); + } + { + //cpuTimer.start("CPU GridChecksum of level set sphere"); + nanovdb::GridChecksum cs; + cs(*grid); + checksum2 = cs.checksum(0);// only check the checksum of grid, tree and root data + //cpuTimer.stop(); + //std::cerr << checksum2 << std::endl; + EXPECT_EQ(checksum, checksum2); + } + uint64_t fullChecksum; + { + //cpuTimer.start("CPU FULL cudaGridChecksum tabled CRC of level set sphere"); + nanovdb::updateChecksum(*handle.grid(), nanovdb::ChecksumMode::Full); + //cpuTimer.stop(); + fullChecksum = handle.grid()->checksum(); + EXPECT_EQ(checksum, fullChecksum & 0xFFFFFFFF); + } + { + //gpuTimer.start("GPU FULL cudaGridChecksum tabled CRC of level set sphere"); + nanovdb::cudaGridChecksum(handle.deviceGrid(), nanovdb::ChecksumMode::Full); + //gpuTimer.stop(); + uint64_t fullChecksum2; + cudaCheck(cudaMemcpy(&fullChecksum2, (const uint8_t*)handle.deviceGrid() + 8, 8, cudaMemcpyDeviceToHost)); + EXPECT_EQ(checksum, fullChecksum2 & 0xFFFFFFFF); + EXPECT_EQ(fullChecksum, fullChecksum2); + } + cudaCheck(cudaFree(d_checksum)); +}// CudaGridChecksum + +template +size_t countActiveVoxels(const nanovdb::NodeManager *d_mgr) +{ + size_t count[2], *d_count; + cudaCheck(cudaMalloc((void**)&d_count, 2*sizeof(size_t))); + cudaLambdaKernel<<<1,1>>>(1, [=] __device__ (size_t){ + d_count[0] = 0; + for (int i=0; ileafCount(); ++i) d_count[0] += d_mgr->leaf(i).valueMask().countOn(); + for (int i=0; ilowerCount(); ++i) d_count[0] += d_mgr->lower(i).valueMask().countOn(); + for (int i=0; iupperCount(); ++i) d_count[0] += d_mgr->upper(i).valueMask().countOn(); + d_count[1] = d_mgr->tree().activeVoxelCount(); + //printf("active count = %lu %lu\n", d_count[0], d_count[1]); + }); + cudaCheck(cudaMemcpy(count, d_count, 2*sizeof(size_t), cudaMemcpyDeviceToHost)); + cudaCheck(cudaFree(d_count)); + EXPECT_EQ(count[0], count[1]); + return count[0]; +} +TEST(TestNanoVDBCUDA, NodeManager) +{ + auto handle = nanovdb::createLevelSetSphere(100); + EXPECT_TRUE(handle.data()); + auto *grid = handle.grid(); + EXPECT_TRUE(grid); + handle.deviceUpload(); + auto *d_grid = handle.deviceGrid(); + EXPECT_TRUE(d_grid); + size_t count = 0; + nanovdb::CpuTimer cpuTimer; + { + //cpuTimer.start("CPU NodeManager"); + auto handle2 = nanovdb::createNodeManager<>(*grid); + //cpuTimer.stop(); + auto *mgr = handle2.mgr(); + EXPECT_TRUE(mgr); + count = mgr->grid().tree().activeVoxelCount(); + } + + nanovdb::GpuTimer gpuTimer; + { + //gpuTimer.start("GPU NodeManager"); + auto handle2 = nanovdb::cudaCreateNodeManager(d_grid); + //gpuTimer.stop(); + auto *d_mgr = handle2.deviceMgr(); + EXPECT_TRUE(d_mgr); + EXPECT_EQ(count, countActiveVoxels(d_mgr)); + } +}// NodeManager diff --git a/nanovdb/nanovdb/unittest/TestOpenVDB.cc b/nanovdb/nanovdb/unittest/TestOpenVDB.cc index a199a64fe6..e14792cb81 100644 --- a/nanovdb/nanovdb/unittest/TestOpenVDB.cc +++ b/nanovdb/nanovdb/unittest/TestOpenVDB.cc @@ -1629,10 +1629,6 @@ TEST_F(TestOpenVDB, NanoToOpenVDB) TEST_F(TestOpenVDB, File) { - { // check nanovdb::io::stringHash - EXPECT_EQ(nanovdb::io::stringHash("generated_id_0"), nanovdb::io::stringHash("generated_id_0")); - EXPECT_NE(nanovdb::io::stringHash("generated_id_0"), nanovdb::io::stringHash("generated_id_1")); - } auto srcGrid = this->getSrcGrid(); //mTimer.start("Reading NanoVDB grids from file"); diff --git a/nanovdb/nanovdb/util/CreateNanoGrid.h b/nanovdb/nanovdb/util/CreateNanoGrid.h index 2500d80722..2d578222d0 100644 --- a/nanovdb/nanovdb/util/CreateNanoGrid.h +++ b/nanovdb/nanovdb/util/CreateNanoGrid.h @@ -94,7 +94,6 @@ #include "DitherLUT.h"// for nanovdb::DitherLUT #include -#include // for stringstream #include #include #include // for memcpy @@ -1719,14 +1718,17 @@ void CreateNanoGrid::processGrid() auto* dstData = this->template dstGrid()->data(); dstData->init({GridFlags::IsBreadthFirst}, mOffset.size, mSrcNodeAcc.map(), mapToGridType(), mapToGridClass(mSrcNodeAcc.gridClass())); - dstData->mBlindMetadataCount = static_cast(mBlindMetaData.size()); - dstData->mData1 = this->valueCount(); + dstData->mBlindMetadataCount = static_cast(mBlindMetaData.size()); + dstData->mData1 = this->valueCount(); if (!isValid(dstData->mGridType, dstData->mGridClass)) { - std::stringstream ss; - ss << "Invalid combination of GridType("<mGridType) - << ") and GridClass("<mGridClass)<<"). See NanoVDB.h for details!"; - throw std::runtime_error(ss.str()); +#if 1 + fprintf(stderr,"Warning: Strange combination of GridType(\"%s\") and GridClass(\"%s\"). Consider changing GridClass to \"Unknown\"\n", + toStr(dstData->mGridType), toStr(dstData->mGridClass)); +#else + throw std::runtime_error("Invalid combination of GridType("+std::to_string(int(dstData->mGridType))+ + ") and GridClass("+std::to_string(int(dstData->mGridClass))+"). See NanoVDB.h for details!"); +#endif } std::memset(dstData->mGridName, '\0', GridData::MaxNameSize);//overwrite mGridName @@ -1806,7 +1808,7 @@ CreateNanoGrid::postProcess() (void)metaData; } #endif - updateChecksum(*(this->template dstGrid()), mChecksum); + updateChecksum(*dstGrid, mChecksum); }// CreateNanoGrid::postProcess //================================================================================================ @@ -1818,7 +1820,7 @@ CreateNanoGrid::postProcess(uint32_t channels) { const std::string typeName = toStr(mapToGridType()); const uint64_t valueCount = this->valueCount(); - const auto *dstGrid = this->template dstGrid(); + auto *dstGrid = this->template dstGrid(); for (uint32_t i=0; ifindBlindData(name.c_str()); @@ -1837,7 +1839,7 @@ CreateNanoGrid::postProcess(uint32_t channels) } }// loop over channels gridStats(*(this->template dstGrid()), std::min(StatsMode::BBox, mStats)); - updateChecksum(*(this->template dstGrid()), mChecksum); + updateChecksum(*dstGrid, mChecksum); }// CreateNanoGrid::postProcess //================================================================================================ diff --git a/nanovdb/nanovdb/util/GridChecksum.h b/nanovdb/nanovdb/util/GridChecksum.h index 12b79a1131..35697afd1c 100644 --- a/nanovdb/nanovdb/util/GridChecksum.h +++ b/nanovdb/nanovdb/util/GridChecksum.h @@ -6,7 +6,7 @@ \author Ken Museth - \brief Computes a pair of 32bit checksums, og a Grid, by means of Cyclic Redundancy Check (CRC) + \brief Computes a pair of 32bit checksums, of a Grid, by means of Cyclic Redundancy Check (CRC) \details A CRC32 is the 32 bit remainder, or residue, of binary division of a message, by a polynomial. */ @@ -21,12 +21,17 @@ #include // offsetof macro #include #include +#include // for std::unique_ptr #include -#include "GridHandle.h" #include "ForEach.h" #include "NodeManager.h" +// Define log of block size for FULL CRC32 computation. +// A value of 12 corresponds to a block size of 4KB (2^12 = 4096). +// Undefine to use old checksum computation +#define NANOVDB_CRC32_LOG2_BLOCK_SIZE 12 + namespace nanovdb { /// @brief List of different modes for computing for a checksum @@ -36,234 +41,295 @@ enum class ChecksumMode : uint32_t { Disable = 0,// no computation Default = 1,// defaults to Partial End = 3 };// marks the end of the enum list -/// @brief Return the (2 x CRC32) checksum of the specified @a grid -/// +/// @brief Return the (2 x CRC32) checksum of the specified @a grid +/// @tparam BuildT Template parameter used to build NanoVDB grid. /// @param grid Grid from which the checksum is computed. /// @param mode Defines the mode of computation for the checksum. -template -uint64_t checksum(const NanoGrid &grid, ChecksumMode mode = ChecksumMode::Default); +/// @return Return the (2 x CRC32) checksum of the specified @a grid +template +uint64_t checksum(const NanoGrid &grid, ChecksumMode mode = ChecksumMode::Default); /// @brief Return true if the checksum of the @a grid matches the expected /// value already encoded into the grid's meta data. -/// +/// @tparam BuildT Template parameter used to build NanoVDB grid. /// @param grid Grid whose checksum is validated. /// @param mode Defines the mode of computation for the checksum. -template -bool validateChecksum(const NanoGrid &grid, ChecksumMode mode = ChecksumMode::Default); +template +bool validateChecksum(const NanoGrid &grid, ChecksumMode mode = ChecksumMode::Default); /// @brief Updates the checksum of a grid /// /// @param grid Grid whose checksum will be updated. /// @param mode Defines the mode of computation for the checksum. -template -void updateChecksum(NanoGrid &grid, ChecksumMode mode = ChecksumMode::Default); +template +void updateChecksum(NanoGrid &grid, ChecksumMode mode = ChecksumMode::Default); -/// @brief Updates the checksum of the grids encapsulated by a handle -// -/// @param handle Handle with grids whose checksum will be updated. -/// @param mode Defines the mode of computation for the checksum. -template -void updateChecksum(GridHandle &handle, ChecksumMode mode = ChecksumMode::Default); - -/// @brief Return the CRC32 checksum of the raw @a data of @a size -/// @param data The beginning of the raw data. -/// @param size Size of the data to bytes! -inline std::uint_fast32_t crc32(const void *data, size_t size); - -/// @brief Return the CRC32 checksum of the content pointed to be the iterator -/// @param begin Beginning of the iterator range -/// @param end End of the iterator range (exclusive) -/// @warning The dereference of the iterator must be convertible to a uint8_t -template -std::uint_fast32_t crc32(IterT begin, IterT end); - -/// @brief Class that computes the Cyclic Redundancy Check (CRC) -class CRC32 -{ - using ByteT = std::uint_fast8_t; - using HashT = std::uint_fast32_t; - HashT mChecksum; +namespace crc32 { - static std::array INIT() - { - HashT n = 0; - auto kernel = [&n]()->HashT{ - HashT checksum = n++; - for (int i = 0; i < 8; ++i) checksum = (checksum >> 1) ^ ((checksum & 0x1u) ? HashT{0xEDB88320uL} : 0); - return checksum; - }; - std::array LUT{}; - std::generate(LUT.begin(), LUT.end(), kernel); - return LUT;// move semantic should prevent a deep copy - } - -public: - - static const HashT EMPTY = ~HashT{0} & HashT{0xFFFFFFFFuL};// All bits are on - - CRC32() : mChecksum(EMPTY) {} - - void reset() { mChecksum = EMPTY; } +/// @brief Initiate single entry in look-up-table for CRC32 computations +/// @param lut pointer of size 256 for look-up-table +/// @param n entry in table (assumed n < 256) +inline __hostdev__ void initLut(uint32_t lut[256], uint32_t n) +{ + uint32_t &cs = lut[n] = n; + for (int i = 0; i < 8; ++i) cs = (cs >> 1) ^ ((cs & 1) ? 0xEDB88320 : 0); +} - HashT checksum() const { return HashT{0xFFFFFFFFuL} & ~mChecksum; } +/// @brief Initiate entire look-up-table for CRC32 computations +/// @param lut pointer of size 256 for look-up-table +inline __hostdev__ void initLut(uint32_t lut[256]){for (uint32_t n = 0u; n < 256u; ++n) initLut(lut, n);} - template - void operator()(IterT begin, IterT end) - { - static const auto LUT = INIT();// scoped static initialization is thread-safe since C++11 - auto kernel = [](HashT checksum, ByteT value){return LUT[(checksum ^ value) & 0xFFu] ^ (checksum >> 8);}; - mChecksum = std::accumulate(begin, end, mChecksum, kernel); - } +/// @brief Create and initiate entire look-up-table for CRC32 computations +/// @return returns a unique pointer to the lookup table of size 256. +inline std::unique_ptr createLut() +{ + std::unique_ptr lut(new uint32_t[256]); + initLut(lut.get()); + return lut; +} - void operator()(const void *data, size_t byteSize) - { - const ByteT *begin = static_cast(data); - this->operator()(begin, begin + byteSize); +/// @brief Compute crc32 checksum of @c data of @c size bytes (without a lookup table)) +/// @param data pointer to beginning of data +/// @param size byte size of data +/// @param crc initial value of crc32 checksum +/// @return return crc32 checksum of @c data +inline __hostdev__ uint32_t checksum(const void* data, size_t size, uint32_t crc = 0) +{ + crc = ~crc; + for (auto *p = (const uint8_t*)data, *q = p + size; p != q; ++p) { + crc ^= *p; + for (int j = 0; j < 8; ++j) crc = (crc >> 1) ^ (0xEDB88320 & (-(crc & 1))); } + return ~crc; +} - template - void operator()(const T &data) {(*this)(&data, sizeof(T));} -};// CRC32 +/// @brief Compute crc32 checksum of data between @c begin and @c end +/// @param begin points to beginning of @data +/// @param end points to end of @data, (exclusive) +/// @param crc initial value of crc32 checksum +/// @return return crc32 checksum +inline __hostdev__ uint32_t checksum(const void *begin, const void *end, uint32_t crc = 0) +{ + NANOVDB_ASSERT(begin && end); + NANOVDB_ASSERT(end >= begin); + return checksum(begin, (const char*)end - (const char*)begin, crc); +} -inline std::uint_fast32_t crc32(const void *data, size_t byteSize) +/// @brief +/// @param data +/// @param size +/// @param lut +/// @param crc +/// @return +inline __hostdev__ uint32_t checksum(const void *data, size_t size, const uint32_t lut[256], uint32_t crc = 0) { - CRC32 crc; - crc(data, byteSize); - return crc.checksum(); + crc = ~crc; + for (auto *p = (const uint8_t*)data, *q = p + size; p != q; ++p) crc = lut[(crc ^ *p) & 0xFF] ^ (crc >> 8); + return ~crc; } -template -inline std::uint_fast32_t crc32(IterT begin, IterT end) +/// @brief +/// @param begin +/// @param end +/// @param lut +/// @param crc +/// @return +inline __hostdev__ uint32_t checksum(const void *begin, const void *end, const uint32_t lut[256], uint32_t crc = 0) { - CRC32 crc; - crc(begin, end); - return crc.checksum(); + NANOVDB_ASSERT(begin && end); + NANOVDB_ASSERT(end >= begin); + return checksum(begin, (const char*)end - (const char*)begin, lut, crc); } +}// namespace crc32 + /// @brief Class that encapsulates two CRC32 checksums, one for the Grid, Tree and Root node meta data /// and one for the remaining grid nodes. class GridChecksum { - union {uint32_t mCRC[2]; uint64_t mChecksum; }; + /// Three types of checksums: + /// 1) Empty: all 64 bits are on (used to signify no checksum) + /// 2) Partial: Upper 32 bits are on and not all of lower 32 bits are on (lower 32 bits checksum head of grid) + /// 3) Full: Not all of the 64 bits are one (lower 32 bits checksum head of grid and upper 32 bits checksum tail of grid) + union {uint32_t mCRC[2]; uint64_t mChecksum; };// mCRC[0] is checksum of Grid, Tree and Root, and mCRC[1] is checksum of nodes + static constexpr uint32_t EMPTY32 = ~uint32_t{0}; public: - static const uint64_t EMPTY = (static_cast(CRC32::EMPTY) << 32) | static_cast(CRC32::EMPTY); + static constexpr uint64_t EMPTY = ~uint64_t(0); - GridChecksum() : mCRC{CRC32::EMPTY, CRC32::EMPTY} {} + /// @brief default constructor initiates checksum to EMPTY + GridChecksum() : mCRC{EMPTY32, EMPTY32} {} + /// @brief Constructor that allows the two 32bit checksums to be initiated explicitly + /// @param head Initial 32bit CRC checksum of grid, tree and root data + /// @param tail Initial 32bit CRC checksum of all the nodes and blind data GridChecksum(uint32_t head, uint32_t tail) : mCRC{head, tail} {} + /// @brief + /// @param checksum + /// @param mode GridChecksum(uint64_t checksum, ChecksumMode mode = ChecksumMode::Full) : mChecksum{mode == ChecksumMode::Disable ? EMPTY : checksum} { - if (mode == ChecksumMode::Partial) mCRC[1] = CRC32::EMPTY; + if (mode == ChecksumMode::Partial) mCRC[1] = EMPTY32; } + /// @brief + /// @return uint64_t checksum() const { return mChecksum; } - uint32_t crc32(int i) const {assert(i==0 || i==1); return mCRC[i]; } + /// @brief + /// @param i + /// @return + uint32_t& checksum(int i) {NANOVDB_ASSERT(i==0 || i==1); return mCRC[i]; } + + /// @brief + /// @param i + /// @return + uint32_t checksum(int i) const {NANOVDB_ASSERT(i==0 || i==1); return mCRC[i]; } - bool isFull() const { return mCRC[0] != CRC32::EMPTY && mCRC[1] != CRC32::EMPTY; } + /// @brief + /// @return + bool isPartial() const { return mCRC[0] != EMPTY32 && mCRC[1] == EMPTY32; } + /// @brief + /// @return + bool isFull() const { return mCRC[0] != EMPTY32 && mCRC[1] != EMPTY32; } + + /// @brief + /// @return bool isEmpty() const { return mChecksum == EMPTY; } + /// @brief + /// @return ChecksumMode mode() const { return mChecksum == EMPTY ? ChecksumMode::Disable : - mCRC[1] == CRC32::EMPTY ? ChecksumMode::Partial : ChecksumMode::Full; + mCRC[1] == EMPTY32 ? ChecksumMode::Partial : ChecksumMode::Full; } - +#ifdef NANOVDB_CRC32_LOG2_BLOCK_SIZE + /// @brief + /// @param gridData + /// @param mode + ChecksumMode operator()(const GridData &gridData, ChecksumMode mode = ChecksumMode::Full); +#else + /// @brief + /// @tparam ValueT + /// @param grid + /// @param mode template void operator()(const NanoGrid &grid, ChecksumMode mode = ChecksumMode::Full); - +#endif + /// @brief + /// @param rhs + /// @return bool operator==(const GridChecksum &rhs) const {return mChecksum == rhs.mChecksum;} + + /// @brief + /// @param rhs + /// @return bool operator!=(const GridChecksum &rhs) const {return mChecksum != rhs.mChecksum;} };// GridChecksum // [GridData][TreeData]---[RootData][ROOT TILES...]---[NodeData<5>]---[NodeData<4>]---[LeafData<3>]---[BLINDMETA...]---[BLIND0]---[BLIND1]---etc. + +#ifdef NANOVDB_CRC32_LOG2_BLOCK_SIZE + +inline ChecksumMode GridChecksum::operator()(const GridData &gridData, ChecksumMode mode) +{ + mChecksum = EMPTY; + + if (mode == ChecksumMode::Disable) return ChecksumMode::Disable; + + auto lut = crc32::createLut(); + const uint8_t *begin = (const uint8_t*)(&gridData), *mid = gridData.template nodePtr<2>(), *end = begin + gridData.mGridSize;// what about empty grids? + if (mid == nullptr) {// no (upper) nodes + if (gridData.mBlindMetadataCount) { + mid = begin + gridData.mBlindMetadataOffset;// exclude blind data from Partial checksum + } else { + mid = end;// no nodes or blind data, so Partial checksum is computed on the entire grid buffer + } + } + mCRC[0] = crc32::checksum(begin + 16, mid, lut.get());// GridData, TreeData. RootData but exclude GridData::mMagic and GridData::mChecksum + + if (mode != ChecksumMode::Full || mid == end) return ChecksumMode::Partial; + + uint64_t size = end - mid;// includes blind data + const uint64_t blockCount = size >> NANOVDB_CRC32_LOG2_BLOCK_SIZE;// number of 4 KB (4096 byte) blocks + std::unique_ptr checksums(new uint32_t[blockCount]); + forEach(0, blockCount, 64, [&](const Range1D &r) { + uint32_t blockSize = 1 << NANOVDB_CRC32_LOG2_BLOCK_SIZE; + uint32_t *p = checksums.get() + r.begin(); + for (auto i = r.begin(); i != r.end(); ++i) { + if (i+1 == blockCount) blockSize += size - (blockCount< void GridChecksum::operator()(const NanoGrid &grid, ChecksumMode mode) { // Validate the assumed memory layout -#if 0 - NANOVDB_ASSERT(NANOVDB_OFFSETOF(GridData, mMagic) == 0); - NANOVDB_ASSERT(NANOVDB_OFFSETOF(GridData, mChecksum) == 8); - NANOVDB_ASSERT(NANOVDB_OFFSETOF(GridData, mVersion) == 16); -#else// the static asserts below generate compiler warnings static_assert(offsetof(GridData, mMagic) == 0, "Unexpected offset to magic number"); static_assert(offsetof(GridData, mChecksum) == 8, "Unexpected offset to checksum"); static_assert(offsetof(GridData, mVersion) == 16, "Unexpected offset to version number"); -#endif - static const size_t offset = 16; mChecksum = EMPTY; if (mode == ChecksumMode::Disable) return; - const auto &tree = grid.tree(); - const auto &root = tree.root(); - CRC32 crc; + auto lut = crc32::createLut(); + const uint8_t *begin = reinterpret_cast(&grid), *mid = grid.template nodePtr<2>(); - // process Grid + Tree + Root but exclude mMagic and mChecksum - const uint8_t *begin = reinterpret_cast(&grid); - const uint8_t *end = begin + grid.memUsage() + tree.memUsage() + root.memUsage(); - crc(begin + offset, end); + mCRC[0] = crc32::checksum(begin + 16, mid, lut.get());// process Grid + Tree + Root but exclude mMagic and mChecksum - mCRC[0] = crc.checksum(); - - if (mode == ChecksumMode::Partial || tree.isEmpty()) return; + if (mode != ChecksumMode::Full || grid.isEmpty()) return; + const auto &tree = grid.tree(); + const auto &root = tree.root(); auto nodeMgrHandle = createNodeManager(grid); auto *nodeMgr = nodeMgrHandle.template mgr(); assert(isValid(nodeMgr)); const auto nodeCount = tree.nodeCount(0) + tree.nodeCount(1) + tree.nodeCount(2); - std::vector checksums(nodeCount, 0); - + std::vector checksums(nodeCount, 0); // process upper internal nodes auto kernel2 = [&](const Range1D &r) { - CRC32 local; - std::uint_fast32_t *p = checksums.data() + r.begin(); + uint32_t *p = checksums.data() + r.begin(); for (auto i = r.begin(); i != r.end(); ++i) { const auto &node = nodeMgr->upper(static_cast(i)); - local(node); - *p++ = local.checksum(); - local.reset(); + *p++ = crc32::checksum(&node, node.memUsage(), lut.get()); } }; - // process lower internal nodes auto kernel1 = [&](const Range1D &r) { - CRC32 local; - std::uint_fast32_t *p = checksums.data() + r.begin() + tree.nodeCount(2); + uint32_t *p = checksums.data() + r.begin() + tree.nodeCount(2); for (auto i = r.begin(); i != r.end(); ++i) { const auto &node = nodeMgr->lower(static_cast(i)); - local(node); - *p++ = local.checksum(); - local.reset(); + *p++ = crc32::checksum(&node, node.memUsage(), lut.get()); } }; - // process leaf nodes auto kernel0 = [&](const Range1D &r) { - CRC32 local; - std::uint_fast32_t *p = checksums.data() + r.begin() + tree.nodeCount(1) + tree.nodeCount(2); + uint32_t *p = checksums.data() + r.begin() + tree.nodeCount(1) + tree.nodeCount(2); for (auto i = r.begin(); i != r.end(); ++i) { const auto &leaf = nodeMgr->leaf(static_cast(i)); - local(leaf); - *p++ = local.checksum(); - local.reset(); + *p++ = crc32::checksum(&leaf, leaf.memUsage(), lut.get()); } }; - forEach(0, tree.nodeCount(2), 1, kernel2); forEach(0, tree.nodeCount(1), 1, kernel1); forEach(0, tree.nodeCount(0), 8, kernel0); - - crc.reset(); - crc(checksums.data(), sizeof(std::uint_fast32_t)*checksums.size() ); - mCRC[1] = crc.checksum(); + mCRC[1] = crc32::checksum(checksums.data(), sizeof(uint32_t)*checksums.size(), lut.get()); }// GridChecksum::operator() +#endif// NANOVDB_CRC32_LOG2_BLOCK_SIZE + template uint64_t checksum(const NanoGrid &grid, ChecksumMode mode) { @@ -288,85 +354,114 @@ void updateChecksum(NanoGrid &grid, ChecksumMode mode) grid.data()->mChecksum = cs.checksum(); } -template -void updateChecksum(GridHandle &handle, ChecksumMode mode) +inline bool updateChecksum(GridData &gridData, ChecksumMode mode) { - for (uint32_t i = 0; i < handle.gridCount(); ++i) - { - const GridType& gridType = handle.gridType(i); - switch (gridType) - { - case GridType::Float: - updateChecksum(*handle.template grid(i)); - break; - case GridType::Double: - updateChecksum(*handle.template grid(i)); - break; - case GridType::Int16: - updateChecksum(*handle.template grid(i)); - break; - case GridType::Int32: - updateChecksum(*handle.template grid(i)); - break; - case GridType::Int64: - updateChecksum(*handle.template grid(i)); - break; - case GridType::Vec3f: - updateChecksum(*handle.template grid(i)); - break; - case GridType::Vec3d: - updateChecksum(*handle.template grid(i)); - break; - case GridType::UInt32: - updateChecksum(*handle.template grid(i)); - break; - case GridType::Mask: - updateChecksum(*handle.template grid(i)); - break; - case GridType::Index: - updateChecksum(*handle.template grid(i)); - break; - case GridType::OnIndex: - updateChecksum(*handle.template grid(i)); - break; - case GridType::IndexMask: - updateChecksum(*handle.template grid(i)); - break; - case GridType::OnIndexMask: - updateChecksum(*handle.template grid(i)); - break; - case GridType::Boolean: - updateChecksum(*handle.template grid(i)); - break; - case GridType::RGBA8: - updateChecksum(*handle.template grid(i)); - break; - case GridType::Fp4: - updateChecksum(*handle.template grid(i)); - break; - case GridType::Fp8: - updateChecksum(*handle.template grid(i)); - break; - case GridType::Fp16: - updateChecksum(*handle.template grid(i)); - break; - case GridType::FpN: - updateChecksum(*handle.template grid(i)); - break; - case GridType::Vec4f: - updateChecksum(*handle.template grid(i)); - break; - case GridType::Vec4d: - updateChecksum(*handle.template grid(i)); - break; - default: - { - std::stringstream ss; - ss << "Cannot update checksum for grid of unknown type \"" << toStr(handle.gridType(i)); - throw std::runtime_error(ss.str() + "\""); - } +#ifdef NANOVDB_CRC32_LOG2_BLOCK_SIZE + GridChecksum cs; + cs(gridData, mode); + gridData.mChecksum = cs.checksum(); +#else + if (mode == ChecksumMode::Disable) return false; + switch (data->mGridType){ + case GridType::Float: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::Double: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::Int16: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::Int32: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::Int64: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::Vec3f: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::Vec3d: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::UInt32: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::Mask: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::Index: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::OnIndex: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::IndexMask: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::OnIndexMask: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::Boolean: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::RGBA8: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::Fp4: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::Fp8: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::Fp16: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::FpN: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::Vec4f: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + case GridType::Vec4d: + updateChecksum(*reinterpret_cast*>(data), mode); + break; + default: { + std::stringstream ss; + ss << "Cannot update checksum for grid of unknown type \"" << toStr(data->mGridType); + throw std::runtime_error(ss.str() + "\""); } - } + }// switch +#endif + return true; +}// updateChecksum(GridData *data, ChecksumMode mode) + +/// @brief Preserve the existing mode of the checksum and update it if it's not disabled +/// @param data +/// @return +inline bool updateChecksum(GridData *data) +{ + GridChecksum cs(data->mChecksum); + const auto mode = cs.mode(); + return updateChecksum(*data, mode); +}// updateChecksum(GridData *data) + +/// @brief Updates the ground index and count, as well as the partial checksum if needed +/// @param data Pointer to grid data +/// @param gridIndex New value of the index +/// @param gridCount New value of the grid count +/// @return returns true if the checksum was updated +inline bool updateGridCount(GridData *data, uint32_t gridIndex, uint32_t gridCount) +{ + NANOVDB_ASSERT(gridIndex < gridCount); + if (data->mGridIndex == gridIndex && data->mGridCount == gridCount) return false;// nothing to update + data->mGridIndex = gridIndex; + data->mGridCount = gridCount; + GridChecksum cs(data->mChecksum); + if (cs.isEmpty()) return false;// no checksum to update + updateChecksum(*data, ChecksumMode::Partial);// only update the checksum of the grid since we only modified the GridData + reinterpret_cast(&(data->mChecksum))->checksum(1) = cs.checksum(1);// copy the old checksum of the tree nodes since it was set to EMPTY during the update + return true; } } // namespace nanovdb diff --git a/nanovdb/nanovdb/util/GridHandle.h b/nanovdb/nanovdb/util/GridHandle.h index 20dde83535..4ccd019787 100644 --- a/nanovdb/nanovdb/util/GridHandle.h +++ b/nanovdb/nanovdb/util/GridHandle.h @@ -15,10 +15,14 @@ #ifndef NANOVDB_GRID_HANDLE_H_HAS_BEEN_INCLUDED #define NANOVDB_GRID_HANDLE_H_HAS_BEEN_INCLUDED +#include // for std::ifstream +#include // for std::cerr/cout #include #include + #include // for mapToGridType #include +#include // for updateGridCount namespace nanovdb { @@ -114,7 +118,7 @@ class GridHandle //@{ /// @brief Return true if this handle is empty, i.e. has no allocated memory - bool empty() const { return this->size() == 0; } + bool empty() const { return this->size() == 0; } bool isEmpty() const { return this->size() == 0; } //@} @@ -173,7 +177,7 @@ class GridHandle bool isPadded() const {return mMetaData.empty() ? false : mMetaData.back().offset + mMetaData.back().size != mBuffer.size();} /// @brief Return the total number of grids contained in this buffer - uint32_t gridCount() const {return mMetaData.size();} + uint32_t gridCount() const {return static_cast(mMetaData.size());} /// @brief Return the grid size of the @a n'th grid in this GridHandle /// @param n index of the grid (assumed to be less than gridCount()) @@ -194,6 +198,93 @@ class GridHandle /// @param n zero-based ID of the grid /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized const GridMetaData* gridMetaData(uint32_t n = 0) const; + + /// @brief Write a specific grid in this buffer to an output stream + /// @param os output stream that the buffer will be written to + /// @param n zero-based index of the grid to be written to stream + void write(std::ostream& os, uint32_t n) const { + if (const GridData* data = this->gridData(n)) { + os.write((const char*)data, data->mGridSize); + } else { + throw std::runtime_error("GridHandle does not contain a #" + std::to_string(n) + " grid"); + } + } + + /// @brief Write the entire grid buffer to an output stream + /// @param os output stream that the buffer will be written to + void write(std::ostream& os) const { + for (uint32_t n=0; ngridCount(); ++n) this->write(os, n); + } + + /// @brief Write this entire grid buffer to a file + /// @param fileName string name of the output file + void write(const std::string &fileName) const { + std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); + if (!os.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for output"); + this->write(os); + } + + /// @brief Write a specific grid to file + /// @param fileName string name of the output file + /// @param n zero-based index of the grid to be written to file + void write(const std::string &fileName, uint32_t n) const { + std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); + if (!os.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for output"); + this->write(os, n); + } + + /// @brief Read an entire raw grid buffer from an input stream + /// @param is input stream containing a raw grid buffer + /// @param pool optional pool from which to allocate the new grid buffer + /// @throw Will throw a std::logic_error if the stream does not contain a valid raw grid + void read(std::istream& is, const BufferT& pool = BufferT()); + + /// @brief Read a specific grid from an input stream containing a raw grid buffer + /// @param is input stream containing a raw grid buffer + /// @param n zero-based index of the grid to be read + /// @param pool optional pool from which to allocate the new grid buffer + /// @throw Will throw a std::logic_error if the stream does not contain a valid raw grid + void read(std::istream& is, uint32_t n, const BufferT& pool = BufferT()); + + /// @brief Read a specific grid from an input stream containing a raw grid buffer + /// @param is input stream containing a raw grid buffer + /// @param gridName string name of the grid to be read + /// @param pool optional pool from which to allocate the new grid buffer + /// @throw Will throw a std::logic_error if the stream does not contain a valid raw grid with the speficied name + void read(std::istream& is, const std::string &gridName, const BufferT& pool = BufferT()); + + /// @brief Read a raw grid buffer from a file + /// @param filename string name of the input file containing a raw grid buffer + /// @param pool optional pool from which to allocate the new grid buffe + void read(const std::string &fileName, const BufferT& pool = BufferT()) { + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + this->read(is, pool); + } + + /// @brief Read a specific grid from a file containing a raw grid buffer + /// @param filename string name of the input file containing a raw grid buffer + /// @param n zero-based index of the grid to be read + /// @param pool optional pool from which to allocate the new grid buffer + /// @throw Will throw a std::ios_base::failure if the file does not exist and a + /// std::logic_error if the files does not contain a valid raw grid + void read(const std::string &fileName, uint32_t n, const BufferT& pool = BufferT()) { + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + this->read(is, n, pool); + } + + /// @brief Read a specific grid from a file containing a raw grid buffer + /// @param filename string name of the input file containing a raw grid buffer + /// @param gridName string name of the grid to be read + /// @param pool optional pool from which to allocate the new grid buffer + /// @throw Will throw a std::ios_base::failure if the file does not exist and a + /// std::logic_error if the files does not contain a valid raw grid withe the specified name + void read(const std::string &fileName, const std::string &gridName, const BufferT& pool = BufferT()) { + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + this->read(is, gridName, pool); + } }; // GridHandle // --------------------------> Implementation of private methods in GridHandle <------------------------------------ @@ -204,7 +295,7 @@ inline const GridData* GridHandle::gridData(uint32_t n) const const uint8_t *data = this->data(); if (data == nullptr || n >= mMetaData.size()) return nullptr; return reinterpret_cast(data + mMetaData[n].offset); -} +}// const GridData* GridHandle::gridData(uint32_t n) const template inline const GridMetaData* GridHandle::gridMetaData(uint32_t n) const @@ -212,7 +303,7 @@ inline const GridMetaData* GridHandle::gridMetaData(uint32_t n) const const uint8_t *data = this->data(); if (data == nullptr || n >= mMetaData.size()) return nullptr; return reinterpret_cast(data + mMetaData[n].offset); -} +}// const GridMetaData* GridHandle::gridMetaData(uint32_t n) const namespace {// anonymous namespace inline __hostdev__ void cpyMetaData(const GridData *data, GridHandleMetaData *meta) @@ -223,7 +314,7 @@ inline __hostdev__ void cpyMetaData(const GridData *data, GridHandleMetaData *me offset += p->size; data = PtrAdd(data, p->size); } -} +}// void cpyMetaData(const GridData *data, GridHandleMetaData *meta) }// anonymous namespace template @@ -237,7 +328,7 @@ GridHandle::GridHandle(T&& buffer) mMetaData.resize(data->mGridCount); cpyMetaData(data, mMetaData.data()); } -} +}// GridHandle::GridHandle(T&& buffer) template template @@ -247,7 +338,7 @@ inline GridHandle GridHandle::copy(const OtherBufferT& ot auto buffer = OtherBufferT::create(mBuffer.size(), &other); std::memcpy(buffer.data(), mBuffer.data(), mBuffer.size());// deep copy of buffer return GridHandle(std::move(buffer)); -} +}// GridHandle GridHandle::copy(const OtherBufferT& other) const template template @@ -256,7 +347,7 @@ inline const NanoGrid* GridHandle::grid(uint32_t n) const const uint8_t *data = mBuffer.data(); if (data == nullptr || n >= mMetaData.size() || mMetaData[n].gridType != mapToGridType()) return nullptr; return reinterpret_cast*>(data + mMetaData[n].offset); -} +}// const NanoGrid* GridHandle::grid(uint32_t n) const template template @@ -266,18 +357,83 @@ GridHandle::deviceGrid(uint32_t n) const const uint8_t *data = mBuffer.deviceData(); if (data == nullptr || n >= mMetaData.size() || mMetaData[n].gridType != mapToGridType()) return nullptr; return reinterpret_cast*>(data + mMetaData[n].offset); -} +}// GridHandle::deviceGrid(uint32_t n) cons -// --------------------------> free-standing functions <------------------------------------ +template +void GridHandle::read(std::istream& is, const BufferT& pool) +{ + GridData data; + is.read((char*)&data, 40);// only 40 bytes are required for all the data we need in GridData + if (data.isValid()) { + uint64_t size = data.mGridSize, sum = 0u; + while(data.mGridIndex + 1u < data.mGridCount) {// loop over remaining raw grids in stream + is.seekg(data.mGridSize - 40, std::ios::cur);// skip grid + is.read((char*)&data, 40);// read 40 bytes of the next GridData + sum += data.mGridSize; + } + is.seekg(-int64_t(sum + 40), std::ios::cur);// rewind to start + auto buffer = BufferT::create(size + sum, &pool); + is.read((char*)(buffer.data()), buffer.size()); + *this = GridHandle(std::move(buffer)); + } else { + is.seekg(-40, std::ios::cur);// rewind + throw std::logic_error("This stream does not contain a valid raw grid buffer"); + } +}// void GridHandle::read(std::istream& is, const BufferT& pool) -namespace {// anonymous namespace -inline __hostdev__ void updateGridData(GridData *data, uint32_t gridIndex, uint32_t gridCount) +template +void GridHandle::read(std::istream& is, uint32_t n, const BufferT& pool) { - data->mGridIndex = gridIndex; - data->mGridCount = gridCount; -} -}// anonymous namespace + GridData data; + is.read((char*)&data, 40);// only 40 bytes are required for all the data we need in GridData + if (data.isValid()) { + if (n>=data.mGridCount) throw std::runtime_error("stream does not contain a #" + std::to_string(n) + " grid"); + while(data.mGridIndex != n) { + is.seekg(data.mGridSize - 40, std::ios::cur);// skip grid + is.read((char*)&data, 40);// read 40 bytes + } + auto buffer = BufferT::create(data.mGridSize, &pool); + is.seekg(-40, std::ios::cur);// rewind + is.read((char*)(buffer.data()), data.mGridSize); + updateGridCount((GridData*)buffer.data(), 0u, 1u); + *this = GridHandle(std::move(buffer)); + } else { + is.seekg(-40, std::ios::cur);// rewind 40 bytes to undo initial read + throw std::logic_error("This file does not contain a valid raw buffer"); + } +}// void GridHandle::read(std::istream& is, uint32_t n, const BufferT& pool) + +template +void GridHandle::read(std::istream& is, const std::string &gridName, const BufferT& pool) +{ + static const std::streamsize byteSize = sizeof(GridData); + GridData data; + is.read((char*)&data, byteSize); + is.seekg(-byteSize, std::ios::cur);// rewind + if (data.isValid()) { + uint32_t n = 0; + while(data.mGridName != gridName && n++ < data.mGridCount) { + is.seekg(data.mGridSize, std::ios::cur);// skip grid + is.read((char*)&data, byteSize);// read 40 bytes + is.seekg(-byteSize, std::ios::cur);// rewind + } + if (n>data.mGridCount) throw std::runtime_error("No raw grid named \""+gridName+"\""); + auto buffer = BufferT::create(data.mGridSize, &pool); + is.read((char*)(buffer.data()), data.mGridSize); + updateGridCount((GridData*)buffer.data(), 0u, 1u); + *this = GridHandle(std::move(buffer)); + } else { + throw std::logic_error("This file does not contain a valid raw buffer"); + } +}// void GridHandle::read(std::istream& is, const std::string &gridName n, const BufferT& pool) + +// --------------------------> free-standing functions <------------------------------------ +/// @brief Split all grids in a single GridHandle into a vector of multiple GridHandles each with a single grid +/// @tparam BufferT Type of the input and output grid buffers +/// @param handle GridHandle with grids that will be slip into individual GridHandles +/// @param pool optional pool used for allocation of output GridHandle +/// @return Vector of GridHandles each containing a single grid template class VectorT = std::vector> inline VectorT> splitGrids(const GridHandle &handle, const BufferT* other = nullptr) @@ -292,16 +448,21 @@ splitGrids(const GridHandle &handle, const BufferT* other = nullptr) auto buffer = BufferT::create(src->mGridSize, other); GridData *dst = reinterpret_cast(buffer.data()); std::memcpy(dst, src, src->mGridSize); - updateGridData(dst, 0u, 1u); + updateGridCount(dst, 0u, 1u); h = HandleT(std::move(buffer)); ptr += src->mGridSize; } return std::move(handles); }// splitGrids +/// @brief Combines (or merges) multiple GridHandles into a single GridHandle containing all grids +/// @tparam BufferT Type of the input and output grid buffers +/// @param handles Vector of GridHandles to be combined +/// @param pool optional pool used for allocation of output GridHandle +/// @return single GridHandle containing all input grids template class VectorT> inline GridHandle -mergeGrids(const VectorT> &handles, const BufferT* other = nullptr) +mergeGrids(const VectorT> &handles, const BufferT* pool = nullptr) { uint64_t size = 0u; uint32_t counter = 0u, gridCount = 0u; @@ -309,7 +470,7 @@ mergeGrids(const VectorT> &handles, const BufferT* other = n gridCount += h.gridCount(); for (uint32_t n=0; n> &handles, const BufferT* other = n std::memcpy(dst, src, h.gridSize(n)); GridData *data = reinterpret_cast(dst); NANOVDB_ASSERT(data->isValid()); - updateGridData(data, counter++, gridCount); + updateGridCount(data, counter++, gridCount); dst += data->mGridSize; src += data->mGridSize; } diff --git a/nanovdb/nanovdb/util/GridStats.h b/nanovdb/nanovdb/util/GridStats.h index df13104470..1c89cb98c1 100644 --- a/nanovdb/nanovdb/util/GridStats.h +++ b/nanovdb/nanovdb/util/GridStats.h @@ -23,6 +23,12 @@ #include #endif +#if defined(__CUDACC__) +#include // for cuda::std::numeric_limits +#else +#include // for std::numeric_limits +#endif + #include #include @@ -59,55 +65,61 @@ class Extrema public: using ValueType = ValueT; - Extrema() + __hostdev__ Extrema() +#if defined(__CUDACC__) + : mMin(cuda::std::numeric_limits::max()) + , mMax(cuda::std::numeric_limits::lowest()) +#else : mMin(std::numeric_limits::max()) , mMax(std::numeric_limits::lowest()) +#endif { } - Extrema(const ValueT& v) + __hostdev__ Extrema(const ValueT& v) : mMin(v) , mMax(v) { } - Extrema(const ValueT& a, const ValueT& b) + __hostdev__ Extrema(const ValueT& a, const ValueT& b) : mMin(a) , mMax(b) { } - Extrema& min(const ValueT& v) + __hostdev__ Extrema& min(const ValueT& v) { if (v < mMin) { mMin = v; } return *this; } - Extrema& max(const ValueT& v) + __hostdev__ Extrema& max(const ValueT& v) { if (v > mMax) { mMax = v; } return *this; } - Extrema& add(const ValueT& v) + __hostdev__ Extrema& add(const ValueT& v) { this->min(v); this->max(v); return *this; } - Extrema& add(const ValueT& v, uint64_t) { return this->add(v); } - Extrema& add(const Extrema& other) + __hostdev__ Extrema& add(const ValueT& v, uint64_t) { return this->add(v); } + __hostdev__ Extrema& add(const Extrema& other) { this->min(other.mMin); this->max(other.mMax); return *this; } - const ValueT& min() const { return mMin; } - const ValueT& max() const { return mMax; } - operator bool() const { return mMin <= mMax; } - static constexpr bool hasMinMax() { return !std::is_same::value; } - static constexpr bool hasAverage() { return false; } - static constexpr bool hasStdDeviation() { return false; } - static constexpr size_t size() { return 0; } + __hostdev__ const ValueT& min() const { return mMin; } + __hostdev__ const ValueT& max() const { return mMax; } + __hostdev__ operator bool() const { return mMin <= mMax; } + __hostdev__ static constexpr bool hasMinMax() { return !std::is_same::value; } + __hostdev__ static constexpr bool hasAverage() { return false; } + __hostdev__ static constexpr bool hasStdDeviation() { return false; } + __hostdev__ static constexpr bool hasStats() { return !std::is_same::value; } + __hostdev__ static constexpr size_t size() { return 0; } }; // Extrema /// @brief Template specialization of Extrema on vector value types, i.e. rank = 1 @@ -121,19 +133,19 @@ class Extrema Real scalar; VecT vector; - Pair(Real s)// is only used by Extrema() default c-tor + __hostdev__ Pair(Real s)// is only used by Extrema() default c-tor : scalar(s) , vector(s) { } - Pair(const VecT& v) + __hostdev__ Pair(const VecT& v) : scalar(v.lengthSqr()) , vector(v) { } - bool operator<(const Pair& rhs) const { return scalar < rhs.scalar; } + __hostdev__ bool operator<(const Pair& rhs) const { return scalar < rhs.scalar; } } mMin, mMax; - Extrema& add(const Pair& p) + __hostdev__ Extrema& add(const Pair& p) { if (p < mMin) { mMin = p; @@ -146,22 +158,27 @@ class Extrema public: using ValueType = VecT; - Extrema() + __hostdev__ Extrema() +#if defined(__CUDACC__) + : mMin(cuda::std::numeric_limits::max()) + , mMax(cuda::std::numeric_limits::lowest()) +#else : mMin(std::numeric_limits::max()) , mMax(std::numeric_limits::lowest()) +#endif { } - Extrema(const VecT& v) + __hostdev__ Extrema(const VecT& v) : mMin(v) , mMax(v) { } - Extrema(const VecT& a, const VecT& b) + __hostdev__ Extrema(const VecT& a, const VecT& b) : mMin(a) , mMax(b) { } - Extrema& min(const VecT& v) + __hostdev__ Extrema& min(const VecT& v) { Pair tmp(v); if (tmp < mMin) { @@ -169,7 +186,7 @@ class Extrema } return *this; } - Extrema& max(const VecT& v) + __hostdev__ Extrema& max(const VecT& v) { Pair tmp(v); if (mMax < tmp) { @@ -177,9 +194,9 @@ class Extrema } return *this; } - Extrema& add(const VecT& v) { return this->add(Pair(v)); } - Extrema& add(const VecT& v, uint64_t) { return this->add(Pair(v)); } - Extrema& add(const Extrema& other) + __hostdev__ Extrema& add(const VecT& v) { return this->add(Pair(v)); } + __hostdev__ Extrema& add(const VecT& v, uint64_t) { return this->add(Pair(v)); } + __hostdev__ Extrema& add(const Extrema& other) { if (other.mMin < mMin) { mMin = other.mMin; @@ -189,13 +206,14 @@ class Extrema } return *this; } - const VecT& min() const { return mMin.vector; } - const VecT& max() const { return mMax.vector; } - operator bool() const { return !(mMax < mMin); } - static constexpr bool hasMinMax() { return !std::is_same::value; } - static constexpr bool hasAverage() { return false; } - static constexpr bool hasStdDeviation() { return false; } - static constexpr size_t size() { return 0; } + __hostdev__ const VecT& min() const { return mMin.vector; } + __hostdev__ const VecT& max() const { return mMax.vector; } + __hostdev__ operator bool() const { return !(mMax < mMin); } + __hostdev__ static constexpr bool hasMinMax() { return !std::is_same::value; } + __hostdev__ static constexpr bool hasAverage() { return false; } + __hostdev__ static constexpr bool hasStdDeviation() { return false; } + __hostdev__ static constexpr bool hasStats() { return !std::is_same::value; } + __hostdev__ static constexpr size_t size() { return 0; } }; // Extrema //================================================================================================ @@ -222,14 +240,14 @@ class Stats : public Extrema public: using ValueType = ValueT; - Stats() + __hostdev__ Stats() : BaseT() , mSize(0) , mAvg(0.0) , mAux(0.0) { } - Stats(const ValueT& val) + __hostdev__ Stats(const ValueT& val) : BaseT(val) , mSize(1) , mAvg(RealT(val)) @@ -237,7 +255,7 @@ class Stats : public Extrema { } /// @brief Add a single sample - Stats& add(const ValueT& val) + __hostdev__ Stats& add(const ValueT& val) { BaseT::add(val); mSize += 1; @@ -247,7 +265,7 @@ class Stats : public Extrema return *this; } /// @brief Add @a n samples with constant value @a val. - Stats& add(const ValueT& val, uint64_t n) + __hostdev__ Stats& add(const ValueT& val, uint64_t n) { const double denom = 1.0 / double(mSize + n); const double delta = double(val) - mAvg; @@ -259,7 +277,7 @@ class Stats : public Extrema } /// Add the samples from the other Stats instance. - Stats& add(const Stats& other) + __hostdev__ Stats& add(const Stats& other) { if (other.mSize > 0) { const double denom = 1.0 / double(mSize + other.mSize); @@ -272,31 +290,32 @@ class Stats : public Extrema return *this; } - static constexpr bool hasMinMax() { return !std::is_same::value; } - static constexpr bool hasAverage() { return !std::is_same::value; } - static constexpr bool hasStdDeviation() { return !std::is_same::value; } + __hostdev__ static constexpr bool hasMinMax() { return !std::is_same::value; } + __hostdev__ static constexpr bool hasAverage() { return !std::is_same::value; } + __hostdev__ static constexpr bool hasStdDeviation() { return !std::is_same::value; } + __hostdev__ static constexpr bool hasStats() { return !std::is_same::value; } - size_t size() const { return mSize; } + __hostdev__ size_t size() const { return mSize; } //@{ /// Return the arithmetic mean, i.e. average, value. - double avg() const { return mAvg; } - double mean() const { return mAvg; } + __hostdev__ double avg() const { return mAvg; } + __hostdev__ double mean() const { return mAvg; } //@} //@{ /// @brief Return the population variance. /// /// @note The unbiased sample variance = population variance * num/(num-1) - double var() const { return mSize < 2 ? 0.0 : mAux / double(mSize); } - double variance() const { return this->var(); } + __hostdev__ double var() const { return mSize < 2 ? 0.0 : mAux / double(mSize); } + __hostdev__ double variance() const { return this->var(); } //@} //@{ /// @brief Return the standard deviation (=Sqrt(variance)) as /// defined from the (biased) population variance. - double std() const { return sqrt(this->var()); } - double stdDev() const { return this->std(); } + __hostdev__ double std() const { return sqrt(this->var()); } + __hostdev__ double stdDev() const { return this->std(); } //@} }; // end Stats @@ -319,7 +338,7 @@ class Stats : public Extrema public: using ValueType = ValueT; - Stats() + __hostdev__ Stats() : BaseT() , mSize(0) , mAvg(0.0) @@ -327,7 +346,7 @@ class Stats : public Extrema { } /// @brief Add a single sample - Stats& add(const ValueT& val) + __hostdev__ Stats& add(const ValueT& val) { typename BaseT::Pair tmp(val); BaseT::add(tmp); @@ -338,7 +357,7 @@ class Stats : public Extrema return *this; } /// @brief Add @a n samples with constant value @a val. - Stats& add(const ValueT& val, uint64_t n) + __hostdev__ Stats& add(const ValueT& val, uint64_t n) { typename BaseT::Pair tmp(val); const double denom = 1.0 / double(mSize + n); @@ -351,7 +370,7 @@ class Stats : public Extrema } /// Add the samples from the other Stats instance. - Stats& add(const Stats& other) + __hostdev__ Stats& add(const Stats& other) { if (other.mSize > 0) { const double denom = 1.0 / double(mSize + other.mSize); @@ -364,31 +383,32 @@ class Stats : public Extrema return *this; } - static constexpr bool hasMinMax() { return !std::is_same::value; } - static constexpr bool hasAverage() { return !std::is_same::value; } - static constexpr bool hasStdDeviation() { return !std::is_same::value; } + __hostdev__ static constexpr bool hasMinMax() { return !std::is_same::value; } + __hostdev__ static constexpr bool hasAverage() { return !std::is_same::value; } + __hostdev__ static constexpr bool hasStdDeviation() { return !std::is_same::value; } + __hostdev__ static constexpr bool hasStats() { return !std::is_same::value; } - size_t size() const { return mSize; } + __hostdev__ size_t size() const { return mSize; } //@{ /// Return the arithmetic mean, i.e. average, value. - double avg() const { return mAvg; } - double mean() const { return mAvg; } + __hostdev__ double avg() const { return mAvg; } + __hostdev__ double mean() const { return mAvg; } //@} //@{ /// @brief Return the population variance. /// /// @note The unbiased sample variance = population variance * num/(num-1) - double var() const { return mSize < 2 ? 0.0 : mAux / double(mSize); } - double variance() const { return this->var(); } + __hostdev__ double var() const { return mSize < 2 ? 0.0 : mAux / double(mSize); } + __hostdev__ double variance() const { return this->var(); } //@} //@{ /// @brief Return the standard deviation (=Sqrt(variance)) as /// defined from the (biased) population variance. - double std() const { return sqrt(this->var()); } - double stdDev() const { return this->std(); } + __hostdev__ double std() const { return sqrt(this->var()); } + __hostdev__ double stdDev() const { return this->std(); } //@} }; // end Stats @@ -397,15 +417,16 @@ template struct NoopStats { using ValueType = ValueT; - NoopStats() {} - NoopStats(const ValueT&) {} - NoopStats& add(const ValueT&) { return *this; } - NoopStats& add(const ValueT&, uint64_t) { return *this; } - NoopStats& add(const NoopStats&) { return *this; } - static constexpr size_t size() { return 0; } - static constexpr bool hasMinMax() { return false; } - static constexpr bool hasAverage() { return false; } - static constexpr bool hasStdDeviation() { return false; } + __hostdev__ NoopStats() {} + __hostdev__ NoopStats(const ValueT&) {} + __hostdev__ NoopStats& add(const ValueT&) { return *this; } + __hostdev__ NoopStats& add(const ValueT&, uint64_t) { return *this; } + __hostdev__ NoopStats& add(const NoopStats&) { return *this; } + __hostdev__ static constexpr size_t size() { return 0; } + __hostdev__ static constexpr bool hasMinMax() { return false; } + __hostdev__ static constexpr bool hasAverage() { return false; } + __hostdev__ static constexpr bool hasStdDeviation() { return false; } + __hostdev__ static constexpr bool hasStats() { return false; } }; // end NoopStats //================================================================================================ @@ -423,7 +444,6 @@ class GridStats using Node2 = typename TreeT::Node2; // upper using RootT = typename TreeT::Node3; // root static_assert(std::is_same::value, "Mismatching type"); - static constexpr bool DO_STATS = StatsT::hasMinMax() || StatsT::hasAverage() || StatsT::hasStdDeviation(); ValueT mDelta; // skip rendering of node if: node.max < -mDelta || node.min > mDelta @@ -597,7 +617,7 @@ void GridStats::process(RootT &root) const Coord ijk = tile->origin(); total.bbox[0].minComponent(ijk); total.bbox[1].maxComponent(ijk + Coord(ChildT::DIM - 1)); - if (DO_STATS) { // resolved at compile time + if (StatsT::hasStats()) { // resolved at compile time total.stats.add(tile->value, ChildT::NUM_VALUES); } } @@ -629,7 +649,7 @@ GridStats::process(NodeT &node) if (const auto tileCount = data->mValueMask.countOn()) { //total.activeCount = tileCount * ChildT::NUM_VALUES; // active tiles for (auto it = data->mValueMask.beginOn(); it; ++it) { - if (DO_STATS) { // resolved at compile time + if (StatsT::hasStats()) { // resolved at compile time total.stats.add( data->mTable[*it].value, ChildT::NUM_VALUES ); } const Coord ijk = node.offsetToGlobalCoord(*it); @@ -668,7 +688,7 @@ GridStats::process(NodeT &node) data->mFlags &= ~uint32_t(2); // set 2nd bit off since node does not contain active values } else { data->mFlags |= uint32_t(2); // set 2nd bit on since node contains active values - if (DO_STATS) { // resolved at compile time + if (StatsT::hasStats()) { // resolved at compile time this->setStats(data, total.stats); this->setFlag(data->mMinimum, data->mMaximum, data->mFlags); } @@ -682,24 +702,15 @@ template typename GridStats::NodeStats GridStats::process(Node0 &leaf) { - static_assert(Node0::SIZE == 512u, "Invalid size of leaf nodes"); NodeStats local; - auto *data = leaf.data(); - if (auto activeCount = data->mValueMask.countOn()) { - //data->mFlags |= uint8_t(2); // sets 2nd bit on since leaf contains active voxel - //local.activeCount += activeCount; - leaf.updateBBox(); // optionally update active bounding box (updates data->mFlags) - local.bbox[0] = local.bbox[1] = data->mBBoxMin; - local.bbox[1] += Coord(data->mBBoxDif[0], data->mBBoxDif[1], data->mBBoxDif[2]); - if (DO_STATS) { // resolved at compile time - for (auto it = data->mValueMask.beginOn(); it; ++it) { - local.stats.add(data->getValue(*it)); - } - this->setStats(data, local.stats); - this->setFlag(data->getMin(), data->getMax(), data->mFlags); + if (leaf.updateBBox()) {// optionally update active bounding box (updates data->mFlags) + local.bbox[0] = local.bbox[1] = leaf.mBBoxMin; + local.bbox[1] += Coord(leaf.mBBoxDif[0], leaf.mBBoxDif[1], leaf.mBBoxDif[2]); + if (StatsT::hasStats()) {// resolved at compile time + for (auto it = leaf.cbeginValueOn(); it; ++it) local.stats.add(*it); + this->setStats(&leaf, local.stats); + this->setFlag(leaf.getMin(), leaf.getMax(), leaf.mFlags); } - } else { - data->mFlags &= ~uint8_t(2); // sets 2nd bit off since leaf has no bbox of active active values } return local; } // GridStats::process( LeafNode ) @@ -725,7 +736,7 @@ void gridStats(NanoGrid& grid, StatsMode mode) } else { throw std::runtime_error("gridStats: Unsupported statistics mode."); } -} +}// gridStats //================================================================================================ @@ -755,7 +766,8 @@ Mask getBBoxMask(const CoordBBox &bbox, const NodeT* node) } } return mask; -} +}// getBBoxMask + }// end of unnamed namespace /// @brief return the extrema of all the values in a grid that @@ -837,7 +849,6 @@ getExtrema(const NanoGrid& grid, const CoordBBox &bbox) return extrema; }// getExtrema - } // namespace nanovdb #endif // NANOVDB_GRIDSTATS_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/GridValidator.h b/nanovdb/nanovdb/util/GridValidator.h index 85234b03ad..c14d03040f 100644 --- a/nanovdb/nanovdb/util/GridValidator.h +++ b/nanovdb/nanovdb/util/GridValidator.h @@ -59,8 +59,16 @@ std::string GridValidator::check(const GridT &grid, bool detailed) std::stringstream ss; if (!isValid(data)) { errorStr.assign("Grid is not 32B aligned"); - } else if (data->mMagic != NANOVDB_MAGIC_NUMBER) { - ss << "Incorrect magic number: Expected " << NANOVDB_MAGIC_NUMBER << ", but read " << data->mMagic; + } else if (data->mMagic != NANOVDB_MAGIC_NUMBER && data->mMagic != NANOVDB_MAGIC_GRID) { + const uint64_t magic1 = NANOVDB_MAGIC_NUMBER, magic2 = NANOVDB_MAGIC_GRID; + const char *c0 = (const char*)&(data->mMagic), *c1=(const char*)&magic1, *c2=(const char*)&magic2; + ss << "Incorrect magic number: Expected \""; + for (int i=0; i<8; ++i) ss << c1[i]; + ss << "\" or \""; + for (int i=0; i<8; ++i) ss << c2[i]; + ss << "\", but found \""; + for (int i=0; i<8; ++i) ss << c0[i]; + ss << "\""; errorStr = ss.str(); } else if (!validateChecksum(grid, detailed ? ChecksumMode::Full : ChecksumMode::Partial)) { errorStr.assign("Mis-matching checksum"); diff --git a/nanovdb/nanovdb/util/IO.h b/nanovdb/nanovdb/util/IO.h index f304411e7c..a962a3bcdb 100644 --- a/nanovdb/nanovdb/util/IO.h +++ b/nanovdb/nanovdb/util/IO.h @@ -13,6 +13,14 @@ multiple grid types. \note This file does NOT depend on OpenVDB, but optionally on ZIP and BLOSC + + \details NanoVDB files take on of two formats: + 1) multiple segments each with multiple grids (segments have easy to access metadata about its grids) + 2) starting with verion 32.6.0 nanovdb files also support a raw buffer with one or more grids (just a + dump of a raw grid buffer, so no new metadata). + + // 1: Segment: FileHeader, MetaData0, gridName0...MetaDataN, gridNameN, compress Grid0,...compressed GridN + // 2: Raw: Grid0,...GridN */ #ifndef NANOVDB_IO_H_HAS_BEEN_INCLUDED @@ -20,7 +28,7 @@ #include #include "GridHandle.h" -#include "GridChecksum.h" +#include "GridChecksum.h"// for updateGridCount #include // for std::ifstream #include // for std::cerr/cout @@ -48,28 +56,60 @@ namespace nanovdb { namespace io { +// --------------------------> writeGrid(s) <------------------------------------ + +/// @brief Write a single grid to file (over-writing existing content of the file) +template +void writeGrid(const std::string& fileName, const GridHandle& handle, io::Codec codec = io::Codec::NONE, int verbose = 0); + +/// @brief Write multiple grids to file (over-writing existing content of the file) +template class VecT = std::vector> +void writeGrids(const std::string& fileName, const VecT>& handles, Codec codec = Codec::NONE, int verbose = 0); + +// --------------------------> readGrid(s) <------------------------------------ + +/// @brief Read and return one or all grids from a file into a single GridHandle +/// @tparam BufferT Type of buffer used memory allocation +/// @param fileName string name of file to be read from +/// @param n zero-based signed index of the grid to be read. +/// The default value of 0 means read only first grid. +/// A negative value of n means read all grids in the file. +/// @param verbose specify verbosity level. Default value of zero means quiet. +/// @param buffer optional buffer used for memory allocation +/// @return return a single GridHandle with one or all grids found in the file +/// @throw will throw a std::runtime_error if the file does not contain a grid with index n +template +GridHandle readGrid(const std::string& fileName, int n = 0, int verbose = 0, const BufferT& buffer = BufferT()); + +/// @brief Read and return the first grid with a specific name from a file +/// @tparam BufferT Type of buffer used memory allocation +/// @param fileName string name of file to be read from +/// @param gridName string name of the grid to be read +/// @param verbose specify verbosity level. Default value of zero means quiet. +/// @param buffer optional buffer used for memory allocation +/// @return return a single GridHandle containing the grid with the specific name +/// @throw will throw a std::runtime_error if the file does not contain a grid with the specific name +template +GridHandle readGrid(const std::string& fileName, const std::string& gridName, int verbose = 0, const BufferT& buffer = BufferT()); + +/// @brief Read all the grids in the file and return them as a vector of multiple GridHandles, each containing +/// all grids encoded in the same segment of the file (i.e. they where written together) +/// @tparam BufferT Type of buffer used memory allocation +/// @param fileName string name of file to be read from +/// @param verbose specify verbosity level. Default value of zero means quiet. +/// @param buffer optional buffer used for memory allocation +/// @return Return a vector of GridHandles each containing all grids encoded +/// in the same segment of the file (i.e. they where written together). +template class VecT = std::vector> +VecT> readGrids(const std::string& fileName, int verbose = 0, const BufferT& buffer = BufferT()); + +// ----------------------------------------------------------------------- + /// We fix a specific size for counting bytes in files so that they /// are saved the same regardless of machine precision. (Note there are /// still little/bigendian issues, however) using fileSize_t = uint64_t; -/// @brief Optional compression codecs -/// -/// @note NONE is the default, ZIP is slow but compact and BLOSC offers a great balance. -/// -/// @warning NanoVDB optionally supports ZIP and BLOSC compression and will throw an exception -/// if it support is required but missing. -enum class Codec : uint16_t { NONE = 0, - ZIP = 1, - BLOSC = 2, - END = 3 }; - -inline __hostdev__ const char* toStr(Codec codec) -{ - static const char * LUT[] = { "NONE", "ZIP", "BLOSC" , "END" }; - return LUT[static_cast(codec)]; -} - /// @brief Internal functions for compressed read/write of a NanoVDB GridHandle into a stream /// /// @warning These functions should never be called directly by client code @@ -90,10 +130,7 @@ static void read(std::istream& is, char* data, fileSize_t size, Codec codec); uint64_t stringHash(const char* cstr); /// @brief Return a uint64_t hash key of a std::string -inline uint64_t stringHash(const std::string& str) -{ - return stringHash(str.c_str()); -} +inline uint64_t stringHash(const std::string& str){return stringHash(str.c_str());} /// @brief Return a uint64_t with its bytes reversed so we can check for endianness inline uint64_t reverseEndianness(uint64_t val) @@ -104,82 +141,41 @@ inline uint64_t reverseEndianness(uint64_t val) (((val) << 40) & 0x00FF000000000000) | (((val) << 56) & 0xFF00000000000000); } -/// @brief Data encoded at the head of each segment of a file or stream. +/// @brief This class defines the meta data stored for each grid in a segment /// -/// @note A file or stream is composed of one or more segments that each contain -// one or more grids. -// Magic number of NanoVDB files (uint64_t) | -// Version numbers of this file (uint32_t) | one header for each segment -// Number of grids in this segment (uint16_t) | -// Compression mode (uint16_t) | -struct Header -{// 16 bytes - uint64_t magic; // 8 bytes - Version version;// 4 bytes version numbers - uint16_t gridCount; // 2 bytes - Codec codec; // 2 bytes - Header(Codec c = Codec::NONE) - : magic(NANOVDB_MAGIC_NUMBER) // Magic number: "NanoVDB" in hex - , version()// major, minor and patch version numbers - , gridCount(0) - , codec(c) - { - } -}; // Header ( 16 bytes = 2 words ) - -/// @brief Data encoded for each of the grids associated with a segment. -// Grid size in memory (uint64_t) | -// Grid size on disk (uint64_t) | -// Grid name hash key (uint64_t) | -// Numer of active voxels (uint64_t) | -// Grid type (uint32_t) | -// Grid class (uint32_t) | -// Characters in grid name (uint32_t) | -// AABB in world space (2*3*double) | one per grid in file -// AABB in index space (2*3*int) | -// Size of a voxel in world units (3*double) | -// Byte size of the grid name (uint32_t) | -// Number of nodes per level (4*uint32_t) | -// Numer of active tiles per level (3*uint32_t) | -// Codec for file compression (uint16_t) | -// Padding due to 8B alignment (uint16_t) | -// Version number (uint32_t) | -struct MetaData -{// 176 bytes - uint64_t gridSize, fileSize, nameKey, voxelCount; // 4 * 8 = 32B. - GridType gridType; // 4B. - GridClass gridClass; // 4B. - BBox worldBBox; // 2 * 3 * 8 = 48B. - CoordBBox indexBBox; // 2 * 3 * 4 = 24B. - Vec3d voxelSize; // 24B. - uint32_t nameSize; // 4B. - uint32_t nodeCount[4]; //4 x 4 = 16B - uint32_t tileCount[3];// 3 x 4 = 12B - Codec codec; // 2B - uint16_t padding;// 2B, due to 8B alignment from uint64_t - Version version;// 4B -}; // MetaData - -struct GridMetaData : public MetaData +/// @details A segment consists of a FileHeader followed by a list of FileGridMetaData +/// each followed by grid names and then finally the grids themselves. +/// +/// @note This class should not be confused with nanovdb::GridMetaData defined in NanoVDB.h +/// Also, FileMetaData is defined in NanoVDB.h. +struct FileGridMetaData : public FileMetaData { - static_assert(sizeof(MetaData) == 176, "Unexpected sizeof(MetaData)"); + static_assert(sizeof(FileMetaData) == 176, "Unexpected sizeof(FileMetaData)"); std::string gridName; void read(std::istream& is); void write(std::ostream& os) const; - GridMetaData() {} + FileGridMetaData() {} template - GridMetaData(uint64_t size, Codec c, const NanoGrid& grid); - uint64_t memUsage() const { return sizeof(MetaData) + nameSize; } -}; // GridMetaData + FileGridMetaData(uint64_t size, Codec c, const NanoGrid& grid); + uint64_t memUsage() const { return sizeof(FileMetaData) + nameSize; } +}; // FileGridMetaData +/// @brief This class defines all the data stored in segment of a file +/// +/// @details A segment consists of a FileHeader followed by a list of FileGridMetaData +/// each followed by grid names and then finally the grids themselves. struct Segment { - // Check assumptions made during read and write of Header and MetaData - static_assert(sizeof(Header) == 16u, "Unexpected sizeof(Header)"); - Header header; - std::vector meta; + // Check assumptions made during read and write of FileHeader and FileMetaData + static_assert(sizeof(FileHeader) == 16u, "Unexpected sizeof(FileHeader)"); + FileHeader header;// defined in NanoVDB.h + std::vector meta;// defined in NanoVDB.h Segment(Codec c = Codec::NONE) - : header(c) +#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS + : header{NANOVDB_MAGIC_FILE, Version(), 0u, c} +#else + : header{NANOVDB_MAGIC_NUMBER, Version(), 0u, c} +#endif , meta() { } @@ -190,56 +186,6 @@ struct Segment uint64_t memUsage() const; }; // Segment -/// @brief Write a single grid to file (over-writing existing content of the file) -template -void writeGrid(const std::string& fileName, const GridHandle& handle, Codec codec = Codec::NONE, int verbose = 0); - -/// @brief Write a single grid to stream (starting at the current position) -/// -/// @note This method can be used to append grid to an existing stream -template -void writeGrid(std::ostream& os, const GridHandle& handle, Codec codec = Codec::NONE); - -/// @brief Write multiple grids to file (over-writing existing content of the file) -template class VecT = std::vector> -void writeGrids(const std::string& fileName, const VecT>& handles, Codec codec = Codec::NONE, int verbose = 0); - -/// @brief Writes multiple grids to stream (starting at its current position) -/// -/// @note This method can be used to append multiple grids to an existing stream -template class VecT = std::vector> -void writeGrids(std::ostream& os, const VecT>& handles, Codec codec = Codec::NONE); - -/// @brief Read the n'th grid from file (defaults to first grid) -/// -/// @throw If n exceeds the number of grids in the file -template -GridHandle readGrid(const std::string& fileName, uint64_t n = 0, int verbose = 0, const BufferT& buffer = BufferT()); - -/// @brief Read the n'th grid from stream (defaults to first grid) -/// -/// @throw If n exceeds the number of grids in the stream -template -GridHandle readGrid(std::istream& is, uint64_t n = 0, const BufferT& buffer = BufferT()); - -/// @brief Read the first grid with a specific name -/// -/// @warning If not grid exists with the specified name the resulting GridHandle is empty -template -GridHandle readGrid(const std::string& fileName, const std::string& gridName, int verbose = 0, const BufferT& buffer = BufferT()); - -/// @brief Read the first grid with a specific name -template -GridHandle readGrid(std::istream& is, const std::string& gridName, const BufferT& buffer = BufferT()); - -/// @brief Read all the grids in the file -template class VecT = std::vector> -VecT> readGrids(const std::string& fileName, int verbose = 0, const BufferT& buffer = BufferT()); - -/// @brief Real all grids at the current position of the input stream -template class VecT = std::vector> -VecT> readGrids(std::istream& is, const BufferT& buffer = BufferT()); - /// @brief Return true if the file contains a grid with the specified name bool hasGrid(const std::string& fileName, const std::string& gridName); @@ -247,10 +193,10 @@ bool hasGrid(const std::string& fileName, const std::string& gridName); bool hasGrid(std::istream& is, const std::string& gridName); /// @brief Reads and returns a vector of meta data for all the grids found in the specified file -std::vector readGridMetaData(const std::string& fileName); +std::vector readGridMetaData(const std::string& fileName); /// @brief Reads and returns a vector of meta data for all the grids found in the specified stream -std::vector readGridMetaData(std::istream& is); +std::vector readGridMetaData(std::istream& is); // --------------------------> Implementations for Internal <------------------------------------ @@ -313,6 +259,11 @@ void Internal::read(std::istream& is, BufferT& buffer, Codec codec) Internal::read(is, reinterpret_cast(buffer.data()), buffer.size(), codec); } // Internal::read +/// @brief read compressed grid from stream +/// @param is input stream to read from +/// @param data data buffer to write into +/// @param residual expected size of uncompressed data +/// @param codec mode of compression void Internal::read(std::istream& is, char* data, fileSize_t residual, Codec codec) { // read tree using optional compression @@ -324,11 +275,9 @@ void Internal::read(std::istream& is, char* data, fileSize_t residual, Codec cod std::unique_ptr tmp(new Bytef[size]); is.read(reinterpret_cast(tmp.get()), size); uLongf numBytes = residual; - int status = uncompress(reinterpret_cast(data), &numBytes, tmp.get(), static_cast(size)); - if (status != Z_OK) - std::runtime_error("Internal read error in ZIP"); - if (fileSize_t(numBytes) != residual) - throw std::runtime_error("UNZIP failed on byte size"); + int status = uncompress(reinterpret_cast(data), &numBytes, tmp.get(), static_cast(size)); + if (status != Z_OK) std::runtime_error("Internal read error in ZIP"); + if (fileSize_t(numBytes) != residual) throw std::runtime_error("UNZIP failed on byte size"); #else throw std::runtime_error("ZIP compression codec was disabled during build"); #endif @@ -356,61 +305,61 @@ void Internal::read(std::istream& is, char* data, fileSize_t residual, Codec cod break; } default: - is.read(data, residual); + is.read(data, residual);// read uncompressed data } if (!is) throw std::runtime_error("Failed to read Tree from file"); } // Internal::read -// --------------------------> Implementations for GridMetaData <------------------------------------ +// --------------------------> Implementations for FileGridMetaData <------------------------------------ template -inline GridMetaData::GridMetaData(uint64_t size, Codec c, const NanoGrid& grid) - : MetaData{size, // gridSize - 0, // fileSize - 0, // nameKey - grid.activeVoxelCount(), // voxelCount - grid.gridType(), // gridType - grid.gridClass(), // gridClass - grid.worldBBox(), // worldBBox - grid.tree().bbox(), // indexBBox - grid.voxelSize(), // voxelSize - 0, // nameSize - {0, 0, 0, 1}, // nodeCount[4] - {0, 0, 0}, // tileCount[3] - c, // codec - 0, // padding - Version()}// version +inline FileGridMetaData::FileGridMetaData(uint64_t size, Codec c, const NanoGrid& grid) + : FileMetaData{size, // gridSize + size, // fileSize (will typically be redefined) + 0u, // nameKey + grid.activeVoxelCount(), // voxelCount + grid.gridType(), // gridType + grid.gridClass(), // gridClass + grid.worldBBox(), // worldBBox + grid.tree().bbox(), // indexBBox + grid.voxelSize(), // voxelSize + 0, // nameSize + {0, 0, 0, 1}, // nodeCount[4] + {0, 0, 0}, // tileCount[3] + c, // codec + 0, // padding + Version()}// version , gridName(grid.gridName()) { nameKey = stringHash(gridName); nameSize = static_cast(gridName.size() + 1); // include '\0' const uint32_t* ptr = reinterpret_cast(&grid.tree())->mNodeCount; - for (int i = 0; i < 3; ++i) MetaData::nodeCount[i] = *ptr++; - for (int i = 0; i < 3; ++i) MetaData::tileCount[i] = *ptr++; -}// GridMetaData::GridMetaData + for (int i = 0; i < 3; ++i) FileMetaData::nodeCount[i] = *ptr++; + for (int i = 0; i < 3; ++i) FileMetaData::tileCount[i] = *ptr++; +}// FileGridMetaData::FileGridMetaData -inline void GridMetaData::write(std::ostream& os) const +inline void FileGridMetaData::write(std::ostream& os) const { - os.write(reinterpret_cast(this), sizeof(MetaData)); + os.write(reinterpret_cast(this), sizeof(FileMetaData)); os.write(gridName.c_str(), nameSize); - if (!os) throw std::runtime_error("Failed writing GridMetaData"); -}// GridMetaData::write + if (!os) throw std::runtime_error("Failed writing FileGridMetaData"); +}// FileGridMetaData::write -inline void GridMetaData::read(std::istream& is) +inline void FileGridMetaData::read(std::istream& is) { - is.read(reinterpret_cast(this), sizeof(MetaData)); + is.read(reinterpret_cast(this), sizeof(FileMetaData)); std::unique_ptr tmp(new char[nameSize]); is.read(reinterpret_cast(tmp.get()), nameSize); gridName.assign(tmp.get()); - if (!is) throw std::runtime_error("Failed reading GridMetaData"); -}// GridMetaData::read + if (!is) throw std::runtime_error("Failed reading FileGridMetaData"); +}// FileGridMetaData::read // --------------------------> Implementations for Segment <------------------------------------ inline uint64_t Segment::memUsage() const { - uint64_t sum = sizeof(Header); - for (auto& m : meta) sum += m.memUsage(); + uint64_t sum = sizeof(FileHeader); + for (auto& m : meta) sum += m.memUsage();// includes FileMetaData + grid name return sum; }// Segment::memUsage @@ -473,26 +422,34 @@ inline void Segment::write(std::ostream& os) const { if (header.gridCount == 0) { throw std::runtime_error("Segment contains no grids"); - } else if (!os.write(reinterpret_cast(&header), sizeof(Header))) { - throw std::runtime_error("Failed to write Header of Segment"); + } else if (!os.write(reinterpret_cast(&header), sizeof(FileHeader))) { + throw std::runtime_error("Failed to write FileHeader of Segment"); } for (auto& m : meta) m.write(os); }// Segment::write inline bool Segment::read(std::istream& is) { - is.read(reinterpret_cast(&header), sizeof(Header)); - if (is.eof()) { + is.read(reinterpret_cast(&header), sizeof(FileHeader)); + if (is.eof()) {// The EOF flag is only set once a read tries to read past the end of the file + is.clear(std::ios_base::eofbit);// clear eof flag so we can rewind and read again return false; } - if (!is || header.magic != NANOVDB_MAGIC_NUMBER) { + if (!header.isValid()) { // first check for byte-swapped header magic. - if (header.magic == reverseEndianness(NANOVDB_MAGIC_NUMBER)) + if (header.magic == reverseEndianness(NANOVDB_MAGIC_NUMBER) || + header.magic == reverseEndianness(NANOVDB_MAGIC_FILE)) { throw std::runtime_error("This nvdb file has reversed endianness"); - throw std::runtime_error("Magic number error: This is not a valid nvdb file"); - } else if ( header.version.getMajor() != NANOVDB_MAJOR_VERSION_NUMBER) { + } else { + throw std::runtime_error("Magic number error: This is not a valid nvdb file"); + } + } else if ( !header.version.isCompatible()) { std::stringstream ss; - if (header.version.getMajor() < NANOVDB_MAJOR_VERSION_NUMBER) { + Version v; + is.read(reinterpret_cast(&v), sizeof(Version));// read GridData::mVersion located at byte 16=sizeof(FileHeader) is stream + if ( v.getMajor() == NANOVDB_MAJOR_VERSION_NUMBER) { + ss << "This file looks like it contains a raw grid buffer and not a standard file with meta data"; + } else if ( header.version.getMajor() < NANOVDB_MAJOR_VERSION_NUMBER) { ss << "The file contains an older version of NanoVDB: " << std::string(header.version.c_str()) << "!\n\t" << "Recommendation: Re-generate this NanoVDB file with this version: " << NANOVDB_MAJOR_VERSION_NUMBER << ".X of NanoVDB"; } else { @@ -509,93 +466,166 @@ inline bool Segment::read(std::istream& is) return true; }// Segment::read -// --------------------------> Implementations for read/write <------------------------------------ - -template -void writeGrid(const std::string& fileName, const GridHandle& handle, Codec codec, int verbose) -{ - std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); - if (!os.is_open()) { - throw std::runtime_error("Unable to open file named \"" + fileName + "\" for output"); - } - writeGrid(os, handle, codec); - if (verbose) { - std::cout << "Wrote nanovdb::Grid to file named \"" << fileName << "\"" << std::endl; - } -}// writeGrid +// --------------------------> writeGrid <------------------------------------ template void writeGrid(std::ostream& os, const GridHandle& handle, Codec codec) { Segment seg(codec); seg.add(handle); - const uint64_t headerSize = seg.memUsage(); - std::streamoff seek = headerSize; - seg.write(os); // write header without the correct fileSize + const auto start = os.tellp(); + seg.write(os); // write header without the correct fileSize (so it's allocated) for (uint32_t i = 0; i < handle.gridCount(); ++i) { seg.meta[i].fileSize = Internal::write(os, handle, codec, i); - seek += seg.meta[i].fileSize; } - os.seekp(-seek, std::ios_base::cur); // rewind to start of stream - seg.write(os); // rewrite header with the correct fileSize - os.seekp(seek - headerSize, std::ios_base::cur); // skip to end + os.seekp(start); + seg.write(os);// re-write header with the correct fileSize + os.seekp(0, std::ios_base::end);// skip to end }// writeGrid -template class VecT> -void writeGrids(const std::string& fileName, const VecT>& handles, Codec codec, int verbose) +template +void writeGrid(const std::string& fileName, const GridHandle& handle, Codec codec, int verbose) { std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); if (!os.is_open()) { - throw std::runtime_error("Unable to open file named \"" + fileName + "\" for output"); + throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for output"); } - writeGrids(os, handles, codec); + writeGrid(os, handle, codec); if (verbose) { - std::cout << "Wrote " << handles.size() << " nanovdb::Grid(s) to file named \"" << fileName << "\"" << std::endl; + std::cout << "Wrote nanovdb::Grid to file named \"" << fileName << "\"" << std::endl; } +}// writeGrid + +// --------------------------> writeGrids <------------------------------------ + +template class VecT = std::vector> +void writeGrids(std::ostream& os, const VecT>& handles, Codec codec = Codec::NONE) +{ + for (auto& h : handles) writeGrid(os, h, codec); }// writeGrids template class VecT> -void writeGrids(std::ostream& os, const VecT>& handles, Codec codec) +void writeGrids(const std::string& fileName, const VecT>& handles, Codec codec, int verbose) { - for (auto& h : handles) writeGrid(os, h, codec); + std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); + if (!os.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for output"); + writeGrids(os, handles, codec); + if (verbose) std::cout << "Wrote " << handles.size() << " nanovdb::Grid(s) to file named \"" << fileName << "\"" << std::endl; }// writeGrids +// --------------------------> readGrid <------------------------------------ + +template +GridHandle readGrid(std::istream& is, int n, const BufferT& pool) +{ + GridHandle handle; + if (n<0) {// read all grids into the same buffer + try {//first try to read a raw grid buffer + handle.read(is, pool); + } catch(const std::logic_error&) { + Segment seg; + uint64_t bufferSize = 0u; + uint32_t gridCount = 0u, gridIndex = 0u; + const auto start = is.tellg(); + while (seg.read(is)) { + std::streamoff skipSize = 0; + for (auto& m : seg.meta) { + ++gridCount; + bufferSize += m.gridSize; + skipSize += m.fileSize; + }// loop over grids in segment + is.seekg(skipSize, std::ios_base::cur); // skip forward from the current position + }// loop over segments + auto buffer = BufferT::create(bufferSize, &pool); + char *ptr = (char*)buffer.data(); + is.seekg(start);// rewind + while (seg.read(is)) { + for (auto& m : seg.meta) { + Internal::read(is, ptr, m.gridSize, seg.header.codec); + updateGridCount((GridData*)ptr, gridIndex++, gridCount); + ptr += m.gridSize; + }// loop over grids in segment + }// loop over segments + return GridHandle(std::move(buffer)); + } + } else {// read a specific grid + try {//first try to read a raw grid buffer + handle.read(is, uint32_t(n), pool); + updateGridCount((GridData*)handle.data(), 0u, 1u); + } catch(const std::logic_error&) { + Segment seg; + int counter = -1; + while (seg.read(is)) { + std::streamoff seek = 0; + for (auto& m : seg.meta) { + if (++counter == n) { + auto buffer = BufferT::create(m.gridSize, &pool); + Internal::read(is, buffer, seg.header.codec); + updateGridCount((GridData*)buffer.data(), 0u, 1u); + return GridHandle(std::move(buffer)); + } else { + seek += m.fileSize; + } + }// loop over grids in segment + is.seekg(seek, std::ios_base::cur); // skip forward from the current position + }// loop over segments + if (n != counter) throw std::runtime_error("stream does not contain a #" + std::to_string(n) + " grid"); + } + } + return handle; +}// readGrid + /// @brief Read the n'th grid template -GridHandle readGrid(const std::string& fileName, uint64_t n, int verbose, const BufferT& buffer) +GridHandle readGrid(const std::string& fileName, int n, int verbose, const BufferT& buffer) { std::ifstream is(fileName, std::ios::in | std::ios::binary); - if (!is.is_open()) { - throw std::runtime_error("Unable to open file named \"" + fileName + "\" for input"); - } + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); auto handle = readGrid(is, n, buffer); if (verbose) { - std::cout << "Read NanoGrid # " << n << " from the file named \"" << fileName << "\"" << std::endl; + if (n<0) { + std::cout << "Read all NanoGrids from the file named \"" << fileName << "\"" << std::endl; + } else { + std::cout << "Read NanoGrid # " << n << " from the file named \"" << fileName << "\"" << std::endl; + } } return handle; // is converted to r-value and return value is move constructed. }// readGrid +/// @brief Read a specific grid from an input stream given the name of the grid +/// @tparam BufferT Buffer type used for allocation +/// @param is input stream from which to read the grid +/// @param gridName string name of the (first) grid to be returned +/// @param pool optional memory pool from which to allocate the grid buffer +/// @return Return the first grid in the input stream with a specific name +/// @throw std::runtime_error with no grid exists with the specified name template -GridHandle readGrid(std::istream& is, uint64_t n, const BufferT& pool) +GridHandle readGrid(std::istream& is, const std::string& gridName, const BufferT& pool) { - Segment seg; - uint64_t counter = 0; - while (seg.read(is)) { - std::streamoff seek = 0; - for (auto& m : seg.meta) { - if (counter == n) { - auto buffer = BufferT::create(m.gridSize, &pool); - is.seekg(seek, std::ios_base::cur); // skip forward from the current position - Internal::read(is, buffer, seg.header.codec); - return GridHandle(std::move(buffer)); - } else { - seek += m.fileSize; + try { + GridHandle handle; + handle.read(is, gridName, pool); + return handle; + } catch(const std::logic_error&) { + const auto key = stringHash(gridName); + Segment seg; + while (seg.read(is)) {// loop over all segments in stream + std::streamoff seek = 0; + for (auto& m : seg.meta) {// loop over all grids in segment + if ((m.nameKey == 0u || m.nameKey == key) && m.gridName == gridName) { // check for hash key collision + auto buffer = BufferT::create(m.gridSize, &pool); + is.seekg(seek, std::ios_base::cur); // rewind + Internal::read(is, buffer, seg.header.codec); + updateGridCount((GridData*)buffer.data(), 0u, 1u); + return GridHandle(std::move(buffer)); + } else { + seek += m.fileSize; + } } - ++counter; + is.seekg(seek, std::ios_base::cur); // skip forward from the current position } - is.seekg(seek, std::ios_base::cur); // skip forward from the current position } - throw std::runtime_error("Grid index " + std::to_string(n) + "exceeds grid count (" + std::to_string(counter) + ") in file"); + throw std::runtime_error("Grid name '" + gridName + "' not found in file"); }// readGrid /// @brief Read the first grid with a specific name @@ -603,9 +633,7 @@ template GridHandle readGrid(const std::string& fileName, const std::string& gridName, int verbose, const BufferT& buffer) { std::ifstream is(fileName, std::ios::in | std::ios::binary); - if (!is.is_open()) { - throw std::runtime_error("Unable to open file named \"" + fileName + "\" for input"); - } + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); auto handle = readGrid(is, gridName, buffer); if (verbose) { if (handle) { @@ -617,45 +645,10 @@ GridHandle readGrid(const std::string& fileName, const std::string& gri return handle; // is converted to r-value and return value is move constructed. }// readGrid -template -GridHandle readGrid(std::istream& is, const std::string& gridName, const BufferT& pool) -{ - const auto key = stringHash(gridName); - Segment seg; - while (seg.read(is)) { - std::streamoff seek = 0; - for (auto& m : seg.meta) { - if (m.nameKey == key && m.gridName == gridName) { // check for hash key collision - auto buffer = BufferT::create(m.gridSize, &pool); - is.seekg(seek, std::ios_base::cur); // rewind - Internal::read(is, buffer, seg.header.codec); - return GridHandle(std::move(buffer)); - } else { - seek += m.fileSize; - } - } - is.seekg(seek, std::ios_base::cur); // skip forward from the current position - } - throw std::runtime_error("Grid name '" + gridName + "' not found in file"); -}// readGrid +// --------------------------> readGrids <------------------------------------ -/// @brief Read all the grids -template class VecT> -VecT> readGrids(const std::string& fileName, int verbose, const BufferT& buffer) -{ - std::ifstream is(fileName, std::ios::in | std::ios::binary); - if (!is.is_open()) { - throw std::runtime_error("Unable to open file named \"" + fileName + "\" for input"); - } - auto handles = readGrids(is, buffer); - if (verbose) { - std::cout << "Read " << handles.size() << " NanoGrid(s) from the file named \"" << fileName << "\"" << std::endl; - } - return handles; // is converted to r-value and return value is move constructed. -}// readGrids - -template class VecT> -VecT> readGrids(std::istream& is, const BufferT& pool) +template class VecT = std::vector> +VecT> readGrids(std::istream& is, const BufferT& pool = BufferT()) { VecT> handles; Segment seg; @@ -665,58 +658,64 @@ VecT> readGrids(std::istream& is, const BufferT& pool) auto buffer = BufferT::create(bufferSize, &pool); uint64_t bufferOffset = 0; for (uint16_t i = 0; i < seg.header.gridCount; ++i) { - Internal::read(is, reinterpret_cast(buffer.data()) + bufferOffset, seg.meta[i].gridSize, seg.header.codec); - - // The following three lines provide backwards compatibility with older files - // that were written using writeGrids. auto *data = reinterpret_cast(buffer.data() + bufferOffset); - data->mGridIndex = static_cast(i); - data->mGridCount = static_cast(seg.header.gridCount); - + Internal::read(is, (char*)data, seg.meta[i].gridSize, seg.header.codec); + updateGridCount(data, uint32_t(i), uint32_t(seg.header.gridCount)); bufferOffset += seg.meta[i].gridSize; - } + }// loop over grids in segment handles.emplace_back(std::move(buffer)); // force move copy assignment - } - - // The following two lines provide backwards compatibility with older files - // that were written using writeGrids. Since we (force) updated the mGridIndex - // and mGridCount above, we need to recompute the checksum as well. - for (auto& handle : handles) - updateChecksum(handle); + }// loop over segments + return handles; // is converted to r-value and return value is move constructed. +}// readGrids +/// @brief Read all the grids +template class VecT> +VecT> readGrids(const std::string& fileName, int verbose, const BufferT& buffer) +{ + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + auto handles = readGrids(is, buffer); + if (verbose) std::cout << "Read " << handles.size() << " NanoGrid(s) from the file named \"" << fileName << "\"" << std::endl; return handles; // is converted to r-value and return value is move constructed. }// readGrids -inline std::vector readGridMetaData(const std::string& fileName) +// --------------------------> readGridMetaData <------------------------------------ + +inline std::vector readGridMetaData(const std::string& fileName) { std::ifstream is(fileName, std::ios::in | std::ios::binary); - if (!is.is_open()) { - throw std::runtime_error("Unable to open file named \"" + fileName + "\" for input"); - } + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); return readGridMetaData(is); // is converted to r-value and return value is move constructed. }// readGridMetaData -inline std::vector readGridMetaData(std::istream& is) +inline std::vector readGridMetaData(std::istream& is) { - std::vector meta; Segment seg; - while (seg.read(is)) { - std::streamoff seek = 0; - for (auto& m : seg.meta) { - meta.push_back(m); - seek += m.fileSize; - } - is.seekg(seek, std::ios_base::cur); + std::vector meta; + try { + GridHandle<> handle;// if stream contains a raw grid buffer we unfortunately have to load everything + handle.read(is); + seg.add(handle); + meta = std::move(seg.meta); + } catch(const std::logic_error&) { + while (seg.read(is)) { + std::streamoff skip = 0; + for (auto& m : seg.meta) { + meta.push_back(m); + skip += m.fileSize; + }// loop over grid meta data in segment + is.seekg(skip, std::ios_base::cur); + }// loop over segments } return meta; // is converted to r-value and return value is move constructed. }// readGridMetaData +// --------------------------> hasGrid <------------------------------------ + inline bool hasGrid(const std::string& fileName, const std::string& gridName) { std::ifstream is(fileName, std::ios::in | std::ios::binary); - if (!is.is_open()) { - throw std::runtime_error("Unable to open file named \"" + fileName + "\" for input"); - } + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); return hasGrid(is, gridName); }// hasGrid @@ -729,17 +728,19 @@ inline bool hasGrid(std::istream& is, const std::string& gridName) for (auto& m : seg.meta) { if (m.nameKey == key && m.gridName == gridName) return true; // check for hash key collision seek += m.fileSize; - } + }// loop over grid meta data in segment is.seekg(seek, std::ios_base::cur); - } + }// loop over segments return false; }// hasGrid -inline uint64_t stringHash(const char* cstr) +// --------------------------> stringHash <------------------------------------ + +inline uint64_t stringHash(const char* c_str) { - uint64_t hash = 0; - if (cstr) { - for (auto* str = reinterpret_cast(cstr); *str; ++str) { + uint64_t hash = 0;// zero is returned when cstr = nullptr or "\0" + if (c_str) { + for (auto* str = reinterpret_cast(c_str); *str; ++str) { uint64_t overflow = hash >> (64 - 8); hash *= 67; // Next-ish prime after 26 + 26 + 10 hash += *str + overflow; diff --git a/nanovdb/nanovdb/util/NodeManager.h b/nanovdb/nanovdb/util/NodeManager.h index 5c15594da4..821c220372 100644 --- a/nanovdb/nanovdb/util/NodeManager.h +++ b/nanovdb/nanovdb/util/NodeManager.h @@ -53,21 +53,26 @@ struct NodeManagerData template class NodeManagerHandle { - BufferT mBuffer; + GridType mGridType{GridType::Unknown}; + BufferT mBuffer; template - const NodeManager* getMgr() const; + const NodeManager* getMgr() const { + return mGridType == mapToGridType() ? (const NodeManager*)mBuffer.data() : nullptr; + } template - typename std::enable_if::hasDeviceDual, const NodeManager*>::type - getDeviceMgr() const; + typename enable_if::hasDeviceDual, const NodeManager*>::type + getDeviceMgr() const { + return mGridType == mapToGridType() ? (const NodeManager*)mBuffer.deviceData() : nullptr; + } template static T* no_const(const T* ptr) { return const_cast(ptr); } public: /// @brief Move constructor from a buffer - NodeManagerHandle(BufferT&& buffer) { mBuffer = std::move(buffer); } + NodeManagerHandle(GridType gridType, BufferT&& buffer) : mGridType(gridType) { mBuffer = std::move(buffer); } /// @brief Empty ctor NodeManagerHandle() = default; /// @brief Disallow copy-construction @@ -75,13 +80,18 @@ class NodeManagerHandle /// @brief Disallow copy assignment operation NodeManagerHandle& operator=(const NodeManagerHandle&) = delete; /// @brief Move copy assignment operation - NodeManagerHandle& operator=(NodeManagerHandle&& other) noexcept - { + NodeManagerHandle& operator=(NodeManagerHandle&& other) noexcept { + mGridType = other.mGridType; mBuffer = std::move(other.mBuffer); + other.mGridType = GridType::Unknown; return *this; } /// @brief Move copy-constructor - NodeManagerHandle(NodeManagerHandle&& other) noexcept { mBuffer = std::move(other.mBuffer); } + NodeManagerHandle(NodeManagerHandle&& other) noexcept { + mGridType = other.mGridType; + mBuffer = std::move(other.mBuffer); + other.mGridType = GridType::Unknown; + } /// @brief Default destructor ~NodeManagerHandle() { this->reset(); } /// @brief clear the buffer @@ -122,21 +132,21 @@ class NodeManagerHandle /// /// @warning Note that the return pointer can be NULL if the template parameter does not match the specified grid! template - typename std::enable_if::hasDeviceDual, const NodeManager*>::type + typename enable_if::hasDeviceDual, const NodeManager*>::type deviceMgr() const { return this->template getDeviceMgr(); } /// @brief Return a const pointer to the NodeManager encoded in this NodeManagerHandle on the device, e.g. GPU /// /// @warning Note that the return pointer can be NULL if the template parameter does not match the specified grid! template - typename std::enable_if::hasDeviceDual, NodeManager*>::type + typename enable_if::hasDeviceDual, NodeManager*>::type deviceMgr() { return no_const(this->template getDeviceMgr()); } /// @brief Upload the NodeManager to the device, e.g. from CPU to GPU /// /// @note This method is only available if the buffer supports devices template - typename std::enable_if::hasDeviceDual, void>::type + typename enable_if::hasDeviceDual, void>::type deviceUpload(void* deviceGrid, void* stream = nullptr, bool sync = true) { assert(deviceGrid); @@ -151,7 +161,7 @@ class NodeManagerHandle /// /// @note This method is only available if the buffer supports devices template - typename std::enable_if::hasDeviceDual, void>::type + typename enable_if::hasDeviceDual, void>::type deviceDownload(void* stream = nullptr, bool sync = true) { auto *data = reinterpret_cast(mBuffer.data()); @@ -161,25 +171,6 @@ class NodeManagerHandle } };// NodeManagerHandle -template -template -inline const NodeManager* NodeManagerHandle::getMgr() const -{ - using T = const NodeManager*; - T mgr = reinterpret_cast(mBuffer.data());// host - return mgr && mgr->grid().gridType() == mapToGridType() ? mgr : nullptr; -} - -template -template -inline typename std::enable_if::hasDeviceDual, const NodeManager*>::type -NodeManagerHandle::getDeviceMgr() const -{ - using T = const NodeManager*; - T mgr = reinterpret_cast(mBuffer.data());// host - return mgr && mgr->grid().gridType() == mapToGridType() ? reinterpret_cast(mBuffer.deviceData()) : nullptr; -} - /// @brief This class allows for sequential access to nodes in a NanoVDB tree /// /// @details Nodes are always arranged breadth first during sequential access of nodes @@ -196,9 +187,9 @@ class NodeManager : private NodeManagerData using Node2 = NodeT<2>;// upper internal node using Node1 = NodeT<1>;// lower internal node using Node0 = NodeT<0>;// leaf node - static constexpr bool FIXED_SIZE = Node0::FIXED_SIZE && Node1::FIXED_SIZE && Node2::FIXED_SIZE; public: + static constexpr bool FIXED_SIZE = Node0::FIXED_SIZE && Node1::FIXED_SIZE && Node2::FIXED_SIZE; NodeManager(const NodeManager&) = delete; NodeManager(NodeManager&&) = delete; @@ -218,7 +209,7 @@ class NodeManager : private NodeManagerData __hostdev__ static uint64_t memUsage(const GridT &grid) { uint64_t size = sizeof(NodeManagerData); if (!NodeManager::isLinear(grid)) { - const uint32_t *p = grid.tree().data()->mNodeCount; + const uint32_t *p = grid.tree().mNodeCount; size += sizeof(int64_t)*(p[0]+p[1]+p[2]); } return size; @@ -293,12 +284,15 @@ template NodeManagerHandle createNodeManager(const NanoGrid &grid, const BufferT& buffer) { - NodeManagerHandle handle(BufferT::create(NodeManager::memUsage(grid), &buffer)); + NodeManagerHandle handle(mapToGridType(), BufferT::create(NodeManager::memUsage(grid), &buffer)); auto *data = reinterpret_cast(handle.data()); NANOVDB_ASSERT(isValid(data)); - data->mMagic = NANOVDB_MAGIC_NUMBER; - data->mGrid = const_cast*>(&grid); - data->mPadding = 0; + NANOVDB_ASSERT(mapToGridType() == grid.gridType()); +#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS + *data = NodeManagerData{NANOVDB_MAGIC_NODE, 0u, (void*)&grid, {0u,0u,0u}}; +#else + *data = NodeManagerData{NANOVDB_MAGIC_NUMBER, 0u, (void*)&grid, {0u,0u,0u}}; +#endif if (NodeManager::isLinear(grid)) { data->mLinear = uint8_t(1u); @@ -326,4 +320,8 @@ NodeManagerHandle createNodeManager(const NanoGrid &grid, } // namespace nanovdb +#if defined(__CUDACC__) +#include +#endif// defined(__CUDACC__) + #endif // NANOVDB_NODEMANAGER_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/cuda/CudaAddBlindData.cuh b/nanovdb/nanovdb/util/cuda/CudaAddBlindData.cuh index 42a6e7f20c..c750412458 100644 --- a/nanovdb/nanovdb/util/cuda/CudaAddBlindData.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaAddBlindData.cuh @@ -21,12 +21,26 @@ #include "CudaDeviceBuffer.h" #include #include +#include +#include #include // for std::strcpy namespace nanovdb { /// @brief This function appends blind data to and existing NanoGrid +/// @tparam BuildT Build type of the grid +/// @tparam BlindDataT Type of the blind data +/// @tparam BufferT Type of the buffer used for allocation +/// @param d_grid Pointer to device grid +/// @param d_blindData Pointer to device blind data +/// @param valueCount number of values in the blind data +/// @param blindClass class of the blind data +/// @param semantics semantics of the blind data +/// @param name optional name of the blind data +/// @param pool optional pool used for allocation +/// @param stream optional CUDA stream (defaults to CUDA stream 0) +/// @return GridHandle with blind data appended template GridHandle cudaAddBlindData(const NanoGrid *d_grid, @@ -35,7 +49,8 @@ cudaAddBlindData(const NanoGrid *d_grid, GridBlindDataClass blindClass = GridBlindDataClass::Unknown, GridBlindDataSemantic semantics = GridBlindDataSemantic::Unknown, const char *name = "", - const BufferT &pool = BufferT()) + const BufferT &pool = BufferT(), + cudaStream_t stream = 0) { // In: |-----------|--------- |-----------| // old grid old meta old data @@ -45,10 +60,10 @@ cudaAddBlindData(const NanoGrid *d_grid, static_assert(BufferTraits::hasDeviceDual, "Expected BufferT to support device allocation"); // extract byte sizes of the grid, blind meta data and blind data - enum {GRID=0, META=1, DATA=2}; - uint64_t tmp[3], *d_tmp; - cudaCheck(cudaMalloc((void**)&d_tmp, 3*sizeof(uint64_t))); - cudaLambdaKernel<<<1, 1>>>(1, [=] __device__(size_t) { + enum {GRID=0, META=1, DATA=2, CHECKSUM=3}; + uint64_t tmp[4], *d_tmp; + cudaCheck(cudaMallocAsync((void**)&d_tmp, 4*sizeof(uint64_t), stream)); + cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { if (auto count = d_grid->blindDataCount()) { d_tmp[GRID] = PtrDiff(&d_grid->blindMetaData(0), d_grid); d_tmp[META] = count*sizeof(GridBlindMetaData); @@ -57,47 +72,52 @@ cudaAddBlindData(const NanoGrid *d_grid, d_tmp[GRID] = d_grid->gridSize(); d_tmp[META] = d_tmp[DATA] = 0u; } + d_tmp[CHECKSUM] = d_grid->checksum(); }); cudaCheckError(); - cudaCheck(cudaMemcpy(&tmp, d_tmp, 3*sizeof(uint64_t), cudaMemcpyDeviceToHost)); - cudaCheck(cudaFree(d_tmp)); + cudaCheck(cudaMemcpyAsync(&tmp, d_tmp, 4*sizeof(uint64_t), cudaMemcpyDeviceToHost, stream)); GridBlindMetaData metaData{int64_t(sizeof(GridBlindMetaData) + tmp[DATA]), valueCount, sizeof(BlindDataT), semantics, blindClass, mapToGridType()}; + if (!metaData.isValid()) throw std::runtime_error("cudaAddBlindData: invalid combination of blind meta data"); std::strcpy(metaData.mName, name); auto buffer = BufferT::create(tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA] + metaData.blindDataSize(), &pool, false); auto d_data = buffer.deviceData(); // 1: |-----------|----------| // old grid old meta - cudaCheck(cudaMemcpy(d_data, d_grid, tmp[GRID] + tmp[META], cudaMemcpyDeviceToDevice)); + cudaCheck(cudaMemcpyAsync(d_data, d_grid, tmp[GRID] + tmp[META], cudaMemcpyDeviceToDevice, stream)); // 2: |-----------|----------|----------| // old grid old meta new meta - cudaCheck(cudaMemcpy(d_data + tmp[GRID] + tmp[META], &metaData, sizeof(GridBlindMetaData), cudaMemcpyHostToDevice)); + cudaCheck(cudaMemcpyAsync(d_data + tmp[GRID] + tmp[META], &metaData, sizeof(GridBlindMetaData), cudaMemcpyHostToDevice, stream)); // 3: |-----------|----------|----------|-----------| // old grid old meta new meta old data - cudaCheck(cudaMemcpy(d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData), - (const char*)d_grid + tmp[GRID] + tmp[META], tmp[DATA], cudaMemcpyDeviceToDevice)); + cudaCheck(cudaMemcpyAsync(d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData), + (const char*)d_grid + tmp[GRID] + tmp[META], tmp[DATA], cudaMemcpyDeviceToDevice, stream)); // 4: |-----------|----------|----------|-----------|------------| // old grid old meta new meta old data new data const size_t dataSize = valueCount*sizeof(BlindDataT);// no padding - cudaCheck(cudaMemcpy(d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA], - d_blindData, dataSize, cudaMemcpyDeviceToDevice)); + cudaCheck(cudaMemcpyAsync(d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA], + d_blindData, dataSize, cudaMemcpyDeviceToDevice, stream)); if (auto padding = metaData.blindDataSize() - dataSize) {// zero out possible padding - cudaCheck(cudaMemset(d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA] + dataSize, 0, padding)); + cudaCheck(cudaMemsetAsync(d_data + tmp[GRID] + tmp[META] + sizeof(GridBlindMetaData) + tmp[DATA] + dataSize, 0, padding, stream)); } // increment grid size and blind data counter in output grid - cudaLambdaKernel<<<1, 1>>>(1, [=] __device__(size_t) { + cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { auto &grid = *reinterpret_cast*>(d_data); grid.mBlindMetadataCount += 1; - grid.mBlindMetadataOffset = tmp[GRID];// this is undefined if input grid has no blind data + grid.mBlindMetadataOffset = d_tmp[GRID]; auto *meta = PtrAdd(d_data, grid.mBlindMetadataOffset);// points to first blind meta data for (uint32_t i=0, n=grid.mBlindMetadataCount-1; imDataOffset += sizeof(GridBlindMetaData); grid.mGridSize += sizeof(GridBlindMetaData) + meta->blindDataSize();// expansion with 32 byte alignment }); cudaCheckError(); + cudaCheck(cudaFreeAsync(d_tmp, stream)); + + GridChecksum cs(tmp[CHECKSUM]); + cudaGridChecksum(reinterpret_cast(d_data), cs.mode()); return GridHandle(std::move(buffer)); }// cudaAddBlindData diff --git a/nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h b/nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h index 7674f84a12..4b9820771d 100644 --- a/nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h +++ b/nanovdb/nanovdb/util/cuda/CudaDeviceBuffer.h @@ -8,7 +8,10 @@ \date January 8, 2020 - \brief Implements a simple dual (host/device) CUDA buffer + \brief Implements a simple dual (host/device) CUDA buffer. + + \note This file has no device-only (kernel) function calls, + which explains why it's a .h and not .cuh file. */ #ifndef NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED @@ -28,6 +31,7 @@ namespace nanovdb { /// it is significantly slower then cached (un-pinned) memory on the host. class CudaDeviceBuffer { + uint64_t mSize; // total number of bytes managed by this buffer (assumed to be identical for host and device) uint8_t *mCpuData, *mGpuData; // raw pointers to the host and device buffers @@ -36,18 +40,20 @@ class CudaDeviceBuffer /// @param size byte size of buffer to be initialized /// @param dummy this argument is currently ignored but required to match the API of the HostBuffer /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + /// @param stream optional stream argument (defaults to stream NULL) /// @return An instance of this class using move semantics - static CudaDeviceBuffer create(uint64_t size, const CudaDeviceBuffer* dummy = nullptr, bool host = true); + static CudaDeviceBuffer create(uint64_t size, const CudaDeviceBuffer* dummy = nullptr, bool host = true, void* stream = nullptr); /// @brief Constructor /// @param size byte size of buffer to be initialized /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU - CudaDeviceBuffer(uint64_t size = 0, bool host = true) + /// @param stream optional stream argument (defaults to stream NULL) + CudaDeviceBuffer(uint64_t size = 0, bool host = true, void* stream = nullptr) : mSize(0) , mCpuData(nullptr) , mGpuData(nullptr) { - if (size > 0) this->init(size, host); + if (size > 0) this->init(size, host, stream); } /// @brief Disallow copy-construction @@ -88,7 +94,7 @@ class CudaDeviceBuffer /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU /// @note All existing buffers are first cleared /// @warning size is expected to be non-zero. Use clear() clear buffer! - void init(uint64_t size, bool host = true); + void init(uint64_t size, bool host = true, void* stream = nullptr); /// @brief Retuns a raw pointer to the host/CPU buffer managed by this allocator. /// @warning Note that the pointer can be NULL! @@ -122,7 +128,7 @@ class CudaDeviceBuffer //@} /// @brief De-allocate all memory managed by this allocator and set all pointers to NULL - void clear(); + void clear(void* stream = nullptr); }; // CudaDeviceBuffer class @@ -134,20 +140,20 @@ struct BufferTraits // --------------------------> Implementations below <------------------------------------ -inline CudaDeviceBuffer CudaDeviceBuffer::create(uint64_t size, const CudaDeviceBuffer*, bool host) +inline CudaDeviceBuffer CudaDeviceBuffer::create(uint64_t size, const CudaDeviceBuffer*, bool host, void* stream) { - return CudaDeviceBuffer(size, host); + return CudaDeviceBuffer(size, host, stream); } -inline void CudaDeviceBuffer::init(uint64_t size, bool host) +inline void CudaDeviceBuffer::init(uint64_t size, bool host, void* stream) { - if (mSize>0) this->clear(); + if (mSize>0) this->clear(stream); NANOVDB_ASSERT(size > 0); if (host) { cudaCheck(cudaMallocHost((void**)&mCpuData, size)); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned checkPtr(mCpuData, "CudaDeviceBuffer::init: failed to allocate host buffer"); } else { - cudaCheck(cudaMalloc((void**)&mGpuData, size)); // un-managed memory on the device, always 32B aligned! + cudaCheck(cudaMallocAsync((void**)&mGpuData, size, reinterpret_cast(stream))); // un-managed memory on the device, always 32B aligned! checkPtr(mGpuData, "CudaDeviceBuffer::init: failed to allocate device buffer"); } mSize = size; @@ -157,7 +163,7 @@ inline void CudaDeviceBuffer::deviceUpload(void* stream, bool sync) const { checkPtr(mCpuData, "uninitialized cpu data"); if (mGpuData == nullptr) { - cudaCheck(cudaMalloc((void**)&mGpuData, mSize)); // un-managed memory on the device, always 32B aligned! + cudaCheck(cudaMallocAsync((void**)&mGpuData, mSize, reinterpret_cast(stream))); // un-managed memory on the device, always 32B aligned! } checkPtr(mGpuData, "uninitialized gpu data"); cudaCheck(cudaMemcpyAsync(mGpuData, mCpuData, mSize, cudaMemcpyHostToDevice, reinterpret_cast(stream))); @@ -175,9 +181,9 @@ inline void CudaDeviceBuffer::deviceDownload(void* stream, bool sync) const if (sync) cudaCheck(cudaStreamSynchronize(reinterpret_cast(stream))); } // CudaDeviceBuffer::gpuDownload -inline void CudaDeviceBuffer::clear() +inline void CudaDeviceBuffer::clear(void *stream) { - if (mGpuData) cudaCheck(cudaFree(mGpuData)); + if (mGpuData) cudaCheck(cudaFreeAsync(mGpuData, reinterpret_cast(stream))); if (mCpuData) cudaCheck(cudaFreeHost(mCpuData)); mCpuData = mGpuData = nullptr; mSize = 0; diff --git a/nanovdb/nanovdb/util/cuda/CudaGridChecksum.cuh b/nanovdb/nanovdb/util/cuda/CudaGridChecksum.cuh new file mode 100644 index 0000000000..e3ae9a941f --- /dev/null +++ b/nanovdb/nanovdb/util/cuda/CudaGridChecksum.cuh @@ -0,0 +1,244 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file CudaGridChecksum.cuh + + \author Ken Museth + + \date September 28, 2023 + + \brief Compute CRC32 checksum of NanoVDB grids + +*/ + +#ifndef NANOVDB_CUDA_GRID_CHECKSUM_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_CUDA_GRID_CHECKSUM_CUH_HAS_BEEN_INCLUDED + +#include "CudaDeviceBuffer.h"// required for instantiation of move c-tor of GridHandle +#include "CudaNodeManager.cuh" +#include "../GridChecksum.h"// for +#include "../GridHandle.h" + +namespace nanovdb { + +namespace crc32 { + +/// @bried Cuda kernel to initiate lookup table for CRC32 computation +/// @tparam T Dummy template parameter used to avoid multiple instantiations. T should be uint32_t! +/// @param d_lut Device pointer to lookup table of size 256 +template +__global__ void initLutKernel(T *d_lut) +{ + static_assert(is_same::value,"Expected uint32_t"); + const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < 256u) crc32::initLut(d_lut, tid); +} + +/// @brief Cuda kernel that computes CRC32 checksums of blocks of data using a look-up-table +/// @param d_data device pointer to raw data from wich to compute the CRC32 checksums +/// @param d_blockCRC device pointer to array of @c blockCount checksums for each block +/// @param blockCount number of blocks and checksums +/// @param blockSize size of each block in bytes +/// @param d_lut device pointer to CRC32 Lookup Table +template +__global__ void checksumKernel(const T *d_data, uint32_t* d_blockCRC, uint32_t blockCount, uint32_t blockSize, const uint32_t *d_lut) +{ + const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < blockCount) d_blockCRC[tid] = crc32::checksum((const uint8_t*)d_data + tid * blockSize, blockSize, d_lut); +} + +/// @brief Cuda kernel that computes CRC32 checksums of blocks of data (without using a look-up-table) +/// @param d_data device pointer to raw data from wich to compute the CRC32 checksums +/// @param d_blockCRC device pointer to array of @c blockCount checksums for each block +/// @param blockCount number of blocks and checksums +/// @param blockSize size of each block in bytes +template +__global__ void checksumKernel(const T *d_data, uint32_t* d_blockCRC, uint32_t blockCount, uint32_t blockSize) +{ + const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < blockCount) d_blockCRC[tid] = crc32::checksum((const uint8_t*)d_data + tid * blockSize, blockSize); +} + +/// @brief Host function to allocate and initiate a Look-Up-Table of size 256 for subsequent CRC32 computation on the device +/// @param stream optional cuda stream (defaults to zero) +/// @return returns a device point to a lookup-table for CRC32 computation +/// @warning It is the responsibility of the caller to delete the returned array +inline uint32_t* cudaCreateLut(cudaStream_t stream = 0) +{ + uint32_t *d_lut; + cudaCheck(cudaMallocAsync((void**)&d_lut, 256*sizeof(uint32_t), stream)); + initLutKernel<<<1, 256, 0, stream>>>(d_lut); + cudaCheckError(); + return d_lut; +} + +}// namespace crc + +#ifdef NANOVDB_CRC32_LOG2_BLOCK_SIZE// new approach computes CRC32 checksums for each 4 KB block + +/// @brief Update the checksum of a device grid +/// @param d_gridData device pointer to GridData +/// @param mode Mode of computation for the checksum. +/// @param stream optional cuda stream (defaults to zero) +/// @return The actual mode used for checksum computation. Eg. if @c d_gridData is NULL (or @c mode = ChecksumMode::Disable) +/// then ChecksumMode::Disable is always returned. Elseif the grid has no nodes or blind data ChecksumMode::Partial +/// is always returnd (even if @c mode = ChecksumMode::Full). +inline ChecksumMode cudaGridChecksum(GridData *d_gridData, ChecksumMode mode = ChecksumMode::Partial, cudaStream_t stream = 0) +{ + if (d_gridData == nullptr || mode == ChecksumMode::Disable) return ChecksumMode::Disable; + + static constexpr unsigned int mNumThreads = 128;// seems faster than the old value of 256! + auto numBlocks = [&](unsigned int n)->unsigned int{return (n + mNumThreads - 1) / mNumThreads;}; + uint8_t *d_begin = reinterpret_cast(d_gridData); + uint32_t *d_lut = crc32::cudaCreateLut(stream);// allocate and generate device LUT for CRC32 + uint64_t size[2], *d_size;// {total size of grid, partial size for first checksum} + cudaCheck(cudaMallocAsync((void**)&d_size, 2*sizeof(uint64_t), stream)); + + // Compute CRC32 checksum of GridData, TreeData, RootData (+tiles), but exclude GridData::mMagic and GridData::mChecksum + cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { + d_size[0] = d_gridData->mGridSize; + uint8_t *d_mid = d_gridData->template nodePtr<2>(); + if (d_mid == nullptr) {// no upper nodes + if (d_gridData->mBlindMetadataCount) { + d_mid = d_begin + d_gridData->mBlindMetadataOffset;// exclude blind data from partial checksum + } else { + d_mid = d_begin + d_gridData->mGridSize;// no nodes or blind data, so partial checksum is computed on the entire grid buffer + } + } + d_size[1] = d_mid - d_begin; + uint32_t *p = reinterpret_cast(&(d_gridData->mChecksum)); + p[0] = crc32::checksum(d_begin + 16u, d_mid, d_lut);// exclude GridData::mMagic and GridData::mChecksum + }); + cudaCheckError(); + cudaCheck(cudaMemcpyAsync(size, d_size, 2*sizeof(uint64_t), cudaMemcpyDeviceToHost, stream)); + cudaCheck(cudaFreeAsync(d_size, stream)); + + if (mode != ChecksumMode::Full || size[0] == size[1]) return ChecksumMode::Partial; + + // Compute CRC32 checksum of 4K block of everything remaining in the buffer, i.e. nodes and blind data + const uint8_t *d_mid = d_begin + size[1], *d_end = d_begin + size[0]; + uint32_t *d_checksums;// 4096 byte chunks + const uint64_t checksumCount = (d_end - d_mid) >> NANOVDB_CRC32_LOG2_BLOCK_SIZE;// 4 KB (4096 byte) + cudaCheck(cudaMallocAsync((void**)&d_checksums, checksumCount*sizeof(uint32_t), stream)); + cudaLambdaKernel<<>>(checksumCount, [=] __device__(size_t tid) { + uint32_t size = 1<>>(1, [=] __device__(size_t) { + uint32_t *p = reinterpret_cast(&(d_gridData->mChecksum)); + p[1] = crc32::checksum((const uint8_t*)d_checksums, checksumCount*sizeof(uint32_t), d_lut); + }); + cudaCheckError(); + cudaCheck(cudaFreeAsync(d_checksums, stream)); + cudaCheck(cudaFreeAsync(d_lut, stream)); + + return ChecksumMode::Full; +}// cudaGridChecksum + +template +inline ChecksumMode cudaGridChecksum(NanoGrid *d_grid, ChecksumMode mode = ChecksumMode::Partial, cudaStream_t stream = 0) +{ + return cudaGridChecksum(reinterpret_cast(d_grid), mode, stream); +} + +inline GridChecksum cudaGetGridChecksum(GridData *d_gridData, cudaStream_t stream = 0) +{ + uint64_t checksum, *d_checksum; + cudaCheck(cudaMallocAsync((void**)&d_checksum, sizeof(uint64_t), stream)); + cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) {*d_checksum = d_gridData->mChecksum;}); + cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&checksum, d_checksum, sizeof(uint64_t), cudaMemcpyDeviceToHost, stream)); + cudaCheck(cudaFreeAsync(d_checksum, stream)); + return GridChecksum(checksum);; +} + +inline ChecksumMode cudaUpdateGridChecksum(GridData *d_gridData, cudaStream_t stream = 0) +{ + return cudaGridChecksum(d_gridData, cudaGetGridChecksum(d_gridData, stream).mode(), stream); +} + +#else + +template +void cudaGridChecksum(NanoGrid *d_grid, ChecksumMode mode = ChecksumMode::Partial, cudaStream_t stream = 0) +{ + if (d_grid == nullptr || mode == ChecksumMode::Disable) return; + + static constexpr unsigned int mNumThreads = 128;// seems faster than the old value of 256! + auto numBlocks = [&](unsigned int n)->unsigned int{return (n + mNumThreads - 1) / mNumThreads;}; + + uint32_t *d_lut = crc32::cudaCreateLut(stream);// allocate and generate device LUT for CRC32 + uint64_t size[2], *d_size; + cudaCheck(cudaMallocAsync((void**)&d_size, 2*sizeof(uint64_t), stream)); + cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { + d_size[0] = d_grid->gridSize(); + d_size[1] = d_grid->memUsage() + d_grid->tree().memUsage() + d_grid->tree().root().memUsage(); + const uint8_t *begin = reinterpret_cast(d_grid); + uint32_t *p = reinterpret_cast(&(d_grid->mChecksum)); + p[0] = crc32::checksum(begin + 16u, begin + d_size[1], d_lut);// exclude mMagic and mChecksum + }); + cudaCheckError(); + cudaCheck(cudaMemcpyAsync(size, d_size, 2*sizeof(uint64_t), cudaMemcpyDeviceToHost, stream)); + cudaCheckError(); + + if (mode != ChecksumMode::Full) return; + + // Get node counts + uint32_t nodeCount[3], *d_nodeCount, *d_checksums, *d_ptr; + cudaCheck(cudaMallocAsync((void**)&d_nodeCount, 3*sizeof(uint32_t), stream)); + cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { + auto &tree = d_grid->tree(); + for (int i = 0; i < 3; ++i) d_nodeCount[i] = tree.nodeCount(i); + }); + cudaCheckError(); + cudaCheck(cudaMemcpyAsync(nodeCount, d_nodeCount, 3*sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); + cudaCheck(cudaFreeAsync(d_nodeCount, stream)); + cudaCheck(cudaMallocAsync((void**)&d_checksums, (nodeCount[0]+nodeCount[1]+nodeCount[2])*sizeof(uint32_t), stream)); + + auto nodeMgrHandle = cudaCreateNodeManager(d_grid, CudaDeviceBuffer(), stream); + auto *d_nodeMgr = nodeMgrHandle.template deviceMgr(); + NANOVDB_ASSERT(isValid(d_nodeMgr)); + d_ptr = d_checksums; + + // very slow due to large nodes + cudaLambdaKernel<<>>(nodeCount[2], [=] __device__(size_t tid) { + auto &node = d_nodeMgr->upper(uint32_t(tid)); + d_ptr[tid] = crc32::checksum((const uint8_t*)&node, node.memUsage(), d_lut); + }); + cudaCheckError(); + + d_ptr += nodeCount[2]; + cudaLambdaKernel<<>>(nodeCount[1], [=] __device__(size_t tid) { + auto &node = d_nodeMgr->lower(uint32_t(tid)); + d_ptr[tid] = crc32::checksum((const uint8_t*)&node, node.memUsage(), d_lut); + }); + cudaCheckError(); + + d_ptr += nodeCount[1]; + cudaLambdaKernel<<>>(nodeCount[0], [=] __device__(size_t tid) { + auto &node = d_nodeMgr->leaf(uint32_t(tid)); + d_ptr[tid] = crc32::checksum((const uint8_t*)&node, node.memUsage(), d_lut); + }); + cudaCheckError(); + + // to-do: process blind data + cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { + uint32_t *p = reinterpret_cast(&(d_grid->mChecksum)); + const uint8_t *begin = reinterpret_cast(d_checksums); + p[1] = crc32::checksum(begin, d_nodeMgr->tree().totalNodeCount()*sizeof(uint32_t), d_lut); + }); + cudaCheckError(); + + cudaCheck(cudaFreeAsync(d_size, stream)); + cudaCheck(cudaFreeAsync(d_checksums, stream)); + cudaCheck(cudaFreeAsync(d_lut, stream)); +}// cudaGridChecksum + +#endif + +}// namespace nanovdb + +#endif // NANOVDB_CUDA_GRID_CHECKSUM_CUH_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh b/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh index 5dc3adf2fd..8d449d09b0 100644 --- a/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh @@ -18,13 +18,22 @@ #define NANOVDB_CUDA_GRID_HANDLE_CUH_HAS_BEEN_INCLUDED #include "CudaDeviceBuffer.h"// required for instantiation of move c-tor of GridHandle +#include "CudaGridChecksum.cuh"// for cudaUpdateChecksum #include "../GridHandle.h" namespace nanovdb { namespace {// anonymous namespace __global__ void cudaCpyMetaData(const GridData *data, GridHandleMetaData *meta){cpyMetaData(data, meta);} -__global__ void cudaUpdateGridData(GridData *data, uint32_t gridIndex, uint32_t gridCount){updateGridData(data, gridIndex, gridCount);} +__global__ void cudaUpdateGridCount(GridData *data, uint32_t gridIndex, uint32_t gridCount, bool *d_dirty){ + NANOVDB_ASSERT(gridIndex < gridCount); + if (*d_dirty = data->mGridIndex != gridIndex || data->mGridCount != gridCount) { + data->mGridIndex = gridIndex; + data->mGridCount = gridCount; + if (data->mChecksum == GridChecksum::EMPTY) *d_dirty = false;// no need to update checksum if it didn't already exist + //data->mChecksum = GridChecksum::EMPTY;// disable the checksum (in the future this should call cudaGridChecksum) + } +} }// anonymous namespace template @@ -57,32 +66,38 @@ namespace {auto __dummy(){return GridHandle(std::move(CudaDevi template class VectorT = std::vector> inline typename enable_if::hasDeviceDual, VectorT>>::type -cudaSplitGridHandles(const GridHandle &handle, const BufferT* other = nullptr) +cudaSplitGridHandles(const GridHandle &handle, const BufferT* other = nullptr, cudaStream_t stream = 0) { const uint8_t *ptr = handle.deviceData(); if (ptr == nullptr) return VectorT>(); VectorT> handles(handle.gridCount()); + bool dirty, *d_dirty;// use this to check if the checksum needs to be recomputed + cudaCheck(cudaMallocAsync((void**)&d_dirty, sizeof(bool), stream)); for (uint32_t n=0; n(buffer.deviceData()); const GridData *src = reinterpret_cast(ptr); - cudaCheck(cudaMemcpy(dst, src, handle.gridSize(n), cudaMemcpyDeviceToDevice)); - cudaUpdateGridData<<<1,1>>>(dst, 0u, 1u); + cudaCheck(cudaMemcpyAsync(dst, src, handle.gridSize(n), cudaMemcpyDeviceToDevice, stream)); + cudaUpdateGridCount<<<1, 1, 0, stream>>>(dst, 0u, 1u, d_dirty); cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&dirty, d_dirty, sizeof(bool), cudaMemcpyDeviceToHost, stream)); + if (dirty) cudaGridChecksum(dst, ChecksumMode::Partial); handles[n] = GridHandle(std::move(buffer)); ptr += handle.gridSize(n); } + cudaCheck(cudaFreeAsync(d_dirty, stream)); + //cudaCheck(cudaFreeAsync(d_lut, stream)); return std::move(handles); }// cudaSplitGridHandles template class VectorT = std::vector> inline typename enable_if::hasDeviceDual, VectorT>>::type -splitDeviceGrids(const GridHandle &handle, const BufferT* other = nullptr) -{ return cudaSplitGridHandles(handle, other); } +splitDeviceGrids(const GridHandle &handle, const BufferT* other = nullptr, cudaStream_t stream = 0) +{ return cudaSplitGridHandles(handle, other, stream); } template class VectorT> inline typename enable_if::hasDeviceDual, GridHandle>::type -cudaMergeGridHandles(const VectorT> &handles, const BufferT* other = nullptr) +cudaMergeGridHandles(const VectorT> &handles, const BufferT* other = nullptr, cudaStream_t stream = 0) { uint64_t size = 0u; uint32_t counter = 0u, gridCount = 0u; @@ -90,26 +105,31 @@ cudaMergeGridHandles(const VectorT> &handles, const BufferT* gridCount += h.gridCount(); for (uint32_t n=0; n(dst); - cudaUpdateGridData<<<1,1>>>(data, counter++, gridCount); + cudaUpdateGridCount<<<1, 1, 0, stream>>>(data, counter++, gridCount, d_dirty); cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&dirty, d_dirty, sizeof(bool), cudaMemcpyDeviceToHost, stream)); + if (dirty) cudaGridChecksum(data, ChecksumMode::Partial); dst += h.gridSize(n); src += h.gridSize(n); } } + cudaCheck(cudaFreeAsync(d_dirty, stream)); return GridHandle(std::move(buffer)); }// cudaMergeGridHandles template class VectorT> inline typename enable_if::hasDeviceDual, GridHandle>::type -mergeDeviceGrids(const VectorT> &handles, const BufferT* other = nullptr) -{ return cudaMergeGridHandles(handles, other); } +mergeDeviceGrids(const VectorT> &handles, const BufferT* other = nullptr, cudaStream_t stream = 0) +{ return cudaMergeGridHandles(handles, other, stream); } } // namespace nanovdb diff --git a/nanovdb/nanovdb/util/cuda/CudaIndexToGrid.cuh b/nanovdb/nanovdb/util/cuda/CudaIndexToGrid.cuh index 9d53dca4ca..8394ecefe1 100644 --- a/nanovdb/nanovdb/util/cuda/CudaIndexToGrid.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaIndexToGrid.cuh @@ -20,7 +20,7 @@ #include #include "CudaDeviceBuffer.h" #include -#include +#include #include namespace nanovdb { @@ -32,6 +32,7 @@ namespace nanovdb { /// @param d_srcGrid Device pointer to source/input IndexGrid, i.e. SrcBuildT={ValueIndex,ValueOnIndex,ValueIndexMask,ValueOnIndexMask} /// @param d_srcValues Device pointer to an array of values /// @param pool Memory pool used to create a buffer for the destination/output Grid +/// @param stream optional CUDA stream (defaults to CUDA stream 0 /// @note If d_srcGrid has stats (min,max,avg,std-div), the d_srcValues is also assumed /// to have the same information, all of which are then copied to the destination/output grid. /// An exception to this rule is if the type of d_srcValues is different from the stats type @@ -40,14 +41,14 @@ namespace nanovdb { /// @return template typename enable_if::is_index, GridHandle>::type -cudaIndexToGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT()); +cudaIndexToGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT(), cudaStream_t stream = 0); template typename enable_if::is_index, GridHandle>::type -cudaCreateNanoGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT()) +cudaCreateNanoGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool = BufferT(), cudaStream_t stream = 0) { - return cudaIndexToGrid(d_srcGrid, d_srcValues, pool); + return cudaIndexToGrid(d_srcGrid, d_srcValues, pool, stream); } namespace {// anonymous namespace @@ -61,9 +62,9 @@ public: /// @brief Constructor from a source IndeGrid /// @param srcGrid Device pointer to IndexGrid used as the source - CudaIndexToGrid(const SrcGridT *d_srcGrid); + CudaIndexToGrid(const SrcGridT *d_srcGrid, cudaStream_t stream = 0); - ~CudaIndexToGrid() {cudaCheck(cudaFree(mDevNodeAcc));} + ~CudaIndexToGrid() {cudaCheck(cudaFreeAsync(mDevNodeAcc, mStream));} /// @brief Toggle on and off verbose mode /// @param on if true verbose is turned on @@ -73,10 +74,17 @@ public: /// @param name Name used for the destination grid void setGridName(const std::string &name) {mGridName = name;} + /// @brief Combines the IndexGrid with values to produce a regular Grid + /// @tparam DstBuildT Template parameter of the destination grid and value type + /// @tparam BufferT Template parameter of the memory allocator + /// @param srcValues pointer to values that will be inserted into the output grid + /// @param buffer optional buffer used for memory allocation + /// @return A new GridHandle with the grid of type @c DstBuildT template GridHandle getHandle(const typename BuildToValueMap::type *srcValues, const BufferT &buffer = BufferT()); private: + cudaStream_t mStream{0}; GpuTimer mTimer; std::string mGridName; bool mVerbose{false}; @@ -137,6 +145,7 @@ __global__ void cudaProcessGridTreeRoot(typename CudaIndexToGrid::Nod *dstGrid.data() = *srcGrid.data(); dstGrid.mGridType = mapToGridType(); dstGrid.mData1 = 0u; + // we will recompute GridData::mChecksum later // process Tree *dstTree.data() = *srcTree.data(); @@ -282,13 +291,14 @@ __global__ void cudaCpyNodeCount(const NanoGrid *srcGrid, //================================================================================================ template -CudaIndexToGrid::CudaIndexToGrid(const SrcGridT *d_srcGrid) +CudaIndexToGrid::CudaIndexToGrid(const SrcGridT *d_srcGrid, cudaStream_t stream) + : mStream(stream), mTimer(stream) { NANOVDB_ASSERT(d_srcGrid); - cudaCheck(cudaMalloc((void**)&mDevNodeAcc, sizeof(NodeAccessor))); - cudaCpyNodeCount<<<1,1>>>(d_srcGrid, mDevNodeAcc); + cudaCheck(cudaMallocAsync((void**)&mDevNodeAcc, sizeof(NodeAccessor), mStream)); + cudaCpyNodeCount<<<1, 1, 0, mStream>>>(d_srcGrid, mDevNodeAcc); cudaCheckError(); - cudaCheck(cudaMemcpy(&mNodeAcc, mDevNodeAcc, sizeof(NodeAccessor), cudaMemcpyDeviceToHost));// mNodeAcc = *mDevNodeAcc + cudaCheck(cudaMemcpyAsync(&mNodeAcc, mDevNodeAcc, sizeof(NodeAccessor), cudaMemcpyDeviceToHost, mStream));// mNodeAcc = *mDevNodeAcc } //================================================================================================ @@ -296,34 +306,39 @@ CudaIndexToGrid::CudaIndexToGrid(const SrcGridT *d_srcGrid) template template GridHandle CudaIndexToGrid::getHandle(const typename BuildToValueMap::type *srcValues, - const BufferT &pool) + const BufferT &pool) { if (mVerbose) mTimer.start("Initiate buffer"); auto buffer = this->template getBuffer(pool); if (mVerbose) mTimer.restart("Process grid,tree,root"); - cudaProcessGridTreeRoot<<<1, 1>>>(mDevNodeAcc, srcValues); + cudaProcessGridTreeRoot<<<1, 1, 0, mStream>>>(mDevNodeAcc, srcValues); cudaCheckError(); if (mVerbose) mTimer.restart("Process root children and tiles"); - cudaProcessRootTiles<<>>(mDevNodeAcc, srcValues); + cudaProcessRootTiles<<>>(mDevNodeAcc, srcValues); cudaCheckError(); - cudaCheck(cudaFree(mNodeAcc.d_gridName)); + cudaCheck(cudaFreeAsync(mNodeAcc.d_gridName, mStream)); if (mVerbose) mTimer.restart("Process upper internal nodes"); - cudaProcessInternalNodes<<>>(mDevNodeAcc, srcValues); + cudaProcessInternalNodes<<>>(mDevNodeAcc, srcValues); cudaCheckError(); if (mVerbose) mTimer.restart("Process lower internal nodes"); - cudaProcessInternalNodes<<>>(mDevNodeAcc, srcValues); + cudaProcessInternalNodes<<>>(mDevNodeAcc, srcValues); cudaCheckError(); if (mVerbose) mTimer.restart("Process leaf nodes"); - cudaProcessLeafNodes<<>>(mDevNodeAcc, srcValues); + cudaProcessLeafNodes<<>>(mDevNodeAcc, srcValues); if (mVerbose) mTimer.stop(); cudaCheckError(); + if (mVerbose) mTimer.restart("Compute checksums"); + cudaUpdateGridChecksum((GridData*)mNodeAcc.d_dstPtr, mStream); + if (mVerbose) mTimer.stop(); + + cudaStreamSynchronize(mStream);// finish all device tasks in mStream return GridHandle(std::move(buffer)); }// CudaIndexToGrid::getHandle @@ -342,17 +357,17 @@ inline BufferT CudaIndexToGrid::getBuffer(const BufferT &pool) mNodeAcc.meta = mNodeAcc.node[0] + NanoLeaf::DataType::memUsage()*mNodeAcc.nodeCount[0];// leaf nodes end and blind meta data begins mNodeAcc.blind = mNodeAcc.meta + 0*sizeof(GridBlindMetaData); // meta data ends and blind data begins mNodeAcc.size = mNodeAcc.blind;// end of buffer - auto buffer = BufferT::create(mNodeAcc.size, &pool, false); + auto buffer = BufferT::create(mNodeAcc.size, &pool, false, mStream); mNodeAcc.d_dstPtr = buffer.deviceData(); if (mNodeAcc.d_dstPtr == nullptr) throw std::runtime_error("Failed memory allocation on the device"); if (size_t size = mGridName.size()) { - cudaCheck(cudaMalloc((void**)&mNodeAcc.d_gridName, size)); - cudaCheck(cudaMemcpy(mNodeAcc.d_gridName, mGridName.data(), size, cudaMemcpyHostToDevice)); + cudaCheck(cudaMallocAsync((void**)&mNodeAcc.d_gridName, size, mStream)); + cudaCheck(cudaMemcpyAsync(mNodeAcc.d_gridName, mGridName.data(), size, cudaMemcpyHostToDevice, mStream)); } else { mNodeAcc.d_gridName = nullptr; } - cudaCheck(cudaMemcpy(mDevNodeAcc, &mNodeAcc, sizeof(NodeAccessor), cudaMemcpyHostToDevice));// copy NodeAccessor CPU -> GPU + cudaCheck(cudaMemcpyAsync(mDevNodeAcc, &mNodeAcc, sizeof(NodeAccessor), cudaMemcpyHostToDevice, mStream));// copy NodeAccessor CPU -> GPU return buffer; } @@ -360,10 +375,10 @@ inline BufferT CudaIndexToGrid::getBuffer(const BufferT &pool) template typename enable_if::is_index, GridHandle>::type -cudaIndexToGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool) +cudaIndexToGrid(const NanoGrid *d_srcGrid, const typename BuildToValueMap::type *d_srcValues, const BufferT &pool, cudaStream_t stream) { - CudaIndexToGrid converter(d_srcGrid); - return converter.template getHandle(d_srcValues, pool); + CudaIndexToGrid converter(d_srcGrid, stream); + return converter.template getHandle(d_srcValues, pool); } }// nanovdb namespace diff --git a/nanovdb/nanovdb/util/cuda/CudaNodeManager.cuh b/nanovdb/nanovdb/util/cuda/CudaNodeManager.cuh new file mode 100644 index 0000000000..a69d86be20 --- /dev/null +++ b/nanovdb/nanovdb/util/cuda/CudaNodeManager.cuh @@ -0,0 +1,90 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file CudaNodeManager.cuh + + \author Ken Museth + + \date October 3, 2023 + + \brief Contains cuda kernels for NodeManager + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NANOVDB_CUDA_NODE_MANAGER_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_CUDA_NODE_MANAGER_CUH_HAS_BEEN_INCLUDED + +#include "CudaUtils.h"// for cudaLambdaKernel +#include "CudaDeviceBuffer.h" +#include "../NodeManager.h" + +namespace nanovdb { + +/// @brief Construct a NodeManager from a device grid pointer +/// +/// @param d_grid device grid pointer whose nodes will be accessed sequentially +/// @param buffer buffer from which to allocate the output handle +/// @param stream cuda stream +/// @return Handle that contains a device NodeManager +template +inline typename enable_if::hasDeviceDual, NodeManagerHandle>::type +cudaCreateNodeManager(const NanoGrid *d_grid, + const BufferT& pool = BufferT(), + cudaStream_t stream = 0) +{ + auto buffer = BufferT::create(sizeof(NodeManagerData), &pool, false, stream); + auto *d_data = (NodeManagerData*)buffer.deviceData(); + size_t size = 0u, *d_size; + cudaCheck(cudaMallocAsync((void**)&d_size, sizeof(size_t), stream)); + cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__(size_t) { +#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS + *d_data = NodeManagerData{NANOVDB_MAGIC_NODE, 0u, (void*)d_grid, {0u,0u,0u}}; +#else + *d_data = NodeManagerData{NANOVDB_MAGIC_NUMBER, 0u, (void*)d_grid, {0u,0u,0u}}; +#endif + *d_size = sizeof(NodeManagerData); + auto &tree = d_grid->tree(); + if (NodeManager::FIXED_SIZE && d_grid->isBreadthFirst()) { + d_data->mLinear = uint8_t(1u); + d_data->mOff[0] = PtrDiff(tree.template getFirstNode<0>(), d_grid); + d_data->mOff[1] = PtrDiff(tree.template getFirstNode<1>(), d_grid); + d_data->mOff[2] = PtrDiff(tree.template getFirstNode<2>(), d_grid); + } else { + *d_size += sizeof(uint64_t)*tree.totalNodeCount(); + } + }); + cudaCheckError(); + cudaCheck(cudaMemcpyAsync(&size, d_size, sizeof(size_t), cudaMemcpyDeviceToHost, stream)); + cudaCheck(cudaFreeAsync(d_size, stream)); + if (size > sizeof(NodeManagerData)) { + auto tmp = BufferT::create(size, &pool, false, stream);// only allocate buffer on the device + cudaCheck(cudaMemcpyAsync(tmp.deviceData(), buffer.deviceData(), sizeof(NodeManagerData), cudaMemcpyDeviceToDevice, stream)); + buffer = std::move(tmp); + d_data = reinterpret_cast(buffer.deviceData()); + cudaLambdaKernel<<<1, 1, 0, stream>>>(1, [=] __device__ (size_t) { + auto &tree = d_grid->tree(); + int64_t *ptr0 = d_data->mPtr[0] = reinterpret_cast(d_data + 1); + int64_t *ptr1 = d_data->mPtr[1] = d_data->mPtr[0] + tree.nodeCount(0); + int64_t *ptr2 = d_data->mPtr[2] = d_data->mPtr[1] + tree.nodeCount(1); + // Performs depth first traversal but breadth first insertion + for (auto it2 = tree.root().cbeginChild(); it2; ++it2) { + *ptr2++ = PtrDiff(&*it2, d_grid); + for (auto it1 = it2->beginChild(); it1; ++it1) { + *ptr1++ = PtrDiff(&*it1, d_grid); + for (auto it0 = it1->beginChild(); it0; ++it0) { + *ptr0++ = PtrDiff(&*it0, d_grid); + }// loop over child nodes of the lower internal node + }// loop over child nodes of the upper internal node + }// loop over child nodes of the root node + }); + } + + return NodeManagerHandle(mapToGridType(), std::move(buffer)); +}// cudaCreateNodeManager + +} // namespace nanovdb + +#endif // NANOVDB_CUDA_NODE_MANAGER_CUH_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh index ad3524775c..8da1dc3e1d 100644 --- a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh @@ -21,11 +21,12 @@ #include #include "CudaDeviceBuffer.h" #include -#include +#include +#include #include /* -Note: 4.29 billion (=2^32) coordinates of type Vec3f have a memory footprint of 48 GB! + Note: 4.29 billion (=2^32) coordinates of type Vec3f have a memory footprint of 48 GB! */ namespace nanovdb { @@ -45,32 +46,77 @@ enum class PointType : uint32_t { Disable = 0,// no point information e.g. when //================================================================================================ +/// @brief Example class of a fancy pointer that can optionally be used as a template for writing +/// a custom fancy pointer that allows for particle coordinates to be arrange non-linearly +/// in memory. For instance with coordinates are interlaced with other dats, i.e. an array +/// of structs, a custom implementation of fancy_ptr::operator[](size_t i) can account for +/// strides that skip other interlaces data. +/// @tparam T Template type that specifies the type use for the coordinates of the points +template +class fancy_ptr +{ + const T* mPtr; +public: + __hostdev__ explicit fancy_ptr(const T* ptr = nullptr) : mPtr(ptr) {} + __hostdev__ inline const T& operator[](size_t i) const {return mPtr[i];}// required by CudaPointsToGrid + __hostdev__ inline const T& operator*() const {return *mPtr;}// required by pointer_traits +};// fancy_ptr + +/// @brief Simple stand-alone function that can be used to conveniently construct a fancy_ptr +/// @tparam T Template type that specifies the type use for the coordinates of the points +/// @param ptr Raw pointer to data +/// @return a new instance of a fancy_ptr +template +fancy_ptr make_fancy(const T* ptr = nullptr) {return fancy_ptr(ptr);} + +/// @brief Trait of points, like type of pointer and size of the pointer type +template +struct pointer_traits; + +template +struct pointer_traits { + using element_type = T; + static constexpr size_t element_size = sizeof(T); +}; + +template +struct pointer_traits { + using element_type = typename remove_reference())>::type;// assumes T::operator*() exists! + static constexpr size_t element_size = sizeof(element_type); +}; + +//================================================================================================ + /// @brief Generates a NanoGrid from a list of point coordinates on the device. This method is /// mainly used as a means to build a BVH acceleration structure for points, e.g. for efficient rendering. -/// @tparam Vec3T Template type of the list of coordinates of points in world space. Should be Vec3f or Vec3d. +/// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world space. Dereferencing should return Vec3f or Vec3d. /// @tparam BufferT Template type of buffer used for memory allocation on the device -/// @tparam AllocT Template type of optional device allocator for internal tempoary memory -/// @param d_world List of coordinates of points in world space on the device +/// @tparam AllocT Template type of optional device allocator for internal temporary memory +/// @param dWorldPoints Raw or fancy pointer to list of point coordinates in world space on the device /// @param pointCount number of point in the list @c d_world /// @param voxelSize Size of a voxel in world units used for the output grid /// @param type Defined the way point information is represented in the output grid (see PointType enum above) /// Should not be PointType::Disable! /// @param buffer Instance of the device buffer used for memory allocation +/// @param stream optional CUDA stream (defaults to CUDA stream 0) /// @return Returns a handle with a grid of type NanoGrid where point information, e.g. coordinates, /// are represented as blind data defined by @c type. -template +template GridHandle -cudaPointsToGrid(const Vec3T* d_world, +cudaPointsToGrid(const PtrT dWorldPoints, int pointCount, double voxelSize = 1.0, PointType type = PointType::Default, - BufferT &buffer = BufferT()); + BufferT &buffer = BufferT(), + cudaStream_t stream = 0); //================================================================================================ -template +template GridHandle -cudaPointsToGrid(std::vector> pointSet, const BufferT &buffer = BufferT()); +cudaPointsToGrid(std::vector> pointSet, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); //================================================================================================ @@ -78,39 +124,29 @@ cudaPointsToGrid(std::vector> p /// this method only builds the grid but does not encode the coordinates as blind data. It is mainly useful as a /// means to generate a grid that is know to contain the voxels given in the list. /// @tparam BuildT Template type of the return grid -/// @tparam Vec3T Template type of the list of coordinates of voxels in grid (or index) space. Should be Coord, Vec3f or Vec3f. +/// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world space. Dereferencing should return Vec3f or Vec3d. /// @tparam BufferT Template type of buffer used for memory allocation on the device -/// @tparam AllocT Template type of optional device allocator for internal tempoary memory -/// @param d_voxels List of coordinates of voxels in grid (or index) space on the device -/// @param pointCount number of voxel in the list @c d_voxels +/// @tparam AllocT Template type of optional device allocator for internal temporary memory +/// @param dGridVoxels Raw or fancy pointer to list of voxel coordinates in grid (or index) space on the device +/// @param pointCount number of voxel in the list @c dGridVoxels /// @param voxelSize Size of a voxel in world units used for the output grid /// @param buffer Instance of the device buffer used for memory allocation /// @return Returns a handle with the grid of type NanoGrid -template +template GridHandle -cudaVoxelsToGrid(const Vec3T* d_voxels, - int pointCount, +cudaVoxelsToGrid(const PtrT dGridVoxels, + size_t voxelCount, double voxelSize = 1.0, - const BufferT &buffer = BufferT()); - + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); //================================================================================================ -template +template GridHandle -cudaVoxelsToGrid(std::vector> pointSet, const BufferT &buffer = BufferT()); - -//================================================================================================ - -#if 0 -// the following function is only included for backwards compatability -template -typename enable_if::is_index, GridHandle>::type -cudaCreateNanoGrid(const Vec3T* d_ijk, size_t pointCount, double voxelSize = 1.0, const BufferT &buffer = BufferT()) -{ - return cudaVoxelsToGrid(d_ijk, pointCount, voxelSize, buffer); -} -#endif +cudaVoxelsToGrid(std::vector> pointSet, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); //================================================================================================ @@ -208,24 +244,34 @@ public: /// @brief Constructor from a Map /// @param map Map to be used for the output device grid - CudaPointsToGrid(const Map &map) : mPointType(is_same::value ? PointType::Default : PointType::Disable){ + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + CudaPointsToGrid(const Map &map, cudaStream_t stream = 0) + : mStream(stream) + , mPointType(is_same::value ? PointType::Default : PointType::Disable) + { mData.map = map; - mData.flags.initMask({GridFlags::HasBBox, GridFlags::IsBreadthFirst}); - cudaCheck(cudaMalloc((void**)&mDeviceData, sizeof(Data))); + mData.flags.initMask({GridFlags::HasBBox, GridFlags::IsLexicographic}); + cudaCheck(cudaMallocAsync((void**)&mDeviceData, sizeof(Data), mStream)); } /// @brief Default constructor /// @param scale Voxel size in world units /// @param trans Translation of origin in world units - CudaPointsToGrid(const double scale = 1.0, const Vec3d &trans = Vec3d(0.0)) : CudaPointsToGrid(Map(scale, trans)) {} + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + CudaPointsToGrid(const double scale = 1.0, const Vec3d &trans = Vec3d(0.0), cudaStream_t stream = 0) + : CudaPointsToGrid(Map(scale, trans), stream) {} /// @brief Destructor - ~CudaPointsToGrid() {cudaCheck(cudaFree(mDeviceData));} + ~CudaPointsToGrid() {cudaCheck(cudaFreeAsync(mDeviceData, mStream));} /// @brief Toggle on and off verbose mode /// @param level Verbose level: 0=quiet, 1=timing, 2=benchmarking void setVerbose(int level = 1) {mVerbose = level; mData.flags.setBit(7u, level); } + /// @brief Set the mode for checksum computation, which is disabled by default + /// @param mode Mode of checksum computation + void setChecksum(ChecksumMode mode = ChecksumMode::Disable){mChecksum = mode;} + /// @brief Toggle on and off the computation of a bounding-box /// @param on If true bbox will be computed void includeBBox(bool on = true) { mData.flags.setMask(GridFlags::HasBBox, on); } @@ -240,32 +286,33 @@ public: /// @brief Creates a handle to a grid with the specified build type from a list of points in index or world space /// @tparam BuildT Build type of the output grid, i.e NanoGrid - /// @tparam Vec3T Type of the input points. If Vec3 points are in world space and if Coord in Index space + /// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world or index space. /// @tparam BufferT Buffer type used for allocation of the grid handle - /// @param d_xyz device point to an array of points in world space - /// @param pointCount number of input points + /// @param points device point to an array of points in world space + /// @param pointCount number of input points or voxels /// @param gridName optional name of the output grid /// @param buffer optional buffer (currently ignored) /// @return returns a handle with a grid of type NanoGrid - //template - template - GridHandle getHandle(const Vec3T* d_xyz, size_t pointCount, const BufferT &buffer = BufferT()); + template + GridHandle getHandle(const PtrT points, + size_t pointCount, + const BufferT &buffer = BufferT()); - template - void countNodes(const Vec3T* d_points, size_t pointCount); + template + void countNodes(const PtrT points, size_t pointCount); - template - void processGridTreeRoot(const Vec3T *d_points, size_t pointCount); + template + void processGridTreeRoot(const PtrT points, size_t pointCount); void processUpperNodes(); void processLowerNodes(); - template - void processLeafNodes(const Vec3T *d_points); + template + void processLeafNodes(const PtrT points); - template - void processPoints(const Vec3T *d_points, size_t pointCount); + template + void processPoints(const PtrT points, size_t pointCount); void processBBox(); @@ -279,12 +326,15 @@ private: static constexpr unsigned int mNumThreads = 128;// seems faster than the old value of 256! static unsigned int numBlocks(unsigned int n) {return (n + mNumThreads - 1) / mNumThreads;} - GpuTimer mTimer; - PointType mPointType; - std::string mGridName; - int mVerbose{0}; - Data mData, *mDeviceData; - uint32_t mMaxPointsPerVoxel{0u}, mMaxPointsPerLeaf{0u}; + cudaStream_t mStream{0}; + GpuTimer mTimer; + PointType mPointType; + std::string mGridName; + int mVerbose{0}; + Data mData, *mDeviceData; + uint32_t mMaxPointsPerVoxel{0u}, mMaxPointsPerLeaf{0u}; + ChecksumMode mChecksum{ChecksumMode::Disable}; + // wrapper of cub::CachingDeviceAllocator with a shared scratch space struct Allocator { AllocT mAllocator; @@ -296,7 +346,7 @@ private: mAllocator.FreeAllCached(); } template - T* alloc(size_t count, cudaStream_t stream = 0) { + T* alloc(size_t count, cudaStream_t stream) { T* d_ptr = nullptr; cudaCheck(mAllocator.DeviceAllocate((void**)&d_ptr, sizeof(T)*count, stream)); return d_ptr; @@ -307,7 +357,7 @@ private: if (d_ptr) cudaCheck(mAllocator.DeviceFree(d_ptr)); this->free(other...); } - void adjustScratch(cudaStream_t stream = 0){ + void adjustScratch(cudaStream_t stream){ if (scratchSize > actualScratchSize) { if (actualScratchSize>0) cudaCheck(mAllocator.DeviceFree(d_scratch)); cudaCheck(mAllocator.DeviceAllocate((void**)&d_scratch, scratchSize, stream)); @@ -316,8 +366,8 @@ private: } } mMemPool; - template - BufferT getBuffer(const BufferT &buffer, size_t pointCount); + template + BufferT getBuffer(const PtrT points, size_t pointCount, const BufferT &buffer); };// CudaPointsToGrid //================================================================================================ @@ -326,14 +376,14 @@ private: #ifndef CALL_CUBS #ifdef _WIN32 #define CALL_CUBS(func, ...) \ - cudaCheck(cub::func(nullptr, mMemPool.scratchSize, __VA_ARGS__)); \ - mMemPool.adjustScratch(); \ - cudaCheck(cub::func(mMemPool.d_scratch, mMemPool.scratchSize, __VA_ARGS__)); + cudaCheck(cub::func(nullptr, mMemPool.scratchSize, __VA_ARGS__, mStream)); \ + mMemPool.adjustScratch(mStream); \ + cudaCheck(cub::func(mMemPool.d_scratch, mMemPool.scratchSize, __VA_ARGS__, mStream)); #else// fdef _WIN32 #define CALL_CUBS(func, args...) \ - cudaCheck(cub::func(nullptr, mMemPool.scratchSize, args)); \ - mMemPool.adjustScratch(); \ - cudaCheck(cub::func(mMemPool.d_scratch, mMemPool.scratchSize, args)); + cudaCheck(cub::func(nullptr, mMemPool.scratchSize, args, mStream)); \ + mMemPool.adjustScratch(mStream); \ + cudaCheck(cub::func(mMemPool.d_scratch, mMemPool.scratchSize, args, mStream)); #endif// ifdef _WIN32 #endif// ifndef CALL_CUBS @@ -342,20 +392,20 @@ private: //================================================================================================ template -template +template inline GridHandle -CudaPointsToGrid::getHandle(const Vec3T* d_xyz, - size_t pointCount, - const BufferT &pool) +CudaPointsToGrid::getHandle(const PtrT points, + size_t pointCount, + const BufferT &pool) { if (mVerbose==1) mTimer.start("\nCounting nodes"); - this->countNodes(d_xyz, pointCount); + this->countNodes(points, pointCount); if (mVerbose==1) mTimer.restart("Initiate buffer"); - auto buffer = this->template getBuffer(pool, pointCount); + auto buffer = this->getBuffer(points, pointCount, pool); if (mVerbose==1) mTimer.restart("Process grid,tree,root"); - this->processGridTreeRoot(d_xyz, pointCount); + this->processGridTreeRoot(points, pointCount); if (mVerbose==1) mTimer.restart("Process upper nodes"); this->processUpperNodes(); @@ -364,17 +414,23 @@ CudaPointsToGrid::getHandle(const Vec3T* d_xyz, this->processLowerNodes(); if (mVerbose==1) mTimer.restart("Process leaf nodes"); - this->processLeafNodes(d_xyz); + this->processLeafNodes(points); if (mVerbose==1) mTimer.restart("Process points"); - this->processPoints(d_xyz, pointCount); + this->processPoints(points, pointCount); if (mVerbose==1) mTimer.restart("Process bbox"); this->processBBox(); - - cudaDeviceSynchronize();// finish all device tasks if (mVerbose==1) mTimer.stop(); + if (mChecksum != ChecksumMode::Disable) { + if (mVerbose==1) mTimer.restart("Computation of checksum"); + cudaGridChecksum((GridData*)buffer.deviceData(), mChecksum); + if (mVerbose==1) mTimer.stop(); + } + + cudaStreamSynchronize(mStream);// finish all device tasks in mStream + return GridHandle(std::move(buffer)); }// CudaPointsToGrid::getHandle @@ -397,57 +453,58 @@ struct ShiftRightIterator : public cub::TransformInputIterator -template -void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t pointCount) +template +void CudaPointsToGrid::countNodes(const PtrT points, size_t pointCount) { + using Vec3T = typename remove_const::element_type>::type; if constexpr(is_same::value) { static_assert(is_same::value, "Point (vs voxels) coordinates should be represented as Vec3f or Vec3d"); } else { static_assert(is_same::value, "Voxel coordinates should be represented as Coord, Vec3f or Vec3d"); } - mData.d_keys = mMemPool.template alloc(pointCount); - mData.d_indx = mMemPool.template alloc(pointCount);// uint32_t can index 4.29 billion Coords, corresponding to 48 GB - cudaCheck(cudaMemcpy(mDeviceData, &mData, sizeof(Data), cudaMemcpyHostToDevice));// copy mData from CPU -> GPU + mData.d_keys = mMemPool.template alloc(pointCount, mStream); + mData.d_indx = mMemPool.template alloc(pointCount, mStream);// uint32_t can index 4.29 billion Coords, corresponding to 48 GB + cudaCheck(cudaMemcpyAsync(mDeviceData, &mData, sizeof(Data), cudaMemcpyHostToDevice, mStream));// copy mData from CPU -> GPU if (mVerbose==2) mTimer.start("\nAllocating arrays for keys and indices"); - auto *d_keys = mMemPool.template alloc(pointCount); - auto *d_indx = mMemPool.template alloc(pointCount); + auto *d_keys = mMemPool.template alloc(pointCount, mStream); + auto *d_indx = mMemPool.template alloc(pointCount, mStream); if (mVerbose==2) mTimer.restart("Generate tile keys"); - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Vec3T *d_points, const Data *d_data) { + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data, const PtrT points) { d_indx[tid] = uint32_t(tid); uint64_t &key = d_keys[tid]; if constexpr(is_same::value) {// points are in world space if constexpr(is_same::value) { - key = NanoRoot::CoordToKey(d_data->map.applyInverseMapF(d_points[tid]).round()); + key = NanoRoot::CoordToKey(d_data->map.applyInverseMapF(points[tid]).round()); } else {// points are Vec3d - key = NanoRoot::CoordToKey(d_data->map.applyInverseMap(d_points[tid]).round()); + key = NanoRoot::CoordToKey(d_data->map.applyInverseMap(points[tid]).round()); } } else if constexpr(is_same::value) {// points Coord are in index space - key = NanoRoot::CoordToKey(d_points[tid]); + key = NanoRoot::CoordToKey(points[tid]); } else {// points are Vec3f or Vec3d in index space - key = NanoRoot::CoordToKey(d_points[tid].round()); + key = NanoRoot::CoordToKey(points[tid].round()); } - }, d_points, mDeviceData); + }, mDeviceData, points); cudaCheckError(); if (mVerbose==2) mTimer.restart("DeviceRadixSort of "+std::to_string(pointCount)+" tile keys"); CALL_CUBS(DeviceRadixSort::SortPairs, d_keys, mData.d_keys, d_indx, mData.d_indx, pointCount, 0, 62);// 21 bits per coord std::swap(d_indx, mData.d_indx);// sorted indices are now in d_indx if (mVerbose==2) mTimer.restart("Allocate runs"); - auto *d_points_per_tile = mMemPool.template alloc(pointCount); - uint32_t *d_node_count = mMemPool.template alloc(3); + auto *d_points_per_tile = mMemPool.template alloc(pointCount, mStream); + uint32_t *d_node_count = mMemPool.template alloc(3, mStream); if (mVerbose==2) mTimer.restart("DeviceRunLengthEncode tile keys"); CALL_CUBS(DeviceRunLengthEncode::Encode, mData.d_keys, d_keys, d_points_per_tile, d_node_count+2, pointCount); - cudaCheck(cudaMemcpy(mData.nodeCount+2, d_node_count+2, sizeof(uint32_t), cudaMemcpyDeviceToHost)); - mData.d_tile_keys = mMemPool.template alloc(mData.nodeCount[2]); - cudaCheck(cudaMemcpy(mData.d_tile_keys, d_keys, mData.nodeCount[2]*sizeof(uint64_t), cudaMemcpyDeviceToDevice)); + cudaCheck(cudaMemcpyAsync(mData.nodeCount+2, d_node_count+2, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mData.d_tile_keys = mMemPool.template alloc(mData.nodeCount[2], mStream); + cudaCheck(cudaMemcpyAsync(mData.d_tile_keys, d_keys, mData.nodeCount[2]*sizeof(uint64_t), cudaMemcpyDeviceToDevice, mStream)); if (mVerbose) mTimer.restart("DeviceRadixSort of " + std::to_string(pointCount) + " voxel keys in " + std::to_string(mData.nodeCount[2]) + " tiles"); uint32_t *points_per_tile = new uint32_t[mData.nodeCount[2]]; - cudaCheck(cudaMemcpy(points_per_tile, d_points_per_tile, mData.nodeCount[2]*sizeof(uint32_t), cudaMemcpyDeviceToHost)); + cudaCheck(cudaMemcpyAsync(points_per_tile, d_points_per_tile, mData.nodeCount[2]*sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); mMemPool.free(d_points_per_tile); auto voxelKey = [] __device__ (uint64_t tileID, const Coord &ijk){ @@ -459,9 +516,9 @@ void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t for (uint32_t id = 0, offset = 0; id < mData.nodeCount[2]; ++id) { const uint32_t count = points_per_tile[id]; - cudaLambdaKernel<<>>(count, [=] __device__(size_t tid, const Data *d_data) { + cudaLambdaKernel<<>>(count, [=] __device__(size_t tid, const Data *d_data) { tid += offset; - Vec3T p = d_points[d_indx[tid]]; + Vec3T p = points[d_indx[tid]]; if constexpr(is_same::value) p = is_same::value ? d_data->map.applyInverseMapF(p) : d_data->map.applyInverseMap(p); d_keys[tid] = voxelKey(id, p.round()); }, mDeviceData); cudaCheckError(); @@ -473,33 +530,33 @@ void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t if (mVerbose==2) mTimer.restart("Count points per voxel"); - mData.pointsPerVoxel = mMemPool.template alloc(pointCount); - uint32_t *d_voxel_count = mMemPool.template alloc(1); + mData.pointsPerVoxel = mMemPool.template alloc(pointCount, mStream); + uint32_t *d_voxel_count = mMemPool.template alloc(1, mStream); CALL_CUBS(DeviceRunLengthEncode::Encode, mData.d_keys, d_keys, mData.pointsPerVoxel, d_voxel_count, pointCount); - cudaCheck(cudaMemcpy(&mData.voxelCount, d_voxel_count, sizeof(uint32_t), cudaMemcpyDeviceToHost)); + cudaCheck(cudaMemcpyAsync(&mData.voxelCount, d_voxel_count, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); mMemPool.free(d_voxel_count); if constexpr(is_same::value) { if (mVerbose==2) mTimer.restart("Count max points per voxel"); - uint32_t *d_maxPointsPerVoxel = mMemPool.template alloc(1); + uint32_t *d_maxPointsPerVoxel = mMemPool.template alloc(1, mStream); CALL_CUBS(DeviceReduce::Max, mData.pointsPerVoxel, d_maxPointsPerVoxel, mData.voxelCount); - cudaCheck(cudaMemcpy(&mMaxPointsPerVoxel, d_maxPointsPerVoxel, sizeof(uint32_t), cudaMemcpyDeviceToHost)); + cudaCheck(cudaMemcpyAsync(&mMaxPointsPerVoxel, d_maxPointsPerVoxel, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); mMemPool.free(d_maxPointsPerVoxel); } //printf("\n Active voxel count = %u, max points per voxel = %u\n", mData.voxelCount, mMaxPointsPerVoxel); if (mVerbose==2) mTimer.restart("Compute prefix sum of points per voxel"); - mData.pointsPerVoxelPrefix = mMemPool.template alloc(mData.voxelCount); + mData.pointsPerVoxelPrefix = mMemPool.template alloc(mData.voxelCount, mStream); CALL_CUBS(DeviceScan::ExclusiveSum, mData.pointsPerVoxel, mData.pointsPerVoxelPrefix, mData.voxelCount); - mData.pointsPerLeaf = mMemPool.template alloc(pointCount); + mData.pointsPerLeaf = mMemPool.template alloc(pointCount, mStream); CALL_CUBS(DeviceRunLengthEncode::Encode, ShiftRightIterator<9>(mData.d_keys), d_keys, mData.pointsPerLeaf, d_node_count, pointCount); - cudaCheck(cudaMemcpy(mData.nodeCount, d_node_count, sizeof(uint32_t), cudaMemcpyDeviceToHost)); + cudaCheck(cudaMemcpyAsync(mData.nodeCount, d_node_count, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); if constexpr(is_same::value) { - uint32_t *d_maxPointsPerLeaf = mMemPool.template alloc(1); + uint32_t *d_maxPointsPerLeaf = mMemPool.template alloc(1, mStream); CALL_CUBS(DeviceReduce::Max, mData.pointsPerLeaf, d_maxPointsPerLeaf, mData.nodeCount[0]); - cudaCheck(cudaMemcpy(&mMaxPointsPerLeaf, d_maxPointsPerLeaf, sizeof(uint32_t), cudaMemcpyDeviceToHost)); + cudaCheck(cudaMemcpyAsync(&mMaxPointsPerLeaf, d_maxPointsPerLeaf, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); //printf("\n Leaf count = %u, max points per leaf = %u\n", mData.nodeCount[0], mMaxPointsPerLeaf); if (mMaxPointsPerLeaf > std::numeric_limits::max()) { throw std::runtime_error("Too many points per leaf: "+std::to_string(mMaxPointsPerLeaf)); @@ -507,16 +564,16 @@ void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t mMemPool.free(d_maxPointsPerLeaf); } - mData.pointsPerLeafPrefix = mMemPool.template alloc(mData.nodeCount[0]); + mData.pointsPerLeafPrefix = mMemPool.template alloc(mData.nodeCount[0], mStream); CALL_CUBS(DeviceScan::ExclusiveSum, mData.pointsPerLeaf, mData.pointsPerLeafPrefix, mData.nodeCount[0]); - mData.d_leaf_keys = mMemPool.template alloc(mData.nodeCount[0]); - cudaCheck(cudaMemcpy(mData.d_leaf_keys, d_keys, mData.nodeCount[0]*sizeof(uint64_t), cudaMemcpyDeviceToDevice)); + mData.d_leaf_keys = mMemPool.template alloc(mData.nodeCount[0], mStream); + cudaCheck(cudaMemcpyAsync(mData.d_leaf_keys, d_keys, mData.nodeCount[0]*sizeof(uint64_t), cudaMemcpyDeviceToDevice, mStream)); CALL_CUBS(DeviceSelect::Unique, ShiftRightIterator<12>(mData.d_leaf_keys), d_keys, d_node_count+1, mData.nodeCount[0]);// count lower nodes - cudaCheck(cudaMemcpy(mData.nodeCount+1, d_node_count+1, sizeof(uint32_t), cudaMemcpyDeviceToHost)); - mData.d_lower_keys = mMemPool.template alloc(mData.nodeCount[1]); - cudaCheck(cudaMemcpy(mData.d_lower_keys, d_keys, mData.nodeCount[1]*sizeof(uint64_t), cudaMemcpyDeviceToDevice)); + cudaCheck(cudaMemcpyAsync(mData.nodeCount+1, d_node_count+1, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mData.d_lower_keys = mMemPool.template alloc(mData.nodeCount[1], mStream); + cudaCheck(cudaMemcpyAsync(mData.d_lower_keys, d_keys, mData.nodeCount[1]*sizeof(uint64_t), cudaMemcpyDeviceToDevice, mStream)); mMemPool.free(d_keys, d_node_count); if (mVerbose==2) mTimer.stop(); @@ -527,8 +584,8 @@ void CudaPointsToGrid::countNodes(const Vec3T *d_points, size_t //================================================================================================ template -template -inline BufferT CudaPointsToGrid::getBuffer(const BufferT &pool, size_t pointCount) +template +inline BufferT CudaPointsToGrid::getBuffer(const PtrT, size_t pointCount, const BufferT &pool) { auto sizeofPoint = [&]()->size_t{ switch (mPointType){ @@ -540,7 +597,7 @@ inline BufferT CudaPointsToGrid::getBuffer(const BufferT &pool, case PointType::Voxel32: return sizeof(Vec3f); case PointType::Voxel16: return sizeof(Vec3u16); case PointType::Voxel8: return sizeof(Vec3u8); - case PointType::Default: return sizeof(Vec3T); + case PointType::Default: return pointer_traits::element_size; default: return size_t(0);// PointType::Disable } }; @@ -555,20 +612,21 @@ inline BufferT CudaPointsToGrid::getBuffer(const BufferT &pool, mData.blind = mData.meta + sizeof(GridBlindMetaData)*int( mPointType!=PointType::Disable ); // meta data ends and blind data begins mData.size = mData.blind + pointCount*sizeofPoint();// end of buffer - auto buffer = BufferT::create(mData.size, &pool, false); + auto buffer = BufferT::create(mData.size, &pool, false);// only allocate buffer on the device mData.d_bufferPtr = buffer.deviceData(); if (mData.d_bufferPtr == nullptr) throw std::runtime_error("Failed to allocate grid buffer on the device"); - cudaCheck(cudaMemcpy(mDeviceData, &mData, sizeof(Data), cudaMemcpyHostToDevice));// copy Data CPU -> GPU + cudaCheck(cudaMemcpyAsync(mDeviceData, &mData, sizeof(Data), cudaMemcpyHostToDevice, mStream));// copy Data CPU -> GPU return buffer; }// CudaPointsToGrid::getBuffer //================================================================================================ template -template -inline void CudaPointsToGrid::processGridTreeRoot(const Vec3T *d_points, size_t pointCount) +template +inline void CudaPointsToGrid::processGridTreeRoot(const PtrT points, size_t pointCount) { - cudaLambdaKernel<<<1, 1>>>(1, [=] __device__(size_t, Data *d_data, PointType pointType) { + using Vec3T = typename remove_const::element_type>::type; + cudaLambdaKernel<<<1, 1, 0, mStream>>>(1, [=] __device__(size_t, Data *d_data, PointType pointType) { // process Root auto &root = d_data->getRoot(); root.mBBox = CoordBBox(); // init to empty @@ -590,7 +648,8 @@ inline void CudaPointsToGrid::processGridTreeRoot(const Vec3T *d // process Grid auto &grid = d_data->getGrid(); - grid.init({GridFlags::HasBBox, GridFlags::IsBreadthFirst}, d_data->size, d_data->map, mapToGridType()); + grid.init({GridFlags::HasBBox, GridFlags::IsLexicographic}, d_data->size, d_data->map, mapToGridType()); + grid.mChecksum = ~uint64_t(0);// set all bits on which means it's disabled grid.mBlindMetadataCount = is_same::value;// ? 1u : 0u; grid.mBlindMetadataOffset = d_data->meta; if (pointType != PointType::Disable) { @@ -691,9 +750,9 @@ inline void CudaPointsToGrid::processGridTreeRoot(const Vec3T *d char *dst = mData.getGrid().mGridName; if (const char *src = mGridName.data()) { - cudaCheck(cudaMemcpy(dst, src, GridData::MaxNameSize, cudaMemcpyHostToDevice)); + cudaCheck(cudaMemcpyAsync(dst, src, GridData::MaxNameSize, cudaMemcpyHostToDevice, mStream)); } else { - cudaCheck(cudaMemset(dst, 0, GridData::MaxNameSize)); + cudaCheck(cudaMemsetAsync(dst, 0, GridData::MaxNameSize, mStream)); } }// CudaPointsToGrid::processGridTreeRoot @@ -702,7 +761,7 @@ inline void CudaPointsToGrid::processGridTreeRoot(const Vec3T *d template inline void CudaPointsToGrid::processUpperNodes() { - cudaLambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { + cudaLambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { auto &root = d_data->getRoot(); auto &upper = d_data->getUpper(tid); const Coord ijk = NanoRoot::KeyToCoord(d_data->d_tile_keys[tid]); @@ -719,7 +778,7 @@ inline void CudaPointsToGrid::processUpperNodes() mMemPool.free(mData.d_tile_keys); const uint64_t valueCount = mData.nodeCount[2] << 15; - cudaLambdaKernel<<>>(valueCount, [=] __device__(size_t tid, Data *d_data) { + cudaLambdaKernel<<>>(valueCount, [=] __device__(size_t tid, Data *d_data) { auto &upper = d_data->getUpper(tid >> 15); upper.mTable[tid & 32767u].value = NanoUpper::ValueType(0);// background }, mDeviceData); @@ -731,7 +790,7 @@ inline void CudaPointsToGrid::processUpperNodes() template inline void CudaPointsToGrid::processLowerNodes() { - cudaLambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { + cudaLambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { auto &root = d_data->getRoot(); const uint64_t lowerKey = d_data->d_lower_keys[tid]; auto &upper = d_data->getUpper(lowerKey >> 15); @@ -749,7 +808,7 @@ inline void CudaPointsToGrid::processLowerNodes() cudaCheckError(); const uint64_t valueCount = mData.nodeCount[1] << 12; - cudaLambdaKernel<<>>(valueCount, [=] __device__(size_t tid, Data *d_data) { + cudaLambdaKernel<<>>(valueCount, [=] __device__(size_t tid, Data *d_data) { auto &lower = d_data->getLower(tid >> 12); lower.mTable[tid & 4095u].value = NanoLower::ValueType(0);// background }, mDeviceData); @@ -759,14 +818,14 @@ inline void CudaPointsToGrid::processLowerNodes() //================================================================================================ template -template -inline void CudaPointsToGrid::processLeafNodes(const Vec3T *d_points) +template +inline void CudaPointsToGrid::processLeafNodes(const PtrT points) { const uint8_t flags = static_cast(mData.flags.data());// mIncludeStats ? 16u : 0u;// 4th bit indicates stats if (mVerbose==2) mTimer.start("process leaf meta data"); // loop over leaf nodes and add it to its parent node - cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { const uint64_t leafKey = d_data->d_leaf_keys[tid], tile_id = leafKey >> 27; auto &upper = d_data->getUpper(tile_id); const uint32_t lowerOffset = leafKey & 4095u, upperOffset = (leafKey >> 12) & 32767u; @@ -793,7 +852,7 @@ inline void CudaPointsToGrid::processLeafNodes(const Vec3T *d_po if (mVerbose==2) mTimer.restart("set active voxel state and values"); // loop over all active voxels and set LeafNode::mValueMask and LeafNode::mValues - cudaLambdaKernel<<>>(mData.voxelCount, [=] __device__(size_t tid, Data *d_data) { + cudaLambdaKernel<<>>(mData.voxelCount, [=] __device__(size_t tid, Data *d_data) { const uint32_t pointID = d_data->pointsPerVoxelPrefix[tid]; const uint64_t voxelKey = d_data->d_keys[pointID]; auto &upper = d_data->getUpper(voxelKey >> 36); @@ -812,7 +871,7 @@ inline void CudaPointsToGrid::processLeafNodes(const Vec3T *d_po if (mVerbose==2) mTimer.restart("set inactive voxel values"); const uint64_t denseVoxelCount = mData.nodeCount[0] << 9; - cudaLambdaKernel<<>>(denseVoxelCount, [=] __device__(size_t tid, Data *d_data) { + cudaLambdaKernel<<>>(denseVoxelCount, [=] __device__(size_t tid, Data *d_data) { auto &leaf = d_data->getLeaf(tid >> 9u); const uint32_t n = tid & 511u; if (leaf.mValueMask.isOn(n)) return; @@ -826,14 +885,14 @@ inline void CudaPointsToGrid::processLeafNodes(const Vec3T *d_po if constexpr(BuildTraits::is_onindex) { if (mVerbose==2) mTimer.restart("prefix-sum for index grid"); - uint64_t *devValueIndex = mMemPool.template alloc(mData.nodeCount[0]); - auto devValueIndexPrefix = mMemPool.template alloc(mData.nodeCount[0]); - cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + uint64_t *devValueIndex = mMemPool.template alloc(mData.nodeCount[0], mStream); + auto devValueIndexPrefix = mMemPool.template alloc(mData.nodeCount[0], mStream); + cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { devValueIndex[tid] = static_cast(d_data->getLeaf(tid).mValueMask.countOn()); }, mDeviceData); cudaCheckError(); CALL_CUBS(DeviceScan::InclusiveSum, devValueIndex, devValueIndexPrefix, mData.nodeCount[0]); mMemPool.free(devValueIndex); - cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { auto &leaf = d_data->getLeaf(tid); leaf.mOffset = 1u;// will be re-set below const uint64_t *w = leaf.mValueMask.words(); @@ -855,7 +914,7 @@ inline void CudaPointsToGrid::processLeafNodes(const Vec3T *d_po if constexpr(BuildTraits::is_indexmask) { if (mVerbose==2) mTimer.restart("leaf.mMask = leaf.mValueMask"); - cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { auto &leaf = d_data->getLeaf(tid); leaf.mMask = leaf.mValueMask; }, mDeviceData); cudaCheckError(); @@ -866,65 +925,65 @@ inline void CudaPointsToGrid::processLeafNodes(const Vec3T *d_po //================================================================================================ template -template -inline void CudaPointsToGrid::processPoints(const Vec3T *d_points, size_t pointCount) +template +inline void CudaPointsToGrid::processPoints(const PtrT, size_t) { - mMemPool.free(mData.d_indx); + mMemPool.free(mData.d_indx, mStream); } //================================================================================================ // Template specialization with BuildT = Point template <> -template -inline void CudaPointsToGrid::processPoints(const Vec3T *d_points, size_t pointCount) +template +inline void CudaPointsToGrid::processPoints(const PtrT points, size_t pointCount) { switch (mPointType){ case PointType::Disable: throw std::runtime_error("CudaPointsToGrid::processPoints: mPointType == PointType::Disable\n"); case PointType::PointID: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { d_data->template getPoint(tid) = d_data->d_indx[tid]; }, mDeviceData); cudaCheckError(); break; case PointType::World64: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - d_data->template getPoint(tid) = d_points[d_data->d_indx[tid]]; + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = points[d_data->d_indx[tid]]; }, mDeviceData); cudaCheckError(); break; case PointType::World32: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - d_data->template getPoint(tid) = d_points[d_data->d_indx[tid]]; + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = points[d_data->d_indx[tid]]; }, mDeviceData); cudaCheckError(); break; case PointType::Grid64: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - d_data->template getPoint(tid) = d_data->map.applyInverseMap(d_points[d_data->d_indx[tid]]); + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_data->map.applyInverseMap(points[d_data->d_indx[tid]]); }, mDeviceData); cudaCheckError(); break; case PointType::Grid32: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - d_data->template getPoint(tid) = d_data->map.applyInverseMapF(d_points[d_data->d_indx[tid]]); + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_data->map.applyInverseMapF(points[d_data->d_indx[tid]]); }, mDeviceData); cudaCheckError(); break; case PointType::Voxel32: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - worldToVoxel(d_data->template getPoint(tid), d_points[d_data->d_indx[tid]], d_data->map); + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + worldToVoxel(d_data->template getPoint(tid), points[d_data->d_indx[tid]], d_data->map); }, mDeviceData); cudaCheckError(); break; case PointType::Voxel16: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - worldToVoxel(d_data->template getPoint(tid), d_points[d_data->d_indx[tid]], d_data->map); + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + worldToVoxel(d_data->template getPoint(tid), points[d_data->d_indx[tid]], d_data->map); }, mDeviceData); cudaCheckError(); break; case PointType::Voxel8: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - worldToVoxel(d_data->template getPoint(tid), d_points[d_data->d_indx[tid]], d_data->map); + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + worldToVoxel(d_data->template getPoint(tid), points[d_data->d_indx[tid]], d_data->map); }, mDeviceData); cudaCheckError(); break; case PointType::Default: - cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { - d_data->template getPoint(tid) = d_points[d_data->d_indx[tid]]; + cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint::element_type>(tid) = points[d_data->d_indx[tid]]; }, mDeviceData); cudaCheckError(); break; default: @@ -944,13 +1003,13 @@ inline void CudaPointsToGrid::processBBox() } // reset bbox in lower nodes - cudaLambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { + cudaLambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { d_data->getLower(tid).mBBox = CoordBBox(); }, mDeviceData); cudaCheckError(); // update and propagate bbox from leaf -> lower/parent nodes - cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { const uint64_t leafKey = d_data->d_leaf_keys[tid]; auto &upper = d_data->getUpper(leafKey >> 27); auto &lower = *upper.getChild((leafKey >> 12) & 32767u); @@ -962,13 +1021,13 @@ inline void CudaPointsToGrid::processBBox() cudaCheckError(); // reset bbox in upper nodes - cudaLambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { + cudaLambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { d_data->getUpper(tid).mBBox = CoordBBox(); }, mDeviceData); cudaCheckError(); // propagate bbox from lower -> upper/parent node - cudaLambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { + cudaLambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { const uint64_t lowerKey = d_data->d_lower_keys[tid]; auto &upper = d_data->getUpper(lowerKey >> 15); auto &lower = d_data->getLower(tid); @@ -978,13 +1037,13 @@ inline void CudaPointsToGrid::processBBox() cudaCheckError() // propagate bbox from upper -> root/parent node - cudaLambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { + cudaLambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { d_data->getRoot().mBBox.expandAtomic(d_data->getUpper(tid).bbox()); }, mDeviceData); cudaCheckError(); // update the world-bbox in the root node - cudaLambdaKernel<<<1, 1>>>(1, [=] __device__(size_t, Data *d_data) { + cudaLambdaKernel<<<1, 1, 0, mStream>>>(1, [=] __device__(size_t, Data *d_data) { d_data->getGrid().mWorldBBox = d_data->getRoot().mBBox.transform(d_data->map); }, mDeviceData); cudaCheckError(); @@ -992,45 +1051,45 @@ inline void CudaPointsToGrid::processBBox() //================================================================================================ -template +template GridHandle// Grid with PointType coordinates as blind data -cudaPointsToGrid(const Vec3T* d_xyz, int pointCount, double voxelSize, PointType type, BufferT &buffer) +cudaPointsToGrid(const PtrT d_xyz, int pointCount, double voxelSize, PointType type, BufferT &buffer, cudaStream_t stream) { - CudaPointsToGrid converter(voxelSize); + CudaPointsToGrid converter(voxelSize, Vec3d(0.0), stream); converter.setPointType(type); return converter.getHandle(d_xyz, pointCount, buffer); } //================================================================================================ -template +template GridHandle// Grid -cudaVoxelsToGrid(const Vec3T* d_ijk, int pointCount, double voxelSize, const BufferT &buffer) +cudaVoxelsToGrid(const PtrT d_ijk, size_t voxelCount, double voxelSize, const BufferT &buffer, cudaStream_t stream) { - CudaPointsToGrid converter(voxelSize); - return converter.getHandle(d_ijk, pointCount, buffer); + CudaPointsToGrid converter(voxelSize, Vec3d(0.0), stream); + return converter.getHandle(d_ijk, voxelCount, buffer); } //================================================================================================ -template +template GridHandle -cudaPointsToGrid(std::vector> vec, const BufferT &buffer) +cudaPointsToGrid(std::vector> vec, const BufferT &buffer, cudaStream_t stream) { std::vector> handles; - for (auto &p : vec) handles.push_back(cudaPointsToGrid(std::get<0>(p), std::get<1>(p), std::get<2>(p), std::get<3>(p), buffer)); - return mergeDeviceGrids(handles); + for (auto &p : vec) handles.push_back(cudaPointsToGrid(std::get<0>(p), std::get<1>(p), std::get<2>(p), std::get<3>(p), buffer, stream)); + return mergeDeviceGrids(handles, stream); } //================================================================================================ -template +template GridHandle -cudaVoxelsToGrid(std::vector> vec, const BufferT &buffer) +cudaVoxelsToGrid(std::vector> vec, const BufferT &buffer, cudaStream_t stream) { std::vector> handles; - for (auto &p : vec) handles.push_back(cudaVoxelsToGrid(std::get<0>(p), std::get<1>(p), std::get<2>(p), buffer)); - return mergeDeviceGrids(handles); + for (auto &p : vec) handles.push_back(cudaVoxelsToGrid(std::get<0>(p), std::get<1>(p), std::get<2>(p), buffer, stream)); + return mergeDeviceGrids(handles, stream); } }// nanovdb namespace diff --git a/nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.cuh b/nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.cuh index 0b38d13486..2f4bf203d6 100644 --- a/nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaSignedFloodFill.cuh @@ -24,8 +24,9 @@ #include #include -#include +#include #include +#include namespace nanovdb { @@ -33,9 +34,10 @@ namespace nanovdb { /// @tparam BuildT Build type of the grid to be flood-filled /// @param d_grid Non-const device pointer to the grid that will be flood-filled /// @param verbose If true timing information will be printed to the terminal +/// @param stream optional cuda stream template typename enable_if::is_float, void>::type -cudaSignedFloodFill(NanoGrid *d_grid, bool verbose = false); +cudaSignedFloodFill(NanoGrid *d_grid, bool verbose = false, cudaStream_t stream = 0); namespace {// anonymous namespace @@ -43,7 +45,8 @@ template class CudaSignedFloodFill { public: - CudaSignedFloodFill() {} + CudaSignedFloodFill(bool verbose = false, cudaStream_t stream = 0) + : mStream(stream), mVerbose(verbose) {} /// @brief Toggle on and off verbose mode /// @param on if true verbose is turned on @@ -52,8 +55,9 @@ public: void operator()(NanoGrid *d_grid); private: - GpuTimer mTimer; - bool mVerbose{false}; + cudaStream_t mStream{0}; + GpuTimer mTimer; + bool mVerbose{false}; };// CudaSignedFloodFill @@ -149,30 +153,30 @@ void CudaSignedFloodFill::operator()(NanoGrid *d_grid) static_assert(BuildTraits::is_float, "CudaSignedFloodFill only works on float grids"); NANOVDB_ASSERT(d_grid); uint64_t count[4], *d_count = nullptr; - cudaCheck(cudaMalloc((void**)&d_count, 4*sizeof(uint64_t))); - cudaCpyNodeCount<<<1,1>>>(d_grid, d_count); + cudaCheck(cudaMallocAsync((void**)&d_count, 4*sizeof(uint64_t), mStream)); + cudaCpyNodeCount<<<1, 1, 0, mStream>>>(d_grid, d_count); cudaCheckError(); - cudaCheck(cudaMemcpy(&count, d_count, 4*sizeof(uint64_t), cudaMemcpyDeviceToHost)); - cudaCheck(cudaFree(d_count)); + cudaCheck(cudaMemcpyAsync(&count, d_count, 4*sizeof(uint64_t), cudaMemcpyDeviceToHost, mStream)); + cudaCheck(cudaFreeAsync(d_count, mStream)); static const int threadsPerBlock = 128; auto blocksPerGrid = [&](size_t count)->uint32_t{return (count + (threadsPerBlock - 1)) / threadsPerBlock;}; auto *tree = reinterpret_cast*>(d_grid + 1); if (mVerbose) mTimer.start("\nProcess leaf nodes"); - cudaProcessLeafNodes<<>>(tree, count[0]<<9); + cudaProcessLeafNodes<<>>(tree, count[0]<<9); cudaCheckError(); if (mVerbose) mTimer.restart("Process lower internal nodes"); - cudaProcessInternalNodes<<>>(tree, count[1]<<12); + cudaProcessInternalNodes<<>>(tree, count[1]<<12); cudaCheckError(); if (mVerbose) mTimer.restart("Process upper internal nodes"); - cudaProcessInternalNodes<<>>(tree, count[2]<<15); + cudaProcessInternalNodes<<>>(tree, count[2]<<15); cudaCheckError(); //if (mVerbose) mTimer.restart("Process root node"); - //cudaProcessRootNode<<<1, 1>>>(tree); + //cudaProcessRootNode<<<1, 1, 0, mStream>>>(tree); if (mVerbose) mTimer.stop(); cudaCheckError(); }// CudaSignedFloodFill::operator() @@ -181,11 +185,15 @@ void CudaSignedFloodFill::operator()(NanoGrid *d_grid) template typename enable_if::is_float, void>::type -cudaSignedFloodFill(NanoGrid *d_grid, bool verbose) +cudaSignedFloodFill(NanoGrid *d_grid, bool verbose, cudaStream_t stream) { - CudaSignedFloodFill tmp; - tmp.setVerbose(verbose); - tmp(d_grid); + CudaSignedFloodFill sff(verbose, stream); + sff(d_grid); + auto *d_gridData = d_grid->data(); + GridChecksum cs = cudaGetGridChecksum(d_gridData, stream); + if (cs.mode() == ChecksumMode::Full) {// ChecksumMode::Partial checksum is unaffected + cudaGridChecksum(d_gridData, ChecksumMode::Full, stream); + } } }// nanovdb namespace diff --git a/nanovdb/nanovdb/util/cuda/GpuTimer.cuh b/nanovdb/nanovdb/util/cuda/GpuTimer.h similarity index 61% rename from nanovdb/nanovdb/util/cuda/GpuTimer.cuh rename to nanovdb/nanovdb/util/cuda/GpuTimer.h index 416ca403da..6c6e217403 100644 --- a/nanovdb/nanovdb/util/cuda/GpuTimer.cuh +++ b/nanovdb/nanovdb/util/cuda/GpuTimer.h @@ -1,14 +1,14 @@ // Copyright Contributors to the OpenVDB Project // SPDX-License-Identifier: MPL-2.0 -/// @file GpuTimer.cuh +/// @file GpuTimer.h /// /// @author Ken Museth /// /// @brief A simple GPU timing class -#ifndef NANOVDB_GPU_TIMER_CUH_HAS_BEEN_INCLUDED -#define NANOVDB_GPU_TIMER_CUH_HAS_BEEN_INCLUDED +#ifndef NANOVDB_GPU_TIMER_H_HAS_BEEN_INCLUDED +#define NANOVDB_GPU_TIMER_H_HAS_BEEN_INCLUDED #include // for std::cerr #include @@ -18,28 +18,31 @@ namespace nanovdb { class GpuTimer { + cudaStream_t mStream{0}; cudaEvent_t mStart, mStop; public: /// @brief Default constructor + /// @param stream CUDA stream to be timed (defaults to stream 0) /// @note Starts the timer - GpuTimer(void* stream = nullptr) + GpuTimer(cudaStream_t stream = 0) : mStream(stream) { cudaEventCreate(&mStart); cudaEventCreate(&mStop); - cudaEventRecord(mStart, reinterpret_cast(stream)); + cudaEventRecord(mStart, mStream); } /// @brief Construct and start the timer /// @param msg string message to be printed when timer is started /// @param stream CUDA stream to be timed (defaults to stream 0) /// @param os output stream for the message above - GpuTimer(const std::string &msg, void* stream = nullptr, std::ostream& os = std::cerr) + GpuTimer(const std::string &msg, cudaStream_t stream = 0, std::ostream& os = std::cerr) + : mStream(stream) { os << msg << " ... " << std::flush; cudaEventCreate(&mStart); cudaEventCreate(&mStop); - cudaEventRecord(mStart, reinterpret_cast(stream)); + cudaEventRecord(mStart, mStream); } /// @brief Destructor @@ -52,37 +55,32 @@ class GpuTimer /// @brief Start the timer /// @param stream CUDA stream to be timed (defaults to stream 0) /// @param os output stream for the message above - void start(void* stream = nullptr) - { - cudaEventRecord(mStart, reinterpret_cast(stream)); - } + void start() {cudaEventRecord(mStart, mStream);} /// @brief Start the timer /// @param msg string message to be printed when timer is started - /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @param os output stream for the message above - void start(const std::string &msg, void* stream = nullptr, std::ostream& os = std::cerr) + void start(const std::string &msg, std::ostream& os = std::cerr) { os << msg << " ... " << std::flush; - this->start(stream); + this->start(); } /// @brief Start the timer /// @param msg string message to be printed when timer is started - /// @param stream CUDA stream to be timed (defaults to stream 0) /// @param os output stream for the message above - void start(const char* msg, void* stream = nullptr, std::ostream& os = std::cerr) + void start(const char* msg, std::ostream& os = std::cerr) { os << msg << " ... " << std::flush; - this->start(stream); + this->start(); } /// @brief elapsed time (since start) in miliseconds - /// @param stream CUDA stream to be timed (defaults to stream 0) /// @return elapsed time (since start) in miliseconds - float elapsed(void* stream = nullptr) + float elapsed() { - cudaEventRecord(mStop, reinterpret_cast(stream)); + cudaEventRecord(mStop, mStream); cudaEventSynchronize(mStop); float diff = 0.0f; cudaEventElapsedTime(&diff, mStart, mStop); @@ -90,25 +88,23 @@ class GpuTimer } /// @brief stop the timer - /// @param stream CUDA stream to be timed (defaults to stream 0) /// @param os output stream for the message above - void stop(void* stream = nullptr, std::ostream& os = std::cerr) + void stop(std::ostream& os = std::cerr) { - float diff = this->elapsed(stream); + float diff = this->elapsed(); os << "completed in " << diff << " milliseconds" << std::endl; } /// @brief stop and start the timer /// @param msg string message to be printed when timer is started - /// @param os output stream for the message above /// @warning Remember to call start before restart - void restart(const std::string &msg, void* stream = nullptr, std::ostream& os = std::cerr) + void restart(const std::string &msg, std::ostream& os = std::cerr) { this->stop(); - this->start(msg, stream, os); + this->start(msg, os); } };// GpuTimer } // namespace nanovdb -#endif // NANOVDB_GPU_TIMER_CUH_HAS_BEEN_INCLUDED +#endif // NANOVDB_GPU_TIMER_H_HAS_BEEN_INCLUDED diff --git a/pendingchanges/nanovdb.txt b/pendingchanges/nanovdb.txt index 37c31452c1..4e24792dd9 100644 --- a/pendingchanges/nanovdb.txt +++ b/pendingchanges/nanovdb.txt @@ -1,7 +1,7 @@ NanoVDB: - Minor version changed from 4 to 5 (major version is unchanged since the ABI is preserved) + Minor version changed from 4 to 6 (major version is unchanged since the ABI is preserved) Transition from C++11 to C++17 in NanoVDB.h and its tools Several new ways to construct and modify NanoVDB grids on the GPU New device function to convert points into a compact grid: nanovdb::cudaPointsToGrid @@ -13,9 +13,13 @@ NanoVDB: CreateNanoGrid.h is replacing GridBuilder.h, IndexGridBuilder.h and OpenToNanoVDB.h Moved CudaDeviceBuffer.h to cuda/CudaDeviceBuffer.h Added cuda/CudaUtils.h and cuda/GpuTimer.h with cuda utility functions - Added cuda/CudaPointToGrids.h that constructs device grids from points or voxels - Added cuda/CudaIndexGridToGrid.h that converts IndexGrids and values into regular Grids - Added cuda/CudaSignedFloodFill.h that performs signed-flood filing on SDF on the GPU + Added cuda/CudaPointToGrids.cuh that constructs device grids from points or voxels + Added cuda/CudaIndexToGrid.cuh that converts IndexGrids and values into regular Grids + Added cuda/CudaSignedFloodFill.cuh that performs signed-flood filing on SDF on the GPU + Added cuda/CudaAddBlindData.cuh that adds bind data to an existing grid on the GPU + Added cuda/CudaGridChecksum.cuh that computes CRC32 checksums of grids on the GPU + Added cuda/CudaGridHandle.cuh that handles grids on the GPU + Added cuda/CudaNodeManager.cuh that constructs a NodeManager on the GPU The move constructor in now requires the GridBuffer to actually contain a valid grid Added types: Ve4f Ve4d, ValueIndex, ValueOnIndex, ValueIndexMask, ValueOnIndexMask Major improvements to GridBuilder.h, which allows user to construct grids with random access on the host From fb908588d507d2e546fb5d0c52f2425e925ef929 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Wed, 11 Oct 2023 19:25:19 -0700 Subject: [PATCH 27/49] added dummy implementation of cudaMallocAsync and cudaFreeAsync Signed-off-by: Ken Museth --- nanovdb/nanovdb/util/cuda/CudaUtils.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/nanovdb/nanovdb/util/cuda/CudaUtils.h b/nanovdb/nanovdb/util/cuda/CudaUtils.h index e007e91e73..89c35b971e 100644 --- a/nanovdb/nanovdb/util/cuda/CudaUtils.h +++ b/nanovdb/nanovdb/util/cuda/CudaUtils.h @@ -53,6 +53,27 @@ cudaCheck(cudaGetLastError()); \ } +#if CUDART_VERSION < 11020 // 11.2 introduced cudaMallocAsync and cudaFreeAsync + +/// @brief ummy implementation of cudaMallocAsync that calls cudaMalloc +/// @param devPtr device pointer to allocated device memory +/// @param size byte size to be allocated +/// @param dummy stream argument +/// @return uda error code +inline cudaError_t cudaMallocAsync(void** devPtr, size_t size, cudaStream_t) +{ + return cudaMalloc(devPtr, size); // un-managed memory on the device, always 32B aligned! +} +/// @brief Dummy implementation of cudaFreeAsync that calls cudaFree +/// @param devPtr device pointer that will be freed +/// @param dummy stream argument +/// @return cuda error code +inline ​cudaError_t cudaFreeAsync(void* devPtr, cudaStream_t) +{ + return cudaFree(devPtr); +} +#endif + #if defined(__CUDACC__)// the following functions only run on the GPU! // --- Wrapper for launching lambda kernels From a56439b4d883664bd8ae61a189142061907f913d Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Wed, 11 Oct 2023 19:54:29 -0700 Subject: [PATCH 28/49] fixed typo in previous commit Signed-off-by: Ken Museth --- nanovdb/nanovdb/util/cuda/CudaUtils.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nanovdb/nanovdb/util/cuda/CudaUtils.h b/nanovdb/nanovdb/util/cuda/CudaUtils.h index 89c35b971e..4475b50f3b 100644 --- a/nanovdb/nanovdb/util/cuda/CudaUtils.h +++ b/nanovdb/nanovdb/util/cuda/CudaUtils.h @@ -64,14 +64,16 @@ inline cudaError_t cudaMallocAsync(void** devPtr, size_t size, cudaStream_t) { return cudaMalloc(devPtr, size); // un-managed memory on the device, always 32B aligned! } + /// @brief Dummy implementation of cudaFreeAsync that calls cudaFree /// @param devPtr device pointer that will be freed /// @param dummy stream argument /// @return cuda error code -inline ​cudaError_t cudaFreeAsync(void* devPtr, cudaStream_t) +inline cudaError_t cudaFreeAsync(void* devPtr, cudaStream_t) { return cudaFree(devPtr); } + #endif #if defined(__CUDACC__)// the following functions only run on the GPU! From 90cccbadb110ecbdee6446b892c8b724e6d1de7a Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Thu, 12 Oct 2023 10:39:59 -0700 Subject: [PATCH 29/49] fixed CMakeLists.txt Signed-off-by: Ken Museth --- nanovdb/nanovdb/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nanovdb/nanovdb/CMakeLists.txt b/nanovdb/nanovdb/CMakeLists.txt index b4aa3f1dd2..499b93f0ed 100644 --- a/nanovdb/nanovdb/CMakeLists.txt +++ b/nanovdb/nanovdb/CMakeLists.txt @@ -173,12 +173,14 @@ set(NANOVDB_INCLUDE_UTILFILES util/CSampleFromVoxels.h util/cuda/CudaAddBlindData.cuh util/cuda/CudaDeviceBuffer.h + util/cuda/CudaGridChecksum.cuh util/cuda/CudaGridHandle.cuh util/cuda/CudaIndexToGrid.cuh + util/cuda/CudaNodeManager.cuh util/cuda/CudaPointsToGrid.cuh util/cuda/CudaSignedFloodFill.cuh util/cuda/CudaUtils.h - util/cuda/GpuTimer.cuh + util/cuda/GpuTimer.h util/DitherLUT.h util/ForEach.h util/GridBuilder.h From 99d0ea86bc49609e920a11d8df09afa2a9850720 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Thu, 12 Oct 2023 10:55:32 -0700 Subject: [PATCH 30/49] cleanup Signed-off-by: Ken Museth --- nanovdb/nanovdb/unittest/TestNanoVDB.cc | 2 +- nanovdb/nanovdb/util/cuda/CudaUtils.h | 26 ++++++++++--------------- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cc b/nanovdb/nanovdb/unittest/TestNanoVDB.cc index 090f33ca9f..9a91b149ee 100644 --- a/nanovdb/nanovdb/unittest/TestNanoVDB.cc +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cc @@ -525,7 +525,7 @@ TEST_F(TestNanoVDB, CRC32) EXPECT_EQ("414fa339", ss.str()); } { - EXPECT_EQ(~uint64_t(0), nanovdb::GridChecksum::EMPTY); + //EXPECT_EQ(~uint64_t(0), nanovdb::GridChecksum::EMPTY); nanovdb::GridChecksum cs(~uint64_t(0)); EXPECT_EQ(nanovdb::ChecksumMode::Disable, cs.mode()); EXPECT_TRUE(cs.isEmpty()); diff --git a/nanovdb/nanovdb/util/cuda/CudaUtils.h b/nanovdb/nanovdb/util/cuda/CudaUtils.h index 4475b50f3b..40001748ee 100644 --- a/nanovdb/nanovdb/util/cuda/CudaUtils.h +++ b/nanovdb/nanovdb/util/cuda/CudaUtils.h @@ -55,24 +55,18 @@ #if CUDART_VERSION < 11020 // 11.2 introduced cudaMallocAsync and cudaFreeAsync -/// @brief ummy implementation of cudaMallocAsync that calls cudaMalloc -/// @param devPtr device pointer to allocated device memory -/// @param size byte size to be allocated -/// @param dummy stream argument -/// @return uda error code -inline cudaError_t cudaMallocAsync(void** devPtr, size_t size, cudaStream_t) -{ - return cudaMalloc(devPtr, size); // un-managed memory on the device, always 32B aligned! -} +/// @brief Dummy implementation of cudaMallocAsync that calls cudaMalloc +/// @param d_ptr Device pointer to allocated device memory +/// @param size Number of bytes to allocate +/// @param dummy The stream establishing the stream ordering contract and the memory pool to allocate from (ignored) +/// @return Cuda error code +inline cudaError_t cudaMallocAsync(void** d_ptr, size_t size, cudaStream_t){return cudaMalloc(d_ptr, size);} /// @brief Dummy implementation of cudaFreeAsync that calls cudaFree -/// @param devPtr device pointer that will be freed -/// @param dummy stream argument -/// @return cuda error code -inline cudaError_t cudaFreeAsync(void* devPtr, cudaStream_t) -{ - return cudaFree(devPtr); -} +/// @param d_ptr Device pointer that will be freed +/// @param dummy The stream establishing the stream ordering promise (ignored) +/// @return Cuda error code +inline cudaError_t cudaFreeAsync(void* d_ptr, cudaStream_t){return cudaFree(d_ptr);} #endif From b7d76dd8d70112f0eb46655a9f264c1143ecbb78 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Thu, 12 Oct 2023 11:39:27 -0700 Subject: [PATCH 31/49] fixed Windows build issue in ex_nodemanager_cuda Signed-off-by: Ken Museth --- .../ex_nodemanager_cuda/nodemanager_cuda.cc | 8 +- .../nodemanager_cuda_kernel.cu | 6 +- nanovdb/nanovdb/util/GridChecksum.h | 77 +++++++++---------- 3 files changed, 43 insertions(+), 48 deletions(-) diff --git a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc index 633eb5628e..68906b90e8 100644 --- a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc +++ b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda.cc @@ -10,9 +10,8 @@ extern "C" void launch_kernels(const nanovdb::NodeManager*, const nanovdb::NodeManager*, cudaStream_t stream); -extern "C" nanovdb::NodeManagerHandle cudaCreateNodeManager(const nanovdb::NanoGrid*); - -//openvdb::FloatGrid::Ptr createLevelSetSphere();// not sure why this is needed +extern "C" void cudaCreateNodeManager(const nanovdb::NanoGrid*, + nanovdb::NodeManagerHandle*); /// @brief This examples depends on OpenVDB, NanoVDB and CUDA. int main() @@ -41,7 +40,8 @@ int main() nodeHandle.deviceUpload(deviceGrid, stream, false); auto *deviceNodeMgr = nodeHandle.template deviceMgr(); #else// the approach below constructs a new NodeManager directly for a device grid - auto nodeHandle2 = cudaCreateNodeManager(deviceGrid); + nanovdb::NodeManagerHandle nodeHandle2; + cudaCreateNodeManager(deviceGrid, &nodeHandle2); auto *deviceNodeMgr = nodeHandle2.template deviceMgr(); #endif if (!deviceNodeMgr || !nodeMgr) { diff --git a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda_kernel.cu b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda_kernel.cu index f2fb99ff3f..b06c87b4e5 100644 --- a/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda_kernel.cu +++ b/nanovdb/nanovdb/examples/ex_nodemanager_cuda/nodemanager_cuda_kernel.cu @@ -30,6 +30,8 @@ extern "C" void launch_kernels(const nanovdb::NodeManager* deviceMgr, } // Simple wrapper that makes sure nanovdb::cudaCreateNodeManager is initiated -extern "C" auto cudaCreateNodeManager(const nanovdb::NanoGrid *d_grid) { - return nanovdb::cudaCreateNodeManager(d_grid); +extern "C" void cudaCreateNodeManager(const nanovdb::NanoGrid *d_grid, + nanovdb::NodeManagerHandle *handle) +{ + *handle = std::move(nanovdb::cudaCreateNodeManager(d_grid)); } \ No newline at end of file diff --git a/nanovdb/nanovdb/util/GridChecksum.h b/nanovdb/nanovdb/util/GridChecksum.h index 35697afd1c..531a6f674b 100644 --- a/nanovdb/nanovdb/util/GridChecksum.h +++ b/nanovdb/nanovdb/util/GridChecksum.h @@ -104,7 +104,7 @@ inline __hostdev__ uint32_t checksum(const void* data, size_t size, uint32_t crc } /// @brief Compute crc32 checksum of data between @c begin and @c end -/// @param begin points to beginning of @data +/// @param begin points to beginning of data /// @param end points to end of @data, (exclusive) /// @param crc initial value of crc32 checksum /// @return return crc32 checksum @@ -115,12 +115,12 @@ inline __hostdev__ uint32_t checksum(const void *begin, const void *end, uint32_ return checksum(begin, (const char*)end - (const char*)begin, crc); } -/// @brief -/// @param data -/// @param size -/// @param lut -/// @param crc -/// @return +/// @brief Compute crc32 checksum of @c data with @c size bytes using a lookup table +/// @param data pointer to begenning of data +/// @param size byte size +/// @param lut pointer to loopup table for accelerated crc32 computation +/// @param crc initial value of the checksum +/// @return crc32 checksum of @c data with @c size bytes inline __hostdev__ uint32_t checksum(const void *data, size_t size, const uint32_t lut[256], uint32_t crc = 0) { crc = ~crc; @@ -128,12 +128,12 @@ inline __hostdev__ uint32_t checksum(const void *data, size_t size, const uint32 return ~crc; } -/// @brief -/// @param begin -/// @param end -/// @param lut -/// @param crc -/// @return +/// @brief Compute crc32 checksum of data between @c begin and @c end using a lookup table +/// @param begin points to beginning of data +/// @param end points to end of @data, (exclusive) +/// @param lut pointer to loopup table for accelerated crc32 computation +/// @param crc initial value of crc32 checksum +/// @return return crc32 checksum inline __hostdev__ uint32_t checksum(const void *begin, const void *end, const uint32_t lut[256], uint32_t crc = 0) { NANOVDB_ASSERT(begin && end); @@ -174,60 +174,53 @@ class GridChecksum if (mode == ChecksumMode::Partial) mCRC[1] = EMPTY32; } - /// @brief - /// @return + /// @brief return the 64 bit checksum of this instance uint64_t checksum() const { return mChecksum; } - /// @brief - /// @param i - /// @return + /// @brief return 32 bit (crc32) checksum of this instance + /// @param i index of value 0 or 1 indicated the 32 bit checksum of the head or nodes + /// @return non-const reference of the i'th 32bit checksum uint32_t& checksum(int i) {NANOVDB_ASSERT(i==0 || i==1); return mCRC[i]; } - /// @brief - /// @param i - /// @return + /// @brief return 32 bit (crc32) checksum of this instance + /// @param i index of value 0 or 1 indicated the 32 bit checksum of the head or nodes + /// @return copy of the i'th 32bit checksum uint32_t checksum(int i) const {NANOVDB_ASSERT(i==0 || i==1); return mCRC[i]; } - /// @brief - /// @return + /// @brief return true if the 64 bit checksum is partial, i.e. of head only bool isPartial() const { return mCRC[0] != EMPTY32 && mCRC[1] == EMPTY32; } - /// @brief - /// @return + /// @brief return true if the 64 bit checksum is fill, i.e. of both had and nodes bool isFull() const { return mCRC[0] != EMPTY32 && mCRC[1] != EMPTY32; } - /// @brief - /// @return + /// @brief return true if the 64 bit checksum is disables (unset) bool isEmpty() const { return mChecksum == EMPTY; } - /// @brief - /// @return + /// @brief return the mode of the 64 bit checksum ChecksumMode mode() const { return mChecksum == EMPTY ? ChecksumMode::Disable : mCRC[1] == EMPTY32 ? ChecksumMode::Partial : ChecksumMode::Full; } #ifdef NANOVDB_CRC32_LOG2_BLOCK_SIZE - /// @brief - /// @param gridData - /// @param mode + /// @brief compute checksum of @c gridData using a 4KB blocked approach + /// @param gridData Reference to GridData + /// @param mode Mode of the checksum computation ChecksumMode operator()(const GridData &gridData, ChecksumMode mode = ChecksumMode::Full); #else - /// @brief - /// @tparam ValueT - /// @param grid - /// @param mode + /// @brief Compute checksum using old (node-based) approach + /// @tparam ValueT Build type of the grid + /// @param grid Reference to Grid + /// @param mode Mode of the checksum computation template void operator()(const NanoGrid &grid, ChecksumMode mode = ChecksumMode::Full); #endif - /// @brief - /// @param rhs - /// @return + /// @brief return true if the checksums are identical + /// @param rhs other GridChecksum bool operator==(const GridChecksum &rhs) const {return mChecksum == rhs.mChecksum;} - /// @brief - /// @param rhs - /// @return + /// @brief return true if the checksums are not identical + /// @param rhs other GridChecksum bool operator!=(const GridChecksum &rhs) const {return mChecksum != rhs.mChecksum;} };// GridChecksum From 5b3681a803683d5139782fb81d494eda221dbd0e Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Thu, 12 Oct 2023 16:41:47 -0700 Subject: [PATCH 32/49] fixed compiler warning on OSX Signed-off-by: Ken Museth --- nanovdb/nanovdb/NanoVDB.h | 4 ++-- nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index 4b099f2913..f36660c652 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -4811,7 +4811,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData __hostdev__ const StatsT& average() const { return mAverage; } __hostdev__ const StatsT& stdDeviation() const { return mStdDevi; } -#ifdef __GNUC__ +#if defined(__GNUC__) && !defined(__APPLE__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstringop-overflow" #endif @@ -4819,7 +4819,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData __hostdev__ void setMax(const ValueT& v) { mMaximum = v; } __hostdev__ void setAvg(const StatsT& v) { mAverage = v; } __hostdev__ void setDev(const StatsT& v) { mStdDevi = v; } -#ifdef __GNUC__ +#if defined(__GNUC__) && !defined(__APPLE__) #pragma GCC diagnostic pop #endif diff --git a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh index 8da1dc3e1d..e2b6a5d677 100644 --- a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh @@ -4,6 +4,8 @@ /*! \file CudaPointsToGrid.cuh + \authors Greg Klar (initial version) and Ken Museth (final version) + \brief Generates NanoVDB grids from a list of voxels or points on the device \warning The header file contains cuda device code so be sure From 09dd3ea01f31076c780a5843cfd8f73e479cbdcd Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Fri, 13 Oct 2023 10:57:04 -0700 Subject: [PATCH 33/49] improved documentation of fancy_ptr Signed-off-by: Ken Museth --- nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh index e2b6a5d677..80a20a662c 100644 --- a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh @@ -59,9 +59,21 @@ class fancy_ptr { const T* mPtr; public: + /// @brief Default constructor. + /// @note This method is atcually not required by CudaPointsToGrid + /// @param ptr Pointer to array of elements __hostdev__ explicit fancy_ptr(const T* ptr = nullptr) : mPtr(ptr) {} - __hostdev__ inline const T& operator[](size_t i) const {return mPtr[i];}// required by CudaPointsToGrid - __hostdev__ inline const T& operator*() const {return *mPtr;}// required by pointer_traits + /// @brief Index acces into the array pointed to by the stored pointer. + /// @note This method is required by CudaPointsToGrid! + /// @param i Unsigned index of the element to be returned + /// @return Const refernce to the element at the i'th poisiton + __hostdev__ inline const T& operator[](size_t i) const {return mPtr[i];} + /// @brief Dummy implementation required by pointer_traits. + /// @note Note that only the return type matters! + /// @details Unlike operator[] it is safe to assume that all pointer types have operator*, + /// which is why pointer_traits makes use of it to determine the element_type that + /// a pointer class is pointing to. E.g. operator[] is not always defined for std::shared_ptr! + __hostdev__ inline const T& operator*() const {return *mPtr;} };// fancy_ptr /// @brief Simple stand-alone function that can be used to conveniently construct a fancy_ptr From 0eff43ed90cda12cde23f71c31000340d57398bc Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Fri, 13 Oct 2023 10:59:48 -0700 Subject: [PATCH 34/49] removed whitespace Signed-off-by: Ken Museth --- nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh index 80a20a662c..1326efde4f 100644 --- a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh @@ -72,7 +72,7 @@ public: /// @note Note that only the return type matters! /// @details Unlike operator[] it is safe to assume that all pointer types have operator*, /// which is why pointer_traits makes use of it to determine the element_type that - /// a pointer class is pointing to. E.g. operator[] is not always defined for std::shared_ptr! + /// a pointer class is pointing to. E.g. operator[] is not always defined for std::shared_ptr! __hostdev__ inline const T& operator*() const {return *mPtr;} };// fancy_ptr From e9b89e5369ecbf334cae46e33e2bd985c2fd88de Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Mon, 16 Oct 2023 12:48:53 -0700 Subject: [PATCH 35/49] added Vec3/4 methods to Rgba8 Signed-off-by: Ken Museth --- nanovdb/nanovdb/NanoVDB.h | 192 +++++++++++++++++++++----------------- 1 file changed, 108 insertions(+), 84 deletions(-) diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index f36660c652..dce853d4d6 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -785,90 +785,6 @@ __hostdev__ inline static const DstT* PtrAdd(const SrcT* p, int64_t offset) return reinterpret_cast(reinterpret_cast(p) + offset); } -// --------------------------> Rgba8 <------------------------------------ - -/// @brief 8-bit red, green, blue, alpha packed into 32 bit unsigned int -class Rgba8 -{ - union - { - uint8_t c[4]; // 4 integer color channels of red, green, blue and alpha components. - uint32_t packed; // 32 bit packed representation - } mData; - -public: - static const int SIZE = 4; - using ValueType = uint8_t; - - /// @brief Default copy constructor - Rgba8(const Rgba8&) = default; - - /// @brief Default move constructor - Rgba8(Rgba8&&) = default; - - /// @brief Default move assignment operator - /// @return non-const reference to this instance - Rgba8& operator=(Rgba8&&) = default; - - /// @brief Default copy assignment operator - /// @return non-const reference to this instance - Rgba8& operator=(const Rgba8&) = default; - - /// @brief Default ctor initializes all channels to zero - __hostdev__ Rgba8() - : mData{{0, 0, 0, 0}} - { - static_assert(sizeof(uint32_t) == sizeof(Rgba8), "Unexpected sizeof"); - } - - /// @brief integer r,g,b,a ctor where alpha channel defaults to opaque - /// @note all values should be in the range 0u to 255u - __hostdev__ Rgba8(uint8_t r, uint8_t g, uint8_t b, uint8_t a = 255u) - : mData{{r, g, b, a}} - { - } - - /// @brief @brief ctor where all channels are initialized to the same value - /// @note value should be in the range 0u to 255u - explicit __hostdev__ Rgba8(uint8_t v) - : mData{{v, v, v, v}} - { - } - - /// @brief floating-point r,g,b,a ctor where alpha channel defaults to opaque - /// @note all values should be in the range 0.0f to 1.0f - __hostdev__ Rgba8(float r, float g, float b, float a = 1.0f) - : mData{{static_cast(0.5f + r * 255.0f), // round floats to nearest integers - static_cast(0.5f + g * 255.0f), // double {{}} is needed due to union - static_cast(0.5f + b * 255.0f), - static_cast(0.5f + a * 255.0f)}} - { - } - __hostdev__ bool operator<(const Rgba8& rhs) const { return mData.packed < rhs.mData.packed; } - __hostdev__ bool operator==(const Rgba8& rhs) const { return mData.packed == rhs.mData.packed; } - __hostdev__ float lengthSqr() const - { - return 0.0000153787005f * (float(mData.c[0]) * mData.c[0] + - float(mData.c[1]) * mData.c[1] + - float(mData.c[2]) * mData.c[2]); //1/255^2 - } - __hostdev__ float length() const { return sqrtf(this->lengthSqr()); } - __hostdev__ const uint8_t& operator[](int n) const { return mData.c[n]; } - __hostdev__ uint8_t& operator[](int n) { return mData.c[n]; } - __hostdev__ const uint32_t& packed() const { return mData.packed; } - __hostdev__ uint32_t& packed() { return mData.packed; } - __hostdev__ const uint8_t& r() const { return mData.c[0]; } - __hostdev__ const uint8_t& g() const { return mData.c[1]; } - __hostdev__ const uint8_t& b() const { return mData.c[2]; } - __hostdev__ const uint8_t& a() const { return mData.c[3]; } - __hostdev__ uint8_t& r() { return mData.c[0]; } - __hostdev__ uint8_t& g() { return mData.c[1]; } - __hostdev__ uint8_t& b() { return mData.c[2]; } - __hostdev__ uint8_t& a() { return mData.c[3]; } -}; // Rgba8 - -using PackedRGBA8 = Rgba8; // for backwards compatibility - // --------------------------> isFloatingPoint(GridType) <------------------------------------ /// @brief return true if the GridType maps to a floating point type @@ -1880,6 +1796,114 @@ using Vec4d = Vec4; using Vec4f = Vec4; using Vec4i = Vec4; + +// --------------------------> Rgba8 <------------------------------------ + +/// @brief 8-bit red, green, blue, alpha packed into 32 bit unsigned int +class Rgba8 +{ + union + { + uint8_t c[4]; // 4 integer color channels of red, green, blue and alpha components. + uint32_t packed; // 32 bit packed representation + } mData; + +public: + static const int SIZE = 4; + using ValueType = uint8_t; + + /// @brief Default copy constructor + Rgba8(const Rgba8&) = default; + + /// @brief Default move constructor + Rgba8(Rgba8&&) = default; + + /// @brief Default move assignment operator + /// @return non-const reference to this instance + Rgba8& operator=(Rgba8&&) = default; + + /// @brief Default copy assignment operator + /// @return non-const reference to this instance + Rgba8& operator=(const Rgba8&) = default; + + /// @brief Default ctor initializes all channels to zero + __hostdev__ Rgba8() + : mData{{0, 0, 0, 0}} + { + static_assert(sizeof(uint32_t) == sizeof(Rgba8), "Unexpected sizeof"); + } + + /// @brief integer r,g,b,a ctor where alpha channel defaults to opaque + /// @note all values should be in the range 0u to 255u + __hostdev__ Rgba8(uint8_t r, uint8_t g, uint8_t b, uint8_t a = 255u) + : mData{{r, g, b, a}} + { + } + + /// @brief @brief ctor where all channels are initialized to the same value + /// @note value should be in the range 0u to 255u + explicit __hostdev__ Rgba8(uint8_t v) + : mData{{v, v, v, v}} + { + } + + /// @brief floating-point r,g,b,a ctor where alpha channel defaults to opaque + /// @note all values should be in the range 0.0f to 1.0f + __hostdev__ Rgba8(float r, float g, float b, float a = 1.0f) + : mData{{static_cast(0.5f + r * 255.0f), // round floats to nearest integers + static_cast(0.5f + g * 255.0f), // double {{}} is needed due to union + static_cast(0.5f + b * 255.0f), + static_cast(0.5f + a * 255.0f)}} + { + } + + /// @brief Vec3f r,g,b ctor (alpha channel it set to 1) + /// @note all values should be in the range 0.0f to 1.0f + __hostdev__ Rgba8(const Vec3f& rgb) + : Rgba8(rgb[0], rgb[1], rgb[2]) + { + } + + /// @brief Vec4f r,g,b,a ctor + /// @note all values should be in the range 0.0f to 1.0f + __hostdev__ Rgba8(const Vec4f& rgba) + : Rgba8(rgba[0], rgba[1], rgba[2], rgba[3]) + { + } + + __hostdev__ bool operator< (const Rgba8& rhs) const { return mData.packed < rhs.mData.packed; } + __hostdev__ bool operator==(const Rgba8& rhs) const { return mData.packed == rhs.mData.packed; } + __hostdev__ float lengthSqr() const + { + return 0.0000153787005f * (float(mData.c[0]) * mData.c[0] + + float(mData.c[1]) * mData.c[1] + + float(mData.c[2]) * mData.c[2]); //1/255^2 + } + __hostdev__ float length() const { return sqrtf(this->lengthSqr()); } + /// @brief return n'th color channel as a float in the range 0 to 1 + __hostdev__ float asFloat(int n) const { return 0.003921569f*float(mData.c[n]); }// divide by 255 + __hostdev__ const uint8_t& operator[](int n) const { return mData.c[n]; } + __hostdev__ uint8_t& operator[](int n) { return mData.c[n]; } + __hostdev__ const uint32_t& packed() const { return mData.packed; } + __hostdev__ uint32_t& packed() { return mData.packed; } + __hostdev__ const uint8_t& r() const { return mData.c[0]; } + __hostdev__ const uint8_t& g() const { return mData.c[1]; } + __hostdev__ const uint8_t& b() const { return mData.c[2]; } + __hostdev__ const uint8_t& a() const { return mData.c[3]; } + __hostdev__ uint8_t& r() { return mData.c[0]; } + __hostdev__ uint8_t& g() { return mData.c[1]; } + __hostdev__ uint8_t& b() { return mData.c[2]; } + __hostdev__ uint8_t& a() { return mData.c[3]; } + __hostdev__ operator Vec3f() const { + return Vec3f(this->asFloat(0), this->asFloat(1), this->asFloat(2)); + } + __hostdev__ operator Vec4f() const { + return Vec4f(this->asFloat(0), this->asFloat(1), this->asFloat(2), this->asFloat(3)); + } +}; // Rgba8 + +using PackedRGBA8 = Rgba8; // for backwards compatibility + // ----------------------------> TensorTraits <-------------------------------------- template::value || is_specialization::value || is_same::value) ? 1 : 0> From 6faffc39ed1189bff64be1a29bf50f9961d42684 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Wed, 18 Oct 2023 17:27:02 -0700 Subject: [PATCH 36/49] treading GridType::Half as active Signed-off-by: Ken Museth --- nanovdb/nanovdb/NanoVDB.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index dce853d4d6..6eac5a27b5 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -323,7 +323,7 @@ enum class GridType : uint32_t { Unknown = 0, // unknown value type - should ra Vec3f = 6, // single precision floating 3D vector Vec3d = 7, // double precision floating 3D vector Mask = 8, // no value, just the active state - Half = 9, // half precision floating point value + Half = 9, // half precision floating point value (placeholder for IEEE 754 Half) UInt32 = 10, // single precision unsigned integer value Boolean = 11, // boolean value, encoded in bit array RGBA8 = 12, // RGBA packed into 32bit word in reverse-order, i.e. R is lowest byte. @@ -792,6 +792,7 @@ __hostdev__ inline bool isFloatingPoint(GridType gridType) { return gridType == GridType::Float || gridType == GridType::Double || + gridType == GridType::Half || gridType == GridType::Fp4 || gridType == GridType::Fp8 || gridType == GridType::Fp16 || @@ -2011,6 +2012,8 @@ __hostdev__ inline GridType mapToGridType() return GridType::UInt32; } else if constexpr(is_same::value) { return GridType::Mask; + } else if constexpr(is_same::value) { + return GridType::Half; } else if constexpr(is_same::value) { return GridType::Index; } else if constexpr(is_same::value) { @@ -3284,6 +3287,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridBlindMetaData case GridType::Int64: return mValueSize==8u; case GridType::Vec3f: return mValueSize==12u; case GridType::Vec3d: return mValueSize==24u; + case GridType::Half: return mValueSize==2u; case GridType::RGBA8: return mValueSize==4u; case GridType::Fp8: return mValueSize==1u; case GridType::Fp16: return mValueSize==2u; From ef1c3fe6b2d9c78184e4dadd11ae6dc210f80c72 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Wed, 18 Oct 2023 17:49:51 -0700 Subject: [PATCH 37/49] improved documentation Signed-off-by: Ken Museth --- nanovdb/nanovdb/NanoVDB.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index 6eac5a27b5..fb6434cac1 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -812,7 +812,8 @@ __hostdev__ inline bool isFloatingPointVector(GridType gridType) // --------------------------> isInteger(GridType) <------------------------------------ -/// @brief return true if the GridType maps to a index type. +/// @brief Return true if the GridType maps to a POD integer type. +/// @details These types are used to associate a voxel with a POD integer type __hostdev__ inline bool isInteger(GridType gridType) { return gridType == GridType::Int16 || @@ -823,13 +824,14 @@ __hostdev__ inline bool isInteger(GridType gridType) // --------------------------> isIndex(GridType) <------------------------------------ -/// @brief return true if the GridType maps to a index type. +/// @brief Return true if the GridType maps to a special index type (not a POD integer type). +/// @details These types are used to index from a voxel into an external array of values, e.g. sidecar or blind data. __hostdev__ inline bool isIndex(GridType gridType) { - return gridType == GridType::Index || - gridType == GridType::OnIndex || - gridType == GridType::IndexMask || - gridType == GridType::OnIndexMask; + return gridType == GridType::Index ||// index both active and inactive values + gridType == GridType::OnIndex ||// index active values only + gridType == GridType::IndexMask ||// as Index, but with an additionl mask + gridType == GridType::OnIndexMask;// as OnIndex, but with an additional mask } // --------------------------> isValue(GridType, GridClass) <------------------------------------ From e0f345ec4adff75f1a9e69aeccc031eda89575ed Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Wed, 18 Oct 2023 18:03:13 -0700 Subject: [PATCH 38/49] updated pendingchanges/nanovdb.txt Signed-off-by: Ken Museth --- pendingchanges/nanovdb.txt | 53 +++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/pendingchanges/nanovdb.txt b/pendingchanges/nanovdb.txt index 4e24792dd9..58ff4a23cf 100644 --- a/pendingchanges/nanovdb.txt +++ b/pendingchanges/nanovdb.txt @@ -1,29 +1,30 @@ NanoVDB: - Minor version changed from 4 to 6 (major version is unchanged since the ABI is preserved) - Transition from C++11 to C++17 in NanoVDB.h and its tools - Several new ways to construct and modify NanoVDB grids on the GPU - New device function to convert points into a compact grid: nanovdb::cudaPointsToGrid - Improved and renamed device function that converts voxels into a grid: nanovdb::cudaVoxelsToGrid - Introduced a new extendable API for acceleration of custom random-access methods, e.g. getValue(ijk) - Index grids in 4 flavors (Index, OnIndex, IndexMask, and OnIndexMask) - Introduced new (dummy) build-type nanovdb::Points and nanovdb::GridType::PointIndex - Introduced new types nanovdb::GridType::Vec3u16 and nanovdb::GridType::Vec3u8 used for compressed representations of point coordinates as blind data - CreateNanoGrid.h is replacing GridBuilder.h, IndexGridBuilder.h and OpenToNanoVDB.h - Moved CudaDeviceBuffer.h to cuda/CudaDeviceBuffer.h - Added cuda/CudaUtils.h and cuda/GpuTimer.h with cuda utility functions - Added cuda/CudaPointToGrids.cuh that constructs device grids from points or voxels - Added cuda/CudaIndexToGrid.cuh that converts IndexGrids and values into regular Grids - Added cuda/CudaSignedFloodFill.cuh that performs signed-flood filing on SDF on the GPU - Added cuda/CudaAddBlindData.cuh that adds bind data to an existing grid on the GPU - Added cuda/CudaGridChecksum.cuh that computes CRC32 checksums of grids on the GPU - Added cuda/CudaGridHandle.cuh that handles grids on the GPU - Added cuda/CudaNodeManager.cuh that constructs a NodeManager on the GPU - The move constructor in now requires the GridBuffer to actually contain a valid grid - Added types: Ve4f Ve4d, ValueIndex, ValueOnIndex, ValueIndexMask, ValueOnIndexMask - Major improvements to GridBuilder.h, which allows user to construct grids with random access on the host - Numerous improvements in NanoVDB.h: e.g. Customizable get/set methods on ValueAccessor, BitFlags, transform(Map), expandAtomic(BBox), expandAtomic(Coord), intersectAtomic(BBox), pi(), BuildTraits, more documentation, Mask:: DenseIterator, Mask:: setOnAtomic,Mask:: setOffAtomic, Map constructors, DataType are now public vs private in all node types, GridMetaData can now be copied - PNanoVDB.h is now in sync with NanoVDB.h - Added PrefixSum.h for concurrent computation of prefix sum on the host - Primitives.h can now create grids on the CPU with SDF, FOG and point of torus + - Minor version changed from 4 to 6 (major version is unchanged since the ABI is preserved) + - Transition from C++11 to C++17 in NanoVDB.h and its tools + - New (backwards compatible ) file format that allows serialized grids to with streamed directly to file without headers + - Several new ways to construct and modify NanoVDB grids on the GPU + - New device function to convert points into a compact grid: nanovdb::cudaPointsToGrid + - Improved and renamed device function that converts voxels into a grid: nanovdb::cudaVoxelsToGrid + - Introduced a new extendable API for acceleration of custom random-access methods, e.g. getValue(ijk) + - Index grids in 4 flavors (Index, OnIndex, IndexMask, and OnIndexMask) + - Introduced new (dummy) build-type nanovdb::Points and nanovdb::GridType::PointIndex + - Introduced new types nanovdb::GridType::Vec3u16 and nanovdb::GridType::Vec3u8 used for compressed representations of point coordinates as blind data + - CreateNanoGrid.h is replacing GridBuilder.h, IndexGridBuilder.h and OpenToNanoVDB.h + - Moved CudaDeviceBuffer.h to cuda/CudaDeviceBuffer.h + - Added cuda/CudaUtils.h and cuda/GpuTimer.h with cuda utility functions + - Added cuda/CudaPointToGrids.cuh that constructs device grids from points or voxels + - Added cuda/CudaIndexToGrid.cuh that converts IndexGrids and values into regular Grids + - Added cuda/CudaSignedFloodFill.cuh that performs signed-flood filing on SDF on the GPU + - Added cuda/CudaAddBlindData.cuh that adds bind data to an existing grid on the GPU + - Added cuda/CudaGridChecksum.cuh that computes CRC32 checksums of grids on the GPU + - Added cuda/CudaGridHandle.cuh that handles grids on the GPU + - Added cuda/CudaNodeManager.cuh that constructs a NodeManager on the GPU + - The move constructor in GridHandle now requires the GridBuffer to actually contain a valid grid + - Added new types: Ve4f, Ve4d, ValueIndex, ValueOnIndex, ValueIndexMask, and ValueOnIndexMask + - Major improvements to GridBuilder.h, which allows user to construct grids with random access on the host + - Numerous improvements in NanoVDB.h: e.g. Customizable get/set methods on ValueAccessor, BitFlags, transform(Map), expandAtomic(BBox), expandAtomic(Coord), intersectAtomic(BBox), pi(), BuildTraits, more documentation, Mask:: DenseIterator, Mask:: setOnAtomic,Mask:: setOffAtomic, Map constructors, DataType are now public vs private in all node types, GridMetaData can now be copied + - PNanoVDB.h is now in sync with NanoVDB.h + - Added PrefixSum.h for concurrent computation of prefix sum on the host + - Primitives.h can now create grids on the CPU with SDF, FOG and point of torus \ No newline at end of file From cba0e7c78da3e17fc81fbd3361200ac01c5d2391 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Sun, 22 Oct 2023 09:59:48 -0700 Subject: [PATCH 39/49] added CudaGridStats.cuh and fixed an issue in CudaPointsToGrids Signed-off-by: Ken Museth --- nanovdb/nanovdb/CMakeLists.txt | 1 + nanovdb/nanovdb/NanoVDB.h | 187 +++++++++---- nanovdb/nanovdb/unittest/TestNanoVDB.cc | 68 ++--- nanovdb/nanovdb/unittest/TestNanoVDB.cu | 253 ++++++++++++++---- nanovdb/nanovdb/util/GridStats.h | 32 +-- nanovdb/nanovdb/util/GridValidator.h | 4 +- nanovdb/nanovdb/util/NodeManager.h | 18 +- nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh | 2 - nanovdb/nanovdb/util/cuda/CudaGridStats.cuh | 234 ++++++++++++++++ nanovdb/nanovdb/util/cuda/CudaNodeManager.cuh | 4 +- .../nanovdb/util/cuda/CudaPointsToGrid.cuh | 48 +++- pendingchanges/nanovdb.txt | 3 +- 12 files changed, 668 insertions(+), 186 deletions(-) create mode 100644 nanovdb/nanovdb/util/cuda/CudaGridStats.cuh diff --git a/nanovdb/nanovdb/CMakeLists.txt b/nanovdb/nanovdb/CMakeLists.txt index 499b93f0ed..1c1f2ebb88 100644 --- a/nanovdb/nanovdb/CMakeLists.txt +++ b/nanovdb/nanovdb/CMakeLists.txt @@ -175,6 +175,7 @@ set(NANOVDB_INCLUDE_UTILFILES util/cuda/CudaDeviceBuffer.h util/cuda/CudaGridChecksum.cuh util/cuda/CudaGridHandle.cuh + util/cuda/CudaGridStats.cuh util/cuda/CudaIndexToGrid.cuh util/cuda/CudaNodeManager.cuh util/cuda/CudaPointsToGrid.cuh diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index fb6434cac1..17aef14684 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -391,8 +391,8 @@ enum class GridFlags : uint32_t { HasAverage = 1 << 3, // nodes contain averages of active values HasStdDeviation = 1 << 4, // nodes contain standard deviations of active values IsBreadthFirst = 1 << 5, // nodes are typically arranged breadth-first in memory - IsLexicographic = 1 << 6, // nodes are occasionally arranged lexicographically in memory - End = 1 << 7, // use End - 1 as a mask for the 5 lower bit flags + //IsLexicographic = 1 << 6, // nodes are occasionally arranged lexicographically in memory + End = 1 << 6, // use End - 1 as a mask for the 5 lower bit flags }; #ifndef __CUDACC_RTC__ @@ -405,7 +405,7 @@ inline const char* toStr(GridFlags gridFlags) "has average", "has standard deviation", "is breadth-first", - "is IsLexicographic", + //"is IsLexicographic", "end"}; static_assert(1 << (sizeof(LUT) / sizeof(char*) - 1) == int(GridFlags::End), "Unexpected size of LUT"); return LUT[static_cast(gridFlags)]; @@ -1331,10 +1331,16 @@ class Coord __hostdev__ bool operator<(const Coord& rhs) const { return mVec[0] < rhs[0] ? true : mVec[0] > rhs[0] ? false - : mVec[1] < rhs[1] ? true - : mVec[1] > rhs[1] ? false - : mVec[2] < rhs[2] ? true - : false; + : mVec[1] < rhs[1] ? true : mVec[1] > rhs[1] ? false + : mVec[2] < rhs[2] ? true : false; + } + + /// @brief Return true if this Coord is lexicographically less or equal to the given Coord. + __hostdev__ bool operator<=(const Coord& rhs) const + { + return mVec[0] < rhs[0] ? true : mVec[0] > rhs[0] ? false + : mVec[1] < rhs[1] ? true : mVec[1] > rhs[1] ? false + : mVec[2] <=rhs[2] ? true : false; } // @brief Return true if the Coord components are identical. @@ -2280,7 +2286,10 @@ struct BBox : public BaseBBox __hostdev__ bool empty() const { return mCoord[0][0] >= mCoord[1][0] || mCoord[0][1] >= mCoord[1][1] || mCoord[0][2] >= mCoord[1][2]; } - __hostdev__ Vec3T dim() const { return this->empty() ? Vec3T(0) : this->max() - this->min(); } + __hostdev__ operator bool() const { return mCoord[0][0] < mCoord[1][0] && + mCoord[0][1] < mCoord[1][1] && + mCoord[0][2] < mCoord[1][2]; } + __hostdev__ Vec3T dim() const { return *this ? this->max() - this->min() : Vec3T(0); } __hostdev__ bool isInside(const Vec3T& p) const { return p[0] > mCoord[0][0] && p[1] > mCoord[0][1] && p[2] > mCoord[0][2] && @@ -2369,11 +2378,15 @@ struct BBox : public BaseBBox __hostdev__ bool is_divisible() const { return mCoord[0][0] < mCoord[1][0] && mCoord[0][1] < mCoord[1][1] && mCoord[0][2] < mCoord[1][2]; } - /// @brief Return true if this bounding box is empty, i.e. uninitialized + /// @brief Return true if this bounding box is empty, e.g. uninitialized __hostdev__ bool empty() const { return mCoord[0][0] > mCoord[1][0] || mCoord[0][1] > mCoord[1][1] || mCoord[0][2] > mCoord[1][2]; } - __hostdev__ CoordT dim() const { return this->empty() ? Coord(0) : this->max() - this->min() + Coord(1); } + /// @brief Convert this BBox to boolean true if it is not empty + __hostdev__ operator bool() const { return mCoord[0][0] <= mCoord[1][0] && + mCoord[0][1] <= mCoord[1][1] && + mCoord[0][2] <= mCoord[1][2]; } + __hostdev__ CoordT dim() const { return *this ? this->max() - this->min() + Coord(1) : Coord(0); } __hostdev__ uint64_t volume() const { auto d = this->dim(); @@ -3481,10 +3494,11 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData mData1 = 0u; // only used for index and point grids mData2 = 0u; } - // Set and unset various bit flags + /// @brief return true if the magic number and the version are both valid __hostdev__ bool isValid() const { return mMagic == NANOVDB_MAGIC_GRID || (mMagic == NANOVDB_MAGIC_NUMBER && mVersion.isCompatible()); } + // Set and unset various bit flags __hostdev__ void setMinMaxOn(bool on = true) { mFlags.setMask(GridFlags::HasMinMax, on); } __hostdev__ void setBBoxOn(bool on = true) { mFlags.setMask(GridFlags::HasBBox, on); } __hostdev__ void setLongGridNameOn(bool on = true) { mFlags.setMask(GridFlags::HasLongGridName, on); } @@ -3524,9 +3538,11 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData // @brief Return a non-const uint8_t pointer to the tree __hostdev__ uint8_t* treePtr() { return reinterpret_cast(this + 1); }// TreeData is always right after GridData + //__hostdev__ TreeData* treePtr() { return reinterpret_cast(this + 1); }// TreeData is always right after GridData // @brief Return a const uint8_t pointer to the tree __hostdev__ const uint8_t* treePtr() const { return reinterpret_cast(this + 1); }// TreeData is always right after GridData + //__hostdev__ const TreeData* treePtr() const { return reinterpret_cast(this + 1); }// TreeData is always right after GridData /// @brief Return a non-const uint8_t pointer to the firsr node at @c LEVEL /// @tparam LEVEL of the node. LEVEL 0 means leaf node and LEVEL 3 means root node @@ -3535,7 +3551,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData __hostdev__ const uint8_t* nodePtr() const { static_assert(LEVEL >= 0 && LEVEL <= 3, "invalid LEVEL template parameter"); - auto *treeData = this->treePtr(); + auto *treeData = this->treePtr(); auto nodeOffset = *reinterpret_cast(treeData + 8*LEVEL);// skip LEVEL uint64_t return nodeOffset ? PtrAdd(treeData, nodeOffset) : nullptr; } @@ -3580,10 +3596,21 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData /// @brief return AABB of active values in index space __hostdev__ const CoordBBox& indexBBox() const {return *(const CoordBBox*)(this->nodePtr<3>());} + /// @brief return the root table has size + __hostdev__ uint32_t rootTableSize() const { + if (const uint8_t *root = this->nodePtr<3>()) { + return *(const uint32_t*)(root + sizeof(CoordBBox)); + } + return 0u; + } + /// @brief test if the grid is empty, e.i the root table has size 0 /// @return true if this grid contains not data whatsoever - __hostdev__ bool isEmpty() const {return *(const uint32_t*)(this->nodePtr<3>() + sizeof(CoordBBox)) == 0u;} + __hostdev__ bool isEmpty() const {return this->rootTableSize() == 0u;} + /// @brief return true if RootData follows TreeData in memory without any extra padding + /// @details TreeData is always following right after GridData, but the same might not be true for RootData + __hostdev__ bool isRootConnected() const { return *(const uint64_t*)((const char*)(this + 1) + 24) == 64u;} }; // GridData // Forward declaration of accelerated random access class @@ -3743,7 +3770,7 @@ class Grid : public GridData __hostdev__ bool hasAverage() const { return DataType::mFlags.isMaskOn(GridFlags::HasAverage); } __hostdev__ bool hasStdDeviation() const { return DataType::mFlags.isMaskOn(GridFlags::HasStdDeviation); } __hostdev__ bool isBreadthFirst() const { return DataType::mFlags.isMaskOn(GridFlags::IsBreadthFirst); } - __hostdev__ bool isLexicographic() const { return DataType::mFlags.isMaskOn(GridFlags::IsLexicographic); } + //__hostdev__ bool isLexicographic() const { return DataType::mFlags.isMaskOn(GridFlags::IsLexicographic); } /// @brief return true if the specified node type is layed out breadth-first in memory and has a fixed size. /// This allows for sequential access to the nodes. @@ -3755,6 +3782,7 @@ class Grid : public GridData template __hostdev__ bool isSequential() const { return NodeTrait::type::FIXED_SIZE && this->isBreadthFirst(); } + /// @brief return true if nodes at all levels can safely be accessed with simple linear offsets __hostdev__ bool isSequential() const { return UpperNodeType::FIXED_SIZE && LowerNodeType::FIXED_SIZE && LeafNodeType::FIXED_SIZE && this->isBreadthFirst(); } /// @brief Return a c-string with the name of this grid @@ -3872,6 +3900,9 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) TreeData /// @brief Return a const reference to the index bounding box of all the active values in this tree, i.e. in all nodes of the tree __hostdev__ const CoordBBox& bbox() const {return *PtrAdd(this, mNodeOffset[3]);} + + /// @brief return true if RootData is layout out immidiatly after TreeData in memory + __hostdev__ bool isRootNext() const {return mNodeOffset[3] == sizeof(TreeData); } };// TreeData // ----------------------------> GridTree <-------------------------------------- @@ -4281,6 +4312,7 @@ class RootNode : public RootData template class ChildIter : public BaseIter { + static_assert(is_same::type, RootNode>::value, "Invalid RootT"); using BaseT = BaseIter; using NodeT = typename match_const::type; @@ -4322,10 +4354,10 @@ class RootNode : public RootData } }; // Member class ChildIter - using ChildIterator = ChildIter; + using ChildIterator = ChildIter; using ConstChildIterator = ChildIter; - __hostdev__ ChildIterator beginChild() { return ChildIterator(this); } + __hostdev__ ChildIterator beginChild() { return ChildIterator(this); } __hostdev__ ConstChildIterator cbeginChild() const { return ConstChildIterator(this); } template @@ -4841,7 +4873,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData __hostdev__ const StatsT& average() const { return mAverage; } __hostdev__ const StatsT& stdDeviation() const { return mStdDevi; } -#if defined(__GNUC__) && !defined(__APPLE__) +#if defined(__GNUC__) && !defined(__APPLE__) && !defined(__llvm__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstringop-overflow" #endif @@ -4849,7 +4881,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData __hostdev__ void setMax(const ValueT& v) { mMaximum = v; } __hostdev__ void setAvg(const StatsT& v) { mAverage = v; } __hostdev__ void setDev(const StatsT& v) { mStdDevi = v; } -#if defined(__GNUC__) && !defined(__APPLE__) +#if defined(__GNUC__) && !defined(__APPLE__) && !defined(__llvm__) #pragma GCC diagnostic pop #endif @@ -4887,29 +4919,32 @@ class InternalNode : public InternalData static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node /// @brief Visits child nodes of this node only - class ChildIterator : public MaskIterT + template + class ChildIter : public MaskIterT { + static_assert(is_same::type, InternalNode>::value, "Invalid ParentT"); using BaseT = MaskIterT; - const DataType* mParent; + using NodeT = typename match_const::type; + ParentT* mParent; public: - __hostdev__ ChildIterator() + __hostdev__ ChildIter() : BaseT() , mParent(nullptr) { } - __hostdev__ ChildIterator(const InternalNode* parent) - : BaseT(parent->data()->mChildMask.beginOn()) - , mParent(parent->data()) + __hostdev__ ChildIter(ParentT* parent) + : BaseT(parent->mChildMask.beginOn()) + , mParent(parent) { } - ChildIterator& operator=(const ChildIterator&) = default; - __hostdev__ const ChildT& operator*() const + ChildIter& operator=(const ChildIter&) = default; + __hostdev__ NodeT& operator*() const { NANOVDB_ASSERT(*this); return *mParent->getChild(BaseT::pos()); } - __hostdev__ const ChildT* operator->() const + __hostdev__ NodeT* operator->() const { NANOVDB_ASSERT(*this); return mParent->getChild(BaseT::pos()); @@ -4919,9 +4954,14 @@ class InternalNode : public InternalData NANOVDB_ASSERT(*this); return (*this)->origin(); } - }; // Member class ChildIterator + __hostdev__ CoordType getCoord() const {return this->getOrigin();} + }; // Member class ChildIter - __hostdev__ ChildIterator beginChild() const { return ChildIterator(this); } + using ChildIterator = ChildIter; + using ConstChildIterator = ChildIter; + + __hostdev__ ChildIterator beginChild() { return ChildIterator(this); } + __hostdev__ ConstChildIterator cbeginChild() const { return ConstChildIterator(this); } /// @brief Visits all tile values in this node, i.e. both inactive and active tiles class ValueIterator : public MaskIterT @@ -4949,8 +4989,9 @@ class InternalNode : public InternalData __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); - return mParent->localToGlobalCoord(BaseT::pos()); + return mParent->offsetToGlobalCoord(BaseT::pos()); } + __hostdev__ CoordType getCoord() const {return this->getOrigin();} __hostdev__ bool isActive() const { NANOVDB_ASSERT(*this); @@ -4987,8 +5028,9 @@ class InternalNode : public InternalData __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); - return mParent->localToGlobalCoord(BaseT::pos()); + return mParent->offsetToGlobalCoord(BaseT::pos()); } + __hostdev__ CoordType getCoord() const {return this->getOrigin();} }; // Member class ValueOnIterator __hostdev__ ValueOnIterator beginValueOn() const { return ValueOnIterator(this); } @@ -5031,8 +5073,9 @@ class InternalNode : public InternalData __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(mParent && bool(*this)); - return mParent->localToGlobalCoord(BaseT::pos()); + return mParent->offsetToGlobalCoord(BaseT::pos()); } + __hostdev__ CoordType getCoord() const {return this->getOrigin();} }; // Member class DenseIterator __hostdev__ DenseIterator beginDense() const { return DenseIterator(this); } @@ -5346,6 +5389,8 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData } __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ static bool hasStats() { return true; } + __hostdev__ ValueType getValue(uint32_t i) const { return mValues[i]; } __hostdev__ void setValueOnly(uint32_t offset, const ValueType& value) { mValues[offset] = value; } __hostdev__ void setValue(uint32_t offset, const ValueType& value) @@ -5403,6 +5448,8 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafFnBase // no padding since it's always 32B aligned __hostdev__ static uint64_t memUsage() { return sizeof(LeafFnBase); } + __hostdev__ static bool hasStats() { return true; } + /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment /// /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. @@ -5558,7 +5605,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData; using BuildType = FpN; - static constexpr bool FIXED_SIZE = false; + static constexpr bool FIXED_SIZE = false; __hostdev__ static constexpr uint32_t padding() { static_assert(BaseT::padding() == 0, "expected no padding in LeafFnBase"); @@ -5636,7 +5683,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData) - 16u; } __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } - + __hostdev__ static bool hasStats() { return false; } __hostdev__ bool getValue(uint32_t i) const { return mValues.isOn(i); } __hostdev__ bool getMin() const { return false; } // dummy __hostdev__ bool getMax() const { return false; } // dummy @@ -5684,7 +5731,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData) + 2 * 8u); @@ -5796,9 +5843,8 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData> 6; const uint64_t w = BaseT::mValueMask.words()[n], mask = uint64_t(1) << (i & 63u); - if (!(w & mask)) - return uint64_t(0); // if i'th value is inactive return offset to background value - uint64_t sum = BaseT::mOffset + CountOn(w & (mask - 1u)); + if (!(w & mask)) return uint64_t(0); // if i'th value is inactive return offset to background value + uint64_t sum = BaseT::mOffset + CountOn(w & (mask - 1u)); if (n--) sum += BaseT::mPrefixSum >> (9u * n) & 511u; return sum; } @@ -5990,7 +6036,7 @@ class LeafNode : public LeafData } }; // Member class ValueOffIterator - __hostdev__ ValueOffIterator beginValueOff() const { return ValueOffIterator(this); } + __hostdev__ ValueOffIterator beginValueOff() const { return ValueOffIterator(this); } __hostdev__ ValueOffIterator cbeginValueOff() const { return ValueOffIterator(this); } /// @brief Visits all values in a leaf node, i.e. both active and inactive values @@ -6041,7 +6087,7 @@ class LeafNode : public LeafData } }; // Member class ValueIterator - __hostdev__ ValueIterator beginValue() const { return ValueIterator(this); } + __hostdev__ ValueIterator beginValue() const { return ValueIterator(this); } __hostdev__ ValueIterator cbeginValueAll() const { return ValueIterator(this); } static_assert(is_same::Type>::value, "Mismatching BuildType"); @@ -6217,7 +6263,6 @@ class LeafNode : public LeafData private: static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(LeafData) is misaligned"); - //static_assert(offsetof(DataType, mValues) % 32 == 0, "LeafData::mValues is misaligned"); template friend class ReadAccessor; @@ -7408,21 +7453,23 @@ class GridMetaData mIndexBBox = grid.indexBBox(); mRootTableSize = grid.tree().root().getTableSize(); } - GridMetaData(const uint8_t* buffer) + GridMetaData(const GridData* gridData) { - auto* grid = reinterpret_cast*>(buffer); // dummy grid type - NANOVDB_ASSERT(grid && grid->isValid()); - mGridData = *grid->data(); - mTreeData = *grid->tree().data(); - mIndexBBox = grid->indexBBox(); - mRootTableSize = grid->tree().root().getTableSize(); + NANOVDB_ASSERT(gridData && gridData->isValid()); + mGridData = *gridData; + mTreeData = *reinterpret_cast(gridData->treePtr()); + mIndexBBox = gridData->indexBBox(); + mRootTableSize = gridData->rootTableSize(); } - __hostdev__ bool safeCast() const { return mTreeData.mNodeOffset[3] == sizeof(TreeData); } + /// @brief return true if the RootData follows right after the TreeData. + /// If so, this implies that it's safe to cast the grid from which + /// this instance was constructed to a GridMetaData + __hostdev__ bool safeCast() const { return mTreeData.isRootNext(); } + + /// @brief return true if it is safe to cast the grid to a pointer + /// of type GridMetaData, i.e. construction can be avoided. template - __hostdev__ static bool safeCast(const NanoGrid& grid) - { // the RootData follows right after the TreeData - return grid.tree().data()->mNodeOffset[3] == sizeof(TreeData); - } + __hostdev__ static bool safeCast(const NanoGrid& grid){return grid.tree().isRootNext();} __hostdev__ bool isValid() const { return mGridData.isValid(); } __hostdev__ const GridType& gridType() const { return mGridData.mGridType; } __hostdev__ const GridClass& gridClass() const { return mGridData.mGridClass; } @@ -7440,7 +7487,7 @@ class GridMetaData __hostdev__ bool hasAverage() const { return mGridData.mFlags.isMaskOn(GridFlags::HasAverage); } __hostdev__ bool hasStdDeviation() const { return mGridData.mFlags.isMaskOn(GridFlags::HasStdDeviation); } __hostdev__ bool isBreadthFirst() const { return mGridData.mFlags.isMaskOn(GridFlags::IsBreadthFirst); } - __hostdev__ bool isLexicographic() const { return mGridData.mFlags.isMaskOn(GridFlags::IsLexicographic); } + //__hostdev__ bool isLexicographic() const { return mGridData.mFlags.isMaskOn(GridFlags::IsLexicographic); } __hostdev__ uint64_t gridSize() const { return mGridData.mGridSize; } __hostdev__ uint32_t gridIndex() const { return mGridData.mGridIndex; } __hostdev__ uint32_t gridCount() const { return mGridData.mGridCount; } @@ -7977,7 +8024,7 @@ struct GetValue __hostdev__ static auto get(const typename NanoRoot::Tile& tile) { return tile.value; } __hostdev__ static auto get(const NanoUpper& node, uint32_t n) { return node.mTable[n].value; } __hostdev__ static auto get(const NanoLower& node, uint32_t n) { return node.mTable[n].value; } - __hostdev__ static auto get(const NanoLeaf& leaf, uint32_t n) { return leaf.getValue(n); } // works with all build types + __hostdev__ static auto get(const NanoLeaf& leaf, uint32_t n) { return leaf.getValue(n); } // works with all build types }; // GetValue template @@ -7989,7 +8036,7 @@ struct SetValue __hostdev__ static auto set(typename NanoRoot::Tile& tile, const ValueT& v) { tile.value = v; } __hostdev__ static auto set(NanoUpper& node, uint32_t n, const ValueT& v) { node.mTable[n].value = v; } __hostdev__ static auto set(NanoLower& node, uint32_t n, const ValueT& v) { node.mTable[n].value = v; } - __hostdev__ static auto set(NanoLeaf& leaf, uint32_t n, const ValueT& v) { leaf.mValues[n] = v; } + __hostdev__ static auto set(NanoLeaf& leaf, uint32_t n, const ValueT& v) { leaf.mValues[n] = v; } }; // SetValue template @@ -8013,7 +8060,7 @@ struct GetState __hostdev__ static auto get(const typename NanoRoot::Tile& tile) { return tile.state > 0; } __hostdev__ static auto get(const NanoUpper& node, uint32_t n) { return node.mValueMask.isOn(n); } __hostdev__ static auto get(const NanoLower& node, uint32_t n) { return node.mValueMask.isOn(n); } - __hostdev__ static auto get(const NanoLeaf& leaf, uint32_t n) { return leaf.mValueMask.isOn(n); } + __hostdev__ static auto get(const NanoLeaf& leaf, uint32_t n) { return leaf.mValueMask.isOn(n); } }; // GetState /// @brief Implements Tree::getDim(Coord) @@ -8028,7 +8075,7 @@ struct GetDim __hostdev__ static uint32_t get(const NanoLeaf&, uint32_t) { return 1u; } }; // GetDim -/// @brief Implements Tree::probeLeaf(Coord) +/// @brief Return the pointer to the leaf node that contains Coord. Implements Tree::probeLeaf(Coord) /// @tparam BuildT Build type of the grid being called template struct GetLeaf @@ -8040,6 +8087,30 @@ struct GetLeaf __hostdev__ static const NanoLeaf* get(const NanoLeaf& leaf, uint32_t) { return &leaf; } }; // GetLeaf +/// @brief Return point to the lower internal node where Coord maps to one of its values, i.e. terminates +/// @tparam BuildT Build type of the grid being called +template +struct GetLower +{ + __hostdev__ static const NanoLower* get(const NanoRoot&) { return nullptr; } + __hostdev__ static const NanoLower* get(const typename NanoRoot::Tile&) { return nullptr; } + __hostdev__ static const NanoLower* get(const NanoUpper&, uint32_t) { return nullptr; } + __hostdev__ static const NanoLower* get(const NanoLower& node, uint32_t) { return &node; } + __hostdev__ static const NanoLower* get(const NanoLeaf&, uint32_t) { return nullptr; } +}; // GetLower + +/// @brief Return point to the upper internal node where Coord maps to one of its values, i.e. terminates +/// @tparam BuildT Build type of the grid being called +template +struct GetUpper +{ + __hostdev__ static const NanoUpper* get(const NanoRoot&) { return nullptr; } + __hostdev__ static const NanoUpper* get(const typename NanoRoot::Tile&) { return nullptr; } + __hostdev__ static const NanoUpper* get(const NanoUpper& node, uint32_t) { return &node; } + __hostdev__ static const NanoUpper* get(const NanoLower& node, uint32_t) { return nullptr; } + __hostdev__ static const NanoUpper* get(const NanoLeaf&, uint32_t) { return nullptr; } +}; // GetUpper + /// @brief Implements Tree::probeLeaf(Coord) /// @tparam BuildT Build type of the grid being called template diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cc b/nanovdb/nanovdb/unittest/TestNanoVDB.cc index 9a91b149ee..94e5f79372 100644 --- a/nanovdb/nanovdb/unittest/TestNanoVDB.cc +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cc @@ -1047,9 +1047,11 @@ TEST_F(TestNanoVDB, BBox) EXPECT_EQ(-std::numeric_limits::max(), bbox[1][1]); EXPECT_EQ(-std::numeric_limits::max(), bbox[1][2]); EXPECT_TRUE(bbox.empty()); + EXPECT_FALSE(bbox); bbox.expand(nanovdb::Vec3f(57.0f, -31.0f, 60.0f)); EXPECT_TRUE(bbox.empty()); + EXPECT_FALSE(bbox); EXPECT_EQ(nanovdb::Vec3f(0.0f), bbox.dim()); EXPECT_EQ(57.0f, bbox[0][0]); EXPECT_EQ(-31.0f, bbox[0][1]); @@ -1060,6 +1062,7 @@ TEST_F(TestNanoVDB, BBox) bbox.expand(nanovdb::Vec3f(58.0f, 0.0f, 62.0f)); EXPECT_FALSE(bbox.empty()); + EXPECT_TRUE(bbox); EXPECT_EQ(nanovdb::Vec3f(1.0f, 31.0f, 2.0f), bbox.dim()); EXPECT_EQ(57.0f, bbox[0][0]); EXPECT_EQ(-31.0f, bbox[0][1]); @@ -1080,9 +1083,11 @@ TEST_F(TestNanoVDB, CoordBBox) EXPECT_EQ(std::numeric_limits::min(), bbox[1][1]); EXPECT_EQ(std::numeric_limits::min(), bbox[1][2]); EXPECT_TRUE(bbox.empty()); + EXPECT_FALSE(bbox); bbox.expand(nanovdb::Coord(57, -31, 60)); EXPECT_FALSE(bbox.empty()); + EXPECT_TRUE(bbox); EXPECT_EQ(nanovdb::Coord(1), bbox.dim()); EXPECT_EQ(57, bbox[0][0]); EXPECT_EQ(-31, bbox[0][1]); @@ -1093,6 +1098,7 @@ TEST_F(TestNanoVDB, CoordBBox) bbox.expand(nanovdb::Coord(58, 0, 62)); EXPECT_FALSE(bbox.empty()); + EXPECT_TRUE(bbox); EXPECT_EQ(nanovdb::Coord(2, 32, 3), bbox.dim()); EXPECT_EQ(57, bbox[0][0]); EXPECT_EQ(-31, bbox[0][1]); @@ -1104,6 +1110,7 @@ TEST_F(TestNanoVDB, CoordBBox) { // test convert auto bbox2 = bbox.asReal(); EXPECT_FALSE(bbox2.empty()); + EXPECT_TRUE(bbox2); EXPECT_EQ(nanovdb::Vec3f(57.0f, -31.0f, 60.0f), bbox2.min()); EXPECT_EQ(nanovdb::Vec3f(59.0f, 1.0f, 63.0f), bbox2.max()); } @@ -3503,15 +3510,15 @@ TEST_F(TestNanoVDB, GridBuilder_Fp4) EXPECT_TRUE(nanovdb::isValid(nodeMgr)); EXPECT_TRUE(nodeMgr->isLinear()); uint64_t n[3]={0}; - for (auto it2 = dstGrid->tree().root().beginChild(); it2; ++it2) { + for (auto it2 = dstGrid->tree().root().cbeginChild(); it2; ++it2) { auto *node2 = &nodeMgr->upper(n[0]++); EXPECT_TRUE(nanovdb::isValid(node2)); EXPECT_EQ(&*it2, node2); - for (auto it1 = it2->beginChild(); it1; ++it1) { + for (auto it1 = it2->cbeginChild(); it1; ++it1) { auto *node1 = &nodeMgr->lower(n[1]++); EXPECT_TRUE(nanovdb::isValid(node1)); EXPECT_EQ(&*it1, node1); - for (auto it0 = it1->beginChild(); it0; ++it0) { + for (auto it0 = it1->cbeginChild(); it0; ++it0) { auto *node0 = &nodeMgr->leaf(n[2]++); EXPECT_TRUE(nanovdb::isValid(node0)); EXPECT_EQ(&*it0, node0); @@ -3638,15 +3645,15 @@ TEST_F(TestNanoVDB, GridBuilder_Fp8) EXPECT_TRUE(nanovdb::isValid(nodeMgr)); EXPECT_TRUE(nodeMgr->isLinear()); uint64_t n[3]={0}; - for (auto it2 = dstGrid->tree().root().beginChild(); it2; ++it2) { + for (auto it2 = dstGrid->tree().root().cbeginChild(); it2; ++it2) { auto *node2 = &nodeMgr->upper(n[0]++); EXPECT_TRUE(nanovdb::isValid(node2)); EXPECT_EQ(&*it2, node2); - for (auto it1 = it2->beginChild(); it1; ++it1) { + for (auto it1 = it2->cbeginChild(); it1; ++it1) { auto *node1 = &nodeMgr->lower(n[1]++); EXPECT_TRUE(nanovdb::isValid(node1)); EXPECT_EQ(&*it1, node1); - for (auto it0 = it1->beginChild(); it0; ++it0) { + for (auto it0 = it1->cbeginChild(); it0; ++it0) { auto *node0 = &nodeMgr->leaf(n[2]++); EXPECT_TRUE(nanovdb::isValid(node0)); EXPECT_EQ(&*it0, node0); @@ -3758,15 +3765,15 @@ TEST_F(TestNanoVDB, GridBuilder_Fp16) EXPECT_TRUE(nanovdb::isValid(nodeMgr)); EXPECT_TRUE(nodeMgr->isLinear()); uint64_t n[3]={0}; - for (auto it2 = dstGrid->tree().root().beginChild(); it2; ++it2) { + for (auto it2 = dstGrid->tree().root().cbeginChild(); it2; ++it2) { auto *node2 = &nodeMgr->upper(n[0]++); EXPECT_TRUE(nanovdb::isValid(node2)); EXPECT_EQ(&*it2, node2); - for (auto it1 = it2->beginChild(); it1; ++it1) { + for (auto it1 = it2->cbeginChild(); it1; ++it1) { auto *node1 = &nodeMgr->lower(n[1]++); EXPECT_TRUE(nanovdb::isValid(node1)); EXPECT_EQ(&*it1, node1); - for (auto it0 = it1->beginChild(); it0; ++it0) { + for (auto it0 = it1->cbeginChild(); it0; ++it0) { auto *node0 = &nodeMgr->leaf(n[2]++); EXPECT_TRUE(nanovdb::isValid(node0)); EXPECT_EQ(&*it0, node0); @@ -3938,15 +3945,15 @@ TEST_F(TestNanoVDB, GridBuilder_FpN_Basic3) EXPECT_TRUE(nanovdb::isValid(nodeMgr)); EXPECT_FALSE(nodeMgr->isLinear()); uint64_t n[3]={0}; - for (auto it2 = dstGrid->tree().root().beginChild(); it2; ++it2) { + for (auto it2 = dstGrid->tree().root().cbeginChild(); it2; ++it2) { auto *node2 = &nodeMgr->upper(n[0]++); EXPECT_TRUE(nanovdb::isValid(node2)); EXPECT_EQ(&*it2, node2); - for (auto it1 = it2->beginChild(); it1; ++it1) { + for (auto it1 = it2->cbeginChild(); it1; ++it1) { auto *node1 = &nodeMgr->lower(n[1]++); EXPECT_TRUE(nanovdb::isValid(node1)); EXPECT_EQ(&*it1, node1); - for (auto it0 = it1->beginChild(); it0; ++it0) { + for (auto it0 = it1->cbeginChild(); it0; ++it0) { auto *node0 = &nodeMgr->leaf(n[2]++); EXPECT_TRUE(nanovdb::isValid(node0)); EXPECT_EQ(&*it0, node0); @@ -4051,15 +4058,15 @@ TEST_F(TestNanoVDB, NodeManager) EXPECT_EQ(&nodeMgr->upper(0), dstGrid->tree().getFirstNode< 2 >()); uint64_t n[3]={0}; - for (auto it2 = dstGrid->tree().root().beginChild(); it2; ++it2) { + for (auto it2 = dstGrid->tree().root().cbeginChild(); it2; ++it2) { auto *node2 = &nodeMgr->upper(n[0]++); EXPECT_TRUE(nanovdb::isValid(node2)); EXPECT_EQ(&*it2, node2); - for (auto it1 = it2->beginChild(); it1; ++it1) { + for (auto it1 = it2->cbeginChild(); it1; ++it1) { auto *node1 = &nodeMgr->lower(n[1]++); EXPECT_TRUE(nanovdb::isValid(node1)); EXPECT_EQ(&*it1, node1); - for (auto it0 = it1->beginChild(); it0; ++it0) { + for (auto it0 = it1->cbeginChild(); it0; ++it0) { auto *node0 = &nodeMgr->leaf(n[2]++); EXPECT_TRUE(nanovdb::isValid(node0)); EXPECT_EQ(&*it0, node0); @@ -4119,15 +4126,15 @@ TEST_F(TestNanoVDB, NodeManager) EXPECT_EQ(2.0f, nodeMgr->leaf(1).getValue(x2)); uint64_t n[3]={0}; - for (auto it2 = dstGrid->tree().root().beginChild(); it2; ++it2) { + for (auto it2 = dstGrid->tree().root().cbeginChild(); it2; ++it2) { auto *node2 = &nodeMgr->upper(n[0]++); EXPECT_TRUE(nanovdb::isValid(node2)); EXPECT_EQ(&*it2, node2); - for (auto it1 = it2->beginChild(); it1; ++it1) { + for (auto it1 = it2->cbeginChild(); it1; ++it1) { auto *node1 = &nodeMgr->lower(n[1]++); EXPECT_TRUE(nanovdb::isValid(node1)); EXPECT_EQ(&*it1, node1); - for (auto it0 = it1->beginChild(); it0; ++it0) { + for (auto it0 = it1->cbeginChild(); it0; ++it0) { auto *node0 = &nodeMgr->leaf(n[2]++); EXPECT_TRUE(nanovdb::isValid(node0)); EXPECT_EQ(&*it0, node0); @@ -4160,7 +4167,7 @@ TEST_F(TestNanoVDB, NodeManager) } auto handle = nanovdb::createNanoGrid(srcGrid); EXPECT_TRUE(handle); - auto* dstGrid = handle.grid(); + const auto* dstGrid = handle.grid(); EXPECT_TRUE(dstGrid); EXPECT_TRUE(dstGrid->isBreadthFirst()); using GridT = std::remove_pointer::type; @@ -4178,15 +4185,15 @@ TEST_F(TestNanoVDB, NodeManager) } uint64_t n[3]={0}; - for (auto it2 = dstGrid->tree().root().beginChild(); it2; ++it2) { + for (auto it2 = dstGrid->tree().root().cbeginChild(); it2; ++it2) { auto *node2 = &nodeMgr->upper(n[0]++); EXPECT_TRUE(nanovdb::isValid(node2)); EXPECT_EQ(&*it2, node2); - for (auto it1 = it2->beginChild(); it1; ++it1) { + for (auto it1 = it2->cbeginChild(); it1; ++it1) { auto *node1 = &nodeMgr->lower(n[1]++); EXPECT_TRUE(nanovdb::isValid(node1)); EXPECT_EQ(&*it1, node1); - for (auto it0 = it1->beginChild(); it0; ++it0) { + for (auto it0 = it1->cbeginChild(); it0; ++it0) { auto *node0 = &nodeMgr->leaf(n[2]++); EXPECT_TRUE(nanovdb::isValid(node0)); EXPECT_EQ(&*it0, node0); @@ -7160,10 +7167,10 @@ TEST_F(TestNanoVDB, IndexGridBuilder2) }); auto fltAcc = fltTree.getAccessor();// NOT thread-safe! //mTimer.start("Dense IndexGrid: Sequential node iterator test of active voxels"); - for (auto it2 = idxRoot.beginChild(); it2; ++it2) { - for (auto it1 = it2->beginChild(); it1; ++it1) { - for (auto it0 = it1->beginChild(); it0; ++it0) { - for (auto vox = it0->beginValueOn(); vox; ++vox) { + for (auto it2 = idxRoot.cbeginChild(); it2; ++it2) { + for (auto it1 = it2->cbeginChild(); it1; ++it1) { + for (auto it0 = it1->cbeginChild(); it0; ++it0) { + for (auto vox = it0->cbeginValueOn(); vox; ++vox) { EXPECT_EQ(values[*vox], fltAcc.getValue(vox.getCoord())); }// loop over active voxels in the leaf node }// loop over child nodes of the lower internal nodes @@ -7313,10 +7320,10 @@ TEST_F(TestNanoVDB, SparseIndexGridBuilder2) }); auto fltAcc = fltTree.getAccessor();// NOT thread-safe! //mTimer.start("Sparse IndexGrid: Sequential node iterator test of active voxels"); - for (auto it2 = idxRoot.beginChild(); it2; ++it2) { - for (auto it1 = it2->beginChild(); it1; ++it1) { - for (auto it0 = it1->beginChild(); it0; ++it0) { - for (auto v = it0->beginValueOn(); v; ++v) { + for (auto it2 = idxRoot.cbeginChild(); it2; ++it2) { + for (auto it1 = it2->cbeginChild(); it1; ++it1) { + for (auto it0 = it1->cbeginChild(); it0; ++it0) { + for (auto v = it0->cbeginValueOn(); v; ++v) { EXPECT_EQ(values[*v], fltAcc.getValue(v.getCoord())); }// loop over active voxels in the leaf node }// loop over child nodes of the lower internal nodes @@ -7594,6 +7601,7 @@ TEST_F(TestNanoVDB, GridMetaData) auto handle = nanovdb::createLevelSetSphere(); auto *grid = handle.grid(); EXPECT_TRUE(grid); + EXPECT_TRUE(grid->isRootConnected()); nanovdb::GridMetaData meta(*grid);// deep copy EXPECT_EQ(672 + 64 + 24 + 8, sizeof(meta)); EXPECT_TRUE(nanovdb::GridMetaData::safeCast(*grid)); diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cu b/nanovdb/nanovdb/unittest/TestNanoVDB.cu index 9c2ffe7710..2df7f65318 100644 --- a/nanovdb/nanovdb/unittest/TestNanoVDB.cu +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cu @@ -14,11 +14,13 @@ #include #include #include +#include #include #include #include #include +#include // for std::sort namespace nanovdb {// this namespace is required by gtest @@ -955,7 +957,7 @@ TEST(TestNanoVDBCUDA, CudaSignedFloodFill) EXPECT_EQ( 3.0f, acc(103,0,0)); EXPECT_EQ( 0.0f, acc(100,0,0)); EXPECT_EQ(-3.0f, acc( 97,0,0)); - EXPECT_FALSE(floatGrid->isLexicographic()); + //EXPECT_FALSE(floatGrid->isLexicographic()); EXPECT_TRUE(floatGrid->isBreadthFirst()); }// CudaSignedFloodFill @@ -1062,8 +1064,8 @@ TEST(TestNanoVDBCUDA, ThreePointsToGrid) EXPECT_TRUE(data); grid = handle.grid(); EXPECT_TRUE(grid); - EXPECT_TRUE(grid->isLexicographic()); - EXPECT_FALSE(grid->isBreadthFirst()); + //EXPECT_TRUE(grid->isLexicographic()); + EXPECT_TRUE(grid->isBreadthFirst()); EXPECT_EQ(1u, grid->blindDataCount()); const Vec3T *blindData = grid->getBlindData(0); EXPECT_TRUE(blindData); @@ -1176,8 +1178,8 @@ TEST(TestNanoVDBCUDA, EightVoxelsToFloatGrid) EXPECT_TRUE(data); grid = handle.grid(); EXPECT_TRUE(grid); - EXPECT_TRUE(grid->isLexicographic()); - EXPECT_FALSE(grid->isBreadthFirst()); + //EXPECT_TRUE(grid->isLexicographic()); + EXPECT_TRUE(grid->isBreadthFirst()); //timer.start("Unit-testing grid on the CPU"); auto acc = grid->getAccessor(); @@ -1262,8 +1264,8 @@ TEST(TestNanoVDBCUDA, Random_CudaPointsToGrid_World64) EXPECT_TRUE(data); grid = handle.grid(); EXPECT_TRUE(grid); - EXPECT_TRUE(grid->isLexicographic()); - EXPECT_FALSE(grid->isBreadthFirst()); + //EXPECT_TRUE(grid->isLexicographic()); + EXPECT_TRUE(grid->isBreadthFirst()); EXPECT_EQ(nanovdb::Vec3d(voxelSize), grid->voxelSize()); EXPECT_TRUE(nanovdb::CoordBBox::createCube(min, max-1).isInside(grid->indexBBox())); //std::cerr << grid->indexBBox() << std::endl; @@ -1377,8 +1379,8 @@ TEST(TestNanoVDBCUDA, Large_CudaPointsToGrid_World64) EXPECT_TRUE(data); grid = handle.grid(); EXPECT_TRUE(grid); - EXPECT_TRUE(grid->isLexicographic()); - EXPECT_FALSE(grid->isBreadthFirst()); + //EXPECT_TRUE(grid->isLexicographic()); + EXPECT_TRUE(grid->isBreadthFirst()); EXPECT_EQ(nanovdb::Vec3d(voxelSize), grid->voxelSize()); EXPECT_EQ(pointCount, grid->pointCount()); EXPECT_TRUE(nanovdb::CoordBBox::createCube(min, max-1).isInside(grid->indexBBox())); @@ -1503,8 +1505,8 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_World32) grid = handle.grid(); EXPECT_TRUE(grid); EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); - EXPECT_TRUE(grid->isLexicographic()); - EXPECT_FALSE(grid->isBreadthFirst()); + //EXPECT_TRUE(grid->isLexicographic()); + EXPECT_TRUE(grid->isBreadthFirst()); //std::cerr << grid->indexBBox() << std::endl; EXPECT_STREQ("World32: Vec3 point coordinates in world space", grid->blindMetaData(0).mName); @@ -1630,8 +1632,8 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel32) grid = handle.grid(); EXPECT_TRUE(grid); EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); - EXPECT_TRUE(grid->isLexicographic()); - EXPECT_FALSE(grid->isBreadthFirst()); + //EXPECT_TRUE(grid->isLexicographic()); + EXPECT_TRUE(grid->isBreadthFirst()); //std::cerr << grid->indexBBox() << std::endl; EXPECT_STREQ("Voxel32: Vec3 point coordinates in voxel space", grid->blindMetaData(0).mName); @@ -1764,8 +1766,8 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel16) grid = handle.grid(); EXPECT_TRUE(grid); EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); - EXPECT_TRUE(grid->isLexicographic()); - EXPECT_FALSE(grid->isBreadthFirst()); + //EXPECT_TRUE(grid->isLexicographic()); + EXPECT_TRUE(grid->isBreadthFirst()); //std::cerr << grid->indexBBox() << std::endl; EXPECT_STREQ("Voxel16: Vec3 point coordinates in voxel space", grid->blindMetaData(0).mName); @@ -1893,8 +1895,8 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_Voxel8) grid = handle.grid(); EXPECT_TRUE(grid); EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); - EXPECT_TRUE(grid->isLexicographic()); - EXPECT_FALSE(grid->isBreadthFirst()); + //EXPECT_TRUE(grid->isLexicographic()); + EXPECT_TRUE(grid->isBreadthFirst()); //std::cerr << grid->indexBBox() << std::endl; EXPECT_STREQ("Voxel8: Vec3 point coordinates in voxel space", grid->blindMetaData(0).mName); @@ -2022,8 +2024,8 @@ TEST(TestNanoVDBCUDA, Sphere_CudaPointsToGrid_PointID) grid = handle.grid(); EXPECT_TRUE(grid); EXPECT_EQ(pointGrid->voxelSize(), grid->voxelSize()); - EXPECT_TRUE(grid->isLexicographic()); - EXPECT_FALSE(grid->isBreadthFirst()); + //EXPECT_TRUE(grid->isLexicographic()); + EXPECT_TRUE(grid->isBreadthFirst()); //std::cerr << grid->indexBBox() << std::endl; EXPECT_STREQ("PointID: uint32_t indices to points", grid->blindMetaData(0).mName); @@ -2224,13 +2226,48 @@ TEST(TestNanoVDBCUDA, testGridHandleCopy) EXPECT_EQ( 0.0f, acc(100,0,0)); EXPECT_EQ(-3.0f, acc( 97,0,0)); } -} +}// testGridHandleCopy +// make -j testNanoVDB && ./unittest/testNanoVDB --gtest_break_on_failure --gtest_filter="*compareNodeOrdering" TEST(TestNanoVDBCUDA, compareNodeOrdering) { using namespace nanovdb; - const int voxelCount = 4; - Coord coords[voxelCount]={Coord(0,0,0), Coord(256,0,0), Coord(0,0,8), Coord(0,2,4)}; +#if 0 + const int voxelCount = 2; + Coord coords[voxelCount]={Coord(-1,0,0), Coord(0,0,0)}; +#else + const int voxelCount = 5; + Coord coords[voxelCount]={Coord(0,0,0), Coord(256,0,0), Coord(0,0,8), Coord(0,-256,0), Coord(0,2,4)}; +#endif + + {// check coordToKey and keyToCoord used in CudaPointsToGrid + auto coordToKey = [](const nanovdb::Coord &ijk)->uint64_t{ + static constexpr int32_t offset = 1 << 30; + return (uint64_t(uint32_t(ijk[2] + offset) >> 12)) | // z is the lower 21 bits + (uint64_t(uint32_t(ijk[1] + offset) >> 12) << 21) | // y is the middle 21 bits + (uint64_t(uint32_t(ijk[0] + offset) >> 12) << 42); // x is the upper 21 bits + }; + auto keyToCoord = [](uint64_t key)->nanovdb::Coord{ + static constexpr int32_t offset = 1 << 30; + static constexpr uint64_t MASK = (1u << 21) - 1; // used to mask out 21 lower bits + return nanovdb::Coord((((key >> 42) & MASK) << 12) - offset, // x are the upper 21 bits + (((key >> 21) & MASK) << 12) - offset, // y are the middle 21 bits + ((key & MASK) << 12) - offset); // z are the lower 21 bits + }; + using KeyT = std::pair; + KeyT keys[voxelCount]; + for (int i=0; i(); + EXPECT_TRUE(grid); + handle.deviceUpload(); + GridT *d_grid = handle.deviceGrid(); + EXPECT_TRUE(d_grid); + + {// check min/max using const iterators + float min = std::numeric_limits::max(), max = -min; + int n2=0, n1=0, n0=0;// check that nodes are arranged breath-first in memory + for (auto it2 = grid->tree().root().cbeginChild(); it2; ++it2) { + EXPECT_EQ(grid->tree().getFirstUpper() + n2++, &(*it2)); + for (auto it1 = it2->cbeginChild(); it1; ++it1) { + EXPECT_EQ(grid->tree().getFirstLower() + n1++, &(*it1)); + for (auto it0 = it1->cbeginChild(); it0; ++it0) { + EXPECT_EQ(grid->tree().getFirstLeaf() + n0++, &(*it0)); + for (auto it = it0->cbeginValueOn(); it; ++it) { + if (*it < min) min = *it; + if (*it > max) max = *it; + } + }// loop over child nodes of the lower internal node + }// loop over child nodes of the upper internal node + }// loop over child nodes of the root node + EXPECT_NE(min, grid->tree().root().minimum()); + EXPECT_NE(max, grid->tree().root().maximum()); + EXPECT_EQ(n2, grid->tree().nodeCount(2)); + EXPECT_EQ(n1, grid->tree().nodeCount(1)); + EXPECT_EQ(n0, grid->tree().nodeCount(0)); + } + { + //nanovdb::CpuTimer cpuTimer("CPU gridStats: Default = Full"); + nanovdb::gridStats(*grid); + //cpuTimer.stop(); + } + {// check min/max using const iterators + float min = std::numeric_limits::max(), max = -min; + int n2=0, n1=0, n0=0;// check that nodes are arranged breath-first in memory + for (auto it2 = grid->tree().root().cbeginChild(); it2; ++it2) { + EXPECT_EQ(grid->tree().getFirstUpper() + n2++, &(*it2)); + for (auto it1 = it2->cbeginChild(); it1; ++it1) { + EXPECT_EQ(grid->tree().getFirstLower() + n1++, &(*it1)); + for (auto it0 = it1->cbeginChild(); it0; ++it0) { + EXPECT_EQ(grid->tree().getFirstLeaf() + n0++, &(*it0)); + for (auto it = it0->cbeginValueOn(); it; ++it) { + if (*it < min) min = *it; + if (*it > max) max = *it; + } + }// loop over child nodes of the lower internal node + }// loop over child nodes of the upper internal node + }// loop over child nodes of the root node + EXPECT_EQ(min, grid->tree().root().minimum()); + EXPECT_EQ(max, grid->tree().root().maximum()); + EXPECT_EQ(n2, grid->tree().nodeCount(2)); + EXPECT_EQ(n1, grid->tree().nodeCount(1)); + EXPECT_EQ(n0, grid->tree().nodeCount(0)); + } + {// check min/max using non-const iterators + float min = std::numeric_limits::max(), max = -min; + int n2=0, n1=0, n0=0;// check that nodes are arranged breath-first in memory + for (auto it2 = grid->tree().root().beginChild(); it2; ++it2) { + EXPECT_EQ(grid->tree().getFirstUpper() + n2++, &(*it2)); + for (auto it1 = it2->beginChild(); it1; ++it1) { + EXPECT_EQ(grid->tree().getFirstLower() + n1++, &(*it1)); + for (auto it0 = it1->beginChild(); it0; ++it0) { + EXPECT_EQ(grid->tree().getFirstLeaf() + n0++, &(*it0)); + for (auto it = it0->beginValueOn(); it; ++it) { + if (*it < min) min = *it; + if (*it > max) max = *it; + } + }// loop over child nodes of the lower internal node + }// loop over child nodes of the upper internal node + }// loop over child nodes of the root node + EXPECT_EQ(min, grid->tree().root().minimum()); + EXPECT_EQ(max, grid->tree().root().maximum()); + EXPECT_EQ(n2, grid->tree().nodeCount(2)); + EXPECT_EQ(n1, grid->tree().nodeCount(1)); + EXPECT_EQ(n0, grid->tree().nodeCount(0)); + } + + { + //nanovdb::GpuTimer gpuTimer("GPU gridStats: Default = Full"); + nanovdb::cudaGridStats(d_grid); + //gpuTimer.stop(); + } + {// check bbox and stats of device grid + using DataT = nanovdb::NanoRoot::DataType; + std::unique_ptr buffer(new char[sizeof(DataT)]); + cudaMemcpy(buffer.get(), (char*)d_grid + sizeof(nanovdb::GridData) + sizeof(nanovdb::TreeData), sizeof(DataT), cudaMemcpyDeviceToHost); + auto *data = (const DataT*)buffer.get(); + EXPECT_EQ(grid->indexBBox(), data->mBBox); + EXPECT_EQ(grid->tree().root().background(), data->mBackground); + EXPECT_EQ(grid->tree().root().minimum(), data->mMinimum); + EXPECT_EQ(grid->tree().root().maximum(), data->mMaximum); + } +}// GridStats diff --git a/nanovdb/nanovdb/util/GridStats.h b/nanovdb/nanovdb/util/GridStats.h index 1c89cb98c1..374e674618 100644 --- a/nanovdb/nanovdb/util/GridStats.h +++ b/nanovdb/nanovdb/util/GridStats.h @@ -87,16 +87,12 @@ class Extrema } __hostdev__ Extrema& min(const ValueT& v) { - if (v < mMin) { - mMin = v; - } + if (v < mMin) mMin = v; return *this; } __hostdev__ Extrema& max(const ValueT& v) { - if (v > mMax) { - mMax = v; - } + if (v > mMax) mMax = v; return *this; } __hostdev__ Extrema& add(const ValueT& v) @@ -147,12 +143,8 @@ class Extrema } mMin, mMax; __hostdev__ Extrema& add(const Pair& p) { - if (p < mMin) { - mMin = p; - } - if (mMax < p) { - mMax = p; - } + if (p < mMin) mMin = p; + if (mMax < p) mMax = p; return *this; } @@ -181,29 +173,21 @@ class Extrema __hostdev__ Extrema& min(const VecT& v) { Pair tmp(v); - if (tmp < mMin) { - mMin = tmp; - } + if (tmp < mMin) mMin = tmp; return *this; } __hostdev__ Extrema& max(const VecT& v) { Pair tmp(v); - if (mMax < tmp) { - mMax = tmp; - } + if (mMax < tmp) mMax = tmp; return *this; } __hostdev__ Extrema& add(const VecT& v) { return this->add(Pair(v)); } __hostdev__ Extrema& add(const VecT& v, uint64_t) { return this->add(Pair(v)); } __hostdev__ Extrema& add(const Extrema& other) { - if (other.mMin < mMin) { - mMin = other.mMin; - } - if (mMax < other.mMax) { - mMax = other.mMax; - } + if (other.mMin < mMin) mMin = other.mMin; + if (mMax < other.mMax) mMax = other.mMax; return *this; } __hostdev__ const VecT& min() const { return mMin.vector; } diff --git a/nanovdb/nanovdb/util/GridValidator.h b/nanovdb/nanovdb/util/GridValidator.h index c14d03040f..fe6815bfb4 100644 --- a/nanovdb/nanovdb/util/GridValidator.h +++ b/nanovdb/nanovdb/util/GridValidator.h @@ -157,10 +157,10 @@ void GridValidator::checkNodes(const GridT &grid, std::string &errorStr) for (auto it2 = grid.tree().root().cbeginChild(); it2; ++it2) { auto &node2 = *it2; if (!check(&node2, sizeof(node2))) return; - for (auto it1 = node2.beginChild(); it1; ++it1) { + for (auto it1 = node2.cbeginChild(); it1; ++it1) { auto &node1 = *it1; if (!check(&node1, sizeof(node1))) return; - for (auto it0 = node1.beginChild(); it0; ++it0) { + for (auto it0 = node1.cbeginChild(); it0; ++it0) { auto &node0 = *it0; if (!check(&node2, sizeof(node2))) return; }// loop over child nodes of the lower internal node diff --git a/nanovdb/nanovdb/util/NodeManager.h b/nanovdb/nanovdb/util/NodeManager.h index 821c220372..4da1eee873 100644 --- a/nanovdb/nanovdb/util/NodeManager.h +++ b/nanovdb/nanovdb/util/NodeManager.h @@ -219,22 +219,22 @@ class NodeManager : private NodeManagerData __hostdev__ uint64_t memUsage() const {return NodeManager::memUsage(this->grid());} /// @brief Return a reference to the grid - __hostdev__ GridT& grid() { return *reinterpret_cast(DataT::mGrid); } + __hostdev__ GridT& grid() { return *reinterpret_cast(DataT::mGrid); } __hostdev__ const GridT& grid() const { return *reinterpret_cast(DataT::mGrid); } /// @brief Return a reference to the tree - __hostdev__ TreeT& tree() { return this->grid().tree(); } + __hostdev__ TreeT& tree() { return this->grid().tree(); } __hostdev__ const TreeT& tree() const { return this->grid().tree(); } /// @brief Return a reference to the root - __hostdev__ RootT& root() { return this->tree().root(); } + __hostdev__ RootT& root() { return this->tree().root(); } __hostdev__ const RootT& root() const { return this->tree().root(); } /// @brief Return the number of tree nodes at the specified level /// @details 0 is leaf, 1 is lower internal, and 2 is upper internal level __hostdev__ uint64_t nodeCount(int level) const { return this->tree().nodeCount(level); } - __hostdev__ uint64_t leafCount() const { return this->tree().nodeCount(0); } + __hostdev__ uint64_t leafCount() const { return this->tree().nodeCount(0); } __hostdev__ uint64_t lowerCount() const { return this->tree().nodeCount(1); } __hostdev__ uint64_t upperCount() const { return this->tree().nodeCount(2); } @@ -268,15 +268,15 @@ class NodeManager : private NodeManagerData /// @brief Return the i'th leaf node with respect to breadth-first ordering __hostdev__ const Node0& leaf(uint32_t i) const { return this->node<0>(i); } - __hostdev__ Node0& leaf(uint32_t i) { return this->node<0>(i); } + __hostdev__ Node0& leaf(uint32_t i) { return this->node<0>(i); } /// @brief Return the i'th lower internal node with respect to breadth-first ordering __hostdev__ const Node1& lower(uint32_t i) const { return this->node<1>(i); } - __hostdev__ Node1& lower(uint32_t i) { return this->node<1>(i); } + __hostdev__ Node1& lower(uint32_t i) { return this->node<1>(i); } /// @brief Return the i'th upper internal node with respect to breadth-first ordering __hostdev__ const Node2& upper(uint32_t i) const { return this->node<2>(i); } - __hostdev__ Node2& upper(uint32_t i) { return this->node<2>(i); } + __hostdev__ Node2& upper(uint32_t i) { return this->node<2>(i); } }; // NodeManager class @@ -306,9 +306,9 @@ NodeManagerHandle createNodeManager(const NanoGrid &grid, // Performs depth first traversal but breadth first insertion for (auto it2 = grid.tree().root().cbeginChild(); it2; ++it2) { *ptr2++ = PtrDiff(&*it2, &grid); - for (auto it1 = it2->beginChild(); it1; ++it1) { + for (auto it1 = it2->cbeginChild(); it1; ++it1) { *ptr1++ = PtrDiff(&*it1, &grid); - for (auto it0 = it1->beginChild(); it0; ++it0) { + for (auto it0 = it1->cbeginChild(); it0; ++it0) { *ptr0++ = PtrDiff(&*it0, &grid); }// loop over child nodes of the lower internal node }// loop over child nodes of the upper internal node diff --git a/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh b/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh index 8d449d09b0..941392d00a 100644 --- a/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh @@ -31,7 +31,6 @@ __global__ void cudaUpdateGridCount(GridData *data, uint32_t gridIndex, uint32_t data->mGridIndex = gridIndex; data->mGridCount = gridCount; if (data->mChecksum == GridChecksum::EMPTY) *d_dirty = false;// no need to update checksum if it didn't already exist - //data->mChecksum = GridChecksum::EMPTY;// disable the checksum (in the future this should call cudaGridChecksum) } } }// anonymous namespace @@ -86,7 +85,6 @@ cudaSplitGridHandles(const GridHandle &handle, const BufferT* other = n ptr += handle.gridSize(n); } cudaCheck(cudaFreeAsync(d_dirty, stream)); - //cudaCheck(cudaFreeAsync(d_lut, stream)); return std::move(handles); }// cudaSplitGridHandles diff --git a/nanovdb/nanovdb/util/cuda/CudaGridStats.cuh b/nanovdb/nanovdb/util/cuda/CudaGridStats.cuh new file mode 100644 index 0000000000..20f03bb25c --- /dev/null +++ b/nanovdb/nanovdb/util/cuda/CudaGridStats.cuh @@ -0,0 +1,234 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file CudaGridStats.cuh + + \author Ken Museth + + \date October 9, 2023 + + \brief Re-computes min/max/avg/var/bbox information for each node in a + pre-existing NanoVDB grid on the device. + + \todo Currently this class only compute min/max/bbox - avg/var will be added shortly +*/ + +#ifndef NANOVDB_CUDAGRIDSTATS_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_CUDAGRIDSTATS_CUH_HAS_BEEN_INCLUDED + +#include +#include + +namespace nanovdb { + +/// @brief Re-computes the min/max, stats and bbox information for an existing NanoVDB Grid +/// +/// @param grid Grid whose stats to update +/// @param mode Mode of computation for the statistics. +/// @param stream Optional cuda stream (defaults to zero) +template +void cudaGridStats(NanoGrid *d_grid, StatsMode mode = StatsMode::Default, cudaStream_t stream = 0); + +//================================================================================================ + +/// @brief Allows for the construction of NanoVDB grids without any dependecy +template::ValueType>> +class CudaGridStats +{ + using GridT = NanoGrid; + using TreeT = typename GridT::TreeType; + using ValueT = typename TreeT::ValueType; + using Node0 = typename TreeT::Node0; // leaf + using Node1 = typename TreeT::Node1; // lower + using Node2 = typename TreeT::Node2; // upper + using RootT = typename TreeT::Node3; // root + static_assert(is_same::value, "Mismatching type"); + + ValueT mDelta; // skip rendering of node if: node.max < -mDelta || node.min > mDelta + +public: + CudaGridStats(ValueT delta = ValueT(0)) : mDelta(delta) {} + + void operator()(GridT *d_grid, cudaStream_t stream = 0); + +}; // CudaGridStats + +//================================================================================================ + +namespace {// define cuda kernels in an unnamed namespace + +template +__global__ void processLeaf(NodeManager *d_nodeMgr)//, StatsT *d_stats) +{ + const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= d_nodeMgr->leafCount()) return; + auto &d_leaf = d_nodeMgr->leaf(tid); + + if (d_leaf.updateBBox()) {// updates active bounding box (also updates data->mFlags) and return true if non-empty + if constexpr( StatsT::hasMinMax() ) { // resolved at compile time + StatsT s; + for (auto it = d_leaf.cbeginValueOn(); it; ++it) s.add(*it); + d_leaf.setMin(s.min()); + d_leaf.setMax(s.max()); + } + } + d_leaf.mFlags &= ~uint8_t(1u);// enable rendering +}// processLeaf + +template +__global__ void processInternal(NodeManager *d_nodeMgr)//, StatsT *d_stats) +{ + using ChildT = typename NanoNode::type; + const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= d_nodeMgr->nodeCount(LEVEL)) return; + auto &d_node = d_nodeMgr->template node(tid); + auto &bbox = d_node.mBBox; + bbox = CoordBBox();// empty bbox + + StatsT s; + + for (auto it = d_node.cbeginChild(); it; ++it) { + const auto &child = *it; + bbox.expand( child.bbox() ); + if constexpr(StatsT::hasMinMax()){ + s.add(child.getMin()); + s.add(child.getMax()); + } + } + for (auto it = d_node.cbeginValueOn(); it; ++it) { + const Coord ijk = it.getCoord(); + bbox[0].minComponent(ijk); + bbox[1].maxComponent(ijk + Coord(ChildT::DIM - 1)); + if constexpr(StatsT::hasMinMax()) s.add(*it, ChildT::NUM_VALUES); + } + if constexpr(StatsT::hasMinMax()) { + d_node.setMin(s.min()); + d_node.setMax(s.max()); + } + d_node.mFlags &= ~uint64_t(1u);// enable rendering +}// processInternal + +template +__global__ void processRootAndGrid(NodeManager *d_nodeMgr) +{ + using ChildT = NanoUpper; + using ValueT = typename ChildT::ValueType; + + // process root + auto &root = d_nodeMgr->root(); + root.mBBox = CoordBBox(); + if (root.isEmpty()) { + root.mMinimum = root.mMaximum = root.mBackground; + root.mAverage = root.mStdDevi = 0; + } else { + ValueT v; + StatsT s; + for (auto it = root.beginDense(); it; ++it) { + if (auto *child = it.probeChild(v)) { + root.mBBox.expand( child->bbox() ); + if constexpr(StatsT::hasMinMax()){ + s.add(child->getMin()); + s.add(child->getMax()); + } + } else if (it.isValueOn()) { + const Coord ijk = it.getCoord(); + root.mBBox[0].minComponent(ijk); + root.mBBox[1].maxComponent(ijk + Coord(ChildT::DIM - 1)); + if constexpr(StatsT::hasMinMax()) s.add(v, ChildT::NUM_VALUES); + } + } + if constexpr(StatsT::hasMinMax()) { + root.mMinimum = s.min(); + root.mMaximum = s.max(); + } + } + + // process Grid + auto& grid = d_nodeMgr->grid(); + const auto& indexBBox = root.bbox(); + if (indexBBox.empty()) { + grid.mWorldBBox = BBox(); + grid.setBBoxOn(false); + } else { + // Note that below max is offset by one since CoordBBox.max is inclusive + // while bbox.max is exclusive. However, min is inclusive in both + // CoordBBox and BBox. This also guarantees that a grid with a single + // active voxel, does not have an empty world bbox! E.g. if a grid with a + // unit index-to-world transformation only contains the active voxel (0,0,0) + // then indeBBox = (0,0,0) -> (0,0,0) and then worldBBox = (0.0, 0.0, 0.0) + // -> (1.0, 1.0, 1.0). This is a consequence of the different definitions + // of index and world bounding boxes inherited from OpenVDB! + const Coord min = indexBBox[0]; + const Coord max = indexBBox[1] + Coord(1); + + auto& wBBox = grid.mWorldBBox; + const auto& map = grid.map(); + wBBox[0] = wBBox[1] = map.applyMap(Vec3d(min[0], min[1], min[2])); + wBBox.expand(map.applyMap(Vec3d(min[0], min[1], max[2]))); + wBBox.expand(map.applyMap(Vec3d(min[0], max[1], min[2]))); + wBBox.expand(map.applyMap(Vec3d(max[0], min[1], min[2]))); + wBBox.expand(map.applyMap(Vec3d(max[0], max[1], min[2]))); + wBBox.expand(map.applyMap(Vec3d(max[0], min[1], max[2]))); + wBBox.expand(map.applyMap(Vec3d(min[0], max[1], max[2]))); + wBBox.expand(map.applyMap(Vec3d(max[0], max[1], max[2]))); + grid.setBBoxOn(true); + } + + // set bit flags + grid.setMinMaxOn(StatsT::hasMinMax()); + grid.setAverageOn(StatsT::hasAverage()); + grid.setStdDeviationOn(StatsT::hasStdDeviation()); +}// processRootAndGrid + +}// cuda kernels are defined in an unnamed namespace + +//================================================================================================ + +template +void CudaGridStats::operator()(NanoGrid *d_grid, cudaStream_t stream) +{ + static const uint32_t threadsPerBlock = 128; + auto blocksPerGrid = [&](uint32_t count)->uint32_t{return (count + (threadsPerBlock - 1)) / threadsPerBlock;}; + + auto nodeMgrHandle = cudaCreateNodeManager(d_grid, CudaDeviceBuffer(), stream); + auto *d_nodeMgr = nodeMgrHandle.template deviceMgr(); + + uint32_t nodeCount[3];// {leaf, lower, upper} + cudaCheck(cudaMemcpyAsync(nodeCount, (char*)d_grid + sizeof(GridData) + 4*sizeof(uint64_t), 3*sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); + cudaStreamSynchronize(stream);// finish all device tasks in stream + + processLeaf<<>>(d_nodeMgr); + + processInternal<<>>(d_nodeMgr); + + processInternal<<>>(d_nodeMgr); + + processRootAndGrid<<<1, 1, 0, stream>>>(d_nodeMgr); + +} // CudaGridStats::operator()( Grid ) + +//================================================================================================ + +template +void cudaGridStats(NanoGrid *d_grid, StatsMode mode, cudaStream_t stream) +{ + if (d_grid == nullptr && mode == StatsMode::Disable) { + return; + } else if (mode == StatsMode::BBox || is_same::value) { + CudaGridStats > stats; + stats(d_grid, stream); + } else if (mode == StatsMode::MinMax) { + CudaGridStats > stats; + stats(d_grid, stream); + } else if (mode == StatsMode::All) { + CudaGridStats > stats; + stats(d_grid, stream); + } else { + throw std::runtime_error("cudaGridStats: Unsupported statistics mode."); + } +}// cudaGridStats + +} // namespace nanovdb + +#endif // NANOVDB_CUDAGRIDSTATS_CUH_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/cuda/CudaNodeManager.cuh b/nanovdb/nanovdb/util/cuda/CudaNodeManager.cuh index a69d86be20..3d35a4b902 100644 --- a/nanovdb/nanovdb/util/cuda/CudaNodeManager.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaNodeManager.cuh @@ -72,9 +72,9 @@ cudaCreateNodeManager(const NanoGrid *d_grid, // Performs depth first traversal but breadth first insertion for (auto it2 = tree.root().cbeginChild(); it2; ++it2) { *ptr2++ = PtrDiff(&*it2, d_grid); - for (auto it1 = it2->beginChild(); it1; ++it1) { + for (auto it1 = it2->cbeginChild(); it1; ++it1) { *ptr1++ = PtrDiff(&*it1, d_grid); - for (auto it0 = it1->beginChild(); it0; ++it0) { + for (auto it0 = it1->cbeginChild(); it0; ++it0) { *ptr0++ = PtrDiff(&*it0, d_grid); }// loop over child nodes of the lower internal node }// loop over child nodes of the upper internal node diff --git a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh index 1326efde4f..30f6b5d378 100644 --- a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh @@ -264,7 +264,7 @@ public: , mPointType(is_same::value ? PointType::Default : PointType::Disable) { mData.map = map; - mData.flags.initMask({GridFlags::HasBBox, GridFlags::IsLexicographic}); + mData.flags.initMask({GridFlags::HasBBox, GridFlags::IsBreadthFirst}); cudaCheck(cudaMallocAsync((void**)&mDeviceData, sizeof(Data), mStream)); } @@ -487,18 +487,30 @@ void CudaPointsToGrid::countNodes(const PtrT points, size_t poin if (mVerbose==2) mTimer.restart("Generate tile keys"); cudaLambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data, const PtrT points) { + auto coordToKey = [](const Coord &ijk)->uint64_t{ + // int32_t has a range of -2^31 to 2^31 - 1 + // uint32_t has a range of 0 to 2^32 - 1 + static constexpr int64_t offset = 1 << 31; + return (uint64_t(uint32_t(int64_t(ijk[2]) + offset) >> 12) ) | // z is the lower 21 bits + (uint64_t(uint32_t(int64_t(ijk[1]) + offset) >> 12) << 21) | // y is the middle 21 bits + (uint64_t(uint32_t(int64_t(ijk[0]) + offset) >> 12) << 42); // x is the upper 21 bits + }; d_indx[tid] = uint32_t(tid); uint64_t &key = d_keys[tid]; if constexpr(is_same::value) {// points are in world space if constexpr(is_same::value) { - key = NanoRoot::CoordToKey(d_data->map.applyInverseMapF(points[tid]).round()); + key = coordToKey(d_data->map.applyInverseMapF(points[tid]).round()); + //key = NanoRoot::CoordToKey(d_data->map.applyInverseMapF(points[tid]).round()); } else {// points are Vec3d - key = NanoRoot::CoordToKey(d_data->map.applyInverseMap(points[tid]).round()); + //key = NanoRoot::CoordToKey(d_data->map.applyInverseMap(points[tid]).round()); + key = coordToKey(d_data->map.applyInverseMap(points[tid]).round()); } } else if constexpr(is_same::value) {// points Coord are in index space - key = NanoRoot::CoordToKey(points[tid]); + //key = NanoRoot::CoordToKey(points[tid]); + key = coordToKey(points[tid]); } else {// points are Vec3f or Vec3d in index space - key = NanoRoot::CoordToKey(points[tid].round()); + //key = NanoRoot::CoordToKey(points[tid].round()); + key = coordToKey(points[tid].round()); } }, mDeviceData, points); cudaCheckError(); @@ -521,16 +533,15 @@ void CudaPointsToGrid::countNodes(const PtrT points, size_t poin cudaCheck(cudaMemcpyAsync(points_per_tile, d_points_per_tile, mData.nodeCount[2]*sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); mMemPool.free(d_points_per_tile); - auto voxelKey = [] __device__ (uint64_t tileID, const Coord &ijk){ - return tileID << 36 | // upper offset: 64-15-12-9=28, i.e. last 28 bits - uint64_t(NanoUpper::CoordToOffset(ijk)) << 21 | // lower offset: 32^3 = 2^15, i.e. next 15 bits - uint64_t(NanoLower::CoordToOffset(ijk)) << 9 | // leaf offset: 16^3 = 2^12, i.e. next 12 bits - uint64_t(NanoLeaf< BuildT>::CoordToOffset(ijk)); // voxel offset: 8^3 = 2^9, i.e. first 9 bits - }; - for (uint32_t id = 0, offset = 0; id < mData.nodeCount[2]; ++id) { const uint32_t count = points_per_tile[id]; cudaLambdaKernel<<>>(count, [=] __device__(size_t tid, const Data *d_data) { + auto voxelKey = [] __device__ (uint64_t tileID, const Coord &ijk){ + return tileID << 36 | // upper offset: 64-15-12-9=28, i.e. last 28 bits + uint64_t(NanoUpper::CoordToOffset(ijk)) << 21 | // lower offset: 32^3 = 2^15, i.e. next 15 bits + uint64_t(NanoLower::CoordToOffset(ijk)) << 9 | // leaf offset: 16^3 = 2^12, i.e. next 12 bits + uint64_t(NanoLeaf< BuildT>::CoordToOffset(ijk)); // voxel offset: 8^3 = 2^9, i.e. first 9 bits + }; tid += offset; Vec3T p = points[d_indx[tid]]; if constexpr(is_same::value) p = is_same::value ? d_data->map.applyInverseMapF(p) : d_data->map.applyInverseMap(p); @@ -662,7 +673,7 @@ inline void CudaPointsToGrid::processGridTreeRoot(const PtrT poi // process Grid auto &grid = d_data->getGrid(); - grid.init({GridFlags::HasBBox, GridFlags::IsLexicographic}, d_data->size, d_data->map, mapToGridType()); + grid.init({GridFlags::HasBBox, GridFlags::IsBreadthFirst}, d_data->size, d_data->map, mapToGridType()); grid.mChecksum = ~uint64_t(0);// set all bits on which means it's disabled grid.mBlindMetadataCount = is_same::value;// ? 1u : 0u; grid.mBlindMetadataOffset = d_data->meta; @@ -778,7 +789,18 @@ inline void CudaPointsToGrid::processUpperNodes() cudaLambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { auto &root = d_data->getRoot(); auto &upper = d_data->getUpper(tid); +#if 1 + auto keyToCoord = [](uint64_t key)->nanovdb::Coord{ + static constexpr int64_t offset = 1 << 31;// max values of uint32_t is 2^31 - 1 + static constexpr uint64_t MASK = (1u << 21) - 1; // used to mask out 21 lower bits + return nanovdb::Coord(int(int64_t(((key >> 42) & MASK) << 12) - offset), // x are the upper 21 bits + int(int64_t(((key >> 21) & MASK) << 12) - offset), // y are the middle 21 bits + int(int64_t(( key & MASK) << 12) - offset)); // z are the lower 21 bits + }; + const Coord ijk = keyToCoord(d_data->d_tile_keys[tid]); +#else const Coord ijk = NanoRoot::KeyToCoord(d_data->d_tile_keys[tid]); +#endif root.tile(tid)->setChild(ijk, &upper, &root); upper.mBBox[0] = ijk; upper.mFlags = 0; diff --git a/pendingchanges/nanovdb.txt b/pendingchanges/nanovdb.txt index 58ff4a23cf..239c3f5e93 100644 --- a/pendingchanges/nanovdb.txt +++ b/pendingchanges/nanovdb.txt @@ -14,13 +14,14 @@ NanoVDB: - CreateNanoGrid.h is replacing GridBuilder.h, IndexGridBuilder.h and OpenToNanoVDB.h - Moved CudaDeviceBuffer.h to cuda/CudaDeviceBuffer.h - Added cuda/CudaUtils.h and cuda/GpuTimer.h with cuda utility functions - - Added cuda/CudaPointToGrids.cuh that constructs device grids from points or voxels + - Added cuda/CudaPointToGrids.cuh that constructs device grids from lists of points or voxels - Added cuda/CudaIndexToGrid.cuh that converts IndexGrids and values into regular Grids - Added cuda/CudaSignedFloodFill.cuh that performs signed-flood filing on SDF on the GPU - Added cuda/CudaAddBlindData.cuh that adds bind data to an existing grid on the GPU - Added cuda/CudaGridChecksum.cuh that computes CRC32 checksums of grids on the GPU - Added cuda/CudaGridHandle.cuh that handles grids on the GPU - Added cuda/CudaNodeManager.cuh that constructs a NodeManager on the GPU + - Added cuda/CudaGridStats.cuh that computes grid statistics on the GPU - The move constructor in GridHandle now requires the GridBuffer to actually contain a valid grid - Added new types: Ve4f, Ve4d, ValueIndex, ValueOnIndex, ValueIndexMask, and ValueOnIndexMask - Major improvements to GridBuilder.h, which allows user to construct grids with random access on the host From d15e5b2bea73227b23de501afafe8d1685917fde Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Sun, 22 Oct 2023 10:34:32 -0700 Subject: [PATCH 40/49] reverted nanovdb/Readme.md Signed-off-by: Ken Museth --- nanovdb/nanovdb/Readme.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/nanovdb/nanovdb/Readme.md b/nanovdb/nanovdb/Readme.md index f5d0fb5201..15fda0a2eb 100644 --- a/nanovdb/nanovdb/Readme.md +++ b/nanovdb/nanovdb/Readme.md @@ -3,11 +3,10 @@ # NanoVDB: A lightweight GPU friendly version of VDB initially targeting rendering applications. -* [Build instructions for make and cmake](docs/HowToBuild.md) -* [Frequently asked questions](docs/FAQ.md) -* [Grid cells vs grid nodes](docs/GridCells_vs_GridNodes/Main.pdf) -* [Source tree](docs/SourceTree.md) -* [Examples](docs/HelloWorld.md) +* [Build instructions for make and cmake](../../doc/nanovdb/HowToBuild.md) +* [Frequently asked questions](../../doc/nanovdb/FAQ.md) +* [Source tree](../../doc/nanovdb/SourceTree.md) +* [Examples](../../doc/nanovdb/HelloWorld.md) ### Copyright Contributors to the OpenVDB Project ### SPDX-License-Identifier: MPL-2.0 From e25ebd8121b4ca6373ac93f0bffd5d8273c6509b Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Sun, 22 Oct 2023 10:48:18 -0700 Subject: [PATCH 41/49] nanovdb/unittest/CMakeLists.txt Signed-off-by: Ken Museth --- nanovdb/nanovdb/unittest/CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nanovdb/nanovdb/unittest/CMakeLists.txt b/nanovdb/nanovdb/unittest/CMakeLists.txt index 3c4358358f..b0a32be445 100644 --- a/nanovdb/nanovdb/unittest/CMakeLists.txt +++ b/nanovdb/nanovdb/unittest/CMakeLists.txt @@ -41,6 +41,14 @@ add_test(nanovdb_unit_test nanovdb_test_nanovdb) # ----------------------------------------------------------------------------- +if(NANOVDB_USE_CUDA) + add_executable(nanovdb_test_cuda "TestNanoVDB.cu") + target_link_libraries(nanovdb_test_cuda PRIVATE nanovdb GTest::GTest GTest::Main) + add_test(nanovdb_cuda_unit_test nanovdb_test_cuda) +endif() + +# ----------------------------------------------------------------------------- + if(NOT (NANOVDB_USE_TBB AND NANOVDB_USE_OPENVDB)) message(WARNING " - OpenVDB required to build OpenVDB unit tests. Skipping.") return() From b85092c3e3664b9cef5d59d1f9d2bef04fef4b78 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Tue, 24 Oct 2023 13:59:04 -0700 Subject: [PATCH 42/49] improved CudaGridStats and modified NanoVDB.h to support non-serial grids in the future Signed-off-by: Ken Museth --- nanovdb/nanovdb/NanoVDB.h | 169 +++++++++++--------- nanovdb/nanovdb/unittest/TestNanoVDB.cc | 12 +- nanovdb/nanovdb/unittest/TestNanoVDB.cu | 8 +- nanovdb/nanovdb/util/GridStats.h | 41 ++++- nanovdb/nanovdb/util/cuda/CudaGridStats.cuh | 82 ++++++---- 5 files changed, 188 insertions(+), 124 deletions(-) diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index 17aef14684..c4cf36944e 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -391,7 +391,6 @@ enum class GridFlags : uint32_t { HasAverage = 1 << 3, // nodes contain averages of active values HasStdDeviation = 1 << 4, // nodes contain standard deviations of active values IsBreadthFirst = 1 << 5, // nodes are typically arranged breadth-first in memory - //IsLexicographic = 1 << 6, // nodes are occasionally arranged lexicographically in memory End = 1 << 6, // use End - 1 as a mask for the 5 lower bit flags }; @@ -405,7 +404,6 @@ inline const char* toStr(GridFlags gridFlags) "has average", "has standard deviation", "is breadth-first", - //"is IsLexicographic", "end"}; static_assert(1 << (sizeof(LUT) / sizeof(char*) - 1) == int(GridFlags::End), "Unexpected size of LUT"); return LUT[static_cast(gridFlags)]; @@ -556,12 +554,18 @@ struct is_pointer // --------------------------> remove_const <------------------------------------ +/// @brief Trait use to const from type. Default implementation is just a pass-through +/// @tparam T Type +/// @details remove_pointer::type = float template struct remove_const { using type = T; }; +/// @brief Template specialization of trait class use to remove const qualifier type from a type +/// @tparam T Type of the const type +/// @details remove_pointer::type = float template struct remove_const { @@ -570,28 +574,50 @@ struct remove_const // --------------------------> remove_reference <------------------------------------ +/// @brief Trait use to remove reference, i.e. "&", qualifier from a type. Default implementation is just a pass-through +/// @tparam T Type +/// @details remove_pointer::type = float template struct remove_reference {using type = T;}; +/// @brief Template specialization of trait class use to remove reference, i.e. "&", qualifier from a type +/// @tparam T Type of the reference +/// @details remove_pointer::type = float template struct remove_reference {using type = T;}; // --------------------------> remove_pointer <------------------------------------ +/// @brief Trait use to remove pointer, i.e. "*", qualifier from a type. Default implementation is just a pass-through +/// @tparam T Type +/// @details remove_pointer::type = float template struct remove_pointer {using type = T;}; +/// @brief Template specialization of trait class use to to remove pointer, i.e. "*", qualifier from a type +/// @tparam T Type of the pointer +/// @details remove_pointer::type = float template struct remove_pointer {using type = T;}; // --------------------------> match_const <------------------------------------ +/// @brief Trait used to transfer the const-ness of a reference type to another type +/// @tparam T Type whoes const-ness needs to match the reference type +/// @tparam ReferenceT Reference type that is not const +/// @details match_const::type = int +/// match_const::type = int template struct match_const { using type = typename remove_const::type; }; +/// @brief Template specialization used to transfer the const-ness of a reference type to another type +/// @tparam T Type that will adopt the const-ness of the reference type +/// @tparam ReferenceT Reference type that is const +/// @details match_const::type = const int +/// match_const::type = const int template struct match_const { @@ -834,6 +860,23 @@ __hostdev__ inline bool isIndex(GridType gridType) gridType == GridType::OnIndexMask;// as OnIndex, but with an additional mask } +// --------------------------> memcpy64 <------------------------------------ + +/// @brief copy 64 bit words from @c src to @c dst +/// @param dst pointer to destination +/// @param src pointer to soruce +/// @param word_count number of 64 bit words to be copied +/// @return destination pointer +/// @warning @c src and @c dst cannot overlap and should both be 64 bit aligned +__hostdev__ inline static void* memcpy64(void *dst, const void *src, size_t word_count) +{ + NANOVDB_ASSERT(uint64_t(dst) % 8 == 0 && uint64_t(src) % 8 == 0); + auto *d = reinterpret_cast(dst), *e = d + word_count; + auto *s = reinterpret_cast(src); + while (d != e) *d++ = *s++; + return dst; +} + // --------------------------> isValue(GridType, GridClass) <------------------------------------ /// @brief return true if the combination of GridType and GridClass is valid. @@ -2891,8 +2934,7 @@ class Mask __hostdev__ Mask& operator=(const Mask& other) { - for (uint32_t i = 0; i < WORD_COUNT; ++i) - mWords[i] = other.mWords[i]; + memcpy64(mWords, other.mWords, WORD_COUNT); return *this; } @@ -3460,9 +3502,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData __hostdev__ GridData& operator=(const GridData& other) { static_assert(8 * 84 == sizeof(GridData), "GridData has unexpected size"); - auto* src = reinterpret_cast(&other); - for (auto *dst = reinterpret_cast(this), *end = dst + 84; dst != end; ++dst) - *dst = *src++; + memcpy64(this, &other, 84); return *this; } __hostdev__ void init(std::initializer_list list = {GridFlags::IsBreadthFirst}, @@ -3770,7 +3810,6 @@ class Grid : public GridData __hostdev__ bool hasAverage() const { return DataType::mFlags.isMaskOn(GridFlags::HasAverage); } __hostdev__ bool hasStdDeviation() const { return DataType::mFlags.isMaskOn(GridFlags::HasStdDeviation); } __hostdev__ bool isBreadthFirst() const { return DataType::mFlags.isMaskOn(GridFlags::IsBreadthFirst); } - //__hostdev__ bool isLexicographic() const { return DataType::mFlags.isMaskOn(GridFlags::IsLexicographic); } /// @brief return true if the specified node type is layed out breadth-first in memory and has a fixed size. /// This allows for sequential access to the nodes. @@ -3870,7 +3909,7 @@ __hostdev__ int Grid::findBlindData(const char* name) const struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) TreeData { // sizeof(TreeData) == 64B - uint64_t mNodeOffset[4]; //32B, byte offset from this tree to first leaf, lower, upper and root node + int64_t mNodeOffset[4];// 32B, byte offset from this tree to first leaf, lower, upper and root node. A zero offset means no node exists uint32_t mNodeCount[3]; // 12B, total number of nodes of type: leaf, lower internal, upper internal uint32_t mTileCount[3]; // 12B, total number of active tile values at the lower internal, upper internal and root node levels uint64_t mVoxelCount; // 8B, total number of active voxels in the root and all its child nodes. @@ -3878,31 +3917,23 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) TreeData __hostdev__ TreeData& operator=(const TreeData& other) { static_assert(8 * 8 == sizeof(TreeData), "TreeData has unexpected size"); - auto* src = reinterpret_cast(&other); - for (auto *dst = reinterpret_cast(this), *end = dst + 8; dst != end; ++dst) - *dst = *src++; + memcpy64(this, &other, 8); return *this; } - template - __hostdev__ void setRoot(const RootT* root) { mNodeOffset[3] = PtrDiff(root, this); } - template - __hostdev__ RootT* getRoot() { return PtrAdd(this, mNodeOffset[3]); } - template - __hostdev__ const RootT* getRoot() const { return PtrAdd(this, mNodeOffset[3]); } + __hostdev__ void setRoot(const void* root) {mNodeOffset[3] = root ? PtrDiff(root, this) : 0;} + __hostdev__ uint8_t* getRoot() { return mNodeOffset[3] ? PtrAdd(this, mNodeOffset[3]) : nullptr; } + __hostdev__ const uint8_t* getRoot() const { return mNodeOffset[3] ? PtrAdd(this, mNodeOffset[3]) : nullptr; } template - __hostdev__ void setFirstNode(const NodeT* node) - { - mNodeOffset[NodeT::LEVEL] = node ? PtrDiff(node, this) : 0; - } + __hostdev__ void setFirstNode(const NodeT* node) {mNodeOffset[NodeT::LEVEL] = node ? PtrDiff(node, this) : 0;} - __hostdev__ bool isEmpty() const {return *PtrAdd(this, mNodeOffset[3] + sizeof(BBox)) == 0;} + __hostdev__ bool isEmpty() const {return mNodeOffset[3] ? *PtrAdd(this, mNodeOffset[3] + sizeof(BBox)) == 0 : true;} - /// @brief Return a const reference to the index bounding box of all the active values in this tree, i.e. in all nodes of the tree - __hostdev__ const CoordBBox& bbox() const {return *PtrAdd(this, mNodeOffset[3]);} + /// @brief Return the index bounding box of all the active values in this tree, i.e. in all nodes of the tree + __hostdev__ CoordBBox bbox() const {return mNodeOffset[3] ? *PtrAdd(this, mNodeOffset[3]) : CoordBBox();} /// @brief return true if RootData is layout out immidiatly after TreeData in memory - __hostdev__ bool isRootNext() const {return mNodeOffset[3] == sizeof(TreeData); } + __hostdev__ bool isRootNext() const {return mNodeOffset[3] ? mNodeOffset[3] == sizeof(TreeData) : false; } };// TreeData // ----------------------------> GridTree <-------------------------------------- @@ -3962,9 +3993,19 @@ class Tree : public TreeData /// @brief return memory usage in bytes for the class __hostdev__ static uint64_t memUsage() { return sizeof(DataType); } - __hostdev__ RootT& root() { return *DataType::template getRoot(); } + __hostdev__ RootT& root() + { + RootT* ptr = reinterpret_cast(DataType::getRoot()); + NANOVDB_ASSERT(ptr); + return *ptr; + } - __hostdev__ const RootT& root() const { return *DataType::template getRoot(); } + __hostdev__ const RootT& root() const + { + const RootT* ptr = reinterpret_cast(DataType::getRoot()); + NANOVDB_ASSERT(ptr); + return *ptr; + } __hostdev__ AccessorType getAccessor() const { return AccessorType(this->root()); } @@ -4028,8 +4069,8 @@ class Tree : public TreeData template __hostdev__ NodeT* getFirstNode() { - const uint64_t offset = DataType::mNodeOffset[NodeT::LEVEL]; - return offset > 0 ? PtrAdd(this, offset) : nullptr; + const int64_t offset = DataType::mNodeOffset[NodeT::LEVEL]; + return offset ? PtrAdd(this, offset) : nullptr; } /// @brief return a const pointer to the first node of the specified type @@ -4038,8 +4079,8 @@ class Tree : public TreeData template __hostdev__ const NodeT* getFirstNode() const { - const uint64_t offset = DataType::mNodeOffset[NodeT::LEVEL]; - return offset > 0 ? PtrAdd(this, offset) : nullptr; + const int64_t offset = DataType::mNodeOffset[NodeT::LEVEL]; + return offset ? PtrAdd(this, offset) : nullptr; } /// @brief return a pointer to the first node at the specified level @@ -7438,11 +7479,10 @@ ReadAccessor createAccessor(const NanoRoot @@ -7455,21 +7495,31 @@ class GridMetaData } GridMetaData(const GridData* gridData) { - NANOVDB_ASSERT(gridData && gridData->isValid()); - mGridData = *gridData; - mTreeData = *reinterpret_cast(gridData->treePtr()); - mIndexBBox = gridData->indexBBox(); - mRootTableSize = gridData->rootTableSize(); + static_assert(8 * 96 == sizeof(GridMetaData), "GridMetaData has unexpected size"); + if (GridMetaData::safeCast(gridData)) { + memcpy64(this, gridData, 96); + } else {// otherwise copy each member individually + mGridData = *gridData; + mTreeData = *reinterpret_cast(gridData->treePtr()); + mIndexBBox = gridData->indexBBox(); + mRootTableSize = gridData->rootTableSize(); + } } /// @brief return true if the RootData follows right after the TreeData. /// If so, this implies that it's safe to cast the grid from which /// this instance was constructed to a GridMetaData __hostdev__ bool safeCast() const { return mTreeData.isRootNext(); } + /// @brief return true if it is safe to cast the grid to a pointer + /// of type GridMetaData, i.e. construction can be avoided. + __hostdev__ static bool safeCast(const GridData *gridData){ + NANOVDB_ASSERT(gridData && gridData->isValid()); + return gridData->isRootConnected(); + } /// @brief return true if it is safe to cast the grid to a pointer /// of type GridMetaData, i.e. construction can be avoided. template - __hostdev__ static bool safeCast(const NanoGrid& grid){return grid.tree().isRootNext();} + __hostdev__ static bool safeCast(const NanoGrid& grid){return grid.tree().isRootNext();} __hostdev__ bool isValid() const { return mGridData.isValid(); } __hostdev__ const GridType& gridType() const { return mGridData.mGridType; } __hostdev__ const GridClass& gridClass() const { return mGridData.mGridClass; } @@ -7487,7 +7537,6 @@ class GridMetaData __hostdev__ bool hasAverage() const { return mGridData.mFlags.isMaskOn(GridFlags::HasAverage); } __hostdev__ bool hasStdDeviation() const { return mGridData.mFlags.isMaskOn(GridFlags::HasStdDeviation); } __hostdev__ bool isBreadthFirst() const { return mGridData.mFlags.isMaskOn(GridFlags::IsBreadthFirst); } - //__hostdev__ bool isLexicographic() const { return mGridData.mFlags.isMaskOn(GridFlags::IsLexicographic); } __hostdev__ uint64_t gridSize() const { return mGridData.mGridSize; } __hostdev__ uint32_t gridIndex() const { return mGridData.mGridIndex; } __hostdev__ uint32_t gridCount() const { return mGridData.mGridCount; } @@ -7497,7 +7546,6 @@ class GridMetaData __hostdev__ const BBox& indexBBox() const { return mIndexBBox; } __hostdev__ Vec3d voxelSize() const { return mGridData.mVoxelSize; } __hostdev__ int blindDataCount() const { return mGridData.mBlindMetadataCount; } - //__hostdev__ const GridBlindMetaData& blindMetaData(uint32_t n) const {return *mGridData.blindMetaData(n);} __hostdev__ uint64_t activeVoxelCount() const { return mTreeData.mVoxelCount; } __hostdev__ const uint32_t& activeTileCount(uint32_t level) const { return mTreeData.mTileCount[level - 1]; } __hostdev__ uint32_t nodeCount(uint32_t level) const { return mTreeData.mNodeCount[level]; } @@ -7505,43 +7553,6 @@ class GridMetaData __hostdev__ uint32_t rootTableSize() const { return mRootTableSize; } __hostdev__ bool isEmpty() const { return mRootTableSize == 0; } __hostdev__ Version version() const { return mGridData.mVersion; } -#else - // We cast to a grid templated on a dummy ValueType which is safe because we are very - // careful only to call certain methods which are known to be invariant to the ValueType! - // In other words, don't use this technique unless you are intimately familiar with the - // memory-layout of the data structure and the reasons why certain methods are safe - // to call and others are not! - using GridT = NanoGrid; - __hostdev__ const GridT& grid() const { return *reinterpret_cast(this); } - -public: - __hostdev__ bool isValid() const { return this->grid().isValid(); } - __hostdev__ uint64_t gridSize() const { return this->grid().gridSize(); } - __hostdev__ uint32_t gridIndex() const { return this->grid().gridIndex(); } - __hostdev__ uint32_t gridCount() const { return this->grid().gridCount(); } - __hostdev__ const char* shortGridName() const { return this->grid().shortGridName(); } - __hostdev__ GridType gridType() const { return this->grid().gridType(); } - __hostdev__ GridClass gridClass() const { return this->grid().gridClass(); } - __hostdev__ bool isLevelSet() const { return this->grid().isLevelSet(); } - __hostdev__ bool isFogVolume() const { return this->grid().isFogVolume(); } - __hostdev__ bool isPointIndex() const { return this->grid().isPointIndex(); } - __hostdev__ bool isPointData() const { return this->grid().isPointData(); } - __hostdev__ bool isMask() const { return this->grid().isMask(); } - __hostdev__ bool isStaggered() const { return this->grid().isStaggered(); } - __hostdev__ bool isUnknown() const { return this->grid().isUnknown(); } - __hostdev__ const Map& map() const { return this->grid().map(); } - __hostdev__ const BBox& worldBBox() const { return this->grid().worldBBox(); } - __hostdev__ const BBox& indexBBox() const { return this->grid().indexBBox(); } - __hostdev__ Vec3d voxelSize() const { return this->grid().voxelSize(); } - __hostdev__ int blindDataCount() const { return this->grid().blindDataCount(); } - __hostdev__ const GridBlindMetaData& blindMetaData(uint32_t n) const { return this->grid().blindMetaData(n); } - __hostdev__ uint64_t activeVoxelCount() const { return this->grid().activeVoxelCount(); } - __hostdev__ const uint32_t& activeTileCount(uint32_t level) const { return this->grid().tree().activeTileCount(level); } - __hostdev__ uint32_t nodeCount(uint32_t level) const { return this->grid().tree().nodeCount(level); } - __hostdev__ uint64_t checksum() const { return this->grid().checksum(); } - __hostdev__ bool isEmpty() const { return this->grid().isEmpty(); } - __hostdev__ Version version() const { return this->grid().version(); } -#endif }; // GridMetaData /// @brief Class to access points at a specific voxel location diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cc b/nanovdb/nanovdb/unittest/TestNanoVDB.cc index 94e5f79372..8a244b7f98 100644 --- a/nanovdb/nanovdb/unittest/TestNanoVDB.cc +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cc @@ -335,10 +335,10 @@ TEST_F(TestNanoVDB, Version) TEST_F(TestNanoVDB, Basic) { - { // CHAR_BIT + { // verify size of CHAR_BIT EXPECT_EQ(8, CHAR_BIT); } - { + {// check that keys in a map are sorted in ascending order std::vector v = {3, 1, 7, 0}; EXPECT_FALSE(std::is_sorted(v.begin(), v.end())); std::map m; @@ -349,7 +349,7 @@ TEST_F(TestNanoVDB, Basic) v.push_back(i.first); EXPECT_TRUE(std::is_sorted(v.begin(), v.end())); } - { + {// check that size of enum is the size of an integer enum tmp { a = 0, b, c, @@ -360,6 +360,12 @@ TEST_F(TestNanoVDB, Basic) EXPECT_EQ(176u, sizeof(nanovdb::io::FileMetaData)); //std::cerr << "sizeof(FileMetaData) = " << sizeof(nanovdb::io::FileMetaData) << std::endl; } + {// check that it's safe to case uint64_t to int64_t (as long as its no larger than 2^63 - 1) + const uint64_t i = 9223372036854775807ULL;// = 2^63 - 1 + const int64_t *j = reinterpret_cast(&i); + EXPECT_EQ(i, *j); + //std::cerr << "i="< -__global__ void processLeaf(NodeManager *d_nodeMgr)//, StatsT *d_stats) +__global__ void processLeaf(NodeManager *d_nodeMgr, StatsT *d_stats) { const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid >= d_nodeMgr->leafCount()) return; auto &d_leaf = d_nodeMgr->leaf(tid); if (d_leaf.updateBBox()) {// updates active bounding box (also updates data->mFlags) and return true if non-empty - if constexpr( StatsT::hasMinMax() ) { // resolved at compile time - StatsT s; - for (auto it = d_leaf.cbeginValueOn(); it; ++it) s.add(*it); - d_leaf.setMin(s.min()); - d_leaf.setMax(s.max()); + if constexpr(StatsT::hasStats()) { + StatsT stats; + for (auto it = d_leaf.cbeginValueOn(); it; ++it) stats.add(*it); + if constexpr(StatsT::hasAverage()) { + d_stats[tid] = stats; + *reinterpret_cast(&d_leaf.mMinimum) = tid; + } else { + stats.setStats(d_leaf); + } } } d_leaf.mFlags &= ~uint8_t(1u);// enable rendering }// processLeaf template -__global__ void processInternal(NodeManager *d_nodeMgr)//, StatsT *d_stats) +__global__ void processInternal(NodeManager *d_nodeMgr, StatsT *d_stats) { using ChildT = typename NanoNode::type; const uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -85,32 +87,39 @@ __global__ void processInternal(NodeManager *d_nodeMgr)//, StatsT *d_sta auto &d_node = d_nodeMgr->template node(tid); auto &bbox = d_node.mBBox; bbox = CoordBBox();// empty bbox + StatsT stats; + uint32_t childID = 0u; - StatsT s; - - for (auto it = d_node.cbeginChild(); it; ++it) { - const auto &child = *it; + for (auto it = d_node.beginChild(); it; ++it) { + auto &child = *it; bbox.expand( child.bbox() ); - if constexpr(StatsT::hasMinMax()){ - s.add(child.getMin()); - s.add(child.getMax()); + if constexpr(StatsT::hasAverage()) { + childID = *reinterpret_cast(&child.mMinimum); + StatsT &s = d_stats[childID]; + s.setStats(child); + stats.add(s); + } else if constexpr(StatsT::hasMinMax()) { + stats.add(child.minimum()); + stats.add(child.maximum()); } } for (auto it = d_node.cbeginValueOn(); it; ++it) { const Coord ijk = it.getCoord(); bbox[0].minComponent(ijk); bbox[1].maxComponent(ijk + Coord(ChildT::DIM - 1)); - if constexpr(StatsT::hasMinMax()) s.add(*it, ChildT::NUM_VALUES); + if constexpr(StatsT::hasStats()) stats.add(*it, ChildT::NUM_VALUES); } - if constexpr(StatsT::hasMinMax()) { - d_node.setMin(s.min()); - d_node.setMax(s.max()); + if constexpr(StatsT::hasAverage()) { + d_stats[childID] = stats; + *reinterpret_cast(&d_node.mMinimum) = childID; + } else if constexpr(StatsT::hasMinMax()) { + stats.setStats(d_node); } d_node.mFlags &= ~uint64_t(1u);// enable rendering }// processInternal template -__global__ void processRootAndGrid(NodeManager *d_nodeMgr) +__global__ void processRootAndGrid(NodeManager *d_nodeMgr, StatsT *d_stats) { using ChildT = NanoUpper; using ValueT = typename ChildT::ValueType; @@ -127,21 +136,22 @@ __global__ void processRootAndGrid(NodeManager *d_nodeMgr) for (auto it = root.beginDense(); it; ++it) { if (auto *child = it.probeChild(v)) { root.mBBox.expand( child->bbox() ); - if constexpr(StatsT::hasMinMax()){ - s.add(child->getMin()); - s.add(child->getMax()); + if constexpr(StatsT::hasAverage()) { + StatsT &stats = d_stats[*reinterpret_cast(&child->mMinimum)]; + stats.setStats(*child); + s.add(stats); + } else if constexpr(StatsT::hasMinMax()){ + s.add(child->minimum()); + s.add(child->maximum()); } } else if (it.isValueOn()) { const Coord ijk = it.getCoord(); root.mBBox[0].minComponent(ijk); root.mBBox[1].maxComponent(ijk + Coord(ChildT::DIM - 1)); - if constexpr(StatsT::hasMinMax()) s.add(v, ChildT::NUM_VALUES); + if constexpr(StatsT::hasStats()) s.add(v, ChildT::NUM_VALUES); } } - if constexpr(StatsT::hasMinMax()) { - root.mMinimum = s.min(); - root.mMaximum = s.max(); - } + s.setStats(root); } // process Grid @@ -198,13 +208,19 @@ void CudaGridStats::operator()(NanoGrid *d_grid, cudaStr cudaCheck(cudaMemcpyAsync(nodeCount, (char*)d_grid + sizeof(GridData) + 4*sizeof(uint64_t), 3*sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); cudaStreamSynchronize(stream);// finish all device tasks in stream - processLeaf<<>>(d_nodeMgr); + StatsT *d_stats = nullptr; + + if constexpr(StatsT::hasAverage()) cudaCheck(cudaMallocAsync((void**)&d_stats, nodeCount[0]*sizeof(StatsT), stream)); + + processLeaf<<>>(d_nodeMgr, d_stats); + + processInternal<<>>(d_nodeMgr, d_stats); - processInternal<<>>(d_nodeMgr); + processInternal<<>>(d_nodeMgr, d_stats); - processInternal<<>>(d_nodeMgr); + processRootAndGrid<<<1, 1, 0, stream>>>(d_nodeMgr, d_stats); - processRootAndGrid<<<1, 1, 0, stream>>>(d_nodeMgr); + if constexpr(StatsT::hasAverage()) cudaCheck(cudaFreeAsync(d_stats, stream)); } // CudaGridStats::operator()( Grid ) From 888fefd9831e2a6f7885ca2bb7e8849ce79fab27 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Tue, 24 Oct 2023 14:02:07 -0700 Subject: [PATCH 43/49] removed whitespace Signed-off-by: Ken Museth --- nanovdb/nanovdb/util/cuda/CudaGridStats.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nanovdb/nanovdb/util/cuda/CudaGridStats.cuh b/nanovdb/nanovdb/util/cuda/CudaGridStats.cuh index f89f595296..dcf5bfc850 100644 --- a/nanovdb/nanovdb/util/cuda/CudaGridStats.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaGridStats.cuh @@ -113,7 +113,7 @@ __global__ void processInternal(NodeManager *d_nodeMgr, StatsT *d_stats) d_stats[childID] = stats; *reinterpret_cast(&d_node.mMinimum) = childID; } else if constexpr(StatsT::hasMinMax()) { - stats.setStats(d_node); + stats.setStats(d_node); } d_node.mFlags &= ~uint64_t(1u);// enable rendering }// processInternal From 6dd554728fae16e2d3377dda53fc9c1edfcec439 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Tue, 24 Oct 2023 18:47:49 -0700 Subject: [PATCH 44/49] fixed issue in CreateNanoGrid.h Signed-off-by: Ken Museth --- nanovdb/nanovdb/util/CreateNanoGrid.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/nanovdb/nanovdb/util/CreateNanoGrid.h b/nanovdb/nanovdb/util/CreateNanoGrid.h index 2d578222d0..7ad71c57d4 100644 --- a/nanovdb/nanovdb/util/CreateNanoGrid.h +++ b/nanovdb/nanovdb/util/CreateNanoGrid.h @@ -40,7 +40,7 @@ \brief Create a NanoVDB grid from scratch \code -#if defined(NANOVDB_USE_OPENVDB) +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) using SrcGridT = openvdb::FloatGrid; #else using SrcGridT = nanovdb::build::FloatGrid; @@ -75,7 +75,7 @@ #ifndef NANOVDB_CREATE_NANOGRID_H_HAS_BEEN_INCLUDED #define NANOVDB_CREATE_NANOGRID_H_HAS_BEEN_INCLUDED -#if defined(NANOVDB_USE_OPENVDB) +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) #include #include #include @@ -108,7 +108,7 @@ template struct MapToNano; //================================================================================================ -#if defined(NANOVDB_USE_OPENVDB) +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) /// @brief Forward declaration of free-standing function that converts an OpenVDB GridBase into a NanoVDB GridHandle /// @tparam BufferT Type of the buffer used to allocate the destination grid /// @param base Shared pointer to a base openvdb grid to be converted @@ -370,7 +370,7 @@ class NodeAccessor< NanoGrid > template struct MapToNano { using type = T; }; -#if defined(NANOVDB_USE_OPENVDB) +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) template<> struct MapToNano {using type = nanovdb::ValueMask;}; @@ -739,7 +739,7 @@ class CreateNanoGrid typename enable_if::is_index, uint64_t>::type countValues(); -#if defined(NANOVDB_USE_OPENVDB) +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) template typename disable_if::value || is_same::value, uint64_t>::type @@ -971,7 +971,7 @@ inline typename disable_if::value || BuildTraits::preProcess() { if (const uint64_t pointCount = this->countPoints()) { -#if defined(NANOVDB_USE_OPENVDB) +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) if constexpr(is_same::value) { if (!mBlindMetaData.empty()) throw std::runtime_error("expected no blind meta data"); this->addBlindData("index", @@ -1762,7 +1762,7 @@ CreateNanoGrid::postProcess() if constexpr(is_same::value) mCodec.reset(); auto *dstGrid = this->template dstGrid(); gridStats(*dstGrid, mStats); -#if defined(NANOVDB_USE_OPENVDB) +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) auto *metaData = this->dstMeta(0); if constexpr(is_same::value || is_same::value) { @@ -1905,7 +1905,7 @@ CreateNanoGrid::copyValues(SrcValueT *buffer) //================================================================================================ -#if defined(NANOVDB_USE_OPENVDB) +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) template template @@ -2023,7 +2023,7 @@ createNanoGrid(const SrcGridT &srcGrid, //================================================================================================ -#if defined(NANOVDB_USE_OPENVDB) +#if defined(NANOVDB_USE_OPENVDB) && !defined(__CUDACC__) template GridHandle openToNanoVDB(const openvdb::GridBase::Ptr& base, From b9ab546118d00816ba9437d0679f9a5953be167d Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Wed, 25 Oct 2023 12:12:32 -0700 Subject: [PATCH 45/49] more cleanup and minor improvements Signed-off-by: Ken Museth --- nanovdb/nanovdb/NanoVDB.h | 31 +++++++++++++------ nanovdb/nanovdb/util/GridStats.h | 16 ++-------- nanovdb/nanovdb/util/IO.h | 17 ++++++++++ .../nanovdb/util/cuda/CudaPointsToGrid.cuh | 4 +-- 4 files changed, 43 insertions(+), 25 deletions(-) diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index c4cf36944e..4d1bd07781 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -1520,7 +1520,7 @@ class Coord // ----------------------------> Vec3 <-------------------------------------- -/// @brief A simple vector class with three double components, similar to openvdb::math::Vec3 +/// @brief A simple vector class with three components, similar to openvdb::math::Vec3 template class Vec3 { @@ -1718,7 +1718,7 @@ __hostdev__ inline Vec3d Coord::asVec3d() const // ----------------------------> Vec4 <-------------------------------------- -/// @brief A simple vector class with three double components, similar to openvdb::math::Vec4 +/// @brief A simple vector class with four components, similar to openvdb::math::Vec4 template class Vec4 { @@ -2364,6 +2364,11 @@ struct BBox : public BaseBBox , mPos(b.min()) { } + __hostdev__ Iterator(const BBox& b, const Coord& p) + : mBBox(b) + , mPos(p) + { + } __hostdev__ Iterator& operator++() { if (mPos[2] < mBBox[1][2]) { // this is the most common case @@ -2384,11 +2389,17 @@ struct BBox : public BaseBBox ++(*this); return tmp; } + __hostdev__ bool operator==(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos == rhs.mPos; + } /// @brief Return @c true if the iterator still points to a valid coordinate. __hostdev__ operator bool() const { return mPos[0] <= mBBox[1][0]; } __hostdev__ const CoordT& operator*() const { return mPos; } }; // Iterator __hostdev__ Iterator begin() const { return Iterator{*this}; } + __hostdev__ Iterator end() const { return Iterator{*this, this->max().offsetBy(1)}; } __hostdev__ BBox() : BaseT(CoordT::max(), CoordT::min()) { @@ -2449,7 +2460,7 @@ struct BBox : public BaseBBox } /// @warning This converts a CoordBBox into a floating-point bounding box which implies that max += 1 ! - template + template __hostdev__ BBox> asReal() const { static_assert(is_floating_point::value, "CoordBBox::asReal: Expected a floating point coordinate"); @@ -3932,7 +3943,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) TreeData /// @brief Return the index bounding box of all the active values in this tree, i.e. in all nodes of the tree __hostdev__ CoordBBox bbox() const {return mNodeOffset[3] ? *PtrAdd(this, mNodeOffset[3]) : CoordBBox();} - /// @brief return true if RootData is layout out immidiatly after TreeData in memory + /// @brief return true if RootData is layout out immediately after TreeData in memory __hostdev__ bool isRootNext() const {return mNodeOffset[3] ? mNodeOffset[3] == sizeof(TreeData) : false; } };// TreeData @@ -8160,12 +8171,14 @@ struct ProbeValue template struct GetNodeInfo { + using ValueType = typename NanoLeaf::ValueType; + using FloatType = typename NanoLeaf::FloatType; struct NodeInfo { - uint32_t level, dim; - typename NanoLeaf::ValueType minimum, maximum; - typename NanoLeaf::FloatType average, stdDevi; - CoordBBox bbox; + uint32_t level, dim; + ValueType minimum, maximum; + FloatType average, stdDevi; + CoordBBox bbox; }; __hostdev__ static NodeInfo get(const NanoRoot& root) { @@ -8173,7 +8186,7 @@ struct GetNodeInfo } __hostdev__ static NodeInfo get(const typename NanoRoot::Tile& tile) { - return NodeInfo{3u, NanoUpper::DIM, tile.value, tile.value, tile.value, 0, CoordBBox::createCube(tile.origin(), NanoUpper::DIM)}; + return NodeInfo{3u, NanoUpper::DIM, tile.value, tile.value, static_cast(tile.value), 0, CoordBBox::createCube(tile.origin(), NanoUpper::DIM)}; } __hostdev__ static NodeInfo get(const NanoUpper& node, uint32_t n) { diff --git a/nanovdb/nanovdb/util/GridStats.h b/nanovdb/nanovdb/util/GridStats.h index 1d4a85f0a5..267e7462e3 100644 --- a/nanovdb/nanovdb/util/GridStats.h +++ b/nanovdb/nanovdb/util/GridStats.h @@ -580,20 +580,8 @@ void GridStats::process( GridT &grid ) // then indeBBox = (0,0,0) -> (0,0,0) and then worldBBox = (0.0, 0.0, 0.0) // -> (1.0, 1.0, 1.0). This is a consequence of the different definitions // of index and world bounding boxes inherited from OpenVDB! - const Coord min = indexBBox[0]; - const Coord max = indexBBox[1] + Coord(1); - - auto& wBBox = data.mWorldBBox; - const auto& map = grid.map(); - wBBox[0] = wBBox[1] = map.applyMap(Vec3d(min[0], min[1], min[2])); - wBBox.expand(map.applyMap(Vec3d(min[0], min[1], max[2]))); - wBBox.expand(map.applyMap(Vec3d(min[0], max[1], min[2]))); - wBBox.expand(map.applyMap(Vec3d(max[0], min[1], min[2]))); - wBBox.expand(map.applyMap(Vec3d(max[0], max[1], min[2]))); - wBBox.expand(map.applyMap(Vec3d(max[0], min[1], max[2]))); - wBBox.expand(map.applyMap(Vec3d(min[0], max[1], max[2]))); - wBBox.expand(map.applyMap(Vec3d(max[0], max[1], max[2]))); - data.setBBoxOn(true); + grid.mWorldBBox = CoordBBox(indexBBox[0], indexBBox[1].offsetBy(1)).transform(grid.map()); + grid.setBBoxOn(true); } // set bit flags diff --git a/nanovdb/nanovdb/util/IO.h b/nanovdb/nanovdb/util/IO.h index a962a3bcdb..5d51cb53c6 100644 --- a/nanovdb/nanovdb/util/IO.h +++ b/nanovdb/nanovdb/util/IO.h @@ -751,6 +751,15 @@ inline uint64_t stringHash(const char* c_str) } // namespace io +template +inline std::ostream& +operator<<(std::ostream& os, const BBox>& b) +{ + os << "(" << b[0][0] << "," << b[0][1] << "," << b[0][2] << ") -> " + << "(" << b[1][0] << "," << b[1][1] << "," << b[1][2] << ")"; + return os; +} + inline std::ostream& operator<<(std::ostream& os, const CoordBBox& b) { @@ -774,6 +783,14 @@ operator<<(std::ostream& os, const Vec3& v) return os; } +template +inline std::ostream& +operator<<(std::ostream& os, const Vec4& v) +{ + os << "(" << v[0] << "," << v[1] << "," << v[2] << "," << v[3] << ")"; + return os; +} + } // namespace nanovdb #endif // NANOVDB_IO_H_HAS_BEEN_INCLUDED diff --git a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh index 30f6b5d378..b792bf1834 100644 --- a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh @@ -121,7 +121,7 @@ cudaPointsToGrid(const PtrT dWorldPoints, int pointCount, double voxelSize = 1.0, PointType type = PointType::Default, - BufferT &buffer = BufferT(), + const BufferT &buffer = BufferT(), cudaStream_t stream = 0); //================================================================================================ @@ -1089,7 +1089,7 @@ inline void CudaPointsToGrid::processBBox() template GridHandle// Grid with PointType coordinates as blind data -cudaPointsToGrid(const PtrT d_xyz, int pointCount, double voxelSize, PointType type, BufferT &buffer, cudaStream_t stream) +cudaPointsToGrid(const PtrT d_xyz, int pointCount, double voxelSize, PointType type, const BufferT &buffer, cudaStream_t stream) { CudaPointsToGrid converter(voxelSize, Vec3d(0.0), stream); converter.setPointType(type); From 490788e1cb638a0b38ca927932ff224de57e4a51 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Wed, 25 Oct 2023 12:43:55 -0700 Subject: [PATCH 46/49] fixed bug in BBox::ITerator::end() Signed-off-by: Ken Museth --- nanovdb/nanovdb/NanoVDB.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index 4d1bd07781..87f387b6cb 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -2399,7 +2399,7 @@ struct BBox : public BaseBBox __hostdev__ const CoordT& operator*() const { return mPos; } }; // Iterator __hostdev__ Iterator begin() const { return Iterator{*this}; } - __hostdev__ Iterator end() const { return Iterator{*this, this->max().offsetBy(1)}; } + __hostdev__ Iterator end() const { return Iterator{*this, this->max().offsetBy(1,0,0)}; } __hostdev__ BBox() : BaseT(CoordT::max(), CoordT::min()) { From 79f82417944799af0449b5136bfc8a6fb54ddd94 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Wed, 25 Oct 2023 13:08:34 -0700 Subject: [PATCH 47/49] fixed bug in BBox::ITerator::end() Signed-off-by: Ken Museth --- nanovdb/nanovdb/NanoVDB.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index 87f387b6cb..a77a0f2dd7 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -2374,11 +2374,11 @@ struct BBox : public BaseBBox if (mPos[2] < mBBox[1][2]) { // this is the most common case ++mPos[2]; } else if (mPos[1] < mBBox[1][1]) { - mPos[2] = mBBox[0][2]; + mPos[2] = mBBox[0][2];// reset ++mPos[1]; } else if (mPos[0] <= mBBox[1][0]) { - mPos[2] = mBBox[0][2]; - mPos[1] = mBBox[0][1]; + mPos[2] = mBBox[0][2];// reset + mPos[1] = mBBox[0][1];// reset ++mPos[0]; } return *this; @@ -2399,7 +2399,7 @@ struct BBox : public BaseBBox __hostdev__ const CoordT& operator*() const { return mPos; } }; // Iterator __hostdev__ Iterator begin() const { return Iterator{*this}; } - __hostdev__ Iterator end() const { return Iterator{*this, this->max().offsetBy(1,0,0)}; } + __hostdev__ Iterator end() const { return Iterator{*this, CoordT(mCoord[1][0]+1, mCoord[0][1], mCoord[0][2])}; } __hostdev__ BBox() : BaseT(CoordT::max(), CoordT::min()) { From 5005cc6d8f54012dec95290857f2b367fa569243 Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Wed, 25 Oct 2023 13:52:42 -0700 Subject: [PATCH 48/49] added more unit-tests Signed-off-by: Ken Museth --- nanovdb/nanovdb/NanoVDB.h | 17 +++++++++++------ nanovdb/nanovdb/unittest/TestNanoVDB.cc | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index a77a0f2dd7..dbda74cf55 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -2372,14 +2372,14 @@ struct BBox : public BaseBBox __hostdev__ Iterator& operator++() { if (mPos[2] < mBBox[1][2]) { // this is the most common case - ++mPos[2]; + ++mPos[2];// increment z } else if (mPos[1] < mBBox[1][1]) { - mPos[2] = mBBox[0][2];// reset - ++mPos[1]; + mPos[2] = mBBox[0][2];// reset z + ++mPos[1];// increment y } else if (mPos[0] <= mBBox[1][0]) { - mPos[2] = mBBox[0][2];// reset - mPos[1] = mBBox[0][1];// reset - ++mPos[0]; + mPos[2] = mBBox[0][2];// reset z + mPos[1] = mBBox[0][1];// reset y + ++mPos[0];// increment x } return *this; } @@ -2394,6 +2394,11 @@ struct BBox : public BaseBBox NANOVDB_ASSERT(mBBox == rhs.mBBox); return mPos == rhs.mPos; } + __hostdev__ bool operator!=(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos != rhs.mPos; + } /// @brief Return @c true if the iterator still points to a valid coordinate. __hostdev__ operator bool() const { return mPos[0] <= mBBox[1][0]; } __hostdev__ const CoordT& operator*() const { return mPos; } diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cc b/nanovdb/nanovdb/unittest/TestNanoVDB.cc index 8a244b7f98..ec3837b61e 100644 --- a/nanovdb/nanovdb/unittest/TestNanoVDB.cc +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cc @@ -1124,11 +1124,14 @@ TEST_F(TestNanoVDB, CoordBBox) { // test prefix iterator auto iter = bbox.begin(); EXPECT_TRUE(iter); + EXPECT_FALSE(bbox.end()); + EXPECT_NE(iter, bbox.end()); for (int i = bbox.min()[0]; i <= bbox.max()[0]; ++i) { for (int j = bbox.min()[1]; j <= bbox.max()[1]; ++j) { for (int k = bbox.min()[2]; k <= bbox.max()[2]; ++k) { EXPECT_TRUE(bbox.isInside(*iter)); EXPECT_TRUE(iter); + EXPECT_NE(iter, bbox.end()); const auto& ijk = *iter; // note, copy by reference EXPECT_EQ(ijk[0], i); EXPECT_EQ(ijk[1], j); @@ -1138,15 +1141,19 @@ TEST_F(TestNanoVDB, CoordBBox) } } EXPECT_FALSE(iter); + EXPECT_EQ(iter, bbox.end()); } { // test postfix iterator auto iter = bbox.begin(); EXPECT_TRUE(iter); + EXPECT_FALSE(bbox.end()); + EXPECT_NE(iter, bbox.end()); for (int i = bbox.min()[0]; i <= bbox.max()[0]; ++i) { for (int j = bbox.min()[1]; j <= bbox.max()[1]; ++j) { for (int k = bbox.min()[2]; k <= bbox.max()[2]; ++k) { EXPECT_TRUE(iter); + EXPECT_NE(iter, bbox.end()); const auto ijk = *iter++; // note, copy by value! EXPECT_EQ(ijk[0], i); EXPECT_EQ(ijk[1], j); @@ -1155,6 +1162,14 @@ TEST_F(TestNanoVDB, CoordBBox) } } EXPECT_FALSE(iter); + EXPECT_EQ(iter, bbox.end()); + } + + {// test two approaches to iteration + auto iter1 = bbox.begin(), iter2 = bbox.begin(); + while(iter1 != bbox.end()) ++iter1; + while(iter2) ++iter2; + EXPECT_EQ(iter1, iter2); } {// test CoordBBox::createCube From d5aac5cae2ab537dfe731e19a97632643d43da4d Mon Sep 17 00:00:00 2001 From: Ken Museth Date: Thu, 26 Oct 2023 16:50:19 -0700 Subject: [PATCH 49/49] fixes typos and windows build issues Signed-off-by: Ken Museth --- nanovdb/nanovdb/NanoVDB.h | 54 +++++++---- nanovdb/nanovdb/unittest/TestNanoVDB.cc | 14 ++- nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh | 2 +- .../nanovdb/util/cuda/CudaPointsToGrid.cuh | 94 ++++++++++++++----- 4 files changed, 114 insertions(+), 50 deletions(-) diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h index dbda74cf55..1b770e901e 100644 --- a/nanovdb/nanovdb/NanoVDB.h +++ b/nanovdb/nanovdb/NanoVDB.h @@ -603,7 +603,7 @@ struct remove_pointer {using type = T;}; // --------------------------> match_const <------------------------------------ /// @brief Trait used to transfer the const-ness of a reference type to another type -/// @tparam T Type whoes const-ness needs to match the reference type +/// @tparam T Type whose const-ness needs to match the reference type /// @tparam ReferenceT Reference type that is not const /// @details match_const::type = int /// match_const::type = int @@ -856,7 +856,7 @@ __hostdev__ inline bool isIndex(GridType gridType) { return gridType == GridType::Index ||// index both active and inactive values gridType == GridType::OnIndex ||// index active values only - gridType == GridType::IndexMask ||// as Index, but with an additionl mask + gridType == GridType::IndexMask ||// as Index, but with an additional mask gridType == GridType::OnIndexMask;// as OnIndex, but with an additional mask } @@ -864,7 +864,7 @@ __hostdev__ inline bool isIndex(GridType gridType) /// @brief copy 64 bit words from @c src to @c dst /// @param dst pointer to destination -/// @param src pointer to soruce +/// @param src pointer to source /// @param word_count number of 64 bit words to be copied /// @return destination pointer /// @warning @c src and @c dst cannot overlap and should both be 64 bit aligned @@ -1373,16 +1373,20 @@ class Coord /// @brief Return true if this Coord is lexicographically less than the given Coord. __hostdev__ bool operator<(const Coord& rhs) const { - return mVec[0] < rhs[0] ? true : mVec[0] > rhs[0] ? false - : mVec[1] < rhs[1] ? true : mVec[1] > rhs[1] ? false + return mVec[0] < rhs[0] ? true + : mVec[0] > rhs[0] ? false + : mVec[1] < rhs[1] ? true + : mVec[1] > rhs[1] ? false : mVec[2] < rhs[2] ? true : false; } /// @brief Return true if this Coord is lexicographically less or equal to the given Coord. __hostdev__ bool operator<=(const Coord& rhs) const { - return mVec[0] < rhs[0] ? true : mVec[0] > rhs[0] ? false - : mVec[1] < rhs[1] ? true : mVec[1] > rhs[1] ? false + return mVec[0] < rhs[0] ? true + : mVec[0] > rhs[0] ? false + : mVec[1] < rhs[1] ? true + : mVec[1] > rhs[1] ? false : mVec[2] <=rhs[2] ? true : false; } @@ -1514,7 +1518,7 @@ class Coord /// @brief Return a double precision floating-point vector of this coordinate __hostdev__ inline Vec3 asVec3d() const; - // returns a copy of itself, so it minics the behaviour of Vec3::round() + // returns a copy of itself, so it mimics the behaviour of Vec3::round() __hostdev__ inline Coord round() const { return *this; } }; // Coord class @@ -2250,7 +2254,7 @@ struct BaseBBox return *this; } - /// @brief Expand this bounding box to enclode the given bounding box. + /// @brief Expand this bounding box to enclose the given bounding box. __hostdev__ BaseBBox& expand(const BaseBBox& bbox) { mCoord[0].minComponent(bbox[0]); @@ -2399,12 +2403,22 @@ struct BBox : public BaseBBox NANOVDB_ASSERT(mBBox == rhs.mBBox); return mPos != rhs.mPos; } + __hostdev__ bool operator<(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos < rhs.mPos; + } + __hostdev__ bool operator<=(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos <= rhs.mPos; + } /// @brief Return @c true if the iterator still points to a valid coordinate. - __hostdev__ operator bool() const { return mPos[0] <= mBBox[1][0]; } + __hostdev__ operator bool() const { return mPos <= mBBox[1]; } __hostdev__ const CoordT& operator*() const { return mPos; } }; // Iterator __hostdev__ Iterator begin() const { return Iterator{*this}; } - __hostdev__ Iterator end() const { return Iterator{*this, CoordT(mCoord[1][0]+1, mCoord[0][1], mCoord[0][2])}; } + __hostdev__ Iterator end() const { return Iterator{*this, CoordT(mCoord[1][0]+1, mCoord[0][1], mCoord[0][2])}; } __hostdev__ BBox() : BaseT(CoordT::max(), CoordT::min()) { @@ -3227,7 +3241,7 @@ struct Map /// @note Typically this operation is used for the world -> index mapping /// @tparam Vec3T Template type of the 3D vector to be mapped /// @param xyz 3D vector to be mapped - typically floating point world coordinates - /// @return Inverse afine mapping of the input @c xyz i.e. (xyz - translation) x mat^-1 + /// @return Inverse affine mapping of the input @c xyz i.e. (xyz - translation) x mat^-1 template __hostdev__ Vec3T applyInverseMap(const Vec3T& xyz) const { @@ -3238,7 +3252,7 @@ struct Map /// @note Typically this operation is used for the world -> index mapping /// @tparam Vec3T Template type of the 3D vector to be mapped /// @param xyz 3D vector to be mapped - typically floating point world coordinates - /// @return Inverse afine mapping of the input @c xyz i.e. (xyz - translation) x mat^-1 + /// @return Inverse affine mapping of the input @c xyz i.e. (xyz - translation) x mat^-1 template __hostdev__ Vec3T applyInverseMapF(const Vec3T& xyz) const { @@ -3600,7 +3614,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData __hostdev__ const uint8_t* treePtr() const { return reinterpret_cast(this + 1); }// TreeData is always right after GridData //__hostdev__ const TreeData* treePtr() const { return reinterpret_cast(this + 1); }// TreeData is always right after GridData - /// @brief Return a non-const uint8_t pointer to the firsr node at @c LEVEL + /// @brief Return a non-const uint8_t pointer to the first node at @c LEVEL /// @tparam LEVEL of the node. LEVEL 0 means leaf node and LEVEL 3 means root node /// @warning If not nodes exist at @c LEVEL NULL is returned template @@ -3612,7 +3626,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData return nodeOffset ? PtrAdd(treeData, nodeOffset) : nullptr; } - /// @brief Return a non-const uint8_t pointer to the firsr node at @c LEVEL + /// @brief Return a non-const uint8_t pointer to the first node at @c LEVEL /// @tparam LEVEL of the node. LEVEL 0 means leaf node and LEVEL 3 means root node /// @warning If not nodes exist at @c LEVEL NULL is returned template @@ -4691,7 +4705,7 @@ class RootNode : public RootData } template - // __hostdev__ auto // occationally fails with NVCC + // __hostdev__ auto // occasionally fails with NVCC __hostdev__ decltype(OpT::set(std::declval(), std::declval()...)) set(const CoordType& ijk, ArgsT&&... args) { @@ -4816,7 +4830,7 @@ class RootNode : public RootData } template - // __hostdev__ auto // occationally fails with NVCC + // __hostdev__ auto // occasionally fails with NVCC __hostdev__ decltype(OpT::set(std::declval(), std::declval()...)) setAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) { @@ -5287,7 +5301,7 @@ class InternalNode : public InternalData } template - //__hostdev__ auto // occationally fails with NVCC + //__hostdev__ auto // occasionally fails with NVCC __hostdev__ decltype(OpT::set(std::declval(), std::declval(), std::declval()...)) set(const CoordType& ijk, ArgsT&&... args) { @@ -5396,7 +5410,7 @@ class InternalNode : public InternalData } template - //__hostdev__ auto // occationally fails with NVCC + //__hostdev__ auto // occasionally fails with NVCC __hostdev__ decltype(OpT::set(std::declval(), std::declval(), std::declval()...)) setAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) { @@ -6371,7 +6385,7 @@ class LeafNode : public LeafData } template - //__hostdev__ auto // occationally fails with NVCC + //__hostdev__ auto // occasionally fails with NVCC __hostdev__ decltype(OpT::set(std::declval(), std::declval(), std::declval()...)) setAndCache(const CoordType& ijk, const AccT&, ArgsT&&... args) { diff --git a/nanovdb/nanovdb/unittest/TestNanoVDB.cc b/nanovdb/nanovdb/unittest/TestNanoVDB.cc index ec3837b61e..aa84b99202 100644 --- a/nanovdb/nanovdb/unittest/TestNanoVDB.cc +++ b/nanovdb/nanovdb/unittest/TestNanoVDB.cc @@ -1165,11 +1165,15 @@ TEST_F(TestNanoVDB, CoordBBox) EXPECT_EQ(iter, bbox.end()); } - {// test two approaches to iteration - auto iter1 = bbox.begin(), iter2 = bbox.begin(); - while(iter1 != bbox.end()) ++iter1; - while(iter2) ++iter2; - EXPECT_EQ(iter1, iter2); + {// test different approaches to iteration + auto it1 = bbox.begin(), it2 = bbox.begin(), it3 = bbox.begin(), it4 = bbox.begin(); + while(it1 != bbox.end()) ++it1; + while(it2) ++it2; + while(it3 < bbox.end()) ++it3; + while(*it4 <= bbox.max()) ++it4; + EXPECT_EQ(it1, it2); + EXPECT_EQ(it2, it3); + EXPECT_EQ(it3, it4); } {// test CoordBBox::createCube diff --git a/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh b/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh index 941392d00a..5446c56231 100644 --- a/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaGridHandle.cuh @@ -127,7 +127,7 @@ cudaMergeGridHandles(const VectorT> &handles, const BufferT* template class VectorT> inline typename enable_if::hasDeviceDual, GridHandle>::type mergeDeviceGrids(const VectorT> &handles, const BufferT* other = nullptr, cudaStream_t stream = 0) -{ return cudaMergeGridHandles(handles, other, stream); } +{ return cudaMergeGridHandles(handles, other, stream); } } // namespace nanovdb diff --git a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh index b792bf1834..a1d5150de5 100644 --- a/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh +++ b/nanovdb/nanovdb/util/cuda/CudaPointsToGrid.cuh @@ -384,6 +384,70 @@ private: BufferT getBuffer(const PtrT points, size_t pointCount, const BufferT &buffer); };// CudaPointsToGrid + +namespace kernels { +/// @details Used by CudaPointsToGrid::processLeafNodes before the computation +/// of prefix-sum for index grid. +/// Moving this away from an implementation using the cudaLambdaKernel wrapper +/// to fix the following on Windows platform: +/// error : For this host platform/dialect, an extended lambda cannot be defined inside the 'if' +/// or 'else' block of a constexpr if statement. +/// function in a lambda through cudaLambdaKernel wrapper defined in CudaUtils.h. +template +__global__ void fillValueIndexKernel(const size_t numItems, uint64_t* devValueIndex, typename CudaPointsToGrid::Data* d_data) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) + return; + + devValueIndex[tid] = static_cast(d_data->getLeaf(tid).mValueMask.countOn()); +} + +/// @details Used by CudaPointsToGrid::processLeafNodes for the computation +/// of prefix-sum for index grid. +/// Moving this away from an implementation using the cudaLambdaKernel wrapper +/// to fix the following on Windows platform: +/// error : For this host platform/dialect, an extended lambda cannot be defined inside the 'if' +/// or 'else' block of a constexpr if statement. +template +__global__ void leafPrefixSumKernel(const size_t numItems, uint64_t* devValueIndexPrefix, typename CudaPointsToGrid::Data* d_data) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) + return; + + auto &leaf = d_data->getLeaf(tid); + leaf.mOffset = 1u;// will be re-set below + const uint64_t *w = leaf.mValueMask.words(); + uint64_t &prefixSum = leaf.mPrefixSum, sum = CountOn(*w++); + prefixSum = sum; + for (int n = 9; n < 55; n += 9) {// n=i*9 where i=1,2,..6 + sum += CountOn(*w++); + prefixSum |= sum << n;// each pre-fixed sum is encoded in 9 bits + } + if (tid==0) { + d_data->getGrid().mData1 = 1u + devValueIndexPrefix[d_data->nodeCount[0]-1];// set total count + d_data->getTree().mVoxelCount = devValueIndexPrefix[d_data->nodeCount[0]-1]; + } else { + leaf.mOffset = 1u + devValueIndexPrefix[tid-1];// background is index 0 + } +} + +/// @details Used by CudaPointsToGrid::processLeafNodes to make sure leaf.mMask - leaf.mValueMask. +/// Moving this away from an implementation using the cudaLambdaKernel wrapper +/// to fix the following on Windows platform: +/// error : For this host platform/dialect, an extended lambda cannot be defined inside the 'if' +/// or 'else' block of a constexpr if statement. +template +__global__ void setMaskEqValMaskKernel(const size_t numItems, typename CudaPointsToGrid::Data* d_data) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) + return; + + auto &leaf = d_data->getLeaf(tid); + leaf.mMask = leaf.mValueMask; +} +} // namespace kernels + + //================================================================================================ // Define utility macro used to call cub functions that use dynamic temporary storage @@ -923,37 +987,19 @@ inline void CudaPointsToGrid::processLeafNodes(const PtrT points if (mVerbose==2) mTimer.restart("prefix-sum for index grid"); uint64_t *devValueIndex = mMemPool.template alloc(mData.nodeCount[0], mStream); auto devValueIndexPrefix = mMemPool.template alloc(mData.nodeCount[0], mStream); - cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { - devValueIndex[tid] = static_cast(d_data->getLeaf(tid).mValueMask.countOn()); - }, mDeviceData); cudaCheckError(); + kernels::fillValueIndexKernel<<>>(mData.nodeCount[0], devValueIndex, mDeviceData); + cudaCheckError(); CALL_CUBS(DeviceScan::InclusiveSum, devValueIndex, devValueIndexPrefix, mData.nodeCount[0]); mMemPool.free(devValueIndex); - cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { - auto &leaf = d_data->getLeaf(tid); - leaf.mOffset = 1u;// will be re-set below - const uint64_t *w = leaf.mValueMask.words(); - uint64_t &prefixSum = leaf.mPrefixSum, sum = CountOn(*w++); - prefixSum = sum; - for (int n = 9; n < 55; n += 9) {// n=i*9 where i=1,2,..6 - sum += CountOn(*w++); - prefixSum |= sum << n;// each pre-fixed sum is encoded in 9 bits - } - if (tid==0) { - d_data->getGrid().mData1 = 1u + devValueIndexPrefix[d_data->nodeCount[0]-1];// set total count - d_data->getTree().mVoxelCount = devValueIndexPrefix[d_data->nodeCount[0]-1]; - } else { - leaf.mOffset = 1u + devValueIndexPrefix[tid-1];// background is index 0 - } - }, mDeviceData); cudaCheckError(); + kernels::leafPrefixSumKernel<<>>(mData.nodeCount[0], devValueIndexPrefix, mDeviceData); + cudaCheckError(); mMemPool.free(devValueIndexPrefix); } if constexpr(BuildTraits::is_indexmask) { if (mVerbose==2) mTimer.restart("leaf.mMask = leaf.mValueMask"); - cudaLambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { - auto &leaf = d_data->getLeaf(tid); - leaf.mMask = leaf.mValueMask; - }, mDeviceData); cudaCheckError(); + kernels::setMaskEqValMaskKernel<<>>(mData.nodeCount[0], mDeviceData); + cudaCheckError(); } if (mVerbose==2) mTimer.stop(); }// CudaPointsToGrid::processLeafNodes